def clear_un_img(): # 图片存放的路径 all_img_url = os.path.join(RESOURCE_BASE_URL, "collect/img") # 这个目录下是需要保留的图片 leave_img_url = os.path.join(RESOURCE_BASE_URL, "collect") if FileUtil.isempty(leave_img_url): FileUtil.empty(all_img_url) else: all_imgs = FileUtil.listdir(all_img_url) dirs = [leave_img_url] for parent, dirnames, filenames in os.walk(leave_img_url): for dirname in dirnames: dirs.append(os.path.join(parent, dirname)) leave_imgs = [] for dir_ in dirs: imglist = collect.read_weibo(dir_, isreadimg=True) imglist = flatten( [img.get("img") for img in imglist if img.get("img")]) leave_imgs += imglist # 删除多余的图片 map(lambda p: os.remove(p) if p not in leave_imgs else None, all_imgs)
def get_classificator(self): sentences = self.read_train(ImageClassification.image_train_path) if not sentences: # 读取微博数据 sentences = collect.read_weibo(ImageClassification.weibo_path, isreadimg=True) pure_sentences = [sentence.get("sentence") for sentence in sentences] # predict c_pred = self.__classifict(CHIFeature(), pure_sentences, incr=True) # reconstruct sentences sentences = self.__reconstruct(sentences, c_pred) # save self.__save_result(sentences) texts, imgs, labels = self.split(sentences) img_feature = self.__get_feature_from_img(imgs) self.nn.get_classificator(img_feature, labels) return self
def get_classificator(self): sentences = self.read_train(ImageClassification.image_train_path) if not sentences: # 读取微博数据 sentences = collect.read_weibo(ImageClassification.weibo_path, isreadimg=True) pure_sentences = [ sentence.get("sentence") for sentence in sentences ] # predict c_pred = self.__classifict(CHIFeature(), pure_sentences, incr=True) # reconstruct sentences sentences = self.__reconstruct(sentences, c_pred) # save self.__save_result(sentences) texts, imgs, labels = self.split(sentences) img_feature = self.__get_feature_from_img(imgs) self.nn.get_classificator(img_feature, labels) return self
def clear_un_img(): # 图片存放的路径 all_img_url = os.path.join(RESOURCE_BASE_URL, "collect/img") # 这个目录下是需要保留的图片 leave_img_url = os.path.join(RESOURCE_BASE_URL, "collect") if FileUtil.isempty(leave_img_url): FileUtil.empty(all_img_url) else: all_imgs = FileUtil.listdir(all_img_url) dirs = [leave_img_url] for parent, dirnames, filenames in os.walk(leave_img_url): for dirname in dirnames: dirs.append(os.path.join(parent, dirname)) leave_imgs = [] for dir_ in dirs: imglist = collect.read_weibo(dir_, isreadimg=True) imglist = flatten([img.get("img") for img in imglist if img.get("img")]) leave_imgs += imglist # 删除多余的图片 map(lambda p: os.remove(p) if p not in leave_imgs else None, all_imgs)
%s </sentence> </weibo> """ % ("None", "None", "N", s + "\n Can't recognize because it has insufficient key_words")) else: print c_pred if __name__ == "__main__": if False: [collect.collect_weibo() for i in range(10)] if True: feature = CHIFeature() path = "collect" sentences = collect.read_weibo(path) sentences = [s.get("sentence") for s in sentences] classifict(feature, sentences, incr=True, out=True) # test_classification(feature, incr=True) if False: test_classification(CHIFeature(subjective=False)) # s1 = "寂寞人生爱无休,寂寞是爱永远的主题、我和我的影子独处、它说它有悄悄话想跟我说、" \ # "它说它很想念你,原来我和我的影子,都在想你。" # classifict(CHIFeature(), [s1,s1], out=True) # print # print # # # test fasttfidf