Beispiel #1
0
def clear_un_img():
    # 图片存放的路径
    all_img_url = os.path.join(RESOURCE_BASE_URL, "collect/img")
    # 这个目录下是需要保留的图片
    leave_img_url = os.path.join(RESOURCE_BASE_URL, "collect")

    if FileUtil.isempty(leave_img_url):
        FileUtil.empty(all_img_url)
    else:
        all_imgs = FileUtil.listdir(all_img_url)

        dirs = [leave_img_url]
        for parent, dirnames, filenames in os.walk(leave_img_url):
            for dirname in dirnames:
                dirs.append(os.path.join(parent, dirname))

        leave_imgs = []
        for dir_ in dirs:
            imglist = collect.read_weibo(dir_, isreadimg=True)
            imglist = flatten(
                [img.get("img") for img in imglist if img.get("img")])
            leave_imgs += imglist

        # 删除多余的图片
        map(lambda p: os.remove(p) if p not in leave_imgs else None, all_imgs)
    def get_classificator(self):
        sentences = self.read_train(ImageClassification.image_train_path)
        if not sentences:
            # 读取微博数据
            sentences = collect.read_weibo(ImageClassification.weibo_path, isreadimg=True)
            pure_sentences = [sentence.get("sentence") for sentence in sentences]

            # predict
            c_pred = self.__classifict(CHIFeature(), pure_sentences, incr=True)

            # reconstruct sentences
            sentences = self.__reconstruct(sentences, c_pred)

            # save
            self.__save_result(sentences)

        texts, imgs, labels = self.split(sentences)
        img_feature = self.__get_feature_from_img(imgs)
        self.nn.get_classificator(img_feature, labels)
        return self
Beispiel #3
0
    def get_classificator(self):
        sentences = self.read_train(ImageClassification.image_train_path)
        if not sentences:
            # 读取微博数据
            sentences = collect.read_weibo(ImageClassification.weibo_path,
                                           isreadimg=True)
            pure_sentences = [
                sentence.get("sentence") for sentence in sentences
            ]

            # predict
            c_pred = self.__classifict(CHIFeature(), pure_sentences, incr=True)

            # reconstruct sentences
            sentences = self.__reconstruct(sentences, c_pred)

            # save
            self.__save_result(sentences)

        texts, imgs, labels = self.split(sentences)
        img_feature = self.__get_feature_from_img(imgs)
        self.nn.get_classificator(img_feature, labels)
        return self
def clear_un_img():
    # 图片存放的路径
    all_img_url = os.path.join(RESOURCE_BASE_URL, "collect/img")
    # 这个目录下是需要保留的图片
    leave_img_url = os.path.join(RESOURCE_BASE_URL, "collect")

    if FileUtil.isempty(leave_img_url):
        FileUtil.empty(all_img_url)
    else:
        all_imgs = FileUtil.listdir(all_img_url)

        dirs = [leave_img_url]
        for parent, dirnames, filenames in os.walk(leave_img_url):
            for dirname in dirnames:
                dirs.append(os.path.join(parent, dirname))

        leave_imgs = []
        for dir_ in dirs:
            imglist = collect.read_weibo(dir_, isreadimg=True)
            imglist = flatten([img.get("img") for img in imglist if img.get("img")])
            leave_imgs += imglist

        # 删除多余的图片
        map(lambda p: os.remove(p) if p not in leave_imgs else None, all_imgs)
Beispiel #5
0
        %s
    </sentence>
</weibo>
""" % ("None", "None", "N", s + "\n Can't recognize because it has insufficient key_words"))

    else:
        print c_pred

if __name__ == "__main__":
    if False:
        [collect.collect_weibo() for i in range(10)]

    if True:
        feature = CHIFeature()
        path = "collect"
        sentences = collect.read_weibo(path)
        sentences = [s.get("sentence") for s in sentences]
        classifict(feature, sentences, incr=True, out=True)

#        test_classification(feature, incr=True)

    if False:
        test_classification(CHIFeature(subjective=False))

#    s1 = "寂寞人生爱无休,寂寞是爱永远的主题、我和我的影子独处、它说它有悄悄话想跟我说、" \
#         "它说它很想念你,原来我和我的影子,都在想你。"
#    classifict(CHIFeature(), [s1,s1], out=True)
#    print
#    print
#
#    # test fasttfidf