Example #1
0
    def extract_image(self, image):
        extract = ExtractImage(image)
        #横向提取
        left_list_draw_line = extract.horizontal_draw_line(1, 2)
        #合并
        left_list_split_line = extract.horizontal_merger_line(left_list_draw_line)        
        #纵向提取
        left_list_extract_image = extract.vertical_line(1, 2, left_list_split_line)
        #合并图片
        list_extract_image = []
        list_extract_image_path = []
        for each_extract_image in left_list_extract_image:
            list_extract_image.append(each_extract_image[0])
            list_extract_image_path.append(each_extract_image[1])

        merge_save_path = ""
        if len(list_extract_image):
            merge_save_path = extract.image_merge(list_extract_image)
    
        #保存中间处理图片
        deal_save_path = "static/images/dealimg/" + time.ctime() + ".jpg"
        extract.image.save("../%s" % deal_save_path)

        list_save_path = []
        list_save_path.append([deal_save_path, merge_save_path, list_extract_image_path])
        return list_save_path
Example #2
0
    def extract_image(self):
        #读取数据集urlno,获取专利号
        results = self.db.urlno.find({"kind": "patent", "downflag": 1, "extractflag": 0}, {"_id": 0, "indexflag": 1, "zippath": 1})
        if results:
            patent_handler = PatentClass()
            list_patentno = []
            list_zip_path = []
            for result in results:
                list_patentno.append(result["indexflag"])
                list_zip_path.append(result["zippath"])

            #解压文件
            list_unzip_save_path = []
            for patentno, zip_path in zip(list_patentno, list_zip_path):
                list_unzip_save_path = patent_handler.unzip_patent(zip_path)
                print "解压完成"
                #判断是否解压成功
                if len(list_unzip_save_path):
                    #反向提取图片
                    list_len = len(list_unzip_save_path)
                    count_none = 0
                    list_deal_save_path = []
                    list_merge_save_path = []
                    list_extract_save_path = []
                    for i in xrange(list_len):
                        image_path = list_unzip_save_path[list_len-i-1]
                        #将tif暂时转成jpg
                        PythonMagick.Image("../%s" % image_path).write("temptiftojpg.jpg")
                        image = Image.open("temptiftojpg.jpg")
                        image = image.convert("L")
                        #提取图片
                        list_save_path = patent_handler.extract_image(image)
                        if len(list_save_path[0][1]):
                            list_deal_save_path.append(list_save_path[0][0])
                            list_merge_save_path.append(list_save_path[0][1])
                            list_extract_save_path.append(list_save_path[0][2])
                        else:
                            count_none += 1
                        if count_none == 2:
                            break

                    #将多张图片合成一张大图
                    final_merge_save_path = ""
                    if len(list_merge_save_path):
                        list_merge_image = []
                        for merge_image_path in list_merge_save_path:
                            image = Image.open("../%s" % merge_image_path)
                            list_merge_image.append(image)
                        
                        patent_num = patentno 
                        extract = ExtractImage()
                        final_merge_save_path = extract.image_merge(list_merge_image, patentno)

                    #存入数据集,更新数据集urlno
                    try:
                        self.db.urlno.update({"indexflag": patentno}, {"$set": {"extractflag": 1, "dealpath": list_deal_save_path, 
                                             "extractpath": list_extract_save_path, "mergepath": final_merge_save_path}})
                        print "提取成功"
                    except: pass
        return    
Example #3
0
    def extract_image(self):
        #读取数据集pdfurl中图片提取标志为0的url,并返回url
        list_url = self.db.urlno.find({"convertflag": 1, "extractflag": 0}, {"indexflag": 1})  
       
        i = 0
        for urls in list_url:
            url = urls["indexflag"]
            #读取数据集convertimg中url想对应的转换图片
            list_convert_img = self.db.convertimg.find({"indexflag": url}, {"convertpath": 1})
            
            for item in list_convert_img:
                list_convert_imgs = item["convertpath"]
       
            #提取图片
            list_save_path = []
            for convert_img in list_convert_imgs:
                image = Image.open(convert_img)
                image = image.convert("L")
                extract_img = ExtractImage(image)
                list_each_save_path = extract_img.main()
                list_save_path.append(list_each_save_path)

            #分别提取不同路径
            list_deal_image_path = []
            list_extract_image_path = []
            list_merge_image_path = []
            for each_save_path in list_save_path:
                list_deal_image_path.append(each_save_path[0][0])
                if len(each_save_path[0][1]):
                    list_merge_image_path.append(each_save_path[0][1])
                if len(each_save_path[0][2]):
                    for each_extract_image_path in each_save_path[0][2]:
                        list_extract_image_path.append(each_extract_image_path)

            #合并单页图片
            final_merge_image_path = ""
            if len(list_merge_image_path):            
                list_merge_image = []
                for each_merge_image_path in list_merge_image_path:
                    image = Image.open(each_merge_image_path)
                    list_merge_image.append(image)

                extract_img = ExtractImage() 
                final_merge_image_path = extract_img.image_merge(list_merge_image)
             
            #写入数据集dealimg,extractimg,mergeimg
            exist_result = self.db.dealimg.find_one({"indexflag": url})
            if not exist_result:
                self.db.dealimg.insert({"indexflag": url, "dealpath": list_deal_image_path, "time": time.ctime()})
            exist_result = self.db.extractimg.find_one({"indexflag": url})
            if not exist_result:
                self.db.extractimg.insert({"indexflag": url, "extractpath": list_extract_image_path, "time": time.ctime()})
            exist_result = self.db.mergeimg.find_one({"indexflag": url})
            if not exist_result:
                self.db.mergeimg.insert({"indexflag": url, "mergepath": final_merge_image_path, "time": time.ctime()})

            self.db.urlno.update({"indexflag": url}, {"$set": {"extractflag": 1}})
            
            i = i + 1
            print i
            if i > 9:
                break
        return