def gen_table_gt(dir_path, pdf_img_save_dir, table_img_save_dir): # 将pdf转换为图片,并且截取其中的表格 获得gt信息 table_img_list = [] for cur_dir, next_dir, file_names in os.walk(dir_path): for file_name in file_names: name, ext = os.path.splitext(file_name) if ext != '.pdf': continue pdf_path = os.path.join(cur_dir, file_name) print('process {}'.format(pdf_path)) cur_save_dir = os.path.join(pdf_img_save_dir, name) os.makedirs(cur_save_dir, exist_ok=True) reg_xml_path = os.path.join(cur_dir, name + '-reg.xml') str_xml_path = os.path.join(cur_dir, name + '-str.xml') # 将 pdf 转换为图片 images_path_list = ICDAR2013(pdf_path=pdf_path, save_dir=cur_save_dir).gen_img() # 提取 xml gt 中的信息 xml_result = ReadXml(reg_xml_path=reg_xml_path, str_xml_path=str_xml_path).get_result() for dict_res in xml_result: image_path = images_path_list[dict_res['page']] img_path, img_gt = extract_table(image_path, dict_res, table_img_save_dir) table_img_list.append(img_path)
import os import sys import time import xml.dom.minidom from google_translate import TranslateGoogle from read_xml import ReadXml from utils import init_logging if __name__ == '__main__': init_logging() logging.getLogger().level = logging.INFO # f = codecs.open("translate_file.txt", "w+", 'utf-8') if len(sys.argv) > 1: string_list = ReadXml().read_xml(sys.argv[1]) else: raise Exception("You must add a xml path!") trans = TranslateGoogle() with open('lang_map.json', 'r', encoding='utf8')as fp: languages = json.load(fp) # print("lang_map.json: {}".format(languages)) # languages.update({'one': 2, 'two': 2}) # logging.info(languages) for lang, documentName in languages.items(): doc = xml.dom.minidom.Document() root = doc.createElement('resources') root.setAttribute('xmlns:android', "http://schemas.android.com/apk/res/android") root.setAttribute('xmlns:xliff', "urn:oasis:names:tc:xliff:document:1.2")
def init_value(self): read_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "xml") self.read_xml = ReadXml(read_path) self.model_dict = {} # init brand EN list. self.brand_dict = { "intel": _("intel"), "zte": _("zte"), "hasee": _("hasee"), "apple": _("apple"), "nokia": _("nokia"), "alcatel": _("alcatel"), "google": _("google"), "asus": _("asus"), "hp": _("hp"), "sony": _("sony"), "hedy": _("hedy"), "archos": _("archos"), "boway": _("boway"), "bird": _("bird"), "vivo": _("vivo"), "great wall": _("great wall"), "changhong": _("changhong"), "skyworth": _("skyworth"), "dell": _("dell"), "philips": _("philips"), "toshiba": _("toshiba"), "amazon": _("amazon"), "sdo": _("sdo"), "disney": _("disney"), "haier": _("haier"), "fujitsu": _("fujitsu"), "acer": _("acer"), "lenovo": _("lenovo"), "gigabyte": _("gigabyte"), "xiaomi": _("xiaomi"), "huawei": _("huawei"), "blackberry": _("blackberry"), "motorola": _("motorola"), "sangsung": _("sangsung"), "meizu ": _("meizu "), "benq": _("benq"), "panasonic": _("panasonic"), "sony ericsson": _("sony ericsson"), "pioneer": _("pioneer"), "hyundai": _("hyundai"), "newman": _("newman"), "coolpad": _("coolpad"), "malata": _("malata"), "malata": _("malata"), "sharp": _("sharp"), "gionee": _("gionee"), "k-touch": _("k-touch"), "Pantech": _("Pantech"), "hisense": _("hisense"), "teclast": _("teclast"), "cube": _("cube"), "amoi": _("amoi"), "doov": _("doov"), "minte": _("minte"), "dopod": _("dopod"), "eton": _("eton"), "cherr": _("cherr"), "gaoxinqi": _("gaoxinqi"), "konka": _("konka"), "viewsonic": _("viewsonic"), "xibo": _("xibo"), "hosin": _("hosin"), "apanda": _("apanda"), "iocean": _("iocean"), "mastone": _("mastone") }