Ejemplo n.º 1
0
def gen_table_gt(dir_path, pdf_img_save_dir, table_img_save_dir):
    # 将pdf转换为图片,并且截取其中的表格 获得gt信息
    table_img_list = []
    for cur_dir, next_dir, file_names in os.walk(dir_path):
        for file_name in file_names:
            name, ext = os.path.splitext(file_name)
            if ext != '.pdf': continue
            pdf_path = os.path.join(cur_dir, file_name)
            print('process {}'.format(pdf_path))
            cur_save_dir = os.path.join(pdf_img_save_dir, name)
            os.makedirs(cur_save_dir, exist_ok=True)
            reg_xml_path = os.path.join(cur_dir, name + '-reg.xml')
            str_xml_path = os.path.join(cur_dir, name + '-str.xml')
            # 将 pdf 转换为图片
            images_path_list = ICDAR2013(pdf_path=pdf_path, save_dir=cur_save_dir).gen_img()
            # 提取 xml gt 中的信息
            xml_result = ReadXml(reg_xml_path=reg_xml_path, str_xml_path=str_xml_path).get_result()
            for dict_res in xml_result:
                image_path = images_path_list[dict_res['page']]
                img_path, img_gt = extract_table(image_path, dict_res, table_img_save_dir)
                table_img_list.append(img_path)
Ejemplo n.º 2
0
import os
import sys
import time
import xml.dom.minidom

from google_translate import TranslateGoogle
from read_xml import ReadXml

from utils import init_logging

if __name__ == '__main__':
    init_logging()
    logging.getLogger().level = logging.INFO
    # f = codecs.open("translate_file.txt", "w+", 'utf-8')
    if len(sys.argv) > 1:
        string_list = ReadXml().read_xml(sys.argv[1])
    else:
        raise Exception("You must add a xml path!")
    trans = TranslateGoogle()
    with open('lang_map.json', 'r', encoding='utf8')as fp:
        languages = json.load(fp)
        # print("lang_map.json: {}".format(languages))
    # languages.update({'one': 2, 'two': 2})
    # logging.info(languages)
    for lang, documentName in languages.items():
        doc = xml.dom.minidom.Document()
        root = doc.createElement('resources')
        root.setAttribute('xmlns:android',
                          "http://schemas.android.com/apk/res/android")
        root.setAttribute('xmlns:xliff',
                          "urn:oasis:names:tc:xliff:document:1.2")
Ejemplo n.º 3
0
    def init_value(self):
        read_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                 "xml")
        self.read_xml = ReadXml(read_path)
        self.model_dict = {}

        # init brand EN list.
        self.brand_dict = {
            "intel": _("intel"),
            "zte": _("zte"),
            "hasee": _("hasee"),
            "apple": _("apple"),
            "nokia": _("nokia"),
            "alcatel": _("alcatel"),
            "google": _("google"),
            "asus": _("asus"),
            "hp": _("hp"),
            "sony": _("sony"),
            "hedy": _("hedy"),
            "archos": _("archos"),
            "boway": _("boway"),
            "bird": _("bird"),
            "vivo": _("vivo"),
            "great wall": _("great wall"),
            "changhong": _("changhong"),
            "skyworth": _("skyworth"),
            "dell": _("dell"),
            "philips": _("philips"),
            "toshiba": _("toshiba"),
            "amazon": _("amazon"),
            "sdo": _("sdo"),
            "disney": _("disney"),
            "haier": _("haier"),
            "fujitsu": _("fujitsu"),
            "acer": _("acer"),
            "lenovo": _("lenovo"),
            "gigabyte": _("gigabyte"),
            "xiaomi": _("xiaomi"),
            "huawei": _("huawei"),
            "blackberry": _("blackberry"),
            "motorola": _("motorola"),
            "sangsung": _("sangsung"),
            "meizu ": _("meizu "),
            "benq": _("benq"),
            "panasonic": _("panasonic"),
            "sony ericsson": _("sony ericsson"),
            "pioneer": _("pioneer"),
            "hyundai": _("hyundai"),
            "newman": _("newman"),
            "coolpad": _("coolpad"),
            "malata": _("malata"),
            "malata": _("malata"),
            "sharp": _("sharp"),
            "gionee": _("gionee"),
            "k-touch": _("k-touch"),
            "Pantech": _("Pantech"),
            "hisense": _("hisense"),
            "teclast": _("teclast"),
            "cube": _("cube"),
            "amoi": _("amoi"),
            "doov": _("doov"),
            "minte": _("minte"),
            "dopod": _("dopod"),
            "eton": _("eton"),
            "cherr": _("cherr"),
            "gaoxinqi": _("gaoxinqi"),
            "konka": _("konka"),
            "viewsonic": _("viewsonic"),
            "xibo": _("xibo"),
            "hosin": _("hosin"),
            "apanda": _("apanda"),
            "iocean": _("iocean"),
            "mastone": _("mastone")
        }