Ejemplo n.º 1
0
def get_translated_lines_converter(file_id_to_lines, category_to_translated):
    """转换格式

    Args:
        file_id_to_lines (dict[int: list[str]]):
                key 为 file_id,
                value 为 list<line>, 每行的格式为 "ID","Unknown","Index","Offset","Text"
        category_to_translated (dict[str: list]): dict,
                key 为 category,
                value 为 list of [file_id, unknown, index, text]

    Returns:
        en_line_to_zh_line (dict[str: str]): key 为原文的行, value 为译文的行
    """

    translated_count_dry = 0  # 不包括重复的

    # translated_dict, {"en_line": "zh_line"}
    en_line_to_zh_line = {}
    # 已经处理过的 file_id
    translated_file_ids = []

    # 遍历从每个 xls 读入的数据
    for category, translated_data in sorted(category_to_translated.items()):
        translated_count_dry += len(translated_data)
        possible_file_ids = []
        # 根据 category 决定处理方法
        if category in file_id_of_list.keys():
            possible_file_ids = file_id_of_list[category]
        elif category in file_id_of_array.keys():
            possible_file_ids = file_id_of_array[category]
        elif category in file_id_of_pair.keys():
            possible_file_ids = file_id_of_pair[category]
        translated_file_ids.extend(possible_file_ids)
        # 需要判断的行
        possible_lines = []
        for file_id in possible_file_ids:
            file_id = int(file_id)
            if file_id in file_id_to_lines:
                possible_lines.extend(
                    [line for line in file_id_to_lines[int(file_id)]])
        # load translation
        en_line_to_zh_line_of_category = get_en_line_to_zh_line(
            possible_lines, translated_data)
        # merge translation
        en_line_to_zh_line = merge_dict(en_line_to_zh_line,
                                        en_line_to_zh_line_of_category)

    log.info('%d(%d) lines translated' %
             (translated_count_dry, len(en_line_to_zh_line)))
    return en_line_to_zh_line
Ejemplo n.º 2
0
def get_translated_lines_converter(file_id_to_lines, category_to_translated):
    """转换格式

    Args:
        file_id_to_lines (dict[int: list[str]]):
                key 为 file_id,
                value 为 list<line>, 每行的格式为 "ID","Unknown","Index","Offset","Text"
        category_to_translated (dict[str: list]): dict,
                key 为 category,
                value 为 list of [file_id, unknown, index, text]

    Returns:
        en_line_to_zh_line (dict[str: str]): key 为原文的行, value 为译文的行
    """

    translated_count_dry = 0    # 不包括重复的

    # translated_dict, {"en_line": "zh_line"}
    en_line_to_zh_line = {}
    # 已经处理过的 file_id
    translated_file_ids = []

    # 遍历从每个 xls 读入的数据
    for category, translated_data in sorted(category_to_translated.items()):
        translated_count_dry += len(translated_data)
        possible_file_ids = []
        # 根据 category 决定处理方法
        if category in file_id_of_list.keys():
            possible_file_ids = file_id_of_list[category]
        elif category in file_id_of_array.keys():
            possible_file_ids = file_id_of_array[category]
        elif category in file_id_of_pair.keys():
            possible_file_ids = file_id_of_pair[category]
        translated_file_ids.extend(possible_file_ids)
        # 需要判断的行
        possible_lines = []
        for file_id in possible_file_ids:
            file_id = int(file_id)
            if file_id in file_id_to_lines:
                possible_lines.extend([line for line in file_id_to_lines[int(file_id)]])
        # load translation
        en_line_to_zh_line_of_category = get_en_line_to_zh_line(possible_lines, translated_data)
        # merge translation
        en_line_to_zh_line = merge_dict(en_line_to_zh_line, en_line_to_zh_line_of_category)

    log.info('%d(%d) lines translated' % (translated_count_dry, len(en_line_to_zh_line)))
    return en_line_to_zh_line
Ejemplo n.º 3
0
    def merge_pipeline(self, target):
        merged_pipeline = self.to_json().copy()

        # logger.debug("Base:")
        # logger.debug(self.to_json())
        # logger.debug("Target:")
        # logger.debug(target)
        for k, v in target.items():
            if k in merged_pipeline:
                if isinstance(v, list):
                    merged_pipeline[k] = merged_pipeline[k] + v
                elif isinstance(v, dict):
                    merged_pipeline[k] = merge_dict(merged_pipeline[k], v)
                else:
                    merged_pipeline[k] = v
            else:
                merged_pipeline[k] = v

        # logger.debug("Returned: ")
        # logger.debug(merged_pipeline)
        return Pipeline(merged_pipeline)
Ejemplo n.º 4
0
def chart_ocr(img, word_bbox_role, tool, pad=False):
    word_bbox_except_len = len(word_bbox_role.values())
    word_bbox_unexcept = {}
    for idx, prop in word_bbox_role.items():
        minr, minc, maxr, maxc = prop['bbox']
        if pad:
            word_img = np.ones((2*(maxr - minr), 2*(maxc - minc)))
            word_img[(maxr - minr)//2:(maxr - minr)//2 + (maxr - minr),
                        (maxc - minc)//2:(maxc - minc)//2 + (maxc - minc)] = img[minr:maxr, minc:maxc]
        else:
            pad_size = 2
            word_img = img[minr-pad_size:maxr+pad_size, minc-pad_size:maxc+pad_size]

        # im = Image.fromarray((word_img * 255.0).astype('uint8'), mode='L')
        # im.save('./word_bbox_%d.png' % idx)

        img_aug = ocr_image_preprocess(word_img, rotate_aug=True)
        txt_cand = {}
        for idx_img, word_img_aug in enumerate(img_aug):
            if word_img_aug is not None:
                txt_tmp = ocr(tool, word_img_aug, 'txt')
                if txt_tmp != '':
                    if txt_tmp not in txt_cand:
                        txt_cand[txt_tmp] = 0
                    txt_cand[txt_tmp] += 1
        if not txt_cand:
            word_bbox_role[idx]['txt'] = 'UNKNOWN'
        else:
            ocr_voted = list(txt_cand.keys())[list(txt_cand.values()).index(sorted(txt_cand.values())[-1])]
            word_bbox_unexcept, ocr_voted, add_flag = \
                ocr_postprocess(ocr_voted, prop['bbox'], prop['bbox'], word_bbox_unexcept, prop['role'], word_bbox_except_len)
            if not add_flag:
                word_bbox_role[idx]['txt'] = ocr_voted

    if word_bbox_unexcept:
        word_bbox_role = utils.merge_dict(word_bbox_role, word_bbox_unexcept)

    return word_bbox_role
Ejemplo n.º 5
0
    def get_sum(self):
        take_count = []
        for i in range(0, 8):
            take_count.append(0)

        record_count = []
        for i in range(0, 11):
            record_count.append(0)

        live_count = []
        for i in range(0, 8):
            live_count.append(0)

        option_count = {
            'wb': {},
            'aaa_mode': {}
        }
        record_option = []
        live_option = []
        timelapse_option = {
            'option': [],
            'interval': {}
        }
        for item in self.result_list:
            take_count = [take_count[i] + item['take_count'][i] for i in range(0, 8)]
            record_count = [record_count[i] + item['record_count'][i] for i in range(0, 11)]
            live_count = [live_count[i] + item['live_count'][i] for i in range(0, 8)]
            merge_dict(option_count['wb'], item['option_count']['wb'])
            merge_dict(option_count['aaa_mode'], item['option_count']['aaa_mode'])
            record_option.extend(item['record_option'])
            live_option.extend(item['live_option'])
            timelapse_option['option'].extend(item['timelapse_option']['option'])
            merge_dict(timelapse_option['interval'], item['timelapse_option']['interval'])

        result = {
            'take_count': take_count,
            'record_count': record_count,
            'live_count': live_count,
            'option_count': option_count,
            'record_option': record_option,
            'live_option': live_option,
            'timelapse_option': timelapse_option,
        }

        return result
Ejemplo n.º 6
0
def bboxes_postprocess(bboxes, fig_type):
    '''Add bbox for legend packer/axis line and redefine axis bbox/tick by removing gridline'''

    def bbox_merge(bbox_cur, bbox_add):
        return [min(bbox_cur[0], bbox_add[0]), min(bbox_cur[1], bbox_add[1]),
                max(bbox_cur[2], bbox_add[2]), max(bbox_cur[3], bbox_add[3])]

    # def merge_dict(dict1, dict2):
    #     z = dict1.copy()  # start with x's keys and values
    #     z.update(dict2)  # modifies z with y's keys and values & returns None
    #     return z

    bboxes_packer = {}
    bboxes_path = {}
    for element_type, element_bbox in bboxes.items():
        if FID.LEGEND_SYMBOL_ID in element_type:
            # add bbox for legend packer
            idx = element_type.split('_')[-1]
            bboxes_packer[FID.LEGEND_PACKER_ID + idx] = bbox_merge(element_bbox, bboxes[FID.LEGEND_TEXT_ID + idx])

        if FID.DRAWING_OBJECT_ID in element_type:
            if (fig_type == 'Line_chart' and 'path' in element_type) or (fig_type == 'Area_chart' and 'line_path' in element_type):
                # add location for evert point in path of a line element
                bbox_path = {}
                path_splits = element_bbox.encode("utf-8").split('  ')
                path_splits.remove('')  # the last element in path
                for path_idx, path_split in enumerate(path_splits):
                    path_cur = path_split.split(' ')
                    assert path_cur[0] in ['M', 'L', 'z'], ' !! Error: unexpection type: {} in svg path'.format(path_cur[0])
                    point_loc = path_cur[1:]
                    if point_loc:
                        # remove the 'z' element (just in case)
                        bbox_path[element_type + '_%d' % path_idx] = list(map(float, point_loc))
                bboxes_path[element_type] = bbox_path

        if FID.X_AXIS_ID == element_type:
            # use top of tickline as axis bbox (offset label included)
            element_bbox[1] = bboxes[FID.X_AXIS_MAJOR_TICKLINE_ID + str(1)][1]
            if FID.X_AXIS_OFFSET_ID in list(bboxes):
                element_bbox = bbox_merge(element_bbox, bboxes[FID.X_AXIS_OFFSET_ID])
        if FID.Y_AXIS_ID == element_type:
            # use right of tickline as axis bbox (offset label included)
            element_bbox[2] = bboxes[FID.Y_AXIS_MAJOR_TICKLINE_ID + str(1)][2]
            if FID.Y_AXIS_OFFSET_ID in list(bboxes):
                element_bbox = bbox_merge(element_bbox, bboxes[FID.Y_AXIS_OFFSET_ID])
        if FID.X_AXIS_MAJOR_TICK_ID in element_type:
            # use top of tickline as axis bbox
            element_bbox[1] = bboxes[FID.X_AXIS_MAJOR_TICKLINE_ID + element_type.split('_')[-1]][1]
        if FID.X_AXIS_MINOR_TICK_ID in element_type:
            # use top of tickline as axis bbox
            element_bbox[1] = bboxes[FID.X_AXIS_MINOR_TICKLINE_ID + element_type.split('_')[-1]][1]
        if FID.Y_AXIS_MAJOR_TICK_ID in element_type:
            # use right of tickline as axis bbox
            element_bbox[2] = bboxes[FID.Y_AXIS_MAJOR_TICKLINE_ID + element_type.split('_')[-1]][2]
        if FID.Y_AXIS_MINOR_TICK_ID in element_type:
            # use right of tickline as axis bbox
            element_bbox[2] = bboxes[FID.Y_AXIS_MINOR_TICKLINE_ID + element_type.split('_')[-1]][2]

    if fig_type is not 'Pie_chart':
        # add in bbox for axis line
        bboxes[FID.X_AXIS_LINE_ID] = bboxes[FID.X_AXIS_ID]
        bboxes[FID.X_AXIS_LINE_ID][3] = bboxes[FID.X_AXIS_MAJOR_TICKLINE_ID + str(1)][3]
        bboxes[FID.Y_AXIS_LINE_ID] = bboxes[FID.Y_AXIS_ID]
        bboxes[FID.Y_AXIS_LINE_ID][0] = bboxes[FID.Y_AXIS_MAJOR_TICKLINE_ID + str(1)][0]

    return merge_dict(merge_dict(bboxes, bboxes_packer), bboxes_path)
Ejemplo n.º 7
0
 def clone(self):
     return Step(merge_dict({}, self.to_json()))
Ejemplo n.º 8
0
 def clone(self):
     return Pipeline(merge_dict({}, self.to_json()))