Example #1
0
 def patch(self, json_file):
     with open(json_file) as json_data:
         d = json.load(json_data)
     if u'file_name' not in d:
         return
     file_name = d[u'file_name']
     path = d[u'path']
     encoding = d[u'encoding']
     data = d[u'data']
     # type: data
     md5 = d[u'md5']
     print file_name
     print path
     print encoding
     print data
     if not os.path.exists(path):
         print 'Error: 找不到源文件!!'
         return
     file_md5 = get_file_md5(path)
     if md5 != file_md5:
         print 'Error: 源文件已经被篡改,不能覆盖!!'
         return
     print '=>验证通过,开始回填数据'
     backup = path + '.backup'
     # shutil.copy(path, backup)  # 备份文件
     with open(path) as f:
         lines = f.readlines()
     unicode_lines = map(lambda s: s.decode(encoding), lines)
     print '======'
     print unicode_lines
     for (k, v) in data.items():
         v.sort(reverse=True, key=lambda x: x['start'])
         for item in v:
             line = unicode_lines[int(k)]
             start = item['start']
             end = item['end']
             trans = item['trans']
             # print start, end, '|', origin, '|', trans
             new_line = line[:start] + trans + line[end:]
             unicode_lines[int(k)] = new_line
     print unicode_lines
     lines = map(lambda s: s.encode(encoding), unicode_lines)
     f = open(path, 'w')
     f.writelines(lines)
     f.close()
     print '<=回填结束'
     print ''
Example #2
0
    def resolve(self):
        file_name = unicode(os.path.basename(self.path))
        f = open(self.path)
        code = chardet.detect(f.read())
        f.seek(0, 0)
        lines = f.readlines()
        f.close()
        try:
            unicode_lines = map(lambda s: s.decode(code['encoding']), lines)
        except UnicodeDecodeError as e:
            print('fail to decode file: ', file_name)
            print('except: ', e)
            return {}
        except TypeError as e:
            print('fail to decode file: ', file_name)
            print('except: ', e)
            return {}
        data = {}
        # dict_map 保存找到的中文串
        dict_map = {}
        sort_order_data = ['text', 'start', 'end', 'origin', 'trans', 'auto']

        for seq in xrange(len(unicode_lines)):
            if self.is_comment(unicode_lines[seq]):
                continue
            # 匹配第一个正则表达式
            m = re.finditer(self.pattern, unicode_lines[seq])
            m_list = [i.span() for i in m]
            if len(m_list) > 0:
                data[seq] = []
                for item in m_list:
                    item_data = {
                        'text': unicode_lines[seq],
                        'start': item[0],
                        'end': item[1],
                        'origin': unicode_lines[seq][item[0]:item[1]],
                        'trans': self.transform((unicode_lines[seq][item[0]:item[1]]), unicode_lines[seq], item[0],
                                                item[1], unicode_lines, seq),
                        'auto': ''
                    }
                    # 保存中文串
                    dict_map[unicode_lines[seq][item[0]:item[1]][1:-1]] = unicode_lines[seq][item[0]:item[1]][1:-1]
                    # 排序字典
                    data_ordered = OrderedDict(
                        sorted(item_data.iteritems(), key=lambda (k, v): sort_order_data.index(k)))
                    data[seq].append(data_ordered)

            # 匹配第二个正则表达式
            # print 'second'
            m = re.finditer(self.pattern_plus, unicode_lines[seq])
            m_list = [i.span() for i in m]
            if len(m_list) > 0:
                if seq not in data:
                    data[seq] = []
                for item in m_list:
                    start = item[0]
                    end = item[1]
                    if not self.is_item_include(start, end, data[seq]):
                        # print item
                        # print unicode_lines[seq][item[0]:item[1]]
                        item_data = {
                            'text': unicode_lines[seq],
                            'start': item[0],
                            'end': item[1],
                            'origin': unicode_lines[seq][item[0]:item[1]],
                            'trans': self.transform_simple((unicode_lines[seq][item[0]:item[1]])),
                            'auto': ''
                        }
                        # 保存中文串
                        dict_map[unicode_lines[seq][item[0]:item[1]]] = unicode_lines[seq][item[0]:item[1]]
                        # 排序字典
                        data_ordered = OrderedDict(
                            sorted(item_data.iteritems(), key=lambda (k, v): sort_order_data.index(k)))
                        data[seq].append(data_ordered)

        ret = {
            'path': unicode(self.path),
            'file_name': file_name,
            'md5': get_file_md5(self.path),
            'encoding': code['encoding'],
            'data': data,
        }
        sort_order = ['file_name', 'md5', 'path', 'encoding', 'data']
        ret_ordered = OrderedDict(sorted(ret.iteritems(), key=lambda (k, v): sort_order.index(k)))

        if not os.path.exists(self.output_path):
            os.makedirs(self.output_path)

        if len(data) <= 0:
            return

        f = codecs.open(
            os.path.join(self.output_path, file_name + '-' + hashlib.md5(self.path).hexdigest() + '-output.json'), 'w',
            encoding="utf-8")
        f.write(json.dumps(ret_ordered, encoding='utf-8', ensure_ascii=False, indent=4))
        f.close()
        return dict_map