def test_insert_2(self, segment_list): segment = Segment() insert_sql = segment.generate_insert_formatter_sql() insert_sql = re.sub('\'?{.*?}\'?', '%s', insert_sql) print(insert_sql) mysql_helper = MySqlHelper() time_logger = TimeLogger('executemany') conn = mysql_helper.conn with conn.cursor() as cursor: length = len(segment_list) args = [] for i in range(length): if i % 1000 == 0: print(f'execute {i+1}/{length}') segment = segment_list[i] # 要注意顺序 args.append([ v for v in segment.generate_insert_formatter_dict().values() ]) try: cursor.executemany(insert_sql, args) except Exception as e: print(e) conn.commit() time_logger.stop()
class TestSegmentUtil(TestSql): test_object = Segment() def test_insert_sql(self): """可以包含更新时间""" print(self.test_object.generate_insert_formatter_sql()) def test_insert(self, execute=True): self.test_object.source = 'it\'s a test' super().test_insert(execute) def test_import_from_tmx_and_save(self): tmx_file = r'D:\workspace\TranslatorX\JetBrains\omegat\project_save.tmx' time_logger = TimeLogger('解析 tmx ') segment_list = SegmentUtil().load_segments_from_tmx_file(tmx_file) print(f'共 {len(segment_list)} 条') time_logger.stop() # 不再需要,已经很快了 # time_logger = TimeLogger('保存进 json ') # DataImporter.save_segment_list_to_json_file(segment_list, self.segment_list_file) # time_logger.stop() time_logger = TimeLogger('保存数据') SegmentUtil.save_segments(segment_list) time_logger.stop()
def test_project_statistics(self): """项目统计,正式时应该由操作更新,导入时读取数据库更新""" segment_model_helper = MySqlModelHelper(Segment()) table_name = segment_model_helper.table_name project_statistics = ProjectStatistics() # 目标 id project_statistics.target_id = 1 id_range = (0, 999999) """插入数据的范围 (start,end]""" id_range_condition = f'id>{id_range[0]} AND id<={id_range[1]}' # 片段数 result = segment_model_helper.fetchone( f"SELECT COUNT(*) FROM {table_name} WHERE {id_range_condition}") print(f'count={result[0]}') project_statistics.segments_all = result[0] # 译者数 result = segment_model_helper.fetchall( f'SELECT update_user,update_user_id,COUNT(1) FROM {table_name}' f' WHERE {id_range_condition} GROUP BY update_user,update_user_id') for user, user_id, count in result: # 每一个作者,需要插入作者表 print(user, user_id, count) project_statistics.translator_count = len(result) # 状态 result = segment_model_helper.fetchall( f'SELECT `status`,COUNT(1) FROM {table_name} WHERE {id_range_condition} GROUP BY `status`' ) for status, count in result: print(status, count) # 第一种状态 if status == SegmentStatusEnum.MACHINE_TRANSLATED_2.value: project_statistics.segments_machine = count elif status == SegmentStatusEnum.MANUAL_TRANSLATED_3.value: project_statistics.segments_manual = count elif status == SegmentStatusEnum.ADVICE_4.value: project_statistics.segments_advice = count print(project_statistics)
def load_segments_from_tmx_file(self, file_path): """从 tmx 文件读取""" tree = ElementTree.parse(file_path) tmx = tree.getroot() body = tmx.find('body') result = [] for tu in body.iter('tu'): segment = Segment() for tuv in tu.iter('tuv'): if tuv.attrib['lang'] == 'EN-US': # 取英文 segment.source = tuv.find('seg').text elif tuv.attrib['lang'] == 'ZH-CN': # 取中文 segment.target = tuv.find('seg').text segment.create_user = tuv.attrib['creationid'] segment.create_time = self.parse_tmx_time( tuv.attrib['creationdate']) segment.update_user = tuv.attrib['changeid'] segment.update_time = self.parse_tmx_time( tuv.attrib['changedate']) result.append(segment) return result
def test_create_table_segment(self): self.execute(Segment().generate_create_table_sql())
def test_multi_object_field_helper(self): """测试多个对象的 field_helper 是否会影响""" print(BaseModel().generate_insert_sql()) print(Project().generate_insert_sql()) print(Segment().generate_insert_sql())
def test_generate_insert_formatter_dict(self): model = Segment() model.create_user = model.update_user = model.review_user = '******' print(model.generate_insert_sql())
def dict_to_segment(data): segment = Segment() segment.__dict__ = data return segment
def save_segments(segment_list): """保存""" model_helper = MySqlModelHelper(Segment()) model_helper.insert_item_list(segment_list)