def test_split(self): text_split = 'aa/bb\\cc_dd-ee ff' text_list = CUtils.split(text_split, ['\\', '/', '-', '_', ' ']) assert len(text_list) == 6 assert text_list.count('ff') == 1 assert text_list print(text_list)
def process(self) -> str: """ 在这里处理将__file_info__中记录的对象所对应的文件或目录信息, 根据tags_parser_rule的定义, 进行标签识别 :return: """ # 调用父类方法 super().process() if not isinstance(self._tags_parser_rule, list): return CResult.merge_result(self.Failure, '标签解析规则必须是一个数组, 您的配置有误, 请检查!') error_list = [] for tags_parser in self._tags_parser_rule: catalog = CUtils.any_2_str( CUtils.dict_value_by_name(tags_parser, self.Name_Catalog, '')) tag_field_name = CUtils.any_2_str( CUtils.dict_value_by_name(tags_parser, self.Name_Tag, '')) keyword_field_list = CUtils.dict_value_by_name( tags_parser, self.Name_Keyword, None) data_sample = CUtils.any_2_str( CUtils.dict_value_by_name(tags_parser, self.Name_Data_Sample, '')) separator = CUtils.dict_value_by_name(tags_parser, self.Name_Separator, None) enable = CUtils.dict_value_by_name(tags_parser, self.Name_Enable, True) fuzzy_matching = CUtils.dict_value_by_name( tags_parser, self.Name_Fuzzy_Matching, False) if not enable: continue if CUtils.equal_ignore_case(tag_field_name, ''): continue if CUtils.equal_ignore_case(catalog, ''): continue if keyword_field_list is None: continue if len(keyword_field_list) == 0: continue if CUtils.equal_ignore_case(data_sample, self.Tag_DataSample_MainName): # 如果是主名, 则将对象名称和别名, 都以文件路径的格式, 补充在主名之后, 合并进行分类识别 tag_data_sample_str = CFile.join_file( CFile.join_file( CUtils.any_2_str(self.file_info.file_main_name), self.object_name), self.__file_alias_name) elif CUtils.equal_ignore_case(data_sample, self.Tag_DataSample_RelationPath): tag_data_sample_str = CUtils.any_2_str( self.file_info.file_path_with_rel_path) else: # 如果是主名, 则将对象名称和别名, 都以文件路径的格式, 补充在相对路径的文件主名之后, 合并进行分类识别 tag_data_sample_str = CFile.join_file( CFile.join_file( CUtils.any_2_str( self.file_info.file_main_name_with_rel_path), self.object_name), self.__file_alias_name) try: tag_data_sample_list = CUtils.split(tag_data_sample_str, separator) self.process_tag(catalog, tag_field_name, keyword_field_list, tag_data_sample_list, fuzzy_matching) except Exception as error: error_list.append( '对象[{0}]在处理标签库[{1}]分类[{2}]有误, 详细错误信息为: {3}'.format( self.object_name, catalog, tag_data_sample_str, error.__str__())) if len(error_list) == 0: return CResult.merge_result( self.Success, '文件或目录[{0}]对象业务分类解析成功完成!'.format( self.file_info.file_main_name_with_rel_path)) else: error_message = '文件或目录[{0}]的业务分类解析处理完毕, 但解析过程中出现了错误, 具体如下: \n'.format( self.file_info.file_main_name_with_rel_path) for error_str in error_list: error_message = CUtils.str_append(error_message, error_str) return CResult.merge_result(self.Success, error_message)
:param data_sample_list: :param keyword_value: :param fuzzy_matching: :return: """ keyword_text = CUtils.any_2_str(keyword_value).lower() if fuzzy_matching: for data_sample in data_sample_list: data_sample_text = CUtils.any_2_str(data_sample).lower() if data_sample_text.find(keyword_text) > -1: return True else: if CUtils.list_count(data_sample_list, keyword_value) > 0: return True return False if __name__ == '__main__': """ Job对象的简洁测试模式 创建时, 以sch_center_mission表的scmid, scmParams的内容初始化即可, 调用其execute方法, 即是一次并行调度要运行的主要过程 """ # tags_parser_text = r'县界' tags_parser_text = r'县界_昆明市-2019-2018 2017' # 标签分隔符 tags_parser_split_list: list = ['\\', '_', '/', '-', ' '] text_part_list = CUtils.split(tags_parser_text, tags_parser_split_list) for item in text_part_list: print(item)