def classified(self): self._object_confirm = self.Object_Confirm_IUnKnown self._object_name = None current_path = self.file_info.file_name_with_full_path metadata_file_name = CFile.join_file(current_path, self.FileName_MetaData_Bus_21AT) if CFile.file_or_path_exist(metadata_file_name): self.__bus_metadata_xml_file_name__ = metadata_file_name self.__metadata_xml_obj__ = CXml() try: self.__metadata_xml_obj__.load_file(metadata_file_name) self.__classified_object_type = CXml.get_element_text( self.__metadata_xml_obj__.xpath_one( self.Path_21AT_MD_Content_ProductType)) if CUtils.equal_ignore_case( self.__classified_object_type, CUtils.dict_value_by_name(self.get_information(), self.Plugins_Info_Type, None)): self._object_confirm = self.Object_Confirm_IKnown self._object_name = CXml.get_element_text( self.__metadata_xml_obj__.xpath_one( self.Path_21AT_MD_Content_ProductName)) except: self.__metadata_xml_obj__ = None CLogger().warning('发现文件{0}符合二十一世纪业务数据集标准, 但该文件格式有误, 无法打开! ') return self._object_confirm, self._object_name
def __plugins_classified_by_plugin_node_list__( cls, file_info: CDMFilePathInfoEx, plugin_node_list: list) -> CPlugins: """ 根据外部给定的插件xml节点数组, 顺序进行识别, 返回第一个识别出文件的插件对象 :param file_info: :param plugin_node_list: :return: """ for plugin_node in plugin_node_list: plugin_id = CXml.get_element_text(plugin_node) class_classified_obj = cls.plugins(file_info, plugin_id) if class_classified_obj is None: continue try: object_confirm, object_name = class_classified_obj.classified() if object_confirm != cls.Object_Confirm_IUnKnown: CLogger().debug('{0} is classified as {1}.{2}'.format( file_info.file_main_name, class_classified_obj.get_information(), class_classified_obj.get_id())) return class_classified_obj except: CLogger().debug('插件[{0}]解析出现异常, 请检查!'.format(plugin_id)) continue else: return None
def get_information(self) -> dict: information = super().get_information() if self.__metadata_xml_obj__ is not None: information[self.Plugins_Info_Title] = CXml.get_element_text( self.__metadata_xml_obj__.xpath_one( self.Path_21AT_MD_Content_ProductName)) information[self.Plugins_Info_Type_Code] = None # '110001' information[ self.Plugins_Info_Group] = self.DataGroup_Industry_Land_DataSet information[self.Plugins_Info_Group_Title] = self.data_group_title( information[self.Plugins_Info_Group]) information[ self.Plugins_Info_Catalog] = self.DataCatalog_Land # 'land' information[self.Plugins_Info_Catalog_Title] = self.data_catalog_title( information[self.Plugins_Info_Catalog]) # '国土行业' information[self.Plugins_Info_MetaDataEngine] = None information[self.Plugins_Info_BusMetaDataEngine] = self.Engine_Custom information[ self.Plugins_Info_DetailEngine] = self.DetailEngine_Busdataset information[self.Plugins_Info_HasChildObj] = self.DB_True information[self.Plugins_Info_Is_Spatial] = self.DB_False information[self.Plugins_Info_Is_Dataset] = self.DB_True information[self.Plugins_Info_Spatial_Qa] = self.DB_False information[self.Plugins_Info_Time_Qa] = self.DB_True information[self.Plugins_Info_Visual_Qa] = self.DB_False return information
def a_xml_element(cls, audit_id, audit_title, audit_group, audit_result, xml_obj: CXml, xpath: str, qa_items: dict) -> list: """ 判断一个xml元数据中, 指定的xpath, 对应的element, 满足 kargs参数中的检测项目 :param audit_id: :param audit_title: :param audit_group: :param audit_result: :param xml_obj: :param xpath: :param qa_items: :return: """ result_dict = cls.__init_audit_dict__(audit_id, audit_title, audit_group, audit_result) if xml_obj is None: result_dict[cls.Name_Message] = 'XML对象不合法, 节点[{0}]不存在'.format( xpath) return [result_dict] element_obj = xml_obj.xpath_one(xpath) if element_obj is not None: element_text = CXml.get_element_text(element_obj) return cls.__a_check_value__(result_dict, element_text, '属性[{0}]'.format(audit_title), qa_items) else: result_dict[cls.Name_Message] = 'XML对象的节点[{0}]不存在, 请检查修正!'.format( xpath) return [result_dict]
def metadata_bus_2_params(self, metadata_xml: CXml, params: dict): metadata_list = metadata_xml.xpath('{0}/*'.format( self.Path_MD_Bus_Root)) for metadata_item in metadata_list: metadata_item_name = CXml.get_element_name( metadata_item).lower().strip() metadata_item_value = CXml.get_element_text( metadata_item).lower().strip() params[metadata_item_name] = metadata_item_value
def rule_id(self, default_value): if self.__rule_content == '': return default_value else: xml_obj = CXml() try: xml_obj.load_xml(self.__rule_content) return CXml.get_element_text( xml_obj.xpath_one(self.Path_21AT_MD_Content_ProductType)) except: return default_value
def test_get_element_text(self): """ 获取一个节点的文本 :return: """ xml_content = '''<root name="hello world"><element>world</element></root>''' xml = CXml() xml.load_xml(xml_content) element = xml.xpath('/root/element')[0] text = CXml.get_element_text(element) assert text == 'world'
def plugins_classified(cls, file_info: CDMFilePathInfoEx) -> CPlugins: """ 插件识别 1. 首先检查file_info的__rule_content__中, 有无优先识别插件列表, 有则使用该列表按顺序进行识别 1. 其次, 检查file_info的__rule_content__是否为空, 如果不为空, 则获取其类型, 如果类型不为空, 则按类型, 匹配系统配置 1. 其次, 检查应用系统配置中, 有无对目录识别插件的特殊设置, 有则按设置中的列表进行识别 1. 最后按系统插件目录下的顺序, 对数据进行识别 :param file_info: :return: """ class_classified_obj = None # 根据rule_content, 获取入库规则文件的特定插件列表 if file_info.file_type == cls.FileType_Dir: plugin_node_list = CXml.xml_xpath(file_info.rule_content, cls.Path_MD_Rule_Plugins_Dir) else: plugin_node_list = CXml.xml_xpath(file_info.rule_content, cls.Path_MD_Rule_Plugins_File) if len(plugin_node_list) > 0: # 根据指定的插件列表, 尝试进行识别 class_classified_obj = cls.__plugins_classified_by_plugin_node_list__( file_info, plugin_node_list) if class_classified_obj is not None: return class_classified_obj # 如果入库规则文件中没有指定插件列表, 则读取数据入库规则类型, 它==数据集类型 rule_type = CXml.get_element_text( CXml.xml_xpath_one(file_info.rule_content, cls.Path_MD_Rule_Type)) if not CUtils.equal_ignore_case(rule_type, ''): # 如果有数据入库规则文件, 则获取其规则类型, 匹配setting中的类型 plugins_json_array = settings.application.xpath_one( cls.Path_Setting_MetaData_Plugins_Dir, None) if plugins_json_array is not None: for plugins_define in plugins_json_array: key_word = CUtils.any_2_str( CUtils.dict_value_by_name(plugins_define, cls.Name_Keyword, None)) plugin_list = CUtils.dict_value_by_name( plugins_define, cls.Name_Plugin, None) if plugin_list is None: continue if CUtils.equal_ignore_case(key_word, rule_type): class_classified_obj = cls.__plugins_classified_by_plugin_list__( file_info, plugin_list) else: # 如果没有数据入库规则文件, 则通过目录, 逐一匹配setting中的类型 file_path = file_info.file_path_with_rel_path plugins_json_array = settings.application.xpath_one( cls.Path_Setting_MetaData_Plugins_Dir, None) if plugins_json_array is not None: for plugins_define in plugins_json_array: key_word = CUtils.any_2_str( CUtils.dict_value_by_name(plugins_define, cls.Name_Keyword, None)) plugin_list = CUtils.dict_value_by_name( plugins_define, cls.Name_Plugin, None) if plugin_list is None: continue # todo(注意) 如果关键字为空, 则表明所有子目录都优先使用设置的插件列表进行识别!!! if CUtils.equal_ignore_case(key_word, ''): class_classified_obj = cls.__plugins_classified_by_plugin_list__( file_info, plugin_list) else: if CFile.subpath_in_path(CUtils.any_2_str(key_word), file_path): class_classified_obj = cls.__plugins_classified_by_plugin_list__( file_info, plugin_list) if class_classified_obj is not None: return class_classified_obj else: continue if class_classified_obj is not None: return class_classified_obj else: return cls.__plugins_classified_of_directory__(file_info)
def process_mission(self, dataset) -> str: """ 详细算法复杂, 参见readme.md中[### 数据入库调度]章节 :param dataset: :return: """ ds_src_storage_id = dataset.value_by_name(0, 'query_storage_id', '') ds_src_storage_type = dataset.value_by_name(0, 'query_storage_type', self.Storage_Type_Mix) ds_src_root_path = dataset.value_by_name(0, 'query_rootpath', '') ds_src_dir_id = dataset.value_by_name(0, 'query_ib_dir_id', '') ds_ib_id = dataset.value_by_name(0, 'query_ib_id', '') ds_ib_directory_name = dataset.value_by_name(0, 'query_ib_relation_dir', '') ds_ib_batch_no = dataset.value_by_name(0, 'query_ib_batchno', '') ds_ib_option = dataset.value_by_name(0, 'query_ib_option', '') src_need_storage_size = self.get_storage_size(ds_ib_id, ds_src_storage_id, ds_ib_directory_name, ds_ib_option) src_path = ds_src_root_path if not CUtils.equal_ignore_case(ds_ib_directory_name, ''): src_path = CFile.join_file(src_path, ds_ib_directory_name) src_dataset_metadata_filename = CFile.join_file( src_path, self.FileName_MetaData_Bus_21AT) CLogger().debug('入库的目录为: {0}.{1}'.format(ds_ib_id, ds_ib_directory_name)) try: # 检查所有文件与元数据是否相符 all_ib_file_or_path_existed = self.check_all_ib_file_or_path_existed( ds_ib_id) if not CResult.result_success(all_ib_file_or_path_existed): self.update_ib_result(ds_ib_id, all_ib_file_or_path_existed) return all_ib_file_or_path_existed # 将数据入库的记录保存到日志中 result = self.ib_log(ds_ib_id, ds_src_storage_id, ds_ib_directory_name) if not CResult.result_success(result): self.update_ib_result(ds_ib_id, result) return result # 如果是在核心存储或混合存储中直接入库, 则仅仅改变元数据状态即可 if CUtils.equal_ignore_case(ds_src_storage_type, self.Storage_Type_Mix) \ or CUtils.equal_ignore_case(ds_src_storage_type, self.Storage_Type_Core): result_ib_in_core_or_mix_storage = self.update_ib_data_status_in_core_or_mix_storage( ds_ib_id, ds_src_storage_id, ds_ib_directory_name, ds_src_dir_id) self.update_ib_result(ds_ib_id, result_ib_in_core_or_mix_storage) return result_ib_in_core_or_mix_storage # 加载目录下的待入库数据集的元数据文件 src_dataset_xml = CXml() src_dataset_type = self.Name_Default if CFile.file_or_path_exist(src_dataset_metadata_filename): src_dataset_xml.load_file(src_dataset_metadata_filename) src_dataset_type = CXml.get_element_text( src_dataset_xml.xpath_one(self.Path_MD_Bus_ProductType)) if CUtils.equal_ignore_case(src_dataset_type, ''): src_dataset_type = self.Name_Default # 获取匹配的入库模式 src_ib_schema = self.get_ib_schema(src_dataset_type, ds_ib_option) if src_ib_schema is None: result = CResult.merge_result( self.Failure, '目录为[{0}.{1}]的数据集类型为[{2}], 未找到匹配的入库模式, 请检查修正后重试!'.format( ds_ib_id, ds_ib_directory_name, src_dataset_type)) self.update_ib_result(ds_ib_id, result) return result # 计算入库的目标存储\存储根目录\目标子目录在目标存储中的副目录的标识\目标子目录\反馈消息 dest_ib_storage_id, dest_ib_root_path, desc_ib_dir_id, dest_ib_subpath, message = self.get_dest_storage( ds_ib_batch_no, src_need_storage_size, ds_ib_option, src_ib_schema, src_dataset_xml) if dest_ib_storage_id is None or dest_ib_subpath is None: result = CResult.merge_result(self.Failure, message) self.update_ib_result(ds_ib_id, result) return result dest_ib_subpath = CFile.unify(dest_ib_subpath) if CJson.json_attr_value(ds_ib_option, self.Path_IB_Switch_CheckFileLocked, self.DB_False) == self.DB_True: src_ib_files_not_locked, message = self.check_src_ib_files_not_locked( ds_src_root_path, src_path) if not src_ib_files_not_locked: result = CResult.merge_result(self.Failure, message) self.update_ib_result(ds_ib_id, result) return result proc_ib_src_path = ds_src_root_path proc_ib_dest_path = dest_ib_root_path if not CUtils.equal_ignore_case(dest_ib_subpath, ''): proc_ib_dest_path = CFile.join_file(dest_ib_root_path, dest_ib_subpath) if not CUtils.equal_ignore_case(ds_ib_directory_name, ''): proc_ib_src_path = CFile.join_file(proc_ib_src_path, ds_ib_directory_name) proc_ib_dest_path = CFile.join_file(proc_ib_dest_path, ds_ib_directory_name) # --------------------------------------------------------------至此, 数据入库前的检查处理完毕 # 移动源目录至目标目录, 如果是根目录, 则仅仅移动文件 result = self.ib_files_move( proc_ib_src_path, proc_ib_dest_path, CUtils.equal_ignore_case(ds_ib_directory_name, '')) if not CResult.result_success(result): # 利用相同的方法, 把移动的数据, 重新移动回原目录, 这里理论上应该100%成功 sub_result = self.ib_files_move( proc_ib_dest_path, proc_ib_src_path, CUtils.equal_ignore_case(ds_ib_directory_name, '')) if not CResult.result_success(sub_result): sub_result_message = CResult.result_message(sub_result) result_message = CResult.result_message(result) result = CResult.merge_result( self.Failure, '{0}\n{1}'.format(result_message, sub_result_message)) self.update_ib_result(ds_ib_id, result) return result # 将源文件的元数据, 移动至目标存储下, 如果出现异常, 则在方法内部rollback result = self.src_ib_metadata_move_to_storage( ds_ib_id, ds_src_storage_id, ds_src_dir_id, ds_ib_directory_name, dest_ib_storage_id, desc_ib_dir_id, dest_ib_subpath) if not CResult.result_success(result): # 利用相同的方法, 把移动的数据, 重新移动回原目录, 这里理论上应该100%成功 sub_result = self.ib_files_move( proc_ib_dest_path, proc_ib_src_path, CUtils.equal_ignore_case(ds_ib_directory_name, '')) if not CResult.result_success(sub_result): sub_result_message = CResult.result_message(sub_result) result_message = CResult.result_message(result) result = CResult.merge_result( self.Failure, '{0}/n{1}'.format(result_message, sub_result_message)) self.update_ib_result(ds_ib_id, result) return result result = CResult.merge_result( self.Success, '目录为[{0}.{1}]入库成功!'.format(ds_ib_id, ds_ib_directory_name)) self.update_ib_result(ds_ib_id, result) return result except Exception as error: result = CResult.merge_result( self.Failure, '目录为[{0}.{1}]入库出现异常! 错误原因为: {2}'.format( ds_ib_id, ds_ib_directory_name, error.__str__())) self.update_ib_result(ds_ib_id, result) return result