def a_xml_attribute(cls, audit_id, audit_title, audit_group, audit_result, xml_obj: CXml, xpath: str, attr_name: str, qa_items: dict) -> list: result_dict = cls.__init_audit_dict__(audit_id, audit_title, audit_group, audit_result) if xml_obj is None: result_dict[cls.Name_Message] = 'XML对象不合法, 节点[{0}]不存在'.format( xpath) return [result_dict] element_obj = xml_obj.xpath_one(xpath) if element_obj is not None: if CXml.attr_exist(element_obj, attr_name): attr_text = CXml.get_attr(element_obj, attr_name, '') return cls.__a_check_value__( result_dict, attr_text, '属性[{0}]在XML节点[{1}.{2}]'.format(audit_title, xpath, attr_name), qa_items) else: result_dict[ cls. Name_Message] = '属性[{0}]在XML节点[{1}.{2}]未找到, 请检查修正!'.format( audit_title, xpath, attr_name) else: result_dict[ cls.Name_Message] = '属性[{0}]在XML节点[{1}]未找到, 请检查修正!'.format( audit_title, xpath) return [result_dict]
def a_xml_element(cls, audit_id, audit_title, audit_group, audit_result, xml_obj: CXml, xpath: str, qa_items: dict) -> list: """ 判断一个xml元数据中, 指定的xpath, 对应的element, 满足 kargs参数中的检测项目 :param audit_id: :param audit_title: :param audit_group: :param audit_result: :param xml_obj: :param xpath: :param qa_items: :return: """ result_dict = cls.__init_audit_dict__(audit_id, audit_title, audit_group, audit_result) if xml_obj is None: result_dict[cls.Name_Message] = 'XML对象不合法, 节点[{0}]不存在'.format( xpath) return [result_dict] element_obj = xml_obj.xpath_one(xpath) if element_obj is not None: element_text = CXml.get_element_text(element_obj) return cls.__a_check_value__(result_dict, element_text, '属性[{0}]'.format(audit_title), qa_items) else: result_dict[cls.Name_Message] = 'XML对象的节点[{0}]不存在, 请检查修正!'.format( xpath) return [result_dict]
def rule_id(self, default_value): if self.__rule_content == '': return default_value else: xml_obj = CXml() try: xml_obj.load_xml(self.__rule_content) return CXml.get_element_text( xml_obj.xpath_one(self.Path_21AT_MD_Content_ProductType)) except: return default_value
def test_xpath(self): """ 根据给定的xpath查询语句, 查询出合适的节点 :return: """ xml = CXml() xml_comment = u'<root name="hello world"><element name="zg">hello</element></root>' xml.load_xml(xml_comment) xmlString = xml.to_xml() result = xml.xpath_one('./element') assert CXml.get_element_xml(result) == '<element name="zg">hello</element>'
def test_set_attr(self): """ 设置一个节点的属性 :return: """ xml_content = '''<root name="hello world"></root>''' xml = CXml() xml.load_xml(xml_content) element = xml.xpath_one('/root') CXml.set_attr(element, 'name', 'championing') assert CXml.get_element_xml(element) == '<root name="championing"/>'
def access_check_dict(self) -> dict: # 预留的方法,sync写完后再调 object_name = self._obj_name dsometadataxml_bus = self._dataset.value_by_name( 0, 'dsometadataxml_bus', '') xml = CXml() xml.load_xml(dsometadataxml_bus) # 初始化xml dsometadataxml_bus_type = '{0}'.format( xml.xpath_one("/root/@type")) # 查询业务元数据类别 if object_name is not None: if dsometadataxml_bus_type is not None: if CUtils.equal_ignore_case(dsometadataxml_bus_type, 'mdb'): return self.access_check_dict_mdb() elif CUtils.equal_ignore_case(dsometadataxml_bus_type, 'mat'): return self.access_mdb_check_dict_mat() elif CUtils.equal_ignore_case(dsometadataxml_bus_type, 'xls') \ or CUtils.equal_ignore_case(dsometadataxml_bus_type, 'xlsx'): return self.access_mdb_check_dict_xls() raise Exception("数据{0}业务元数据类型为'{1}'出现错误,没有可识别的类型".format( object_name, dsometadataxml_bus_type))
def get_sync_dict_list(self, insert_or_updata) -> list: """ insert_or_updata 中 self.DB_True为insert,DB_False为updata 本方法的写法为强规则,调用add_value_to_sync_dict_list配置 第一个参数为list,第二个参数为字段名,第三个参数为字段值,第四个参数为特殊配置 """ object_name = self._obj_name dsometadataxml_bus = self._dataset.value_by_name( 0, 'dsometadataxml_bus', '') xml = CXml() xml.load_xml(dsometadataxml_bus) # 初始化xml dsometadataxml_bus_type = '{0}'.format( xml.xpath_one("/root/@type")) # 查询业务元数据类别 if object_name is not None: if dsometadataxml_bus_type is not None: if CUtils.equal_ignore_case(dsometadataxml_bus_type, 'mdb'): return self.get_sync_mdb_dict_list(insert_or_updata) elif CUtils.equal_ignore_case(dsometadataxml_bus_type, 'mat'): return self.get_sync_mat_dict_list(insert_or_updata) elif CUtils.equal_ignore_case(dsometadataxml_bus_type, 'xls') \ or CUtils.equal_ignore_case(dsometadataxml_bus_type, 'xlsx'): return self.get_sync_xls_dict_list(insert_or_updata) raise Exception("数据{0}业务元数据类型为'{1}'出现错误,没有可识别的类型".format( object_name, dsometadataxml_bus_type))
class C21ATBusDataSetPlugins(CDirPlugins): __classified_object_type = None __metadata_xml_obj__ = None __bus_metadata_xml_file_name__ = None def get_information(self) -> dict: information = super().get_information() if self.__metadata_xml_obj__ is not None: information[self.Plugins_Info_Title] = CXml.get_element_text( self.__metadata_xml_obj__.xpath_one( self.Path_21AT_MD_Content_ProductName)) information[self.Plugins_Info_Type_Code] = None # '110001' information[ self.Plugins_Info_Group] = self.DataGroup_Industry_Land_DataSet information[self.Plugins_Info_Group_Title] = self.data_group_title( information[self.Plugins_Info_Group]) information[ self.Plugins_Info_Catalog] = self.DataCatalog_Land # 'land' information[self.Plugins_Info_Catalog_Title] = self.data_catalog_title( information[self.Plugins_Info_Catalog]) # '国土行业' information[self.Plugins_Info_MetaDataEngine] = None information[self.Plugins_Info_BusMetaDataEngine] = self.Engine_Custom information[ self.Plugins_Info_DetailEngine] = self.DetailEngine_Busdataset information[self.Plugins_Info_HasChildObj] = self.DB_True information[self.Plugins_Info_Is_Spatial] = self.DB_False information[self.Plugins_Info_Is_Dataset] = self.DB_True information[self.Plugins_Info_Spatial_Qa] = self.DB_False information[self.Plugins_Info_Time_Qa] = self.DB_True information[self.Plugins_Info_Visual_Qa] = self.DB_False return information def classified(self): self._object_confirm = self.Object_Confirm_IUnKnown self._object_name = None current_path = self.file_info.file_name_with_full_path metadata_file_name = CFile.join_file(current_path, self.FileName_MetaData_Bus_21AT) if CFile.file_or_path_exist(metadata_file_name): self.__bus_metadata_xml_file_name__ = metadata_file_name self.__metadata_xml_obj__ = CXml() try: self.__metadata_xml_obj__.load_file(metadata_file_name) self.__classified_object_type = CXml.get_element_text( self.__metadata_xml_obj__.xpath_one( self.Path_21AT_MD_Content_ProductType)) if CUtils.equal_ignore_case( self.__classified_object_type, CUtils.dict_value_by_name(self.get_information(), self.Plugins_Info_Type, None)): self._object_confirm = self.Object_Confirm_IKnown self._object_name = CXml.get_element_text( self.__metadata_xml_obj__.xpath_one( self.Path_21AT_MD_Content_ProductName)) except: self.__metadata_xml_obj__ = None CLogger().warning('发现文件{0}符合二十一世纪业务数据集标准, 但该文件格式有误, 无法打开! ') return self._object_confirm, self._object_name def init_metadata_bus(self, parser: CMetaDataParser) -> str: """ 提取xml格式的业务元数据, 加载到parser的metadata对象中 :param parser: :return: """ if not CFile.file_or_path_exist(self.__bus_metadata_xml_file_name__): return CResult.merge_result( self.Failure, '元数据文件[{0}]不存在, 无法解析! '.format( self.__bus_metadata_xml_file_name__)) try: parser.metadata.set_metadata_bus_file( self.Success, '元数据文件[{0}]成功加载! '.format(self.__bus_metadata_xml_file_name__), self.MetaDataFormat_XML, self.__bus_metadata_xml_file_name__) return CResult.merge_result( self.Success, '元数据文件[{0}]成功加载! '.format(self.__bus_metadata_xml_file_name__)) except: parser.metadata.set_metadata_bus( self.Failure, '元数据文件[{0}]格式不合法, 无法处理! '.format( self.__bus_metadata_xml_file_name__), self.MetaDataFormat_Text, '') return CResult.merge_result( self.Exception, '元数据文件[{0}]格式不合法, 无法处理! '.format( self.__bus_metadata_xml_file_name__)) def parser_metadata_time_list(self, parser: CMetaDataParser) -> list: """ 标准模式的提取时间信息的列表 示例: """ return [{ self.Name_Source: self.Name_Business, self.Name_ID: self.Name_Time, self.Name_XPath: '//Date', self.Name_Format: self.MetaDataFormat_XML }, { self.Name_Source: self.Name_Business, self.Name_ID: self.Name_Start_Time, self.Name_XPath: '//BeginDate', self.Name_Format: self.MetaDataFormat_XML }, { self.Name_Source: self.Name_Business, self.Name_ID: self.Name_End_Time, self.Name_XPath: '//EndDate', self.Name_Format: self.MetaDataFormat_XML }] def init_qa_metadata_bus_xml_list(self, parser: CMetaDataParser) -> list: """ 初始化默认的, 业务元数据xml文件的检验列表 完成 负责人 李宪 :param parser: :return: """ pass
def process_mission(self, dataset) -> str: """ 详细算法复杂, 参见readme.md中[### 数据入库调度]章节 :param dataset: :return: """ ds_src_storage_id = dataset.value_by_name(0, 'query_storage_id', '') ds_src_storage_type = dataset.value_by_name(0, 'query_storage_type', self.Storage_Type_Mix) ds_src_root_path = dataset.value_by_name(0, 'query_rootpath', '') ds_src_dir_id = dataset.value_by_name(0, 'query_ib_dir_id', '') ds_ib_id = dataset.value_by_name(0, 'query_ib_id', '') ds_ib_directory_name = dataset.value_by_name(0, 'query_ib_relation_dir', '') ds_ib_batch_no = dataset.value_by_name(0, 'query_ib_batchno', '') ds_ib_option = dataset.value_by_name(0, 'query_ib_option', '') src_need_storage_size = self.get_storage_size(ds_ib_id, ds_src_storage_id, ds_ib_directory_name, ds_ib_option) src_path = ds_src_root_path if not CUtils.equal_ignore_case(ds_ib_directory_name, ''): src_path = CFile.join_file(src_path, ds_ib_directory_name) src_dataset_metadata_filename = CFile.join_file( src_path, self.FileName_MetaData_Bus_21AT) CLogger().debug('入库的目录为: {0}.{1}'.format(ds_ib_id, ds_ib_directory_name)) try: # 检查所有文件与元数据是否相符 all_ib_file_or_path_existed = self.check_all_ib_file_or_path_existed( ds_ib_id) if not CResult.result_success(all_ib_file_or_path_existed): self.update_ib_result(ds_ib_id, all_ib_file_or_path_existed) return all_ib_file_or_path_existed # 将数据入库的记录保存到日志中 result = self.ib_log(ds_ib_id, ds_src_storage_id, ds_ib_directory_name) if not CResult.result_success(result): self.update_ib_result(ds_ib_id, result) return result # 如果是在核心存储或混合存储中直接入库, 则仅仅改变元数据状态即可 if CUtils.equal_ignore_case(ds_src_storage_type, self.Storage_Type_Mix) \ or CUtils.equal_ignore_case(ds_src_storage_type, self.Storage_Type_Core): result_ib_in_core_or_mix_storage = self.update_ib_data_status_in_core_or_mix_storage( ds_ib_id, ds_src_storage_id, ds_ib_directory_name, ds_src_dir_id) self.update_ib_result(ds_ib_id, result_ib_in_core_or_mix_storage) return result_ib_in_core_or_mix_storage # 加载目录下的待入库数据集的元数据文件 src_dataset_xml = CXml() src_dataset_type = self.Name_Default if CFile.file_or_path_exist(src_dataset_metadata_filename): src_dataset_xml.load_file(src_dataset_metadata_filename) src_dataset_type = CXml.get_element_text( src_dataset_xml.xpath_one(self.Path_MD_Bus_ProductType)) if CUtils.equal_ignore_case(src_dataset_type, ''): src_dataset_type = self.Name_Default # 获取匹配的入库模式 src_ib_schema = self.get_ib_schema(src_dataset_type, ds_ib_option) if src_ib_schema is None: result = CResult.merge_result( self.Failure, '目录为[{0}.{1}]的数据集类型为[{2}], 未找到匹配的入库模式, 请检查修正后重试!'.format( ds_ib_id, ds_ib_directory_name, src_dataset_type)) self.update_ib_result(ds_ib_id, result) return result # 计算入库的目标存储\存储根目录\目标子目录在目标存储中的副目录的标识\目标子目录\反馈消息 dest_ib_storage_id, dest_ib_root_path, desc_ib_dir_id, dest_ib_subpath, message = self.get_dest_storage( ds_ib_batch_no, src_need_storage_size, ds_ib_option, src_ib_schema, src_dataset_xml) if dest_ib_storage_id is None or dest_ib_subpath is None: result = CResult.merge_result(self.Failure, message) self.update_ib_result(ds_ib_id, result) return result dest_ib_subpath = CFile.unify(dest_ib_subpath) if CJson.json_attr_value(ds_ib_option, self.Path_IB_Switch_CheckFileLocked, self.DB_False) == self.DB_True: src_ib_files_not_locked, message = self.check_src_ib_files_not_locked( ds_src_root_path, src_path) if not src_ib_files_not_locked: result = CResult.merge_result(self.Failure, message) self.update_ib_result(ds_ib_id, result) return result proc_ib_src_path = ds_src_root_path proc_ib_dest_path = dest_ib_root_path if not CUtils.equal_ignore_case(dest_ib_subpath, ''): proc_ib_dest_path = CFile.join_file(dest_ib_root_path, dest_ib_subpath) if not CUtils.equal_ignore_case(ds_ib_directory_name, ''): proc_ib_src_path = CFile.join_file(proc_ib_src_path, ds_ib_directory_name) proc_ib_dest_path = CFile.join_file(proc_ib_dest_path, ds_ib_directory_name) # --------------------------------------------------------------至此, 数据入库前的检查处理完毕 # 移动源目录至目标目录, 如果是根目录, 则仅仅移动文件 result = self.ib_files_move( proc_ib_src_path, proc_ib_dest_path, CUtils.equal_ignore_case(ds_ib_directory_name, '')) if not CResult.result_success(result): # 利用相同的方法, 把移动的数据, 重新移动回原目录, 这里理论上应该100%成功 sub_result = self.ib_files_move( proc_ib_dest_path, proc_ib_src_path, CUtils.equal_ignore_case(ds_ib_directory_name, '')) if not CResult.result_success(sub_result): sub_result_message = CResult.result_message(sub_result) result_message = CResult.result_message(result) result = CResult.merge_result( self.Failure, '{0}\n{1}'.format(result_message, sub_result_message)) self.update_ib_result(ds_ib_id, result) return result # 将源文件的元数据, 移动至目标存储下, 如果出现异常, 则在方法内部rollback result = self.src_ib_metadata_move_to_storage( ds_ib_id, ds_src_storage_id, ds_src_dir_id, ds_ib_directory_name, dest_ib_storage_id, desc_ib_dir_id, dest_ib_subpath) if not CResult.result_success(result): # 利用相同的方法, 把移动的数据, 重新移动回原目录, 这里理论上应该100%成功 sub_result = self.ib_files_move( proc_ib_dest_path, proc_ib_src_path, CUtils.equal_ignore_case(ds_ib_directory_name, '')) if not CResult.result_success(sub_result): sub_result_message = CResult.result_message(sub_result) result_message = CResult.result_message(result) result = CResult.merge_result( self.Failure, '{0}/n{1}'.format(result_message, sub_result_message)) self.update_ib_result(ds_ib_id, result) return result result = CResult.merge_result( self.Success, '目录为[{0}.{1}]入库成功!'.format(ds_ib_id, ds_ib_directory_name)) self.update_ib_result(ds_ib_id, result) return result except Exception as error: result = CResult.merge_result( self.Failure, '目录为[{0}.{1}]入库出现异常! 错误原因为: {2}'.format( ds_ib_id, ds_ib_directory_name, error.__str__())) self.update_ib_result(ds_ib_id, result) return result
class CQuality(CResource): __xml_obj: CXml # 根节点 __xml_root_node = None # 整体质量节点 __node_total = None # 数据质量节点 __node_data = None # 数据的总体质量节点 __node_data_items = None # 数据的每一个记录的质量节点, 用于矢量数据检验 __node_data_records = None # 元数据质量节点 __node_metadata = None # 业务元数据质量节点 __node_metadata_bus = None # 数据本身的元数据质量节点 __node_metadata_data = None __XPath_Root = '/root' __XPath_Total = '{0}/total'.format(__XPath_Root) __XPath_MetaData = '{0}/metadata'.format(__XPath_Root) __XPath_MetaData_Data = '{0}/data'.format(__XPath_MetaData) __XPath_MetaData_Bus = '{0}/business'.format(__XPath_MetaData) __XPath_Data = '{0}/data'.format(__XPath_Root) __XPath_Data_Items = '{0}/items'.format(__XPath_Data) def __init__(self): self.__xml_obj = CXml() self.__xml_root_node = self.__xml_obj.new_xml(self.Name_Root) self.__node_total = CXml.create_element(self.__xml_root_node, self.Name_Total) self.__node_data = CXml.create_element(self.__xml_root_node, self.Name_Data) self.__node_data_items = CXml.create_element(self.__node_data, self.Name_Items) self.__node_data_records = CXml.create_element(self.__node_data, self.Name_Records) self.__node_metadata = CXml.create_element(self.__xml_root_node, self.Name_MetaData) self.__node_metadata_bus = CXml.create_element(self.__node_metadata, self.Name_Business) self.__node_metadata_data = CXml.create_element( self.__node_metadata, self.Name_Data) def __append_quality_info(self, xml_node, audit_result: dict): quality_id = CUtils.dict_value_by_name(audit_result, self.Name_ID, '') quality_title = CUtils.dict_value_by_name(audit_result, self.Name_Title, '') quality_group = CUtils.dict_value_by_name(audit_result, self.Name_Group, self.QA_Group_Data_Integrity) quality_result = CUtils.dict_value_by_name(audit_result, self.Name_Result, self.QA_Result_Pass) quality_memo = CUtils.dict_value_by_name(audit_result, self.Name_Message, '') temp_node = CXml.node_xpath_one( xml_node, './{0}[@id="{1}"]'.format(self.Name_Item, quality_id)) if temp_node is not None: CXml.remove(temp_node) temp_node = CXml.create_element(xml_node, self.Name_Item) CXml.set_attr(temp_node, self.Name_ID, quality_id) CXml.set_attr(temp_node, self.Name_Group, quality_group) CXml.set_attr(temp_node, self.Name_Title, quality_title) CXml.set_attr(temp_node, self.Name_Result, quality_result) CXml.set_element_text(temp_node, quality_memo) def append_total_quality(self, audit_result: dict): """ 设置总体的质量信息 . 质量标识: 重复的质量标识, 仅仅保留一个 . 质量标题: 中文简述 . 质量类型: 信息:提示;警告:警示;错误:错误, 不能继续 . 质量描述: 质量的详细描述 :param audit_result: :return: """ self.__append_quality_info(self.__node_total, audit_result) def append_data_quality(self, audit_result: dict): """ 设置数据的总体质量 :param audit_result: :return: """ self.__append_quality_info(self.__node_data_items, audit_result) def append_data_records_quality(self, record_index, audit_result: dict): """ 设置数据的每一个记录的质量信息 :param record_index: :param audit_result: :return: """ temp_node = CXml.node_xpath_one( self.__node_data_records, './{0}[@index="{1}"]'.format(self.Name_Record, record_index)) if temp_node is None: temp_node = CXml.create_element(self.__node_data_records, self.Name_Record) self.__append_quality_info(temp_node, audit_result) def append_metadata_data_quality(self, audit_result: dict): """ 设置实体的元数据质量检验结果 :param audit_result: :return: """ self.__append_quality_info(self.__node_metadata_data, audit_result) def append_metadata_bus_quality(self, audit_result: dict): """ 设置业务的元数据质量检验结果 :param audit_result: :return: """ self.__append_quality_info(self.__node_metadata_bus, audit_result) def save_as(self, file_name_with_path): """ 将质检结果保存为文件 :param file_name_with_path: :return: """ self.__xml_obj.save_file(file_name_with_path) def to_xml(self) -> str: """ 将质检结果导出为xml文本 :return: """ return self.__xml_obj.to_xml() def summary(self) -> str: json_obj = CJson() json_obj.set_value_of_name( self.Name_Total, self.__quality_result_of_level(self.__XPath_Total)) metadata_qa_s = { self.Name_Data: self.__quality_result_of_level(self.__XPath_MetaData_Data), self.Name_Business: self.__quality_result_of_level(self.__XPath_MetaData_Bus) } json_obj.set_value_of_name(self.Name_MetaData, metadata_qa_s) json_obj.set_value_of_name( self.Name_Data, { self.Name_Items: self.__quality_result_of_level(self.__XPath_Data_Items) }) return json_obj.to_json() def __quality_result_of_level(self, xpath: str): if self.__xml_obj.xpath_one('{0}/*[@{1}="{2}"]'.format( xpath, self.Name_Result, self.QA_Result_Error)) is not None: return self.QA_Result_Error elif self.__xml_obj.xpath_one('{0}/*[@{1}="{2}"]'.format( xpath, self.Name_Result, self.QA_Result_Warn)) is not None: return self.QA_Result_Warn else: return self.QA_Result_Pass