def test_load_file(self): """ 通过给定的xml文件名, 对xml对象进行初始化 """ xml = CXml() if CFile.file_or_path_exist(self.test_filename): xml.load_file(self.test_filename) assert True else: assert False
class C21ATBusDataSetPlugins(CDirPlugins): __classified_object_type = None __metadata_xml_obj__ = None __bus_metadata_xml_file_name__ = None def get_information(self) -> dict: information = super().get_information() if self.__metadata_xml_obj__ is not None: information[self.Plugins_Info_Title] = CXml.get_element_text( self.__metadata_xml_obj__.xpath_one( self.Path_21AT_MD_Content_ProductName)) information[self.Plugins_Info_Type_Code] = None # '110001' information[ self.Plugins_Info_Group] = self.DataGroup_Industry_Land_DataSet information[self.Plugins_Info_Group_Title] = self.data_group_title( information[self.Plugins_Info_Group]) information[ self.Plugins_Info_Catalog] = self.DataCatalog_Land # 'land' information[self.Plugins_Info_Catalog_Title] = self.data_catalog_title( information[self.Plugins_Info_Catalog]) # '国土行业' information[self.Plugins_Info_MetaDataEngine] = None information[self.Plugins_Info_BusMetaDataEngine] = self.Engine_Custom information[ self.Plugins_Info_DetailEngine] = self.DetailEngine_Busdataset information[self.Plugins_Info_HasChildObj] = self.DB_True information[self.Plugins_Info_Is_Spatial] = self.DB_False information[self.Plugins_Info_Is_Dataset] = self.DB_True information[self.Plugins_Info_Spatial_Qa] = self.DB_False information[self.Plugins_Info_Time_Qa] = self.DB_True information[self.Plugins_Info_Visual_Qa] = self.DB_False return information def classified(self): self._object_confirm = self.Object_Confirm_IUnKnown self._object_name = None current_path = self.file_info.file_name_with_full_path metadata_file_name = CFile.join_file(current_path, self.FileName_MetaData_Bus_21AT) if CFile.file_or_path_exist(metadata_file_name): self.__bus_metadata_xml_file_name__ = metadata_file_name self.__metadata_xml_obj__ = CXml() try: self.__metadata_xml_obj__.load_file(metadata_file_name) self.__classified_object_type = CXml.get_element_text( self.__metadata_xml_obj__.xpath_one( self.Path_21AT_MD_Content_ProductType)) if CUtils.equal_ignore_case( self.__classified_object_type, CUtils.dict_value_by_name(self.get_information(), self.Plugins_Info_Type, None)): self._object_confirm = self.Object_Confirm_IKnown self._object_name = CXml.get_element_text( self.__metadata_xml_obj__.xpath_one( self.Path_21AT_MD_Content_ProductName)) except: self.__metadata_xml_obj__ = None CLogger().warning('发现文件{0}符合二十一世纪业务数据集标准, 但该文件格式有误, 无法打开! ') return self._object_confirm, self._object_name def init_metadata_bus(self, parser: CMetaDataParser) -> str: """ 提取xml格式的业务元数据, 加载到parser的metadata对象中 :param parser: :return: """ if not CFile.file_or_path_exist(self.__bus_metadata_xml_file_name__): return CResult.merge_result( self.Failure, '元数据文件[{0}]不存在, 无法解析! '.format( self.__bus_metadata_xml_file_name__)) try: parser.metadata.set_metadata_bus_file( self.Success, '元数据文件[{0}]成功加载! '.format(self.__bus_metadata_xml_file_name__), self.MetaDataFormat_XML, self.__bus_metadata_xml_file_name__) return CResult.merge_result( self.Success, '元数据文件[{0}]成功加载! '.format(self.__bus_metadata_xml_file_name__)) except: parser.metadata.set_metadata_bus( self.Failure, '元数据文件[{0}]格式不合法, 无法处理! '.format( self.__bus_metadata_xml_file_name__), self.MetaDataFormat_Text, '') return CResult.merge_result( self.Exception, '元数据文件[{0}]格式不合法, 无法处理! '.format( self.__bus_metadata_xml_file_name__)) def parser_metadata_time_list(self, parser: CMetaDataParser) -> list: """ 标准模式的提取时间信息的列表 示例: """ return [{ self.Name_Source: self.Name_Business, self.Name_ID: self.Name_Time, self.Name_XPath: '//Date', self.Name_Format: self.MetaDataFormat_XML }, { self.Name_Source: self.Name_Business, self.Name_ID: self.Name_Start_Time, self.Name_XPath: '//BeginDate', self.Name_Format: self.MetaDataFormat_XML }, { self.Name_Source: self.Name_Business, self.Name_ID: self.Name_End_Time, self.Name_XPath: '//EndDate', self.Name_Format: self.MetaDataFormat_XML }] def init_qa_metadata_bus_xml_list(self, parser: CMetaDataParser) -> list: """ 初始化默认的, 业务元数据xml文件的检验列表 完成 负责人 李宪 :param parser: :return: """ pass
def process_mission(self, dataset) -> str: """ 详细算法复杂, 参见readme.md中[### 数据入库调度]章节 :param dataset: :return: """ ds_src_storage_id = dataset.value_by_name(0, 'query_storage_id', '') ds_src_storage_type = dataset.value_by_name(0, 'query_storage_type', self.Storage_Type_Mix) ds_src_root_path = dataset.value_by_name(0, 'query_rootpath', '') ds_src_dir_id = dataset.value_by_name(0, 'query_ib_dir_id', '') ds_ib_id = dataset.value_by_name(0, 'query_ib_id', '') ds_ib_directory_name = dataset.value_by_name(0, 'query_ib_relation_dir', '') ds_ib_batch_no = dataset.value_by_name(0, 'query_ib_batchno', '') ds_ib_option = dataset.value_by_name(0, 'query_ib_option', '') src_need_storage_size = self.get_storage_size(ds_ib_id, ds_src_storage_id, ds_ib_directory_name, ds_ib_option) src_path = ds_src_root_path if not CUtils.equal_ignore_case(ds_ib_directory_name, ''): src_path = CFile.join_file(src_path, ds_ib_directory_name) src_dataset_metadata_filename = CFile.join_file( src_path, self.FileName_MetaData_Bus_21AT) CLogger().debug('入库的目录为: {0}.{1}'.format(ds_ib_id, ds_ib_directory_name)) try: # 检查所有文件与元数据是否相符 all_ib_file_or_path_existed = self.check_all_ib_file_or_path_existed( ds_ib_id) if not CResult.result_success(all_ib_file_or_path_existed): self.update_ib_result(ds_ib_id, all_ib_file_or_path_existed) return all_ib_file_or_path_existed # 将数据入库的记录保存到日志中 result = self.ib_log(ds_ib_id, ds_src_storage_id, ds_ib_directory_name) if not CResult.result_success(result): self.update_ib_result(ds_ib_id, result) return result # 如果是在核心存储或混合存储中直接入库, 则仅仅改变元数据状态即可 if CUtils.equal_ignore_case(ds_src_storage_type, self.Storage_Type_Mix) \ or CUtils.equal_ignore_case(ds_src_storage_type, self.Storage_Type_Core): result_ib_in_core_or_mix_storage = self.update_ib_data_status_in_core_or_mix_storage( ds_ib_id, ds_src_storage_id, ds_ib_directory_name, ds_src_dir_id) self.update_ib_result(ds_ib_id, result_ib_in_core_or_mix_storage) return result_ib_in_core_or_mix_storage # 加载目录下的待入库数据集的元数据文件 src_dataset_xml = CXml() src_dataset_type = self.Name_Default if CFile.file_or_path_exist(src_dataset_metadata_filename): src_dataset_xml.load_file(src_dataset_metadata_filename) src_dataset_type = CXml.get_element_text( src_dataset_xml.xpath_one(self.Path_MD_Bus_ProductType)) if CUtils.equal_ignore_case(src_dataset_type, ''): src_dataset_type = self.Name_Default # 获取匹配的入库模式 src_ib_schema = self.get_ib_schema(src_dataset_type, ds_ib_option) if src_ib_schema is None: result = CResult.merge_result( self.Failure, '目录为[{0}.{1}]的数据集类型为[{2}], 未找到匹配的入库模式, 请检查修正后重试!'.format( ds_ib_id, ds_ib_directory_name, src_dataset_type)) self.update_ib_result(ds_ib_id, result) return result # 计算入库的目标存储\存储根目录\目标子目录在目标存储中的副目录的标识\目标子目录\反馈消息 dest_ib_storage_id, dest_ib_root_path, desc_ib_dir_id, dest_ib_subpath, message = self.get_dest_storage( ds_ib_batch_no, src_need_storage_size, ds_ib_option, src_ib_schema, src_dataset_xml) if dest_ib_storage_id is None or dest_ib_subpath is None: result = CResult.merge_result(self.Failure, message) self.update_ib_result(ds_ib_id, result) return result dest_ib_subpath = CFile.unify(dest_ib_subpath) if CJson.json_attr_value(ds_ib_option, self.Path_IB_Switch_CheckFileLocked, self.DB_False) == self.DB_True: src_ib_files_not_locked, message = self.check_src_ib_files_not_locked( ds_src_root_path, src_path) if not src_ib_files_not_locked: result = CResult.merge_result(self.Failure, message) self.update_ib_result(ds_ib_id, result) return result proc_ib_src_path = ds_src_root_path proc_ib_dest_path = dest_ib_root_path if not CUtils.equal_ignore_case(dest_ib_subpath, ''): proc_ib_dest_path = CFile.join_file(dest_ib_root_path, dest_ib_subpath) if not CUtils.equal_ignore_case(ds_ib_directory_name, ''): proc_ib_src_path = CFile.join_file(proc_ib_src_path, ds_ib_directory_name) proc_ib_dest_path = CFile.join_file(proc_ib_dest_path, ds_ib_directory_name) # --------------------------------------------------------------至此, 数据入库前的检查处理完毕 # 移动源目录至目标目录, 如果是根目录, 则仅仅移动文件 result = self.ib_files_move( proc_ib_src_path, proc_ib_dest_path, CUtils.equal_ignore_case(ds_ib_directory_name, '')) if not CResult.result_success(result): # 利用相同的方法, 把移动的数据, 重新移动回原目录, 这里理论上应该100%成功 sub_result = self.ib_files_move( proc_ib_dest_path, proc_ib_src_path, CUtils.equal_ignore_case(ds_ib_directory_name, '')) if not CResult.result_success(sub_result): sub_result_message = CResult.result_message(sub_result) result_message = CResult.result_message(result) result = CResult.merge_result( self.Failure, '{0}\n{1}'.format(result_message, sub_result_message)) self.update_ib_result(ds_ib_id, result) return result # 将源文件的元数据, 移动至目标存储下, 如果出现异常, 则在方法内部rollback result = self.src_ib_metadata_move_to_storage( ds_ib_id, ds_src_storage_id, ds_src_dir_id, ds_ib_directory_name, dest_ib_storage_id, desc_ib_dir_id, dest_ib_subpath) if not CResult.result_success(result): # 利用相同的方法, 把移动的数据, 重新移动回原目录, 这里理论上应该100%成功 sub_result = self.ib_files_move( proc_ib_dest_path, proc_ib_src_path, CUtils.equal_ignore_case(ds_ib_directory_name, '')) if not CResult.result_success(sub_result): sub_result_message = CResult.result_message(sub_result) result_message = CResult.result_message(result) result = CResult.merge_result( self.Failure, '{0}/n{1}'.format(result_message, sub_result_message)) self.update_ib_result(ds_ib_id, result) return result result = CResult.merge_result( self.Success, '目录为[{0}.{1}]入库成功!'.format(ds_ib_id, ds_ib_directory_name)) self.update_ib_result(ds_ib_id, result) return result except Exception as error: result = CResult.merge_result( self.Failure, '目录为[{0}.{1}]入库出现异常! 错误原因为: {2}'.format( ds_ib_id, ds_ib_directory_name, error.__str__())) self.update_ib_result(ds_ib_id, result) return result
class CMetaData(CResource): def __init__(self): self.__quality__ = CQuality() self.__metadata_extract_result__ = self.Not_Support self.__metadata_extract_memo__ = '' self.__metadata_text__ = None self.__metadata_xml__ = CXml() self.__metadata_json__ = CJson() self.__metadata_type__ = self.MetaDataFormat_Text self.__metadata_bus_extract_result__ = self.Not_Support self.__metadata_bus_extract_memo__ = '' self.__metadata_bus_text__ = None self.__metadata_bus_xml__ = CXml() self.__metadata_bus_json__ = CJson() self.__metadata_bus_type__ = self.MetaDataFormat_Text self.__thumb_img_file_name__ = '' self.__browse_img_file_name__ = '' self.__metadata_view_extract_result__ = self.Not_Support self.__metadata_view_extract_memo__ = '' self.__time_information__ = CJson() self.__metadata_time_extract_result__ = self.Not_Support self.__metadata_time_extract_memo__ = '' self.__metadata_spatial_extract_result__ = self.Not_Support self.__metadata_spatial_extract_memo__ = '' self.__metadata_spatial__ = CMDSpatial() def metadata_time(self): if self.__metadata_time_extract_result__ == self.DB_True: return self.__metadata_time_extract_result__, self.__metadata_time_extract_memo__, self.__time_information__.to_json( ) else: return self.__metadata_time_extract_result__, self.__metadata_time_extract_memo__, '' def metadata_view(self): if self.__metadata_view_extract_result__ == self.DB_True: return self.__metadata_view_extract_result__, self.__metadata_view_extract_memo__, self.__thumb_img_file_name__, self.__browse_img_file_name__ else: return self.__metadata_view_extract_result__, self.__metadata_view_extract_memo__, '', '' def metadata_spatial(self): if self.__metadata_spatial_extract_result__ == self.DB_True: return self.__metadata_spatial_extract_result__, self.__metadata_spatial_extract_memo__, self.__metadata_spatial__ else: return self.__metadata_spatial_extract_result__, self.__metadata_spatial_extract_memo__, self.__metadata_spatial__ @property def quality(self): return self.__quality__ @property def thumb_img_file_name(self): return self.__thumb_img_file_name__ @thumb_img_file_name.setter def thumb_img_file_name(self, value): self.__thumb_img_file_name__ = value @property def browse_img_file_name(self): return self.__browse_img_file_name__ @browse_img_file_name.setter def browse_img_file_name(self, value): self.__browse_img_file_name__ = value @property def time_information(self) -> CJson: return self.__time_information__ @property def metadata_extract_result(self): return self.__metadata_extract_result__ @property def metadata_bus_extract_result(self): return self.__metadata_bus_extract_result__ @property def metadata_view_extract_result(self): return self.__metadata_view_extract_result__ @property def metadata_time_extract_result(self): return self.__metadata_time_extract_result__ @property def metadata_spatial_extract_result(self): return self.__metadata_spatial_extract_result__ def metadata(self): if self.__metadata_extract_result__ != self.DB_True: return self.__metadata_extract_result__, self.__metadata_extract_memo__, self.__metadata_type__, None elif self.__metadata_type__ == self.MetaDataFormat_Json: return self.__metadata_extract_result__, self.__metadata_extract_memo__, self.__metadata_type__, self.__metadata_json__.to_json( ) elif self.__metadata_type__ == self.MetaDataFormat_XML: return self.__metadata_extract_result__, self.__metadata_extract_memo__, self.__metadata_type__, self.__metadata_xml__.to_xml( ) else: return self.__metadata_extract_result__, self.__metadata_extract_memo__, self.__metadata_type__, self.__metadata_text__ @property def metadata_type(self): return self.__metadata_type__ @property def metadata_bus_type(self): return self.__metadata_bus_type__ def metadata_xml(self) -> CXml: return self.__metadata_xml__ def metadata_json(self) -> CJson: return self.__metadata_json__ def metadata_bus_xml(self) -> CXml: return self.__metadata_bus_xml__ def metadata_bus_json(self) -> CJson: return self.__metadata_bus_json__ def metadata_spatial_obj(self) -> CMDSpatial: return self.__metadata_spatial__ def set_metadata_spatial(self, result: int, memo: str, spatial_metadata_type=None, spatial_metadata=None): self.__metadata_spatial_extract_result__ = result self.__metadata_spatial_extract_memo__ = memo if spatial_metadata_type is None: return if spatial_metadata_type == CResource.Spatial_MetaData_Type_Native_Center: self.__metadata_spatial__.native_center = spatial_metadata elif spatial_metadata_type == CResource.Spatial_MetaData_Type_Native_BBox: self.__metadata_spatial__.native_box = spatial_metadata elif spatial_metadata_type == CResource.Spatial_MetaData_Type_Native_Geom: self.__metadata_spatial__.native_geom = spatial_metadata elif spatial_metadata_type == CResource.Spatial_MetaData_Type_Wgs84_Center: self.__metadata_spatial__.wgs84_center = spatial_metadata elif spatial_metadata_type == CResource.Spatial_MetaData_Type_Wgs84_BBox: self.__metadata_spatial__.wgs84_bbox = spatial_metadata elif spatial_metadata_type == CResource.Spatial_MetaData_Type_Wgs84_Geom: self.__metadata_spatial__.wgs84_geom = spatial_metadata elif spatial_metadata_type == CResource.Spatial_MetaData_Type_Prj_Wkt: self.__metadata_spatial__.prj_wkt = spatial_metadata elif spatial_metadata_type == CResource.Spatial_MetaData_Type_Prj_Proj4: self.__metadata_spatial__.prj_proj4 = spatial_metadata elif spatial_metadata_type == CResource.Spatial_MetaData_Type_Prj_Project: self.__metadata_spatial__.prj_project = spatial_metadata elif spatial_metadata_type == CResource.Spatial_MetaData_Type_Prj_Coordinate: self.__metadata_spatial__.prj_coordinate = spatial_metadata elif spatial_metadata_type == CResource.Spatial_MetaData_Type_Prj_Degree: self.__metadata_spatial__.prj_degree = spatial_metadata elif spatial_metadata_type == CResource.Spatial_MetaData_Type_Prj_Zone: self.__metadata_spatial__.prj_zone = spatial_metadata elif spatial_metadata_type == CResource.Spatial_MetaData_Type_Prj_Source: self.__metadata_spatial__.prj_source = spatial_metadata else: pass def set_metadata_view(self, result: int, memo: str, view_metadata_type=None, view_metadata=None): self.__metadata_view_extract_result__ = result self.__metadata_view_extract_memo__ = memo if view_metadata_type is None: self.__browse_img_file_name__ = None self.__thumb_img_file_name__ = None return if view_metadata_type == CResource.View_MetaData_Type_Browse: self.__browse_img_file_name__ = view_metadata elif view_metadata_type == CResource.View_MetaData_Type_Thumb: self.__thumb_img_file_name__ = view_metadata else: pass def set_metadata_time(self, result: int, memo: str, time_attr_name=None, time_attr_value=None): self.__metadata_time_extract_result__ = result self.__metadata_time_extract_memo__ = memo if time_attr_name is not None: self.__time_information__.set_value_of_name( CUtils.any_2_str(time_attr_name), time_attr_value) def set_metadata(self, metadata_extract_result: int, metadata_extract_memo: str, metadata_type: int, metadata_text): self.__metadata_extract_result__ = metadata_extract_result self.__metadata_extract_memo__ = metadata_extract_memo self.__metadata_type__ = metadata_type if self.__metadata_type__ == self.MetaDataFormat_Json: self.__metadata_text__ = '' self.__metadata_xml__ = CXml() self.__metadata_json__.load_json_text(metadata_text) elif self.__metadata_type__ == self.MetaDataFormat_XML: self.__metadata_text__ = '' self.__metadata_xml__.load_xml(metadata_text) self.__metadata_json__ = CJson() else: self.__metadata_text__ = metadata_text self.__metadata_xml__ = CXml() self.__metadata_json__ = CJson() def set_metadata_file(self, metadata_extract_result: int, metadata_extract_memo: str, metadata_type: int, file_name): self.__metadata_extract_result__ = metadata_extract_result self.__metadata_extract_memo__ = metadata_extract_memo self.__metadata_type__ = metadata_type if self.__metadata_type__ == self.MetaDataFormat_Json: self.__metadata_text__ = '' self.__metadata_xml__ = CXml() self.__metadata_json__.load_file(file_name) elif self.__metadata_type__ == self.MetaDataFormat_XML: self.__metadata_text__ = '' self.__metadata_xml__.load_file(file_name) self.__metadata_json__ = CJson() else: self.__metadata_text__ = CFile.file_2_str(file_name) self.__metadata_xml__ = CXml() self.__metadata_json__ = CJson() def metadata_bus(self): if self.__metadata_bus_extract_result__ != self.DB_True: return self.__metadata_bus_extract_result__, self.__metadata_bus_extract_memo__, self.__metadata_bus_type__, None elif self.__metadata_bus_type__ == self.MetaDataFormat_Json: return self.__metadata_bus_extract_result__, self.__metadata_bus_extract_memo__, self.__metadata_bus_type__, self.__metadata_bus_json__.to_json( ) elif self.__metadata_bus_type__ == self.MetaDataFormat_XML: return self.__metadata_bus_extract_result__, self.__metadata_bus_extract_memo__, self.__metadata_bus_type__, self.__metadata_bus_xml__.to_xml( ) else: return self.__metadata_bus_extract_result__, self.__metadata_bus_extract_memo__, self.__metadata_bus_type__, self.__metadata_bus_text__ def set_metadata_bus(self, metadata_bus_extract_result: int, metadata_bus_extract_memo: str, metadata_bus_type: int, metadata_bus_text): self.__metadata_bus_extract_result__ = metadata_bus_extract_result self.__metadata_bus_extract_memo__ = metadata_bus_extract_memo self.__metadata_bus_type__ = metadata_bus_type if self.__metadata_bus_type__ == self.MetaDataFormat_Json: self.__metadata_bus_text__ = '' self.__metadata_bus_xml__ = CXml() self.__metadata_bus_json__.load_json_text(metadata_bus_text) elif self.__metadata_bus_type__ == self.MetaDataFormat_XML: self.__metadata_bus_text__ = '' self.__metadata_bus_xml__.load_xml(metadata_bus_text) self.__metadata_bus_json__ = CJson() else: self.__metadata_bus_text__ = metadata_bus_text self.__metadata_bus_xml__ = CXml() self.__metadata_bus_json__ = CJson() def set_metadata_bus_file(self, metadata_bus_extract_result: int, metadata_bus_extract_memo: str, metadata_type: int, file_name): self.__metadata_bus_extract_result__ = metadata_bus_extract_result self.__metadata_bus_extract_memo__ = metadata_bus_extract_memo self.__metadata_bus_type__ = metadata_type if self.__metadata_bus_type__ == self.MetaDataFormat_Json: self.__metadata_bus_text__ = '' self.__metadata_bus_xml__ = CXml() self.__metadata_bus_json__.load_file(file_name) elif self.__metadata_bus_type__ == self.MetaDataFormat_XML: self.__metadata_bus_text__ = '' self.__metadata_bus_xml__.load_file(file_name) self.__metadata_bus_json__ = CJson() else: self.__metadata_bus_text__ = CFile.file_2_str(file_name) self.__metadata_bus_xml__ = CXml() self.__metadata_bus_json__ = CJson()