def txt_to_xml(self, file_metadata_name_with_path: str): """ 完成 王学谦 txt文件转xml,在函数外提前定义xml对象并获取父节点传入,函数会将通过父节点构造xml对象 by王学谦 :param file_metadata_name_with_path:查询的mat文件全名,带路径 :return xml_obj:将文件内容存储好的项目对象 """ text_list = CFile.file_2_list( file_metadata_name_with_path) # 获取mat文件作为列表 if (text_list is None) or len(text_list) == 0: raise Exception('元数据文件无法读取,请检查') # 如果获取的文件内容为空,则抛出异常 xml_obj = CXml() # 建立xml对象 node_root = xml_obj.new_xml('root') xml_obj.set_attr(node_root, self.Name_Type, self.Transformer_TXT) # 设置root节点与属性 for row_text in text_list: if not CUtils.equal_ignore_case(row_text, ''): row_list = re.split(r'\s+', row_text.strip()) # 利用正则表达式,根据一个或多个tab剪切字符 # item节点 node_item = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item, self.Name_Name, CUtils.any_2_str(row_list[0])) del row_list[0] # value节点 for row in row_list: node_value = xml_obj.create_element(node_item, 'value') xml_obj.set_element_text( node_value, CUtils.any_2_str(row.strip())) # 设置item节点与属性与内容 return xml_obj
def test_creat_element(self): """ 在一个节点下创建一个新节点 :return: """ xml_content = ''' <root name="hello world"><element name="hello"></element></root> ''' xml = CXml() xml.load_xml(xml_content) element = xml.root_element() CXml.create_element(element, "element1") assert CXml.get_element_xml(element) == '<root name="hello world"><element name="hello"/><element1/></root>'
def mat_to_xml(self, file_metadata_name_with_path: str): """ 完成 王学谦 mat文件转xml,在函数外提前定义xml对象并获取父节点传入,函数会将通过父节点构造xml对象 by王学谦 :param file_metadata_name_with_path:查询的mat文件全名,带路径 :return xml_obj:将文件内容存储好的项目对象 """ text_list = CFile.file_2_list( file_metadata_name_with_path) # 获取mat文件作为列表 if (text_list is None) or len(text_list) == 0 or CUtils.equal_ignore_case( CUtils.any_2_str(text_list), ''): raise # 如果获取的文件内容为空,则抛出异常 flag = False # 设置标志 xml_obj = CXml() # 建立xml对象 node_root = xml_obj.new_xml('root') xml_obj.set_attr(node_root, self.Name_Type, self.transformer_type) # 设置root节点与属性 for index, row_text in enumerate(text_list): if row_text.startswith('1\t'): # 从开头为1+tab键的行开始录入 flag = True row_list = re.split(r'\s+', row_text) # 利用正则表达式,根据一个或多个tab剪切字符 if flag: node_item = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item, self.Name_Name, CUtils.any_2_str(row_list[1]).lower()) xml_obj.set_element_text( node_item, CUtils.any_2_str(row_list[2].strip())) # 设置item节点与属性与内容 if not flag: raise Exception('文件内容异常,无法正常识别文件开头') # 如果未找到1+tab键开头,则抛出异常 return xml_obj
def xls_to_xml(self, file_metadata_name_with_path: str): """ 完成 王学谦 xls/xlsx文件转xml,在函数外提前定义xml对象并获取父节点传入,函数会将通过父节点构造xml对象 by王学谦 :param file_metadata_name_with_path:查询的xls/xlsx文件全名,带路径 :return xml_obj:将文件内容存储好的项目对象 """ all_data = xlrd.open_workbook(file_metadata_name_with_path) # 获取表格所有内容 table_data = all_data.sheets()[0] # 默认获取第一个表格 cols_num = table_data.ncols # 获取列数 rows_num = table_data.nrows # 获取行数 cols_index = 0 # 预定义列的index if CUtils.equal_ignore_case(CUtils.any_2_str(cols_num), CUtils.any_2_str(2)): pass # 无序号列从1列开始 elif CUtils.equal_ignore_case(CUtils.any_2_str(cols_num), CUtils.any_2_str(3)): cols_index = 1 # 有序号列从2列开始 else: raise Exception('xls格式异常,无法正常解析') xml_obj = CXml() # 建立xml对象 node_root = xml_obj.new_xml('root') xml_obj.set_attr(node_root, self.Name_Type, self.transformer_type) # 设置root节点与属性 for row in range(0, rows_num): node_item = xml_obj.create_element(node_root, 'item') xml_obj.set_attr( node_item, self.Name_Name, CUtils.any_2_str(table_data.cell(row, cols_index).value).lower()) xml_obj.set_element_text( node_item, table_data.cell(row, cols_index + 1).value) # 设置item节点与属性与内容 return xml_obj
def mdb_to_xml(self, file_metadata_name_with_path: str): """ TODO 王学谦 mdb文件转xml,在函数外提前定义xml对象并获取父节点传入,函数会将通过父节点构造xml对象 :param file_metadata_name_with_path:查询的mdb文件全名,带路径 :return xml_obj:将文件内容存储好的项目对象 """ conn = None # 预定义连接与游标,方便释放 cur = None try: conn = self.get_mdb_connect(file_metadata_name_with_path) cur = conn.cursor() # 游标 xml_obj = CXml() # 建立xml对象 node_root = xml_obj.new_xml('root') xml_obj.set_attr(node_root, self.Name_Type, self.transformer_type) # 设置root节点与属性 table_name_list = ['mbii', 'mpid', 'mppi', 'mqc1', 'mqc2', 'mdac'] for table_name in table_name_list: try: sql = "SELECT * FROM " + table_name cur.execute(sql) table_data = cur.fetchall() # total_rows = len(alldata) # 行 # total_cols = len(alldata[0]) # 列 node_property = xml_obj.create_element(node_root, 'property') xml_obj.set_attr(node_property, 'tablename', table_name) # 设置property节点与属性与内容 for field_index, row_obj in enumerate(cur.description): row_name = row_obj[0] # 字段名称 row_type = row_obj[1] # 字段类型 if row_type is bytearray: # 跳过长二进制数组 continue node_item = xml_obj.create_element(node_property, 'item') xml_obj.set_attr(node_item, self.Name_Name, CUtils.any_2_str(row_name).lower()) xml_obj.set_element_text(node_item, table_data[0][field_index]) # 设置item节点与属性与内容 except: continue except Exception as error: raise Exception(error.__str__()) finally: if cur is not None: cur.close() if conn is not None: conn.close() return xml_obj
def txt_to_xml(self, file_metadata_name_with_path: str): """ 完成 王学谦 txt文件转xml,在函数外提前定义xml对象并获取父节点传入,函数会将通过父节点构造xml对象 by王学谦 :param file_metadata_name_with_path:查询的mat文件全名,带路径 :return xml_obj:将文件内容存储好的项目对象 """ text_list = CFile.file_2_list(file_metadata_name_with_path) # 获取mat文件作为列表 if (text_list is None) or len(text_list) == 0: raise Exception('元数据文件无法读取,请检查') # 如果获取的文件内容为空,则抛出异常 xml_obj = CXml() # 建立xml对象 node_root = xml_obj.new_xml('root') xml_obj.set_attr(node_root, self.Name_Type, self.Transformer_TXT) # 设置root节点与属性 # 设置操作的节点 current_node = node_root for row_text in text_list: if CUtils.equal_ignore_case('row_text', 'END'): break # 分割字符 row_list = re.split(r'=', row_text.strip()) if len(row_list) >= 2: # 为GROUP建立新节点 if CUtils.equal_ignore_case(row_list[0], 'GROUP'): node_item = xml_obj.create_element(current_node, 'item') # 值设为属性 item_value = CUtils.any_2_str(row_list[1].strip()) if item_value.startswith('"') and item_value.endswith('"'): item_value = item_value[1:-1] xml_obj.set_attr(node_item, self.Name_Name, item_value) current_node = node_item elif CUtils.equal_ignore_case(row_list[0], 'END_GROUP'): current_node = xml_obj.node_xpath_one(current_node, '..') else: node_item = xml_obj.create_element(current_node, 'item') xml_obj.set_attr(node_item, self.Name_Name, row_list[0].strip()) item_value = CUtils.any_2_str(row_list[1].strip()) if item_value.startswith('"') and item_value.endswith('"'): item_value = item_value[1:-1] xml_obj.set_element_text(node_item, item_value) return xml_obj
def append_data_records_quality(self, record_index, audit_result: dict): """ 设置数据的每一个记录的质量信息 :param record_index: :param audit_result: :return: """ temp_node = CXml.node_xpath_one( self.__node_data_records, './{0}[@index="{1}"]'.format(self.Name_Record, record_index)) if temp_node is None: temp_node = CXml.create_element(self.__node_data_records, self.Name_Record) self.__append_quality_info(temp_node, audit_result)
def __init__(self): self.__xml_obj = CXml() self.__xml_root_node = self.__xml_obj.new_xml(self.Name_Root) self.__node_total = CXml.create_element(self.__xml_root_node, self.Name_Total) self.__node_data = CXml.create_element(self.__xml_root_node, self.Name_Data) self.__node_data_items = CXml.create_element(self.__node_data, self.Name_Items) self.__node_data_records = CXml.create_element(self.__node_data, self.Name_Records) self.__node_metadata = CXml.create_element(self.__xml_root_node, self.Name_MetaData) self.__node_metadata_bus = CXml.create_element(self.__node_metadata, self.Name_Business) self.__node_metadata_data = CXml.create_element( self.__node_metadata, self.Name_Data)
def __append_quality_info(self, xml_node, audit_result: dict): quality_id = CUtils.dict_value_by_name(audit_result, self.Name_ID, '') quality_title = CUtils.dict_value_by_name(audit_result, self.Name_Title, '') quality_group = CUtils.dict_value_by_name(audit_result, self.Name_Group, self.QA_Group_Data_Integrity) quality_result = CUtils.dict_value_by_name(audit_result, self.Name_Result, self.QA_Result_Pass) quality_memo = CUtils.dict_value_by_name(audit_result, self.Name_Message, '') temp_node = CXml.node_xpath_one( xml_node, './{0}[@id="{1}"]'.format(self.Name_Item, quality_id)) if temp_node is not None: CXml.remove(temp_node) temp_node = CXml.create_element(xml_node, self.Name_Item) CXml.set_attr(temp_node, self.Name_ID, quality_id) CXml.set_attr(temp_node, self.Name_Group, quality_group) CXml.set_attr(temp_node, self.Name_Title, quality_title) CXml.set_attr(temp_node, self.Name_Result, quality_result) CXml.set_element_text(temp_node, quality_memo)
def init_metadata_bus(self, parser: CMetaDataParser) -> str: """ 通过相应信息转换xml """ file_main_name = parser.file_info.file_main_name file_path = parser.file_info.file_path xml_obj = CXml() # 建立xml对象 node_root = xml_obj.new_xml('root') node_item1 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item1, self.Name_Name, 'ProductName') xml_obj.set_element_text(node_item1, file_main_name) # 设置item节点与属性与内容 pathdata_list = re.findall( r'(?i)(\d{4}.{2})[\\\\/]FenFu', file_path ) if len(pathdata_list) > 0: pathdata = CUtils.any_2_str(pathdata_list[0]) else: pathdata = '' node_item2 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item2, self.Name_Name, 'DataDate') xml_obj.set_element_text(node_item2, pathdata) # 设置item节点与属性与内容 pixelsize_value = parser.metadata.metadata_json().xpath_one('pixelsize.width', None) if pixelsize_value is not None: pixelsize_num = CUtils.to_decimal(pixelsize_value, None) if pixelsize_num is not None: if CUtils.to_decimal(pixelsize_value, 0) < 0.01: pixelsize_value = pixelsize_num * 110000 else: pixelsize_value = '' else: pixelsize_value = '' pixelsize_value = CUtils.any_2_str(pixelsize_value) if len(pixelsize_value) > 10: pixelsize_value = pixelsize_value[:10] node_item3 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item3, self.Name_Name, 'Resolution') xml_obj.set_element_text(node_item3, pixelsize_value) # 设置item节点与属性与内容 SatelliteID = '' node_item4 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item4, self.Name_Name, 'SatelliteID') xml_obj.set_element_text(node_item4, SatelliteID) # 设置item节点与属性与内容 GeographicName = '' node_item5 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item5, self.Name_Name, 'GeographicName') xml_obj.set_element_text(node_item5, GeographicName) # 设置item节点与属性与内容 node_item6 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item6, self.Name_Name, 'Description') xml_obj.set_element_text(node_item6, '') # 设置item节点与属性与内容 try: if xml_obj is not None: parser.metadata.set_metadata_bus( self.Success, '元数据文件成功构建! ', self.MetaDataFormat_XML, xml_obj.to_xml() ) return CResult.merge_result( self.Success, '元数据文件成功构建! ' ) else: raise except Exception as error: parser.metadata.set_metadata_bus( self.Exception, '构建元数据文件失败, 无法处理! 错误原因为{0}'.format(error.__str__()), self.MetaDataFormat_Text, '' ) return CResult.merge_result( self.Exception, '构建元数据文件失败, 无法处理! 错误原因为{0}'.format(error.__str__()) )
def init_metadata_bus(self, parser: CMetaDataParser) -> str: """ 通过相应信息转换xml """ file_main_name = parser.file_info.file_main_name file_path = parser.file_info.file_path xml_obj = CXml() # 建立xml对象 node_root = xml_obj.new_xml('root') node_item1 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item1, self.Name_Name, 'ProductName') xml_obj.set_element_text(node_item1, file_main_name) # 设置item节点与属性与内容 if CUtils.text_match_re(file_main_name, r'.*_\d{6,8}_.{3}_.*_' + self.get_coordinate_system()): pathdata_list = re.findall( r'.*_(\d{6,8})_.{3}_.*_' + self.get_coordinate_system(), file_main_name ) if len(pathdata_list) > 0: pathdata = CUtils.any_2_str(pathdata_list[0]) else: pathdata = '' else: pathdata_list = re.findall( r'(?i)(\d{4}.{2})[\\\\/]镶嵌影像成果', file_path ) if len(pathdata_list) > 0: pathdata = CUtils.any_2_str(pathdata_list[0]) else: pathdata = '' node_item2 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item2, self.Name_Name, 'DataDate') xml_obj.set_element_text(node_item2, pathdata) # 设置item节点与属性与内容 if CUtils.text_match_re(file_main_name, r'.*_\d{6,8}_.{3}_.*_' + self.get_coordinate_system()): pixelsize_list = re.findall( r'.*_\d{6,8}_.{3}_(.*)_' + self.get_coordinate_system(), file_main_name ) if len(pathdata_list) > 0: pixelsize = CUtils.any_2_str(pixelsize_list[0]) else: pixelsize = '' elif CUtils.text_match_re(file_main_name, r'.*_.{3}_.*_' + self.get_coordinate_system()): pixelsize_list = re.findall( r'.*_.{3}_(.*)_' + self.get_coordinate_system(), file_main_name ) if len(pathdata_list) > 0: pixelsize = CUtils.any_2_str(pixelsize_list[0]) else: pixelsize = '' else: pixelsize = '' if CUtils.text_match_re(pixelsize, r'^\d+[a-zA-z]+$'): pixelsize_list = re.findall(r'(\d+)[a-zA-z]+', pixelsize) if len(pathdata_list) > 0: pixelsize = CUtils.any_2_str(pixelsize_list[0]) if len(pixelsize) == 2: pixelsize_value = '{0}.{1}'.format(pixelsize[:1], pixelsize[-1:]) else: pixelsize_value = pixelsize node_item3 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item3, self.Name_Name, 'Resolution') xml_obj.set_element_text(node_item3, pixelsize_value) # 设置item节点与属性与内容 if CUtils.text_match_re(file_main_name, r'.*_\d{6,8}_.{3}_.*_' + self.get_coordinate_system()): SatelliteID_list = re.findall( r'.*_\d{6,8}_(.{3})_.*_' + self.get_coordinate_system(), file_main_name ) if len(pathdata_list) > 0: SatelliteID = CUtils.any_2_str(SatelliteID_list[0]) else: SatelliteID = '' elif CUtils.text_match_re(file_main_name, r'.*_.{3}_.*_' + self.get_coordinate_system()): SatelliteID_list = re.findall( r'.*_(.{3})_.*_' + self.get_coordinate_system(), file_main_name ) if len(pathdata_list) > 0: SatelliteID = CUtils.any_2_str(SatelliteID_list[0]) else: SatelliteID = '' else: SatelliteID = '' node_item4 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item4, self.Name_Name, 'SatelliteID') xml_obj.set_element_text(node_item4, SatelliteID) # 设置item节点与属性与内容 if CUtils.text_match_re(file_main_name, r'.*_\d{6,8}_.{3}_.*_' + self.get_coordinate_system()): GeographicName_list = re.findall( r'(.*)_\d{6,8}_.{3}_.*_' + self.get_coordinate_system(), file_main_name ) if len(pathdata_list) > 0: GeographicName = CUtils.any_2_str(GeographicName_list[0]) else: GeographicName = '' elif CUtils.text_match_re(file_main_name, r'.*_.{3}_.*_' + self.get_coordinate_system()): GeographicName_list = re.findall( r'(.*)_.{3}_.*_' + self.get_coordinate_system(), file_main_name ) if len(pathdata_list) > 0: GeographicName = CUtils.any_2_str(GeographicName_list[0]) else: GeographicName = '' else: GeographicName = '' node_item5 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item5, self.Name_Name, 'GeographicName') xml_obj.set_element_text(node_item5, GeographicName) # 设置item节点与属性与内容 node_item6 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item6, self.Name_Name, 'Description') xml_obj.set_element_text(node_item6, '') # 设置item节点与属性与内容 try: if xml_obj is not None: parser.metadata.set_metadata_bus( self.Success, '元数据文件成功构建! ', self.MetaDataFormat_XML, xml_obj.to_xml() ) return CResult.merge_result( self.Success, '元数据文件成功构建! ' ) else: raise except Exception as error: parser.metadata.set_metadata_bus( self.Exception, '构建元数据文件失败, 无法处理! 错误原因为{0}'.format(error.__str__()), self.MetaDataFormat_Text, '' ) return CResult.merge_result( self.Exception, '构建元数据文件失败, 无法处理! 错误原因为{0}'.format(error.__str__()) )
def init_metadata_bus(self, parser: CMetaDataParser) -> str: """ 通过相应信息转换xml """ xml_obj = CXml() # 建立xml对象 node_root = xml_obj.new_xml('root') node_item1 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item1, self.Name_Name, 'ProductName') xml_obj.set_element_text(node_item1, parser.file_info.file_main_name) # 设置item节点与属性与内容 pathdata_list = re.findall( r'(?i)^.+' r'[-_/]?([1-9]\d{3}(0[1-9]|1[0-2])(0[1-9]|[1-2][0-9]|3[0-1]))[-_/]?' r'.+' r'[\\\\/]影像', parser.file_info.file_path ) if len(pathdata_list) > 0: pathdata = CUtils.any_2_str(pathdata_list[0]) else: pathdata = '' node_item2 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item2, self.Name_Name, 'DataDate') xml_obj.set_element_text(node_item2, pathdata) # 设置item节点与属性与内容 # 影像元数据dsometadatajson.pixelsize.width节点 # width < 0.01时,resolution = width * 110000, # width > 0.01时,resolution = width pixelsize_value = parser.metadata.metadata_json().xpath_one('pixelsize.width', None) if pixelsize_value is not None: pixelsize_num = CUtils.to_decimal(pixelsize_value, None) if pixelsize_num is not None: if CUtils.to_decimal(pixelsize_value, 0) < 0.01: pixelsize_value = pixelsize_num * 110000 else: pixelsize_value = '' else: pixelsize_value = '' pixelsize_value = CUtils.any_2_str(pixelsize_value) if len(pixelsize_value) > 10: pixelsize_value = pixelsize_value[:10] node_item3 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item3, self.Name_Name, 'Resolution') xml_obj.set_element_text(node_item3, pixelsize_value) # 设置item节点与属性与内容 SatelliteID_list = re.findall( r'(?i)^.+' r'[-_/]?[1-9]\d{3}(0[1-9]|1[0-2])(0[1-9]|[1-2][0-9]|3[0-1])[-_/]?' r'(.+)' r'[\\\\/]影像', parser.file_info.file_path ) if len(SatelliteID_list) > 0: SatelliteID = CUtils.any_2_str(SatelliteID_list[0][2]) else: SatelliteID = '' node_item4 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item4, self.Name_Name, 'SatelliteID') xml_obj.set_element_text(node_item4, SatelliteID) # 设置item节点与属性与内容 GeographicName_list = re.findall( r'(?i)^(.+)' r'[-_/]?[1-9]\d{3}(0[1-9]|1[0-2])(0[1-9]|[1-2][0-9]|3[0-1])[-_/]?' r'.+' r'[\\\\/]影像', parser.file_info.file_path ) if len(GeographicName_list) > 0: GeographicName = CUtils.any_2_str(GeographicName_list[0]) if CUtils.text_match_re(GeographicName, '[-_/]$'): GeographicName = GeographicName[:-1] GeographicName = CFile.file_main_name(GeographicName) else: GeographicName = '' node_item5 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item5, self.Name_Name, 'GeographicName') xml_obj.set_element_text(node_item5, GeographicName) # 设置item节点与属性与内容 node_item6 = xml_obj.create_element(node_root, 'item') xml_obj.set_attr(node_item6, self.Name_Name, 'Description') xml_obj.set_element_text(node_item6, '') # 设置item节点与属性与内容 try: if xml_obj is not None: parser.metadata.set_metadata_bus( self.Success, '元数据文件成功构建! ', self.MetaDataFormat_XML, xml_obj.to_xml() ) return CResult.merge_result( self.Success, '元数据文件成功构建! ' ) else: raise except Exception as error: parser.metadata.set_metadata_bus( self.Exception, '构建元数据文件失败, 无法处理! 错误原因为{0}'.format(error.__str__()), self.MetaDataFormat_Text, '' ) return CResult.merge_result( self.Exception, '构建元数据文件失败, 无法处理! 错误原因为{0}'.format(error.__str__()) )