def __call__(self): try: with open(self.filename, encoding="utf8") as opened_file: self.xml_nested_dict = xmltodict.parse(opened_file.read()) except: print("Ошибка чтения файла: " + self.filename) print(f"код {sys.exc_info()[0].__dict__}") traceback.print_exc() return lambda x: None # PHASE 1 - extract xml Data into xml Table! Keep track of to which excel_column The Data will go ;) self.xml_value_table = [] anchor0 = len(self.xml_value_table) # Oh, I'm appending xml table iteratively # then I'm appending excel iteratively ( but just for #THIS_CASE) # to better find my special place, I will mark this place with ANCHOR! simple_xml_values = [ [ 'extract_base_params_construction', 'details_statement', 'group_top_requisites', 'registration_number' ], [ 'extract_base_params_construction', 'details_statement', 'group_top_requisites', 'date_formation' ], [ 'extract_base_params_construction', 'construction_record', 'object', 'common_data', 'cad_number' ], [ 'extract_base_params_construction', 'construction_record', 'object', 'common_data', 'quarter_cad_number' ], [ 'extract_base_params_construction', 'construction_record', 'address_location', 'address', 'readable_address' ], [ 'extract_base_params_construction', 'construction_record', 'params', 'base_parameters', 'base_parameter', 'area' ], [ 'extract_base_params_construction', 'construction_record', 'params', 'base_parameters', 'base_parameter', 'built_up_area' ], [ 'extract_base_params_construction', 'construction_record', 'params', 'base_parameters', 'base_parameter', 'extension' ], [ 'extract_base_params_construction', 'construction_record', 'params', 'base_parameters', 'base_parameter', 'depth' ], [ 'extract_base_params_construction', 'construction_record', 'params', 'base_parameters', 'base_parameter', 'occurence_depth' ], [ 'extract_base_params_construction', 'construction_record', 'params', 'base_parameters', 'base_parameter', 'volume' ], [ 'extract_base_params_construction', 'construction_record', 'params', 'base_parameters', 'base_parameter', 'height' ], [ 'extract_base_params_construction', 'construction_record', 'params', 'name' ], [ 'extract_base_params_construction', 'construction_record', 'params', 'purpose' ], [ 'extract_base_params_construction', 'construction_record', 'params', 'floors' ], [ 'extract_base_params_construction', 'construction_record', 'params', 'year_commisioning' ], [ 'extract_base_params_construction', 'construction_record', 'params', 'year_built' ], [ 'extract_base_params_construction', 'construction_record', 'cost', 'value' ], ['extract_base_params_construction', 'status'], [ 'extract_base_params_construction', 'construction_record', 'special_notes' ], ] for value_id, value in enumerate(simple_xml_values): self.xml_value_table.append( tm._try_get(self.xml_nested_dict, value)) # --- 6 anchor1 = len(self.xml_value_table) self.xml_value_table.append( tm._try_get(self.xml_nested_dict, [ 'extract_base_params_construction', 'construction_record', 'object', 'common_data', 'type', 'value' ])) # --- 7 anchor2 = len(self.xml_value_table) dt = tm._try_get(self.xml_nested_dict, [ 'extract_base_params_construction', 'construction_record', 'record_info', 'registration_date' ]) if dt: dt = datetime.datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S%z").date() self.xml_value_table.append(dt) # --- 8 anchor3 = len(self.xml_value_table) old_numbers = tm._try_get(self.xml_nested_dict, [ 'extract_base_params_construction', 'construction_record', 'cad_links', 'old_numbers', "old_number" ]) s8 = lambda s: " ; ".join([ str(tm._try_get(s, ["number_type", "value"])), str(tm._try_get(s, [ "number", ])), ]) old_gos_num = " ;; \n".join( [str(i) for i in tm.iflist(old_numbers, s8)]) if old_gos_num: self.xml_value_table.append(old_gos_num) else: self.xml_value_table.append("-") # --- 23 anchor4 = len(self.xml_value_table) land_cad_numbers = tm._try_get(self.xml_nested_dict, [ 'extract_base_params_construction', 'construction_record', 'cad_links', 'land_cad_numbers', 'land_cad_number' ]) s23 = lambda s: "".join([str(tm._try_get(s, ["cad_number"])), " ; "]) self.xml_value_table.append("".join( [str(i) for i in tm.iflist(land_cad_numbers, s23)])) # --- 24 room_cad_numbers = tm._try_get(self.xml_nested_dict, [ 'extract_base_params_construction', 'construction_record', 'cad_links', 'room_cad_numbers', 'room_cad_number' ]) s24 = lambda s: "".join([str(tm._try_get(s, ["cad_number"])), " ; "]) self.xml_value_table.append("".join( [str(i) for i in tm.iflist(room_cad_numbers, s24)])) # --- 24 (APPEND TO PREVIOUS) car_parking_cad_numbers = tm._try_get(self.xml_nested_dict, [ 'extract_base_params_construction', 'construction_record', 'cad_links', 'car_parking_space_cad_numbers', 'car_parking_space_cad_number' ]) self.xml_value_table.append("".join( [str(i) for i in tm.iflist(car_parking_cad_numbers, s24)])) # --- 25 anchor5 = len(self.xml_value_table) permitted_uses = tm._try_get(self.xml_nested_dict, [ 'extract_base_params_construction', 'construction_record', 'params', 'permitted_uses' ]) s25 = lambda s: "".join( [str(tm._try_get(s, ['permitted_use', 'name'])), " ; "]) self.xml_value_table.append("".join( [str(i) for i in tm.iflist(permitted_uses, s25)])) # TODO remake into more objects # --- 28 anchor6 = len(self.xml_value_table) rights = tm._try_get( self.xml_nested_dict, ['extract_base_params_construction', 'right_records']) self.xml_value_table.append(str(dict(rights))) # --- 29 anchor7 = len(self.xml_value_table) owner_rights = tm._try_get( self.xml_nested_dict, ['extract_base_params_construction', 'ownerless_right_records']) self.xml_value_table.append(str(dict(owner_rights))) # PHASE 2 - now, we have all The Data we need! Now it's time to find our data in Xml_table and push it # to Excel_table. # Simple case: we pushing one xml_value to one excel_column # Complex case: we pushing multiple xml_values to one excel_column. # * And some other columns can push the same data too! self.excel_table_range = range(1, self.COL_MAX_NUM + 1) self.excel_table = [None for i in self.excel_table_range] # xml -> excel relations # one to one OR one two many self.excel_table[0] = os.path.split(self.filename)[-1] # simple excel fields simple_excel__column_destination = [ 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 26, 27 ] for i, excel_id in enumerate(simple_excel__column_destination): self.excel_table[excel_id - 1] = self.xml_value_table[i + anchor0] # anchor1 - field #6 if self.xml_value_table[anchor1] == "construction_record": self.excel_table[6 - 1] = "Сооружение" else: self.excel_table[6 - 1] = self.xml_value_table[anchor1] self.excel_table[7 - 1] = self.xml_value_table[anchor2] self.excel_table[8 - 1] = self.xml_value_table[anchor3] self.excel_table[23 - 1] = self.xml_value_table[anchor4] self.excel_table[24 - 1] = self.xml_value_table[ anchor4 + 1] + self.xml_value_table[anchor4 + 2] self.excel_table[25 - 1] = "".join(self.xml_value_table[anchor5]) self.excel_table[28 - 1] = self.xml_value_table[anchor6] self.excel_table[29 - 1] = self.xml_value_table[anchor7] return self.excel_table
def __call__(self): try: with open(self.filename, encoding="utf8") as opened_file: self.xml_nested_dict = xmltodict.parse(opened_file.read()) except: print("Ошибка чтения файла: " + self.filename) print(f"код {sys.exc_info()[0].__dict__}") traceback.print_exc() return lambda x: None # PHASE 0 - parse etree. enjoy XPath! tree = et.parse(self.filename) root = tree.getroot() root.findall("") # PHASE 1 - extract xml Data into xml Table! Keep track of to which excel_column The Data will go ;) self.xml_value_table = [] ANCHOR0 = len(self.xml_value_table) # Oh, I'm appending xml table iteratively # then I'm appending excel iteratively ( but just for #THIS_CASE) # to better find my special place, I will mark this place with ANCHOR! simple_xml_values = [ [ 'extract_base_params_land', 'details_statement', 'group_top_requisites', 'registration_number' ], [ 'extract_base_params_land', 'details_statement', 'group_top_requisites', 'date_formation' ], [ 'extract_base_params_land', 'land_record', 'object', 'common_data', 'cad_number' ], [ 'extract_base_params_land', 'land_record', 'object', 'common_data', 'quarter_cad_number' ], [ 'extract_base_params_land', 'land_record', 'address_location', 'address', 'readable_address' ], [ 'extract_base_params_land', 'land_record', 'params', 'area', 'value' ], ['extract_base_params_land', 'land_record', 'cost', 'value'], [ 'extract_base_params_land', 'land_record', 'params', 'category', 'type', 'value' ], [ 'extract_base_params_land', 'land_record', 'params', 'permitted_use', 'permitted_use_established', 'land_use', 'value' ], ['extract_base_params_land', 'status'], ['extract_base_params_land', 'land_record', 'special_notes'], ] for value_id, value in enumerate(simple_xml_values): self.xml_value_table.append( tm._try_get(self.xml_nested_dict, value)) # --- 4 == 6 soor anchor1 = len(self.xml_value_table) self.xml_value_table.append( tm._try_get(self.xml_nested_dict, [ 'extract_base_params_land', 'land_record', 'object', 'common_data', 'type', 'value' ])) # --- 5 == 7 soor anchor2 = len(self.xml_value_table) dt = tm._try_get(self.xml_nested_dict, [ 'extract_base_params_land', 'land_record', 'record_info', 'registration_date' ]) if dt: dt = datetime.datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S%z").date() self.xml_value_table.append(dt) # --- 8 anchor3 = len(self.xml_value_table) old_numbers = "; \n\n".join([ ", ".join([str(i).strip() for i in j.itertext() if str(i).strip()]) for j in root.findall('cad_links/old_numbers') ]) if old_numbers: self.xml_value_table.append(old_numbers) else: self.xml_value_table.append("-") # --- 12 == 23 soor anchor4 = len(self.xml_value_table) land_cad_numbers = tm._try_get(self.xml_nested_dict, [ 'extract_base_params_land', 'land_record', 'cad_links', 'included_objects', 'included_object' ]) s12 = lambda s: "".join([str(tm._try_get(s, ["cad_number"])), " ; "]) self.xml_value_table.append("".join( [str(i) for i in tm.iflist(land_cad_numbers, s12)])) # --- 17 == 28 soor anchor5 = len(self.xml_value_table) right_holders_name = [[ i for i in j.itertext() ][0] for j in root.findall( 'right_records/right_record/right_holders//value')] self.xml_value_table.append("; \n".join(right_holders_name)) # --- 18 _type = [[i for i in j.itertext()][0] for j in root.findall( 'right_records/right_record/right_data/right_type/value')] right_number = [[i for i in j.itertext()][0] for j in root.findall( 'right_records/right_record/right_data/right_number')] date = [ str( datetime.datetime.strptime([i for i in j.itertext()][0], "%Y-%m-%dT%H:%M:%S%z").date()) for j in root.findall( 'right_records/right_record/record_info/registration_date') ] self.xml_value_table.append("; \n".join([ ", ".join(el) for i, el in enumerate(zip(_type, right_number, date)) ])) # --- 19 ud = "; \n".join([ ", ".join([i for i in j.itertext()]) for j in root.findall( 'right_records/right_record/underlying_documents/underlying_document' ) ]) if ud: self.xml_value_table.append(ud) else: self.xml_value_table.append("-") # --- 20 rr = "; \n\n".join([ ", ".join([str(i).strip() for i in j.itertext() if str(i).strip()]) for j in root.findall('restrict_records/restrict_record') ]) # rights = tm._try_get(self.xml_nested_dict, ['extract_base_params_land', 'restrict_records']) self.xml_value_table.append(rr) # # PHASE 2 - now, we have all The Data we need! Now it's time to find our data in Xml_table and push it # to Excel_table. # Simple case: we pushing one xml_value to one excel_column # Complex case: we pushing multiple xml_values to one excel_column. # * And some other columns can push the same data too! self.excel_table_range = range(1, self.COL_MAX_NUM + 1) self.excel_table = [None for i in self.excel_table_range] # xml -> excel relations # one to one OR one two many self.excel_table[0] = os.path.split(self.filename)[-1] # simple excel fields simple_excel__column_destination = [ 2, 3, 6, 7, 9, 10, 11, 13, 14, 15, 16 ] for i, excel_id in enumerate(simple_excel__column_destination): self.excel_table[excel_id - 1] = self.xml_value_table[i + ANCHOR0] # anchor1 - field #4 if self.xml_value_table[anchor1] == "land_record": self.excel_table[4 - 1] = "Земельный участок" else: self.excel_table[4 - 1] = self.xml_value_table[anchor1] self.excel_table[5 - 1] = self.xml_value_table[anchor2] self.excel_table[8 - 1] = self.xml_value_table[anchor3] self.excel_table[12 - 1] = self.xml_value_table[anchor4] self.excel_table[17 - 1] = self.xml_value_table[anchor5] self.excel_table[18 - 1] = self.xml_value_table[anchor5 + 1] self.excel_table[19 - 1] = self.xml_value_table[anchor5 + 2] self.excel_table[20 - 1] = self.xml_value_table[anchor5 + 3] return self.excel_table
def __call__(self): try: with open(self.filename, encoding="utf8") as opened_file: self.xml_nested_dict = xmltodict.parse(opened_file.read()) except: print("Ошибка чтения файла: " + self.filename) print(f"код {sys.exc_info()[0].__dict__}") traceback.print_exc() return lambda x: None tree = et.parse(self.filename) root = tree.getroot() root.findall("") # PHASE 1 - extract xml Data into xml Table! Keep track of to which excel_column The Data will go ;) self.xml_value_table = [] ANCHOR0 = len(self.xml_value_table) # Oh, I'm appending xml table iteratively # then I'm appending excel iteratively ( but just for #THIS_CASE) # to better find my special place, I will mark this place with ANCHOR! simple_xml_values = [ [ 'extract_base_params_under_construction', 'details_statement', 'group_top_requisites', 'registration_number' ], [ 'extract_base_params_under_construction', 'details_statement', 'group_top_requisites', 'date_formation' ], [ 'extract_base_params_under_construction', 'object_under_construction_record', 'object', 'common_data', 'cad_number' ], [ 'extract_base_params_under_construction', 'object_under_construction_record', 'object', 'common_data', 'quarter_cad_number' ], [ 'extract_base_params_under_construction', 'object_under_construction_record', 'address_location', 'address', 'readable_address' ], [ 'extract_base_params_under_construction', 'object_under_construction_record', 'params', 'degree_readiness' ], [ 'extract_base_params_under_construction', 'object_under_construction_record', 'params', 'purpose' ], [ 'extract_base_params_under_construction', 'object_under_construction_record', 'cost', 'value' ], ['extract_base_params_under_construction', 'status'], [ 'extract_base_params_under_construction', 'object_under_construction_record', 'special_notes' ], # ['extract_base_params_room', 'room_record', 'special_notes'], ] for value_id, value in enumerate(simple_xml_values): self.xml_value_table.append( tm._try_get(self.xml_nested_dict, value)) # anchor1 - 4 anchor1 = len(self.xml_value_table) _type = [[i for i in j.itertext()][0] for j in root.findall( 'object_under_construction_record/object/common_data/type/value') ][0] if _type == "object_under_construction_record": self.xml_value_table.append("ОНС") elif not _type: self.xml_value_table.append("-") else: self.xml_value_table.append(_type) # 5 cad_num = [[i for i in j.itertext()][0] for j in root.findall( 'object_under_construction_record/object/common_data/cad_number') ][0] self.xml_value_table.append(cad_num) # 8 old_numbers = " , ".join([ " ".join(([i for i in j.itertext()])) for j in root.findall("cad_links/old_numbers/old_number") ]) if old_numbers != "": self.xml_value_table.append(old_numbers) else: self.xml_value_table.append("-") # 10=12 anchor2 = len(self.xml_value_table) base_params = "params/base_parameters/base_parameter/" def get_param(param): p = [[i for i in j.itertext()][0] for j in root.findall(f"{base_params}{param}")] if p: return p[0] else: return "" area = "Площадь в кв. метрах " + get_param("area") + " ; \n" built_up_area = "Площадь застройки в квадратных метрах с округлением до 0,1 квадратного метра " \ + get_param("built_up_area") + " ; \n" extension = "Протяженность в метрах с округлением до 1 метра " + get_param( "extension") + " ; \n" depth = "Глубина в метрах с округлением до 0,1 метра" + get_param( "depth") + " ; \n" occurence_depth = "Глубина залегания в метрах с округлением до 0,1 метра " + get_param( "occurence_depth") + " ; \n" volume = "Объем в кубических метрах с округлением до 1 кубического метра " + get_param( "volume") + " ; \n" height = "Высота в метрах с округлением до 0,1 метра " + get_param( "height") + " ; \n" _final = "".join([ area, built_up_area, extension, depth, occurence_depth, volume, height ]) if _final != "": self.xml_value_table.append(_final) else: self.xml_value_table.append("-") # 11 degree_readiness = [[i for i in j.itertext()][0] for j in root.findall("params/degree_readiness")] if degree_readiness: degree_readiness = degree_readiness[0] self.xml_value_table.append(str(degree_readiness)) # 15 anchor3 = len(self.xml_value_table) land_cad_numbers = " , ".join([ " ".join([i for i in j.itertext()]) for j in root.findall( "cad_links/land_cad_numbers/land_cad_number/cad_number") ]) if land_cad_numbers != "": self.xml_value_table.append(land_cad_numbers) else: self.xml_value_table.append("-") # 17 anchor4 = len(self.xml_value_table) base_params = "params/base_parameters/base_parameter/" p = [[i for i in j.itertext()][0] for j in root.findall( "object_under_construction_record/special_notes")] if p: p = p[0] else: p = "-" self.xml_value_table.append(p) # 18 right_holders_name = [[ i for i in j.itertext() ][0] for j in root.findall( 'right_records/right_record/right_holders//value')] def get_right_holder(root: root): e = root.findall( 'right_records/right_record/right_holders/right_holder/*') # for .. print(e[0].tag) if e[0].tag == "public_formation": print() elif e[0].tag == "individual": ... elif e[0].tag == "legal_entity": ... elif e[0].tag == "another": ... self.xml_value_table.append("; \n".join(right_holders_name)) # 19 _type = [[i for i in j.itertext()][0] for j in root.findall( 'right_records/right_record/right_data/right_type/value')] right_number = [[i for i in j.itertext()][0] for j in root.findall( 'right_records/right_record/right_data/right_number')] date = [ str( datetime.datetime.strptime([i for i in j.itertext()][0], "%Y-%m-%dT%H:%M:%S%z").date()) for j in root.findall( 'right_records/right_record/record_info/registration_date') ] self.xml_value_table.append("; \n".join([ ", ".join(el) for i, el in enumerate(zip(_type, right_number, date)) ])) # --- 20 ud = "; \n".join([ ", ".join([i for i in j.itertext()]) for j in root.findall( 'right_records/right_record/underlying_documents/underlying_document' ) ]) if ud: self.xml_value_table.append(ud) else: self.xml_value_table.append("-") # --- 21 rr = "; \n\n".join([ ", ".join([str(i).strip() for i in j.itertext() if str(i).strip()]) for j in root.findall('restrict_records/restrict_record') ]) # rights = tm._try_get(self.xml_nested_dict, ['extract_base_params_under_construction', 'restrict_records']) self.xml_value_table.append(rr) # --- 22 _expropriation_info_type = [ [i for i in j.itertext()][0] for j in root.findall('expropriation_info/expropriation_info_type') ] _origin_content = [[ i for i in j.itertext() ][0] for j in root.findall('expropriation_info/origin_content')] self.xml_value_table.append("; \n".join([ ", ".join(el) for i, el in enumerate( zip(_expropriation_info_type, _origin_content)) ])) # --- 23 rr = "; \n\n".join([ ", ".join([str(i).strip() for i in j.itertext() if str(i).strip()]) for j in root.findall( 'extract_base_params_under_construction/deal_records/deal_record' ) ]) # rights = tm._try_get(self.xml_nested_dict, ['extract_base_params_room', 'restrict_records']) if rr != "": self.xml_value_table.append(rr) else: self.xml_value_table.append("-") # PHASE 2 - now, we have all The Data we need! Now it's time to find our data in Xml_table and push it # to Excel_table. # Simple case: we pushing one xml_value to one excel_column # Complex case: we pushing multiple xml_values to one excel_column. # * And some other columns can push the same data too! self.excel_table_range = range(1, self.COL_MAX_NUM + 1) self.excel_table = [None for i in self.excel_table_range] # xml -> excel relations # one to one OR one two many self.excel_table[0] = os.path.split(self.filename)[-1] # simple excel fields simple_excel__column_destination = [2, 3, 6, 7, 9, 11, 13, 14, 16, 17] for i, excel_id in enumerate(simple_excel__column_destination): self.excel_table[excel_id - 1] = self.xml_value_table[i + ANCHOR0] self.excel_table[4 - 1] = self.xml_value_table[anchor1] self.excel_table[5 - 1] = self.xml_value_table[anchor1 + 1] self.excel_table[8 - 1] = self.xml_value_table[anchor1 + 2] self.excel_table[12 - 1] = self.xml_value_table[anchor2] # 10=12 self.excel_table[11 - 1] = self.xml_value_table[anchor2 + 1] self.excel_table[15 - 1] = self.xml_value_table[anchor3] self.excel_table[17 - 1] = self.xml_value_table[anchor4] self.excel_table[18 - 1] = self.xml_value_table[anchor4 + 1] self.excel_table[19 - 1] = self.xml_value_table[anchor4 + 2] self.excel_table[20 - 1] = self.xml_value_table[anchor4 + 3] self.excel_table[21 - 1] = self.xml_value_table[anchor4 + 4] self.excel_table[22 - 1] = self.xml_value_table[anchor4 + 5] self.excel_table[23 - 1] = self.xml_value_table[anchor4 + 6] return self.excel_table