def _parse_symbol(self, element, prop): for p in element: if get_tag(p) == prop: for binblbox in p: if get_tag(binblbox) == self.TAG_SYMBOL_BIN: return binblbox.text return None
def parse(self, view_service): # pragma: no cover language = Config.get('default_language') for element in view_service: if get_tag(element) == self.TAG_LEGEND: count = 1 for legend_entry in element: if get_tag(legend_entry) == self.TAG_LEGEND_ENTRY: sub_theme = parse_string(legend_entry, self.TAG_SUB_THEME) if sub_theme is not None: sub_theme = {language: sub_theme} instance = self._model( id='{0}.legende.{1}'.format( view_service.attrib['TID'], count), symbol=self._parse_symbol(legend_entry, self.TAG_SYMBOL), legend_text=parse_multilingual_text( legend_entry, self.TAG_LEGEND_TEXT), type_code=parse_string(legend_entry, self.TAG_TYPE_CODE), type_code_list=parse_string( legend_entry, self.TAG_TYPE_CODE_LIST), topic=self._topic_code, sub_theme=sub_theme, other_theme=parse_string(legend_entry, self.TAG_OTHER_THEME), view_service_id=view_service.attrib['TID']) self._session.add(instance) count += 1
def _parse_coord(self, coord, srs): p = dict() for c in coord: if get_tag(c) == 'C1': p['x'] = float(c.text) elif get_tag(c) == 'C2': p['y'] = float(c.text) if srs == self._to_srs: return p['x'], p['y'] else: return self._reprojector.transform((p['x'], p['y']), from_srs=srs, to_srs=self._to_srs)
def _parse_arc(self, arc, start_point, srs): e = dict() a = dict() for element in arc: tag = get_tag(element) if tag == 'C1': e['x'] = float(element.text) elif tag == 'C2': e['y'] = float(element.text) elif tag == 'A1': a['x'] = float(element.text) elif tag == 'A2': a['y'] = float(element.text) if srs == self._to_srs: arc_point = (a['x'], a['y']) end_point = (e['x'], e['y']) else: arc_point = self._reprojector.transform((a['x'], a['y']), from_srs=srs, to_srs=self._to_srs) end_point = self._reprojector.transform((e['x'], e['y']), from_srs=srs, to_srs=self._to_srs) return stroke_arc(start_point, arc_point, end_point, self._arc_max_diff, self._arc_precision)
def _parse_geom(self, geometry): geom_type = self._geometry_type.upper() for element in geometry: tag = get_tag(element) geom = None if tag == self.TAG_POINT_LV03: geom = self._parse_point(element, 21781) elif tag == self.TAG_POINT_LV95: geom = self._parse_point(element, 2056) elif tag == self.TAG_LINE_LV03: geom = self._parse_line(element, 21781) elif tag == self.TAG_LINE_LV95: geom = self._parse_line(element, 2056) elif tag == self.TAG_AREA_LV03: geom = self._parse_area(element, 21781) elif tag == self.TAG_AREA_LV95: geom = self._parse_area(element, 2056) if geom is not None: if geom_type == 'MULTIPOINT': geom = MultiPoint([geom]) elif geom_type == 'MULTILINESTRING': geom = MultiLineString([geom]) elif geom_type == 'MULTIPOLYGON': geom = MultiPolygon([geom]) elif geom_type == 'GEOMETRYCOLLECTION': geom = GeometryCollection([geom]) return from_shape(geom, srid=2056) return None
def load(self, xtf_files, force=False): """ Updates the data for a certain federal topic. Args: xtf_files (list of str): Files to be parsed. The first one should be the XML file containing the federal laws, the second one the XTF file with the PLR data. force (bool): Set to `True` to ignore the result of the checksum comparison and update the data anyway. """ engine = create_engine(self._connection) Session = sessionmaker(bind=engine) session = Session() try: if force or self._compare_checksum(session): self._log.info('Starting import of topic {0}'.format( self._topic_settings.get('code'))) self._log.info('Using SRID: {0}'.format(self._srid)) self._log.info('Maximum difference for arc points: {0}'.format( self._arc_max_diff)) self._log.info('Arc point coordinate precision: {0}'.format( self._arc_precision)) self._truncate_schema(session) topic_source = None for xtf_file in xtf_files: if xtf_file.endswith('.xtf'): topic_source = os.path.basename(xtf_file) self._log.info('Parsing {0}'.format(xtf_file)) content = parse(xtf_file) for element in content.getroot(): if get_tag(element) == self.TAG_DATASECTION: self._parse_datasection(session, element) self._update_data_integration(session, topic_source) self._log.info('Committing import') session.commit() self._log.info('Finished import of topic {0}'.format( self._topic_settings.get('code'))) except Exception: session.rollback() self._log.exception('An error occurred during the import:') self.cleanup_files() exit(1) finally: session.close()
def _parse_datasection(self, session, datasection): """ Parses the data section Args: session (sqlalchemy.orm.session.Session): The SQLAlchemy session for database interaction. datasection (lxml.etree.Element): The data section element. """ for element in datasection: tag = get_tag(element) if tag in [self.TAG_TRANSFER_STRUCTURE, self.TAG_REFERENCED_LAWS]: laws = (tag == self.TAG_REFERENCED_LAWS) self._parse_transfer_structure(session, element, laws=laws)
def _parse_geom(self, geometry): geom_type = self._geometry_type.upper() geom = None # Check for LV95 geometry for element in geometry: tag = get_tag(element) if tag == self.TAG_POINT_LV95: geom = self._parse_point(element, 2056) elif tag == self.TAG_LINE_LV95: geom = self._parse_line(element, 2056) elif tag == self.TAG_AREA_LV95: geom = self._parse_area(element, 2056) # Check for LV03 geometry as fallback if geom is None: for element in geometry: tag = get_tag(element) if tag == self.TAG_POINT_LV03: geom = self._parse_point(element, 21781) elif tag == self.TAG_LINE_LV03: geom = self._parse_line(element, 21781) elif tag == self.TAG_AREA_LV03: geom = self._parse_area(element, 21781) # Wrap in collection if necessary if geom is not None: if geom_type == 'MULTIPOINT': geom = MultiPoint([geom]) elif geom_type == 'MULTILINESTRING': geom = MultiLineString([geom]) elif geom_type == 'MULTIPOLYGON': geom = MultiPolygon([geom]) elif geom_type == 'GEOMETRYCOLLECTION': geom = GeometryCollection([geom]) # Return geometry or None return None if geom is None else from_shape(geom, srid=2056)
def _parse_line(self, line, srs): for polyline in line: coords = list() for coord in polyline: tag = get_tag(coord) if tag == self.TAG_COORD: coords.append(self._parse_coord(coord, srs)) elif tag == self.TAG_ARC: coords.extend(self._parse_arc(coord, coords[-1], srs)) else: self._log.warning( 'Found unsupported geometry element: {0}'.format(tag)) return LineString(coords) return None
def _parse_transfer_structure(self, session, transfer_structure, laws=False): """ Parses the transfer structure content. Args: session (sqlalchemy.orm.session.Session): The SQLAlchemy session for database interaction. transfer_structure (lxml.etree.Element): The transfer structure element. laws (bool): True if the parsed file is the XML containing the federal laws. """ office = Office(session, self._models.Office) document = Document(session, self._models.Document) article = Article(session, self._models.Article) legend_entry = LegendEntry(session, self._models.LegendEntry, self._topic_settings.get('code')) view_service = ViewService(session, self._models.ViewService, legend_entry) public_law_restriction = PublicLawRestriction( session, self._models.PublicLawRestriction, self._topic_settings.get('code')) geometry = Geometry(session, self._models.Geometry, self._topic_settings.get('geometry_type'), self._srid, arc_max_diff=self._arc_max_diff, arc_precision=self._arc_precision) document_reference = DocumentReference(session, self._models.DocumentReference) public_law_restriction_document = PublicLawRestrictionDocument( session, self._models.PublicLawRestrictionDocument) reference_definition = ReferenceDefinition( session, self._models.ReferenceDefinition, self._topic_settings.get('code')) document_reference_definition = DocumentReferenceDefinition( session, self._models.DocumentReferenceDefinition) base_refinement = BaseRefinement( session, self._models.PublicLawRestrictionBase, self._models.PublicLawRestrictionRefinement) for element in transfer_structure: tag = get_tag(element) if tag == self.TAG_OFFICE: # Use the last office ID for data integration self._data_integration_office_id = element.attrib['TID'] office.parse(element) elif tag == self.TAG_DOCUMENT: document.parse(element, 'Law' if laws else 'Hint') elif tag == self.TAG_LEGAL_PROVISION: document.parse(element, 'LegalProvision') elif tag == self.TAG_ARTICLE: article.parse(element) elif tag == self.TAG_VIEW_SERVICE: view_service.parse(element) elif tag == self.TAG_PLR: public_law_restriction.parse(element) elif tag == self.TAG_GEOMETRY: geometry.parse(element) elif tag == self.TAG_DOCUMENT_REFERENCE: document_reference.parse(element) elif tag == self.TAG_PUBLIC_LAW_RESTRICTION_DOCUMENT: public_law_restriction_document.parse(element) elif tag == self.TAG_REFERENCE_DEFINITION: reference_definition.parse(element) elif tag == self.TAG_DOCUMENT_REFERENCE_DEFINITION: document_reference_definition.parse(element) elif tag == self.TAG_BASE_REFINEMENT: base_refinement.parse(element) else: self._log.error('NOT IMPLEMENTED: {0}'.format( get_tag(element)))
def test_get_tag(): el = XML(""" <bar></bar> """) assert get_tag(el) == 'bar'