def _parse_xml_legal_reports(self, xml_obj: Element): """ Parses existing CI_Date elements from the MD_DataIdentification element Args: xml_obj (Element): The document xml element Returns: """ data_quality_elem = xml_helper.try_get_single_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("DQ_DataQuality"), xml_obj) report_elems = xml_helper.try_get_single_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("report"), xml_obj) for report_elem in report_elems: report = LegalReport() report.title = xml_helper.try_get_text_from_xml_element( report_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("title") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) report.explanation = xml_helper.try_get_text_from_xml_element( report_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("explanation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) legal_date = LegalDate() legal_date.date = xml_helper.try_get_text_from_xml_element( report_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("Date")) legal_date.date_type_code = xml_helper.try_get_attribute_from_xml_element( report_elem, "codeListValue", ".//" + GENERIC_NAMESPACE_TEMPLATE.format("CI_DateTypeCode")) legal_date.date_type_code_list_url = xml_helper.try_get_attribute_from_xml_element( report_elem, "codeList", ".//" + GENERIC_NAMESPACE_TEMPLATE.format("CI_DateTypeCode")) report.date = legal_date self.legal_reports.append(report)
def parse_style(self, layer, layer_obj): style_xml = xml_helper.try_get_single_element_from_xml( "./" + GENERIC_NAMESPACE_TEMPLATE.format("Style"), layer) if style_xml is None: # no <Style> element found return style_obj = Style() style_obj.name = xml_helper.try_get_text_from_xml_element( style_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("Name")) style_obj.title = xml_helper.try_get_text_from_xml_element( style_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("Title")) legend_elem = xml_helper.try_get_single_element_from_xml( elem="./" + GENERIC_NAMESPACE_TEMPLATE.format("LegendURL") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("OnlineResource"), xml_elem=style_xml) style_obj.legend_uri = xml_helper.get_href_attribute(legend_elem) style_obj.width = int( xml_helper.try_get_attribute_from_xml_element( style_xml, "width", "./" + GENERIC_NAMESPACE_TEMPLATE.format("LegendURL")) or 0) style_obj.height = int( xml_helper.try_get_attribute_from_xml_element( style_xml, "height", "./" + GENERIC_NAMESPACE_TEMPLATE.format("LegendURL")) or 0) style_obj.mime_type = MimeType.objects.filter( mime_type=xml_helper.try_get_text_from_xml_element( style_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("LegendURL") + "/ " + GENERIC_NAMESPACE_TEMPLATE.format("Format"))).first() layer_obj.style = style_obj
def get_version_specific_service_metadata(self, xml_obj): """ The version specific implementation of service metadata parsing There are elements in the <Service> part fo the GetCapabilities document which are not covered in the regular service metadata parsing due to the fact, they are only used in the newest version of WMS which is by far not regularly used. Args: xml_obj: The xml element Returns: nothing """ # layer limit is new layer_limit = xml_helper.try_get_text_from_xml_element( elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("LayerLimit"), xml_elem=xml_obj) self.layer_limit = layer_limit # max height and width is new max_width = xml_helper.try_get_text_from_xml_element( elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("MaxWidth"), xml_elem=xml_obj) self.max_width = max_width max_height = xml_helper.try_get_text_from_xml_element( elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("MaxHeight"), xml_elem=xml_obj) self.max_height = max_height self._parse_layers(xml_obj=xml_obj)
def get_version_specific_metadata(self, xml_obj): service_xml = xml_helper.try_get_single_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("Service"), xml_obj ) # Keywords # Keywords are not separated in single <Keyword> elements. # There is a single <Keywords> element, containing a continuous string, where keywords are space separated keywords = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("Keywords") ) keywords = keywords.split(" ") tmp = [] for kw in keywords: kw = kw.strip() if len(kw) != 0: tmp.append(kw) self.service_identification_keywords = tmp # Online Resource # The online resource is not found as an attribute of an element. # It is the text of the <OnlineResource> element online_resource = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("OnlineResource") ) self.service_provider_onlineresource_linkage = online_resource
def _parse_xml_dataset_id(self, xml_obj: _Element, xpath_type: str): """ Parse the dataset id and it's code space from the metadata xml Args: xml_obj (_Element): The xml element xpath_type (str): The element identificator which is determined by SV_ServiceIdentification or MD_DataIdentification Returns: nothing """ # First check if MD_Identifier is set, then check if RS_Identifier is used! # Initialize datasetid self.dataset_id = 'undefined' code = xml_helper.try_get_text_from_xml_element( elem= '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:citation/gmd:CI_Citation/gmd:identifier/gmd:MD_Identifier/gmd:code/gco:CharacterString' .format(xpath_type), xml_elem=xml_obj) if code is not None and len(code) != 0: # new implementation: # http://inspire.ec.europa.eu/file/1705/download?token=iSTwpRWd&usg=AOvVaw18y1aTdkoMCBxpIz7tOOgu # from 2017-03-02 - the MD_Identifier - see C.2.5 Unique resource identifier - it is separated with a slash - the codespace should be everything after the last slash # now try to check if a single slash is available and if the md_identifier is a url parsed_url = urllib.parse.urlsplit(code) if parsed_url.scheme == "http" or parsed_url.scheme == "https" and "/" in parsed_url.path: tmp = code.split("/") self.dataset_id = tmp[len(tmp) - 1] self.dataset_id_code_space = code.replace(self.dataset_id, "") elif parsed_url.scheme == "http" or parsed_url.scheme == "https" and "#" in code: tmp = code.split("#") self.dataset_id = tmp[1] self.dataset_id_code_space = tmp[0] else: self.dataset_id = code self.dataset_id_code_space = "" else: # try to read code from RS_Identifier code = xml_helper.try_get_text_from_xml_element( elem= '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:citation/gmd:CI_Citation/gmd:identifier/gmd:RS_Identifier/gmd:code/gco:CharacterString' .format(xpath_type), xml_elem=xml_obj) code_space = xml_helper.try_get_text_from_xml_element( elem= "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:citation/gmd:CI_Citation/gmd:identifier/gmd:RS_Identifier/gmd:codeSpace/gco:CharacterString" .format(xpath_type), xml_elem=xml_obj) if code_space is not None and code is not None and len( code_space) > 0 and len(code) > 0: self.dataset_id = code self.dataset_id_code_space = code_space else: self.is_broken = True
def test_get_records_sort(self): """ Test whether the sorting parameter is working properly Returns: """ get_records_param = { "service": "CSW", "version": "2.0.2", "request": "GetRecords", "elementsetname": "brief", "resulttype": "results", "sortby": "dc:title:D", } response = self.client.get(reverse(CSW_PATH), data=get_records_param) status_code = response.status_code content = response.content content_xml = xml_helper.parse_xml(content) self.assertEqual(response.status_code, 200, WRONG_STATUS_CODE_TEMPLATE.format(status_code)) self.assertIsNotNone(content_xml, INVALID_XML_MSG) # Iterate over dc:title objects and check whether they are sorted correctly! title_elems = xml_helper.try_get_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("title"), content_xml) titles = [ xml_helper.try_get_text_from_xml_element(title_elem) for title_elem in title_elems ] titles_sorted = copy(titles) titles.sort(reverse=True) # Check the descending sorted way self.assertEqual(titles, titles_sorted)
def _parse_parameter_metadata(self, upper_elem): """ Parses the <Parameter> elements inside of <OperationsMetadata> Args: upper_elem (Element): The upper xml element Returns: parameter_map (dict): Mapped parameters and values """ parameter_objs = xml_helper.try_get_element_from_xml( "./" + GENERIC_NAMESPACE_TEMPLATE.format("Parameter"), upper_elem ) parameter_map = {} for parameter in parameter_objs: param_name = xml_helper.try_get_attribute_from_xml_element( parameter, "name" ) param_val = xml_helper.try_get_text_from_xml_element( parameter, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("Value") ) parameter_map[param_name] = param_val return parameter_map
def parse_lat_lon_bounding_box(self, layer, layer_obj): """ Version specific implementation of the bounding box parsing Args: layer: The xml element which holds the layer info (parsing from) layer_obj: The backend model which holds the layer data (parsing to) Returns: nothing """ try: bbox = xml_helper.try_get_element_from_xml( "./" + GENERIC_NAMESPACE_TEMPLATE.format("EX_GeographicBoundingBox"), layer)[0] attrs = { "westBoundLongitude": "minx", "eastBoundLongitude": "maxx", "southBoundLatitude": "miny", "northBoundLatitude": "maxy", } for key, val in attrs.items(): tmp = xml_helper.try_get_text_from_xml_element( xml_elem=bbox, elem="./" + GENERIC_NAMESPACE_TEMPLATE.format(key)) if tmp is None: tmp = 0 layer_obj.capability_bbox_lat_lon[val] = tmp except IndexError: pass
def _parse_xml_legal_dates(self, xml_obj: Element): """ Parses existing CI_Date elements from the MD_DataIdentification element Args: xml_obj (Element): The document xml element Returns: """ md_data_ident_elem = xml_helper.try_get_single_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("MD_DataIdentification"), xml_obj) legal_date_elems = xml_helper.try_get_element_from_xml( ".//" + GENERIC_NAMESPACE_TEMPLATE.format("CI_Date"), md_data_ident_elem) if legal_date_elems: for legal_date_elem in legal_date_elems: legal_date = LegalDate() legal_date.date = xml_helper.try_get_text_from_xml_element( legal_date_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("Date")) legal_date.date_type_code = xml_helper.try_get_attribute_from_xml_element( legal_date_elem, "codeListValue", ".//" + GENERIC_NAMESPACE_TEMPLATE.format("CI_DateTypeCode")) legal_date.date_type_code_list_url = xml_helper.try_get_attribute_from_xml_element( legal_date_elem, "codeList", ".//" + GENERIC_NAMESPACE_TEMPLATE.format("CI_DateTypeCode")) self.legal_dates.append(legal_date)
def _create_formats_from_md_metadata(self, md_metadata: Element) -> list: """ Creates a list of MimeType objects from MD_Metadata element Args: md_metadata (Element): The xml element Returns: formats (list) """ formats = [] distribution_elem = xml_helper.try_get_single_element_from_xml( ".//" + GENERIC_NAMESPACE_TEMPLATE.format("distributionFormat"), md_metadata) if distribution_elem is None: return formats md_format_elems = xml_helper.try_get_element_from_xml( ".//" + GENERIC_NAMESPACE_TEMPLATE.format("MD_Format"), md_metadata) for md_format_elem in md_format_elems: name = xml_helper.try_get_text_from_xml_element( md_format_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("name") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) if name is not None: formats.append(name) return formats
def create_from_capabilities(self, metadata_only: bool = False, async_task: Task = None, external_auth: ExternalAuthentication = None): """ Fills the object with data from the capabilities document Returns: nothing """ # get xml as iterable object xml_obj = xml_helper.parse_xml(xml=self.service_capabilities_xml) start_time = time.time() self.get_service_metadata_from_capabilities(xml_obj=xml_obj, async_task=async_task) # check if 'real' service metadata exist service_metadata_uri = xml_helper.try_get_text_from_xml_element(xml_elem=xml_obj, elem="//VendorSpecificCapabilities/inspire_vs:ExtendedCapabilities/inspire_common:MetadataUrl/inspire_common:URL") if service_metadata_uri is not None: self.get_service_metadata(uri=service_metadata_uri, async_task=async_task) service_logger.debug(EXEC_TIME_PRINT % ("service metadata", time.time() - start_time)) # check possible operations on this service start_time = time.time() self.get_service_operations_and_formats(xml_obj) service_logger.debug(EXEC_TIME_PRINT % ("service operation checking", time.time() - start_time)) # parse possible linked dataset metadata start_time = time.time() self.get_service_dataset_metadata(xml_obj=xml_obj) service_logger.debug(EXEC_TIME_PRINT % ("service iso metadata", time.time() - start_time)) self.get_version_specific_metadata(xml_obj=xml_obj) if not metadata_only: start_time = time.time() self._parse_layers(xml_obj=xml_obj, async_task=async_task) service_logger.debug(EXEC_TIME_PRINT % ("layer metadata", time.time() - start_time))
def test_new_service_check_describing_attributes(self): return """ Tests whether the describing attributes, such as title or abstract, are correct. Checks for the service. Checks for each layer. Returns: """ service = self.service_wms layers = service.get_subelements() cap_xml = xml_helper.parse_xml(self.cap_doc_wms.content) xml_title = xml_helper.try_get_text_from_xml_element( cap_xml, "//Service/Title") xml_abstract = xml_helper.try_get_text_from_xml_element( cap_xml, "//Service/Abstract") self.assertEqual(service.metadata.title, xml_title) self.assertEqual(service.metadata.abstract, xml_abstract) # run for layers for layer in layers: xml_layer = xml_helper.try_get_single_element_from_xml( "//Name[text()='{}']/parent::Layer".format(layer.identifier), cap_xml) if xml_layer is None: # this might happen for layers which do not provide a unique identifier. We generate an identifier automatically in this case. # this generated identifier - of course - can not be found in the xml document. continue xml_title = xml_helper.try_get_text_from_xml_element( xml_layer, "./Title") xml_abstract = xml_helper.try_get_text_from_xml_element( xml_layer, "./Abstract") self.assertEqual( layer.metadata.title, xml_title, msg="Failed for layer with identifier '{}' and title '{}'". format(layer.identifier, layer.metadata.title)) self.assertEqual( layer.metadata.abstract, xml_abstract, msg="Failed for layer with identifier '{}' and title '{}'". format(layer.identifier, layer.metadata.title))
def _transform_constraint_to_cql_recursive(upper_elem: Element): constraints = [] connector_tags = ["and", "or", "not"] # Prevent <ogc:Filter> from being used as upper_tag joiner in the end upper_tag = QName(upper_elem).localname.lower() upper_tag = upper_tag if upper_tag in connector_tags else "" elements = upper_elem.getchildren() for child in elements: child_tag = QName(child).localname if child_tag.lower() in connector_tags: constraints.append(_transform_constraint_to_cql_recursive(child)) else: property_name = xml_helper.try_get_text_from_xml_element( elem="./" + GENERIC_NAMESPACE_TEMPLATE.format("PropertyName"), xml_elem=child) literal = xml_helper.try_get_text_from_xml_element( elem="./" + GENERIC_NAMESPACE_TEMPLATE.format("Literal"), xml_elem=child) expr = "" if child_tag == "PropertyIsLike": expr = "like" wild_card = xml_helper.try_get_attribute_from_xml_element( child, "wildCard") literal = literal.replace(wild_card, "%") elif child_tag == "PropertyIsEqualTo": expr = "=" elif child_tag == "PropertyIsNotEqualTo": expr = "!=" elif child_tag == "PropertyIsGreaterThanOrEqualTo": expr = ">=" elif child_tag == "PropertyIsGreaterThan": expr = ">" elif child_tag == "PropertyIsLessThanOrEqualTo": expr = "<=" elif child_tag == "PropertyIsLessThan": expr = "<" else: raise ValueError("Unsupported {} found!".format(child_tag), "Filter") constraints.append("{} {} {}".format(property_name, expr, literal)) constraint = " {} ".format(upper_tag).join(constraints) return constraint
def test_get_records_by_id(self): """ Test for checking if the GetRecordsById is working fine or not. Returns: """ get_records_param = { "service": "CSW", "version": "2.0.2", "request": "GetRecordById", "id": self.test_id, "elementsetname": "full", } response = self.client.get(reverse(CSW_PATH), data=get_records_param) status_code = response.status_code content = response.content content_xml = xml_helper.parse_xml(content) self.assertEqual(response.status_code, 200, WRONG_STATUS_CODE_TEMPLATE.format(status_code)) self.assertIsNotNone(content_xml, INVALID_XML_MSG) # Check that the results are correct in amount and quality num_returned_elems = int( xml_helper.try_get_attribute_from_xml_element( xml_elem=content_xml, attribute="numberOfRecordsMatched", elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("SearchResults"))) self.assertEqual( num_returned_elems, 1, "More than one element returned on GetRecordsById with only one used identifier!" ) real_returned_elems = xml_helper.try_get_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("Record"), content_xml) num_real_returned_elems = len(real_returned_elems) self.assertEqual( num_real_returned_elems, num_returned_elems, "csw:SearchResults contains wrong numberOfRecordsMatched! {} stated but {} returned!" .format(num_returned_elems, num_real_returned_elems)) identifiers = [ xml_helper.try_get_text_from_xml_element( real_returned_elem, "//" + GENERIC_NAMESPACE_TEMPLATE.format("identifier")) for real_returned_elem in real_returned_elems ] identifiers_identical = [ identifier == self.test_id for identifier in identifiers ] self.assertTrue( False not in identifiers_identical, "Elements with not matching identifier has been returned: {}". format(", ".join(identifiers)))
def _get_axis_order(self, identifier: str): """ Returns the axis order for a given spatial result system Args: identifier: Returns: """ id = self.get_real_identifier(identifier) axis_order = self.cacher.get(str(id)) if axis_order is not None: axis_order = json.loads(axis_order) return axis_order XML_NAMESPACES["gml"] = "http://www.opengis.net/gml/3.2" uri = self.registry_uri + self.id_prefix + str(id) response = requests.request("Get", url=uri, proxies=PROXIES) response = xml_helper.parse_xml(str(response.content.decode())) type = xml_helper.try_get_text_from_xml_element(xml_elem=response, elem="//epsg:type") if type == "projected": cartes_elem = xml_helper.try_get_single_element_from_xml( "//gml:cartesianCS", response) second_level_srs_uri = xml_helper.get_href_attribute( xml_elem=cartes_elem) elif type == "geographic 2D": geogr_elem = xml_helper.try_get_single_element_from_xml( "//gml:ellipsoidalCS", response) second_level_srs_uri = xml_helper.get_href_attribute( xml_elem=geogr_elem) else: second_level_srs_uri = "" uri = self.registry_uri + second_level_srs_uri response = requests.request("Get", url=uri, proxies=PROXIES) response = xml_helper.parse_xml(str(response.content.decode())) axis = xml_helper.try_get_element_from_xml("//gml:axisDirection", response) order = [] for a in axis: order.append(a.text) order = { "first_axis": order[0], "second_axis": order[1], } # Write this to cache, so it can be used on another request! self.cacher.set(str(id), json.dumps(order)) return order
def _get_axis_order(self, identifier: str): """ Returns the axis order for a given spatial result system Args: identifier: Returns: """ id = self.get_real_identifier(identifier) axis_order = self.cacher.get(str(id)) if axis_order is not None: axis_order = json.loads(axis_order) return axis_order XML_NAMESPACES["gml"] = "http://www.opengis.net/gml/3.2" XML_NAMESPACES["epsg"] = "urn:x-ogp:spec:schema-xsd:EPSG:2.2:dataset" uri = self.registry_uri.replace("{CRS_IDENTIFIER}", str(id)) # change header headers = {'Accept': 'application/xml'} response = requests.request("Get", url=uri, proxies=PROXIES, headers=headers) response = xml_helper.parse_xml(str(response.content.decode())) type = xml_helper.try_get_text_from_xml_element(xml_elem=response, elem="//epsg:type") if type == "projected": cartes_elem = xml_helper.try_get_single_element_from_xml("//gml:cartesianCS", response) second_level_srs_uri = xml_helper.get_href_attribute(xml_elem=cartes_elem) elif type in ["geographic 2D", "geographic 2d"]: geogr_elem = xml_helper.try_get_single_element_from_xml("//gml:ellipsoidalCS", response) second_level_srs_uri = xml_helper.get_href_attribute(xml_elem=geogr_elem) else: second_level_srs_uri = "" uri = second_level_srs_uri headers = {'Accept': 'application/xml'} response = requests.request("Get", url=uri, proxies=PROXIES, headers=headers) response = xml_helper.parse_xml(str(response.content.decode())) axis = xml_helper.try_get_element_from_xml("//gml:axisDirection", response) order = [] for a in axis: order.append(a.text) order = { "first_axis": order[0], "second_axis": order[1], } # Write this to cache, so it can be used on another request! self.cacher.set(str(id), json.dumps(order)) return order
def _create_dataset_from_md_metadata(self, md_metadata: Element, metadata: Metadata) -> Dataset: """ Creates a Dataset record from xml data Args: md_metadata (Element): The xml element which holds the data metadata (Metadata): The related metadata element Returns: dataset (Dataset): The dataset record """ dataset = Dataset() dataset.language_code = metadata.language_code dataset.language_code_list_url = xml_helper.try_get_attribute_from_xml_element( md_metadata, "codeList", ".//" + GENERIC_NAMESPACE_TEMPLATE.format("language") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("LanguageCode")) dataset.character_set_code = xml_helper.try_get_text_from_xml_element( md_metadata, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("characterSet") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("MD_CharacterSetCode")) dataset.character_set_code_list_url = xml_helper.try_get_attribute_from_xml_element( md_metadata, "codeList", ".//" + GENERIC_NAMESPACE_TEMPLATE.format("characterSet") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("MD_CharacterSetCode")) dataset.date_stamp = xml_helper.try_get_text_from_xml_element( md_metadata, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("dateStamp") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("Date")) dataset.metadata_standard_name = xml_helper.try_get_text_from_xml_element( md_metadata, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("metadataStandardName") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) dataset.metadata_standard_version = xml_helper.try_get_text_from_xml_element( md_metadata, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("metadataStandardVersion") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) dataset.update_frequency_code = xml_helper.try_get_text_from_xml_element( md_metadata, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("MD_MaintenanceFrequencyCode")) dataset.update_frequency_code_list_url = xml_helper.try_get_attribute_from_xml_element( md_metadata, "codeList", ".//" + GENERIC_NAMESPACE_TEMPLATE.format("MD_MaintenanceFrequencyCode")) dataset.use_limitation = xml_helper.try_get_text_from_xml_element( md_metadata, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("useLimitation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) dataset.lineage_statement = xml_helper.try_get_text_from_xml_element( md_metadata, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("statement") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) return dataset
def get_service_dataset_metadata(self, xml_obj): """ Args: xml_obj: The xml etree object which is used for parsing Returns: nothing """ # Must parse metadata document and merge metadata into this metadata object elem = "//inspire_common:URL" # for wms by default if self.service_type is OGCServiceEnum.WFS: elem = "//wfs:MetadataURL" service_md_link = xml_helper.try_get_text_from_xml_element( elem=elem, xml_elem=xml_obj) # get iso metadata xml object if service_md_link is None: # no iso metadata provided return iso_metadata = ISOMetadata(uri=service_md_link) # add keywords for keyword in iso_metadata.keywords: self.service_identification_keywords.append(keyword) # add multiple other data that can not be found in the capabilities document self.service_create_date = iso_metadata.date_stamp self.service_last_change = iso_metadata.last_change_date self.service_iso_md_uri = iso_metadata.uri self.service_file_iso_identifier = iso_metadata.file_identifier self.service_identification_title = iso_metadata.title self.service_identification_abstract = iso_metadata.abstract bounding_points = ((float(iso_metadata.bounding_box["min_x"]), float(iso_metadata.bounding_box["min_y"])), (float(iso_metadata.bounding_box["min_x"]), float(iso_metadata.bounding_box["max_y"])), (float(iso_metadata.bounding_box["max_x"]), float(iso_metadata.bounding_box["max_y"])), (float(iso_metadata.bounding_box["max_x"]), float(iso_metadata.bounding_box["min_y"])), (float(iso_metadata.bounding_box["min_x"]), float(iso_metadata.bounding_box["min_y"]))) bbox = Polygon(bounding_points) self.service_bounding_box = bbox
def test_new_service_check_reference_systems(self): return """ Tests whether the layers have all their reference systems, which are provided by the capabilities document. Checks for each layer. Returns: """ layers = self.service_wms.get_subelements().select_related( 'metadata').prefetch_related('metadata__reference_system') cap_xml = self.cap_doc_wms.content for layer in layers: xml_layer_obj = xml_helper.try_get_single_element_from_xml( "//Name[text()='{}']/parent::Layer".format(layer.identifier), cap_xml) if xml_layer_obj is None: # it is possible, that there are layers without a real identifier -> this is generally bad. # we have to ignore these and concentrate on those, which are identifiable continue xml_ref_systems = xml_helper.try_get_element_from_xml( "./" + GENERIC_NAMESPACE_TEMPLATE.format("SRS"), xml_layer_obj) xml_ref_systems_strings = [] for xml_ref_system in xml_ref_systems: xml_ref_systems_strings.append( xml_helper.try_get_text_from_xml_element(xml_ref_system)) layer_ref_systems = layer.metadata.reference_system.all() for ref_system in layer_ref_systems: self.assertTrue( ref_system.code in ALLOWED_SRS, msg="Unallowed reference system registered: {}".format( ref_system.code)) self.assertTrue( ref_system.code in xml_ref_systems_strings, msg= "Reference system registered, which was not in the service: {}" .format(ref_system.code))
def _parse_xml_polygons(self, xml_obj: _Element, xpath_type: str): """ Parse the polygon information from the xml document Args: xml_obj (_Element): The xml element xpath_type (str): The element identificator which is determined by SV_ServiceIdentification or MD_DataIdentification Returns: nothing """ polygons = xml_helper.try_get_element_from_xml( xml_elem=xml_obj, elem= '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_BoundingPolygon/gmd:polygon/gml:MultiSurface' .format(xpath_type)) if len(polygons) > 0: surface_elements = xml_helper.try_get_element_from_xml( xml_elem=xml_obj, elem= "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_BoundingPolygon/gmd:polygon/gml:MultiSurface/gml:surfaceMember" .format(xpath_type)) for element in surface_elements: self.polygonal_extent_exterior.append( self.parse_polygon(element)) else: polygons = xml_helper.try_get_text_from_xml_element( xml_obj, '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_BoundingPolygon/gmd:polygon/gml:Polygon' .format(xpath_type)) if polygons is not None: polygon = xml_helper.try_get_single_element_from_xml( xml_elem=xml_obj, elem= "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_BoundingPolygon/gmd:polygon" .format(xpath_type)) self.polygonal_extent_exterior.append( self.parse_polygon(polygon)) else: self.polygonal_extent_exterior.append( self.parse_bbox(self.bounding_box))
def test_get_records_constraint(self): """ Test whether the constraint parameter is working properly Returns: """ get_records_param = { "service": "CSW", "version": "2.0.2", "request": "GetRecords", "elementsetname": "brief", "resulttype": "results", "constraint": "dc:identifier like %{}%".format(self.test_id), "constraintlanguage": "CQL_TEXT", } response = self.client.get(reverse(CSW_PATH), data=get_records_param) status_code = response.status_code content = response.content content_xml = xml_helper.parse_xml(content) self.assertEqual(response.status_code, 200, WRONG_STATUS_CODE_TEMPLATE.format(status_code)) self.assertIsNotNone(content_xml, INVALID_XML_MSG) # Iterate over dc:title objects and check whether they are sorted correctly! identifier_elems = xml_helper.try_get_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("identifier"), content_xml) identifier = [ xml_helper.try_get_text_from_xml_element(id_elem) for id_elem in identifier_elems ] identifier_inside = [self.test_id in id_elem for id_elem in identifier] self.assertTrue( False not in identifier_inside, "A result was returned, which does not fit to the given constraint parameter!" )
def parse_xml(self): """ Reads the needed data from the xml and writes to an ISOMetadata instance (self) Returns: nothing """ xml = self.raw_metadata xml_obj = xml_helper.parse_xml(xml) self.file_identifier = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString") self.character_set_code = xml_helper.try_get_attribute_from_xml_element( xml_elem=xml_obj, attribute="codeListValue", elem="//gmd:MD_Metadata/gmd:characterSet/gmd:MD_CharacterSetCode") if self.file_identifier is None: self.file_identifier = uuid.uuid4() self.date_stamp = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:dateStamp/gco:Date") self.last_change_date = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:dateStamp/gco:Date") self.md_standard_name = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:metadataStandardName/gco:CharacterString") self.md_standard_version = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:metadataStandardVersion/gco:CharacterString") self._parse_xml_legal_dates(xml_obj) self._parse_xml_legal_reports(xml_obj) # try to transform the last_change_date into a datetime object try: self.last_change_date = parse(self.last_change_date, tzinfo=timezone.utc) except (ValueError, OverflowError, TypeError): # if this is not possible due to wrong input, just use the current time... self.last_change_date = timezone.now() self.hierarchy_level = xml_helper.try_get_attribute_from_xml_element( xml_obj, "codeListValue", "//gmd:MD_Metadata/gmd:hierarchyLevel/gmd:MD_ScopeCode") if self.hierarchy_level == "service": xpath_type = "srv:SV_ServiceIdentification" else: xpath_type = "gmd:MD_DataIdentification" self.title = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString" .format(xpath_type)) self._parse_xml_dataset_id(xml_obj, xpath_type) self.abstract = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:abstract/gco:CharacterString" .format(xpath_type)) keywords = xml_helper.try_get_element_from_xml( xml_elem=xml_obj, elem= "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString" .format(xpath_type)) for keyword in keywords: if keyword.text is not None and keyword not in self.keywords: self.keywords.append( xml_helper.try_get_text_from_xml_element(keyword)) language = xml_helper.try_get_single_element_from_xml( xml_elem=xml_obj, elem= "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:language/gmd:LanguageCode" .format(xpath_type)) if language and language.text is not None: self.language = xml_helper.try_get_text_from_xml_element(language) iso_categories = xml_helper.try_get_element_from_xml( xml_elem=xml_obj, elem= "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:topicCategory/gmd:MD_TopicCategoryCode" .format(xpath_type)) if iso_categories: for iso_category in iso_categories: self.iso_categories.append( xml_helper.try_get_text_from_xml_element(iso_category)) # Get all values from <gmd:distributionInfo> which declares the distributionFormat formats = xml_helper.try_get_element_from_xml( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("distributionFormat")) if formats: for format_elem in formats: # get the character value per format name_elem = xml_helper.try_get_single_element_from_xml( xml_elem=format_elem, elem=".//" + GENERIC_NAMESPACE_TEMPLATE.format("name")) if name_elem is None: continue val = xml_helper.try_get_text_from_xml_element( xml_elem=name_elem, elem=".//" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) self.formats.append(val) self.download_link = xml_helper.try_get_text_from_xml_element( xml_obj, '//gmd:MD_Metadata/gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[gmd:function/gmd:CI_OnLineFunctionCode/@codeListValue="download"]/gmd:linkage/gmd:URL' ) self.transfer_size = xml_helper.try_get_text_from_xml_element( xml_obj, '//gmd:MD_Metadata/gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:transferSize/gco:Real' ) self.preview_image = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:graphicOverview/gmd:MD_BrowseGraphic/gmd:fileName/gco:CharacterString" .format(xpath_type)) try: self.bounding_box["min_x"] = float( xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:westBoundLongitude/gco:Decimal".format(xpath_type))) self.bounding_box["min_y"] = float( xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:southBoundLatitude/gco:Decimal".format(xpath_type))) self.bounding_box["max_x"] = float( xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:eastBoundLongitude/gco:Decimal".format(xpath_type))) self.bounding_box["max_y"] = float( xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:northBoundLatitude/gco:Decimal".format(xpath_type))) except TypeError: self.bounding_box = None self._parse_xml_polygons(xml_obj, xpath_type) self.tmp_extent_begin = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition" .format(xpath_type)) if self.tmp_extent_begin is None: self.tmp_extent_begin = "1900-01-01" self.tmp_extent_end = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition" .format(xpath_type)) if self.tmp_extent_end is None: self.tmp_extent_end = "1900-01-01" equivalent_scale = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:spatialResolution/gmd:MD_Resolution/gmd:equivalentScale/gmd:MD_RepresentativeFraction/gmd:denominator/gco:Integer" .format(xpath_type)) ground_res = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:spatialResolution/gmd:MD_Resolution/gmd:distance/gco:Distance" .format(xpath_type)) if equivalent_scale is not None and int(equivalent_scale) > 0: self.spatial_res_val = equivalent_scale self.spatial_res_type = "scaleDenominator" elif ground_res is not None and len(ground_res) > 0: self.spatial_res_val = ground_res self.spatial_res_type = "groundDistance" self.ref_system = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:referenceSystemInfo/gmd:MD_ReferenceSystem/gmd:referenceSystemIdentifier/gmd:RS_Identifier/gmd:code/gco:CharacterString" ) self.ref_system_version = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:referenceSystemInfo/gmd:MD_ReferenceSystem/gmd:referenceSystemIdentifier/gmd:RS_Identifier/gmd:version/gco:CharacterString" ) self.ref_system_authority = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:referenceSystemInfo/gmd:MD_ReferenceSystem/gmd:referenceSystemIdentifier/gmd:RS_Identifier/gmd:authority/gmd:CI_Citation/gmd:title/gco:CharacterString" ) epsg_api = EpsgApi() if self.ref_system is not None: self.ref_system = "EPSG:{}".format( epsg_api.get_subelements(self.ref_system).get("code")) # gmd:CI_OnLineFunctionCode dist_func_elem = xml_helper.try_get_single_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("CI_OnLineFunctionCode"), xml_obj) self.distribution_function = xml_helper.try_get_attribute_from_xml_element( dist_func_elem, "codeListValue", ) del dist_func_elem # gmd:MD_RepresentativeFraction fraction_elem = xml_helper.try_get_single_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("MD_RepresentativeFraction"), xml_obj) self.fraction_denominator = xml_helper.try_get_text_from_xml_element( fraction_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("Integer")) del fraction_elem # gmd:useLimitation limit_elem = xml_helper.try_get_single_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("useLimitation"), xml_obj) self.use_limitation = xml_helper.try_get_text_from_xml_element( limit_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) del limit_elem self.lineage = xml_helper.try_get_text_from_xml_element( xml_obj, "//gmd:MD_Metadata/gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:statement/gco:CharacterString" ) restriction_code_attr_val = xml_helper.try_get_element_from_xml( xml_elem=xml_obj, elem= '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:useConstraints/gmd:MD_RestrictionCode/@codeListValue' .format(xpath_type)) if len(restriction_code_attr_val) >= 2: legal_constraints = "" if restriction_code_attr_val[ 0] == 'license' and restriction_code_attr_val[ 1] == 'otherRestrictions': other_constraints = xml_helper.try_get_element_from_xml( xml_elem=xml_obj, elem= '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:resourceConstraints/gmd:MD_LegalConstraints[gmd:useConstraints/gmd:MD_RestrictionCode/@codeListValue="otherRestrictions"]/gmd:otherConstraints/gco:CharacterString' .format(xpath_type)) for constraint in other_constraints: try: tmp_constraint = xml_helper.try_get_text_from_xml_element( xml_elem=constraint) constraint_json = json.loads(tmp_constraint) self.license_source_note = constraint_json.get( "quelle", None) self.license_json = constraint_json except ValueError: # no, this is not a json! # handle it is a normal text legal_constraints += tmp_constraint + ";" self.fees = legal_constraints self.access_constraints = xml_helper.try_get_text_from_xml_element( xml_obj, '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:resourceConstraints/gmd:MD_LegalConstraints[gmd:accessConstraints/gmd:MD_RestrictionCode/@codeListValue="otherRestrictions"]/gmd:otherConstraints/gco:CharacterString' .format(xpath_type)) self.responsible_party = xml_helper.try_get_text_from_xml_element( xml_obj, '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:organisationName/gco:CharacterString' .format(xpath_type)) self.contact_person = xml_helper.try_get_text_from_xml_element( xml_obj, '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:individualName/gco:CharacterString' .format(xpath_type)) self.contact_phone = xml_helper.try_get_text_from_xml_element( xml_obj, '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:contactInfo/gmd:CI_Contact/gmd:phone/gmd:CI_Telephone/gmd:voice/gco:CharacterString' .format(xpath_type)) self.contact_email = xml_helper.try_get_text_from_xml_element( xml_obj, '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString' .format(xpath_type)) update_frequency = xml_helper.try_get_attribute_from_xml_element( xml_elem=xml_obj, attribute="codeListValue", elem= '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode' .format(xpath_type)) if update_frequency in self.valid_update_frequencies: self.update_frequency = update_frequency # inspire regulations regislations = {"inspire_rules": []} with open(INSPIRE_LEGISLATION_FILE, "r", encoding="utf-8") as _file: regislations = json.load(_file) for regislation in regislations["inspire_rules"]: reg = { "name": regislation.get("name", None), "date": regislation.get("date", "1900-01-01"), "pass": None, } statement = xml_helper.try_get_text_from_xml_element( xml_obj, '//gmd:MD_Metadata/gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:report/gmd:DQ_DomainConsistency/gmd:result/gmd:DQ_ConformanceResult[gmd:specification/gmd:CI_Citation/gmd:title/gco:CharacterString="{}" and gmd:specification/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:Date="{}"]/gmd:pass/gco:Boolean' .format(reg["name"], reg["date"])) statement_val = utils.resolve_boolean_attribute_val(statement) if statement_val is None: reg["pass"] = "******" self.inspire_interoperability = False else: reg["pass"] = statement_val # if only one regislation is not fullfilled, we do not have interoperability if not statement_val: self.inspire_interoperability = False self.interoperability_list.append(reg)
def parse_polygon(self, polygon_elem): """ Creates points from continuous polygon points array Args: polygon: The etree xml element which holds the polygons Returns: polygon (Polygon): The polygon object created from the data """ relative_ring_xpath = "./gml:Polygon/gml:exterior/gml:LinearRing/gml:posList" relative_coordinate_xpath = "./gml:Polygon/gml:exterior/gml:LinearRing/gml:coordinates" pos_list = xml_helper.try_get_element_from_xml( xml_elem=polygon_elem, elem=relative_ring_xpath) min_x = 10000 max_x = 0 min_y = 100000 max_y = 0 if len(pos_list) > 0: exterior_ring_points = xml_helper.try_get_text_from_xml_element( xml_elem=polygon_elem, elem=relative_ring_xpath) if len(exterior_ring_points) > 0: # posList is only space separated points_list = exterior_ring_points.split(" ") inner_points = () for i in range(int(len(points_list) / 2) - 1): x = float(points_list[2 * i]) y = float(points_list[(2 * i) + 1]) if x < min_x: min_x = x if x > max_x: max_x = x if y < min_y: min_y = y if y > max_y: max_y = y p = ((x, y), ) inner_points = (inner_points) + p else: # try to read coordinates exterior_ring_points = xml_helper.try_get_text_from_xml_element( xml_elem=polygon_elem, elem=relative_coordinate_xpath) # two coordinates of one point are comma separated # problems with ', ' or ' ,' -> must be deleted before exterior_ring_points = exterior_ring_points.replace(', ', ',').replace( ' ,', ',') points_list = exterior_ring_points.split(" ") inner_points = () for point in points_list: point = point.split[","] x = float(points_list[0]) y = float(points_list[1]) if x < min_x: min_x = x if x > max_x: max_x = x if y < min_y: min_y = y if y > max_y: max_y = y p = ((x, y), ) inner_points = (inner_points) + p bounding_points = ((min_x, min_y), (min_x, max_y), (max_x, max_y), (max_x, min_y), (min_x, min_y)) if inner_points[0] != inner_points[len(inner_points) - 1]: # polygon is not closed! inner_points = inner_points + (inner_points[0], ) polygon = Polygon(bounding_points, inner_points) return polygon
def _create_contact_from_md_metadata(self, md_metadata: Element) -> Organization: """ Creates an Organization (Contact) instance from MD_Metadata. Holds the basic information Args: md_metadata (Element): The xml element Returns: org (Organization): The organization element """ resp_party_elem = xml_helper.try_get_single_element_from_xml( ".//" + GENERIC_NAMESPACE_TEMPLATE.format("CI_ResponsibleParty"), md_metadata) if resp_party_elem is None: return None organization_name = xml_helper.try_get_text_from_xml_element( resp_party_elem, "./" + GENERIC_NAMESPACE_TEMPLATE.format("organisationName") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) or "" person_name = xml_helper.try_get_text_from_xml_element( resp_party_elem, "./" + GENERIC_NAMESPACE_TEMPLATE.format("individualName") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) or "" phone = xml_helper.try_get_text_from_xml_element( resp_party_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("CI_Telephone") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("voice") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) or "" facsimile = xml_helper.try_get_text_from_xml_element( resp_party_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("CI_Telephone") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("facsimile") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) or "" # Parse address information, create fallback values address = "" city = "" postal_code = "" country = "" email = "" state = "" address_elem = xml_helper.try_get_single_element_from_xml( ".//" + GENERIC_NAMESPACE_TEMPLATE.format("CI_Address"), md_metadata) if address_elem is not None: address = xml_helper.try_get_text_from_xml_element( address_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("deliveryPoint") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) or "" city = xml_helper.try_get_text_from_xml_element( address_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("city") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) or "" postal_code = xml_helper.try_get_text_from_xml_element( address_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("postalCode") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) or "" country = xml_helper.try_get_text_from_xml_element( address_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("country") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) or "" email = xml_helper.try_get_text_from_xml_element( address_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("electronicMailAddress") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) or "" state = xml_helper.try_get_text_from_xml_element( address_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("administrativeArea") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) or "" is_auto_generated = True description = xml_helper.try_get_text_from_xml_element( resp_party_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("CI_OnlineResource") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("linkage") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("URL")) or "" try: org = Organization.objects.create( person_name=person_name, organization_name=organization_name, phone=phone, facsimile=facsimile, address=address, city=city, postal_code=postal_code, country=country, email=email, state_or_province=state, is_auto_generated=is_auto_generated, description=description, ) except IntegrityError: org = Organization.objects.get( person_name=person_name, organization_name=organization_name, phone=phone, facsimile=facsimile, address=address, city=city, postal_code=postal_code, country=country, email=email, state_or_province=state, is_auto_generated=is_auto_generated, description=description, ) return org
def _md_metadata_parse_to_dict(self, md_metadata_entries: list) -> list: """ Read most important data from MD_Metadata xml element Args: md_metadata_entries (list): The xml MD_Metadata elements Returns: ret_list (list): The list containing dicts """ ret_list = [] for md_metadata in md_metadata_entries: md_data_entry = {} # Check before anything else, whether this metadata type can be skipped! hierarchy_level = xml_helper.try_get_attribute_from_xml_element( md_metadata, "codeListValue", ".//" + GENERIC_NAMESPACE_TEMPLATE.format("hierarchyLevel") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("MD_ScopeCode")) metadata_type = hierarchy_level md_data_entry["metadata_type"] = metadata_type if not HARVEST_METADATA_TYPES.get(metadata_type, False): continue _id = xml_helper.try_get_text_from_xml_element( md_metadata, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("fileIdentifier") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) md_data_entry["id"] = _id parent_id = xml_helper.try_get_text_from_xml_element( md_metadata, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("parentIdentifier") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) md_data_entry["parent_id"] = parent_id # A workaround, so we do not need to check whether SV_ServiceIdentification or MD_DataIdentification is present # in this metadata: Simply take the direct parent and perform a deeper nested search on the inside of this element. # Yes, we could simply decide based on the hierarchyLevel attribute whether to search for SV_xxx or MD_yyy. # No, there are metadata entries which do not follow these guidelines and have "service" with MD_yyy # Yes, they are important since they can be found in the INSPIRE catalogue (07/2020) identification_elem = xml_helper.try_get_single_element_from_xml( xml_elem=md_metadata, elem=".//" + GENERIC_NAMESPACE_TEMPLATE.format("identificationInfo")) title = xml_helper.try_get_text_from_xml_element( identification_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("citation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CI_Citation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("title") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) md_data_entry["title"] = title language_code = xml_helper.try_get_attribute_from_xml_element( md_metadata, "codeListValue", ".//" + GENERIC_NAMESPACE_TEMPLATE.format("language") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("LanguageCode")) md_data_entry["language_code"] = language_code date_stamp = xml_helper.try_get_text_from_xml_element( md_metadata, "./" + GENERIC_NAMESPACE_TEMPLATE.format("dateStamp") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("Date") ) or xml_helper.try_get_text_from_xml_element( md_metadata, "./" + GENERIC_NAMESPACE_TEMPLATE.format("dateStamp") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("DateTime")) try: md_data_entry["date_stamp"] = parse(date_stamp).replace( tzinfo=utc) except TypeError: md_data_entry["date_stamp"] = None abstract = xml_helper.try_get_text_from_xml_element( md_metadata, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("abstract") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) md_data_entry["abstract"] = abstract digital_transfer_elements = xml_helper.try_get_element_from_xml( xml_elem=md_metadata, elem=".//" + GENERIC_NAMESPACE_TEMPLATE.format("MD_DigitalTransferOptions")) links = [] for elem in digital_transfer_elements: links_entry = {} resource_link = xml_helper.try_get_text_from_xml_element( elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("onLine") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CI_OnlineResource") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("linkage") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("URL"), ) descr = xml_helper.try_get_text_from_xml_element( elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("onLine") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CI_OnlineResource") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("description") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) links_entry["link"] = resource_link links_entry["description"] = descr if resource_link is not None: # Check on the type of online_resource we found -> could be GetCapabilities query_params = parse_qs( urlparse(resource_link.lower()).query) if OGCOperationEnum.GET_CAPABILITIES.value.lower( ) in query_params.get("request", []): # Parse all possibly relevant data from the dict version = query_params.get("version", [None]) service_type = query_params.get("service", [None]) md_data_entry[ "capabilities_original_url"] = resource_link md_data_entry["service_type"] = service_type[0] md_data_entry["version"] = version[0] links.append(links_entry) md_data_entry["links"] = links keywords = xml_helper.try_get_element_from_xml( ".//" + GENERIC_NAMESPACE_TEMPLATE.format("keyword") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString"), md_metadata, ) or [] keywords = [ xml_helper.try_get_text_from_xml_element(kw) for kw in keywords ] md_data_entry["keywords"] = keywords access_constraints = xml_helper.try_get_text_from_xml_element( md_metadata, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("otherConstraints") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString")) md_data_entry["access_constraints"] = access_constraints categories = xml_helper.try_get_element_from_xml( ".//" + GENERIC_NAMESPACE_TEMPLATE.format("MD_TopicCategoryCode"), md_metadata, ) or [] categories = [ xml_helper.try_get_text_from_xml_element(cat) for cat in categories ] md_data_entry["categories"] = categories bbox_elem = xml_helper.try_get_single_element_from_xml( ".//" + GENERIC_NAMESPACE_TEMPLATE.format("EX_GeographicBoundingBox"), md_metadata) if bbox_elem is not None: extent = [ xml_helper.try_get_text_from_xml_element( bbox_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("westBoundLongitude") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("Decimal")) or "0.0", xml_helper.try_get_text_from_xml_element( bbox_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("southBoundLatitude") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("Decimal")) or "0.0", xml_helper.try_get_text_from_xml_element( bbox_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("eastBoundLongitude") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("Decimal")) or "0.0", xml_helper.try_get_text_from_xml_element( bbox_elem, ".//" + GENERIC_NAMESPACE_TEMPLATE.format("northBoundLatitude") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("Decimal")) or "0.0", ] # There are metadata with wrong vertex notations like 50,3 instead of 50.3 # We should just drop them, since they are not compatible with the specifications but in here, we make an # exception and replace , since it's quite easy extent = [vertex.replace(",", ".") for vertex in extent] try: bounding_geometry = GEOSGeometry( Polygon.from_bbox(bbox=extent), srid=DEFAULT_SRS) except Exception: # Log malicious extent! csw_logger.warning( CSW_EXTENT_WARNING_LOG_TEMPLATE.format( _id, self.metadata.title, extent)) bounding_geometry = DEFAULT_SERVICE_BOUNDING_BOX_EMPTY else: bounding_geometry = DEFAULT_SERVICE_BOUNDING_BOX_EMPTY md_data_entry["bounding_geometry"] = bounding_geometry md_data_entry["contact"] = self._create_contact_from_md_metadata( md_metadata) md_data_entry["formats"] = self._create_formats_from_md_metadata( md_metadata) # Load non-metadata data # ToDo: Should harvesting persist non-metadata data?! #described_resource = None #metadata = None #if hierarchy_level == MetadataEnum.DATASET.value: # described_resource = self._create_dataset_from_md_metadata(md_metadata, metadata) # described_resource.metadata = metadata # described_resource.is_active = True # described_resource.save() ret_list.append(md_data_entry) return ret_list
def get_service_metadata_from_capabilities(self, xml_obj): """ Parse the capability document <Service> metadata into the self object Args: xml_obj: A minidom object which holds the xml content Returns: Nothing """ service_xml = xml_helper.try_get_single_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("ServiceIdentification"), xml_obj ) self.service_identification_title = xml_helper.try_get_text_from_xml_element( xml_elem=service_xml, elem="./" + GENERIC_NAMESPACE_TEMPLATE.format("Title") ) if current_task: current_task.update_state( state=states.STARTED, meta={'service': self.service_identification_title, 'phase': 'Parsing main capabilities'} ) self.service_identification_abstract = xml_helper.try_get_text_from_xml_element( xml_elem=service_xml, elem="./" + GENERIC_NAMESPACE_TEMPLATE.format("Abstract") ) self.service_identification_fees = xml_helper.try_get_text_from_xml_element( xml_elem=service_xml, elem="./" + GENERIC_NAMESPACE_TEMPLATE.format("Fees") ) self.service_identification_accessconstraints = xml_helper.try_get_text_from_xml_element( xml_elem=service_xml, elem="./" + GENERIC_NAMESPACE_TEMPLATE.format("AccessConstraints") ) keywords = xml_helper.try_get_element_from_xml( xml_elem=service_xml, elem="./" + GENERIC_NAMESPACE_TEMPLATE.format("Keywords") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("Keyword") ) kw = [] for keyword in keywords: text = keyword.text if text is None: continue try: kw.append(text) except AttributeError: pass self.service_identification_keywords = kw self.service_provider_providername = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("ProviderName") ) provider_site_elem = xml_helper.try_get_single_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("ProviderSite"), xml_obj ) self.service_provider_url = xml_helper.get_href_attribute(xml_elem=provider_site_elem) self.service_provider_responsibleparty_individualname = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("IndividualName") ) self.service_provider_responsibleparty_positionname = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("PositionName") ) self.service_provider_telephone_voice = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("Voice") ) self.service_provider_telephone_facsimile = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("Facsimile") ) self.service_provider_address = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("DeliveryPoint") ) self.service_provider_address_city = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("City") ) self.service_provider_address_state_or_province = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("AdministrativeArea") ) self.service_provider_address_postalcode = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("PostalCode") ) self.service_provider_address_country = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("Country") ) self.service_provider_address_electronicmailaddress = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("ElectronicMailAddress") ) online_resource_elem = xml_helper.try_get_single_element_from_xml( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("OnlineResource") ) self.service_provider_onlineresource_linkage = xml_helper.get_href_attribute(online_resource_elem) if self.service_provider_onlineresource_linkage is None or self.service_provider_onlineresource_linkage == "": # There are metadatas where no online resource link is given. We need to generate it manually therefore... self.service_provider_onlineresource_linkage = service_helper.split_service_uri(self.service_connect_url).get("base_uri") self.service_provider_onlineresource_linkage = service_helper.prepare_original_uri_stump(self.service_provider_onlineresource_linkage) self.service_provider_contact_hoursofservice = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("HoursOfService") ) self.service_provider_contact_contactinstructions = xml_helper.try_get_text_from_xml_element( xml_elem=xml_obj, elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInstructions") )
def parse_identifier(self, layer, layer_obj): layer_obj.identifier = xml_helper.try_get_text_from_xml_element( elem="./" + GENERIC_NAMESPACE_TEMPLATE.format("Name"), xml_elem=layer)
def parse_abstract(self, layer, layer_obj): layer_obj.abstract = xml_helper.try_get_text_from_xml_element( elem="./" + GENERIC_NAMESPACE_TEMPLATE.format("Abstract"), xml_elem=layer)
def get_service_metadata_from_capabilities(self, xml_obj): """ Parses all <Service> element information which can be found in every wms specification since 1.0.0 Args: xml_obj: The iterable xml object tree Returns: Nothing """ service_xml = xml_helper.try_get_single_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("Service"), xml_obj) self.service_file_identifier = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("Name")) self.service_identification_abstract = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("Abstract")) self.service_identification_title = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("Title")) if current_task: current_task.update_state(state=states.STARTED, meta={ 'service': self.service_identification_title, 'phase': "Parsing main capabilities", }) self.service_identification_fees = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("Fees")) self.service_identification_accessconstraints = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("AccessConstraints")) self.service_provider_providername = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInformation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactPersonPrimary") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactOrganization")) authority_elem = xml_helper.try_get_single_element_from_xml( elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("AuthorityURL"), xml_elem=xml_obj) self.service_provider_url = xml_helper.get_href_attribute( authority_elem) self.service_provider_contact_contactinstructions = xml_helper.try_get_text_from_xml_element( xml_elem=service_xml, elem="./" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInformation")) self.service_provider_responsibleparty_individualname = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInformation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactPersonPrimary") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactPerson")) self.service_provider_responsibleparty_positionname = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInformation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactPersonPrimary") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactPosition")) self.service_provider_telephone_voice = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInformation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactVoiceTelephone")) self.service_provider_telephone_facsimile = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInformation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactFacsimileTelephone")) self.service_provider_address_electronicmailaddress = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInformation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactElectronicMailAddress")) keywords = xml_helper.try_get_element_from_xml( "./" + GENERIC_NAMESPACE_TEMPLATE.format("KeywordList") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("Keyword"), service_xml) kw = [] for keyword in keywords: if keyword is None: continue kw.append(keyword.text) self.service_identification_keywords = kw online_res_elem = xml_helper.try_get_single_element_from_xml( "//" + GENERIC_NAMESPACE_TEMPLATE.format("Service") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("OnlineResource"), xml_obj) link = xml_helper.get_href_attribute(online_res_elem) self.service_provider_onlineresource_linkage = link self.service_provider_address_country = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInformation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactAddress") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("Country")) self.service_provider_address_postalcode = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInformation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactAddress") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("PostCode")) self.service_provider_address_city = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInformation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactAddress") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("City")) self.service_provider_address_state_or_province = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInformation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactAddress") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("StateOrProvince")) self.service_provider_address = xml_helper.try_get_text_from_xml_element( service_xml, "./" + GENERIC_NAMESPACE_TEMPLATE.format("ContactInformation") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("ContactAddress") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("Address")) # parse request uris from capabilities document self.parse_request_uris(xml_obj, self)
def parse_title(self, layer, layer_obj): layer_obj.title = xml_helper.try_get_text_from_xml_element( elem="./" + GENERIC_NAMESPACE_TEMPLATE.format("Title"), xml_elem=layer)