Beispiel #1
0
    def _get_axis_order(self, identifier: str):
        """ Returns the axis order for a given spatial result system

        Args:
            identifier:
        Returns:

        """
        id = self.get_real_identifier(identifier)

        axis_order = self.cacher.get(str(id))
        if axis_order is not None:
            axis_order = json.loads(axis_order)
            return axis_order

        XML_NAMESPACES["gml"] = "http://www.opengis.net/gml/3.2"

        uri = self.registry_uri + self.id_prefix + str(id)
        response = requests.request("Get", url=uri, proxies=PROXIES)
        response = xml_helper.parse_xml(str(response.content.decode()))
        type = xml_helper.try_get_text_from_xml_element(xml_elem=response,
                                                        elem="//epsg:type")
        if type == "projected":
            cartes_elem = xml_helper.try_get_single_element_from_xml(
                "//gml:cartesianCS", response)
            second_level_srs_uri = xml_helper.get_href_attribute(
                xml_elem=cartes_elem)
        elif type == "geographic 2D":
            geogr_elem = xml_helper.try_get_single_element_from_xml(
                "//gml:ellipsoidalCS", response)
            second_level_srs_uri = xml_helper.get_href_attribute(
                xml_elem=geogr_elem)
        else:
            second_level_srs_uri = ""

        uri = self.registry_uri + second_level_srs_uri
        response = requests.request("Get", url=uri, proxies=PROXIES)
        response = xml_helper.parse_xml(str(response.content.decode()))
        axis = xml_helper.try_get_element_from_xml("//gml:axisDirection",
                                                   response)
        order = []
        for a in axis:
            order.append(a.text)
        order = {
            "first_axis": order[0],
            "second_axis": order[1],
        }

        # Write this to cache, so it can be used on another request!
        self.cacher.set(str(id), json.dumps(order))

        return order
Beispiel #2
0
    def _get_axis_order(self, identifier: str):
        """ Returns the axis order for a given spatial result system

        Args:
            identifier:
        Returns:

        """
        id = self.get_real_identifier(identifier)

        axis_order = self.cacher.get(str(id))
        if axis_order is not None:
            axis_order = json.loads(axis_order)
            return axis_order

        XML_NAMESPACES["gml"] = "http://www.opengis.net/gml/3.2"
        XML_NAMESPACES["epsg"] = "urn:x-ogp:spec:schema-xsd:EPSG:2.2:dataset"
        uri = self.registry_uri.replace("{CRS_IDENTIFIER}", str(id))
        # change header
        headers = {'Accept': 'application/xml'}
        response = requests.request("Get", url=uri, proxies=PROXIES, headers=headers)
        response = xml_helper.parse_xml(str(response.content.decode()))
        type = xml_helper.try_get_text_from_xml_element(xml_elem=response, elem="//epsg:type")
        if type == "projected":
            cartes_elem = xml_helper.try_get_single_element_from_xml("//gml:cartesianCS", response)
            second_level_srs_uri = xml_helper.get_href_attribute(xml_elem=cartes_elem)
        elif type in ["geographic 2D", "geographic 2d"]:
            geogr_elem = xml_helper.try_get_single_element_from_xml("//gml:ellipsoidalCS", response)
            second_level_srs_uri = xml_helper.get_href_attribute(xml_elem=geogr_elem)
        else:
            second_level_srs_uri = ""

        uri = second_level_srs_uri
        headers = {'Accept': 'application/xml'}
        response = requests.request("Get", url=uri, proxies=PROXIES, headers=headers)
        response = xml_helper.parse_xml(str(response.content.decode()))
        axis = xml_helper.try_get_element_from_xml("//gml:axisDirection", response)
        order = []
        for a in axis:
            order.append(a.text)
        order = {
            "first_axis": order[0],
            "second_axis": order[1],
        }

        # Write this to cache, so it can be used on another request!
        self.cacher.set(str(id), json.dumps(order))

        return order
Beispiel #3
0
    def create_from_capabilities(self, metadata_only: bool = False, async_task: Task = None, external_auth: ExternalAuthentication = None):
        """ Fills the object with data from the capabilities document

        Returns:
             nothing
        """
        # get xml as iterable object
        xml_obj = xml_helper.parse_xml(xml=self.service_capabilities_xml)

        start_time = time.time()
        self.get_service_metadata_from_capabilities(xml_obj=xml_obj, async_task=async_task)

        # check if 'real' service metadata exist
        service_metadata_uri = xml_helper.try_get_text_from_xml_element(xml_elem=xml_obj, elem="//VendorSpecificCapabilities/inspire_vs:ExtendedCapabilities/inspire_common:MetadataUrl/inspire_common:URL")
        if service_metadata_uri is not None:
            self.get_service_metadata(uri=service_metadata_uri, async_task=async_task)

        service_logger.debug(EXEC_TIME_PRINT % ("service metadata", time.time() - start_time))

        # check possible operations on this service
        start_time = time.time()
        self.get_service_operations_and_formats(xml_obj)
        service_logger.debug(EXEC_TIME_PRINT % ("service operation checking", time.time() - start_time))

        # parse possible linked dataset metadata
        start_time = time.time()
        self.get_service_dataset_metadata(xml_obj=xml_obj)
        service_logger.debug(EXEC_TIME_PRINT % ("service iso metadata", time.time() - start_time))

        self.get_version_specific_metadata(xml_obj=xml_obj)

        if not metadata_only:
            start_time = time.time()
            self._parse_layers(xml_obj=xml_obj, async_task=async_task)
            service_logger.debug(EXEC_TIME_PRINT % ("layer metadata", time.time() - start_time))
Beispiel #4
0
def transform_constraint_to_cql(constraint: str, constraint_language: str):
    """ Transforms a xml filter style constraint into CQL style

    Args:
        constraint (str): The constraint parameter
        constraint_language (str): The constraintlanguage parameter
    Returns:
         constraint (str): The transfored constrained
    """
    if constraint_language.upper() != "FILTER":
        raise ValueError(
            "{} is no valid CSW conform value. Choices are `CQL_TEXT, FILTER`".
            format(constraint_language), "constraintlanguage")

    constraint_xml = xml_helper.parse_xml(constraint)
    if constraint_xml is None:
        raise ValueError(
            "Constraint value is no valid xml! Did you set the correct value for 'constraintlanguage'?",
            CONSTRAINT_LOCATOR)
    filter_elem = xml_helper.try_get_single_element_from_xml(
        "//" + GENERIC_NAMESPACE_TEMPLATE.format("Filter"),
        constraint_xml.getroot())
    new_constraint = _transform_constraint_to_cql_recursive(filter_elem)

    return new_constraint
Beispiel #5
0
    def test_get_records_sort(self):
        """ Test whether the sorting parameter is working properly

        Returns:

        """
        get_records_param = {
            "service": "CSW",
            "version": "2.0.2",
            "request": "GetRecords",
            "elementsetname": "brief",
            "resulttype": "results",
            "sortby": "dc:title:D",
        }

        response = self.client.get(reverse(CSW_PATH), data=get_records_param)
        status_code = response.status_code
        content = response.content
        content_xml = xml_helper.parse_xml(content)

        self.assertEqual(response.status_code, 200,
                         WRONG_STATUS_CODE_TEMPLATE.format(status_code))
        self.assertIsNotNone(content_xml, INVALID_XML_MSG)

        # Iterate over dc:title objects and check whether they are sorted correctly!
        title_elems = xml_helper.try_get_element_from_xml(
            "//" + GENERIC_NAMESPACE_TEMPLATE.format("title"), content_xml)
        titles = [
            xml_helper.try_get_text_from_xml_element(title_elem)
            for title_elem in title_elems
        ]
        titles_sorted = copy(titles)
        titles.sort(reverse=True)  # Check the descending sorted way
        self.assertEqual(titles, titles_sorted)
Beispiel #6
0
    def test_exception_report(self):
        """ Test for checking if the ows:ExceptionReport is working fine or not.

        Test by requesting a wrong operation

        Returns:

        """
        get_records_param = {
            "service": "CSW",
            "version": "2.0.2",
            "request": "WRONG_OPERATION",
            "id": self.test_id,
            "elementsetname": "brief",
            "typenames": "gmd:MD_Metadata",
            "outputschema": "http://www.isotc211.org/2005/gmd",
        }

        response = self.client.get(reverse(CSW_PATH), data=get_records_param)
        status_code = response.status_code
        content = response.content
        content_xml = xml_helper.parse_xml(content)

        self.assertEqual(response.status_code, 200,
                         WRONG_STATUS_CODE_TEMPLATE.format(status_code))
        self.assertIsNotNone(content_xml, INVALID_XML_MSG)
        exception_report_elem = xml_helper.try_get_single_element_from_xml(
            "//" + GENERIC_NAMESPACE_TEMPLATE.format("ExceptionReport"),
            content_xml)
        self.assertIsNotNone(exception_report_elem,
                             "No ows:ExceptionReport was generated!")
Beispiel #7
0
    def create_metadata_elem(self, returned_md: Metadata):
        """ Returns existing service/dataset metadata as xml elements

        Args:
            returned_md (Metadata): The processing metadata
        Returns:
             xml (Element): The xml element
        """
        if returned_md.is_dataset_metadata:
            doc = Document.objects.get(
                metadata=returned_md,
                document_type=DocumentEnum.METADATA.value,
            )
            xml = doc.content
        else:
            xml = returned_md.get_service_metadata_xml()

        xml = xml_helper.parse_xml(xml)
        xml = xml_helper.try_get_single_element_from_xml(
            xml_elem=xml,
            elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("MD_Metadata"))

        # Reduce the amount of information returned based on the requested elementSetName parameter
        xml = self.reduce_information(xml)

        return xml
Beispiel #8
0
    def _parse_parameters(self, params_dict: dict):
        """ Parses a parameter dictionary into the object

        Args:
            params_dict (dict): The parameter key-value dict
        Returns:

        """
        # Parse all parameters automatically by resolving the parameter_map
        for key, val in params_dict.items():
            key_lower = key.lower()
            param = self.parameter_map.get(key_lower, None)
            if not param:
                continue

            # Make sure no negative integers are passed
            try:
                val = int(val)
                if val < 0:
                    raise AssertionError("No negative values allowed!")
            except ValueError:
                pass
            setattr(self, param, val)

        # Transform listable parameters into lists
        listable_elements = ["element_name", "namespace"]
        for elem in listable_elements:
            attribute = getattr(self, elem)
            if isinstance(attribute, str):
                attribute = attribute.split(",")
                setattr(self, elem, attribute)

        # Check if range of values is acceptable
        if self.result_type not in RESULT_TYPE_CHOICES:
            raise ValueError(INVALID_PARAMETER_TEMPLATE.format(self.result_type, ", ".join(RESULT_TYPE_CHOICES)), "resultType")

        if self.element_set_name is not None and len(self.element_name) > 0:
            raise ValueError("Parameter 'ElementSetName' and 'ElementName' are mutually exclusive. You can only provide one!", "elementSetName")
        elif self.element_set_name and self.element_set_name not in ELEMENT_SET_CHOICES:
            raise ValueError(INVALID_PARAMETER_TEMPLATE.format(self.element_set_name, ", ".join(ELEMENT_SET_CHOICES)), "elementSetName")
        elif self.element_set_name is None and len(self.element_name) == 0:
            self.element_set_name = "full"  # default

        if self.version not in VERSION_CHOICES:
            raise ValueError(INVALID_PARAMETER_TEMPLATE.format(self.version, ", ".join(VERSION_CHOICES)), "version")

        # Check if constraint has to be transformed first!
        if self.constraint_language is not None and self.constraint_language.upper() != "CQL_TEXT":
            try:
                self.constraint = transform_constraint_to_cql(self.constraint, self.constraint_language)
                self.constraint_language = "CQL_TEXT"
            except TypeError:
                raise ValueError("XML does not seem to be valid. Please check the CSW specification.", CONSTRAINT_LOCATOR)
        elif self.constraint is not None:
            xml_elem = xml_helper.parse_xml(self.constraint)
            if xml_elem is not None:
                raise ValueError("XML found for constraint parameter but CQL_TEXT found for constraintlanguage. Please set your parameters correctly.", CONSTRAINT_LOCATOR)
Beispiel #9
0
    def test_get_records_by_id(self):
        """ Test for checking if the GetRecordsById is working fine or not.

        Returns:

        """
        get_records_param = {
            "service": "CSW",
            "version": "2.0.2",
            "request": "GetRecordById",
            "id": self.test_id,
            "elementsetname": "full",
        }

        response = self.client.get(reverse(CSW_PATH), data=get_records_param)
        status_code = response.status_code
        content = response.content
        content_xml = xml_helper.parse_xml(content)

        self.assertEqual(response.status_code, 200,
                         WRONG_STATUS_CODE_TEMPLATE.format(status_code))
        self.assertIsNotNone(content_xml, INVALID_XML_MSG)

        # Check that the results are correct in amount and quality
        num_returned_elems = int(
            xml_helper.try_get_attribute_from_xml_element(
                xml_elem=content_xml,
                attribute="numberOfRecordsMatched",
                elem="//" +
                GENERIC_NAMESPACE_TEMPLATE.format("SearchResults")))
        self.assertEqual(
            num_returned_elems, 1,
            "More than one element returned on GetRecordsById with only one used identifier!"
        )
        real_returned_elems = xml_helper.try_get_element_from_xml(
            "//" + GENERIC_NAMESPACE_TEMPLATE.format("Record"), content_xml)
        num_real_returned_elems = len(real_returned_elems)
        self.assertEqual(
            num_real_returned_elems, num_returned_elems,
            "csw:SearchResults contains wrong numberOfRecordsMatched! {} stated but {} returned!"
            .format(num_returned_elems, num_real_returned_elems))

        identifiers = [
            xml_helper.try_get_text_from_xml_element(
                real_returned_elem,
                "//" + GENERIC_NAMESPACE_TEMPLATE.format("identifier"))
            for real_returned_elem in real_returned_elems
        ]
        identifiers_identical = [
            identifier == self.test_id for identifier in identifiers
        ]
        self.assertTrue(
            False not in identifiers_identical,
            "Elements with not matching identifier has been returned: {}".
            format(", ".join(identifiers)))
Beispiel #10
0
    def check_status(self,
                     url: str,
                     check_wfs_member: bool = False,
                     check_image: bool = False) -> ServiceStatus:
        """ Check status of ogc service.

        Args:
            url (str): URL to the service that should be checked.
            check_wfs_member (bool): True, if a returned xml should check for a 'member' tag.
            check_image (bool): True, if the returned content should be checked as image.
        Returns:
            ServiceStatus: Status info of service.
        """
        success = False
        duration = None
        connector = CommonConnector(url=url,
                                    timeout=self.monitoring_settings.timeout
                                    if self.monitoring_settings is not None
                                    else MONITORING_REQUEST_TIMEOUT)
        if self.metadata.has_external_authentication:
            connector.external_auth = self.metadata.external_authentication
        try:
            connector.load()
        except Exception as e:
            # handler if server sends no response (e.g. outdated uri)
            response_text = str(e)
            return Monitoring.ServiceStatus(url, success, response_text,
                                            connector.status_code, duration)

        duration = timezone.timedelta(seconds=connector.run_time)
        response_text = connector.content
        if connector.status_code == 200:
            success = True
            try:
                xml = parse_xml(response_text)
                if 'Exception' in xml.getroot().tag:
                    success = False
                if check_wfs_member:
                    if not self.has_wfs_member(xml):
                        success = False
            except AttributeError:
                # handle successful responses that do not return xml
                response_text = None
            if check_image:
                try:
                    Image.open(BytesIO(connector.content))
                    success = True
                except UnidentifiedImageError:
                    success = False
        service_status = Monitoring.ServiceStatus(url, success, response_text,
                                                  connector.status_code,
                                                  duration)
        return service_status
Beispiel #11
0
    def test_new_service_check_layer_num(self):
        return
        """ Tests whether all layer objects from the xml have been stored inside the service object

        Returns:

        """
        service = self.service_wms
        layers = service.get_subelements()
        cap_xml = xml_helper.parse_xml(self.cap_doc_wms.content)

        num_layers_xml = self._get_num_of_layers(cap_xml)
        num_layers_service = len(layers)

        self.assertEqual(num_layers_service, num_layers_xml)
Beispiel #12
0
    def create_from_capabilities(self, metadata_only: bool = False, external_auth: ExternalAuthentication = None):
        """ Load data from capabilities document

        Args:
            metadata_only (bool): Whether only metadata shall be fetched
            async_task (Task): The asynchronous running task
        Returns:

        """
        # get xml as iterable object
        xml_obj = xml_helper.parse_xml(xml=self.service_capabilities_xml)

        # parse service metadata
        self.get_service_metadata_from_capabilities(xml_obj)

        # Parse <OperationsMetadata>
        self.get_service_operations_and_formats(xml_obj)
Beispiel #13
0
    def _build_lock_feature_xml(self, service_param: str, version_param: str,
                                request_param: str):
        """ Returns the POST request XML for a Lock request

        Args:
            service_param (str): The service param
            version_param (str): The version param
            request_param (str): The request param
        Returns:
             xml (str): The xml document
        """
        xml = ""

        lock_action_param = self._get_POST_val("lockAction") or ""
        type_name_param = self._get_POST_val("typename")
        filter_param = self._get_POST_val("filter")

        reduced_ns_map = self._get_version_specific_namespaces(
            version_param, service_param)

        root_attributes = {
            "service": service_param,
            "version": version_param,
            "lockAction": lock_action_param
        }
        root = etree.Element(_tag=request_param,
                             nsmap=reduced_ns_map,
                             attrib=root_attributes)

        # create the xml filter object from the filter string parameter
        filter_xml = xml_helper.parse_xml(filter_param)
        filter_xml_root = filter_xml.getroot()

        for t_n_param in type_name_param.split(","):
            query_attributes = {"typeName": t_n_param}
            query_elem = xml_helper.create_subelement(root,
                                                      "Query",
                                                      attrib=query_attributes)

            # add the filter xml object as subobject to the query to use e.g. the spatial restriction
            xml_helper.add_subelement(query_elem, filter_xml_root)

        xml = xml_helper.xml_to_string(root)

        return xml
Beispiel #14
0
    def test_new_service_check_describing_attributes(self):
        return
        """ Tests whether the describing attributes, such as title or abstract, are correct.

        Checks for the service.
        Checks for each layer.

        Returns:

        """
        service = self.service_wms
        layers = service.get_subelements()
        cap_xml = xml_helper.parse_xml(self.cap_doc_wms.content)

        xml_title = xml_helper.try_get_text_from_xml_element(
            cap_xml, "//Service/Title")
        xml_abstract = xml_helper.try_get_text_from_xml_element(
            cap_xml, "//Service/Abstract")

        self.assertEqual(service.metadata.title, xml_title)
        self.assertEqual(service.metadata.abstract, xml_abstract)

        # run for layers
        for layer in layers:
            xml_layer = xml_helper.try_get_single_element_from_xml(
                "//Name[text()='{}']/parent::Layer".format(layer.identifier),
                cap_xml)
            if xml_layer is None:
                # this might happen for layers which do not provide a unique identifier. We generate an identifier automatically in this case.
                # this generated identifier - of course - can not be found in the xml document.
                continue
            xml_title = xml_helper.try_get_text_from_xml_element(
                xml_layer, "./Title")
            xml_abstract = xml_helper.try_get_text_from_xml_element(
                xml_layer, "./Abstract")
            self.assertEqual(
                layer.metadata.title,
                xml_title,
                msg="Failed for layer with identifier '{}' and title '{}'".
                format(layer.identifier, layer.metadata.title))
            self.assertEqual(
                layer.metadata.abstract,
                xml_abstract,
                msg="Failed for layer with identifier '{}' and title '{}'".
                format(layer.identifier, layer.metadata.title))
Beispiel #15
0
    def get_layer_by_identifier(self, identifier: str):
        """ Returns the layer identified by the parameter 'identifier' as OGCWebMapServiceLayer object

        Args:
            identifier (str): The identifier as string
        Returns:
             layer_obj (OGCWebMapServiceLayer): The found and parsed layer
        """
        if self.service_capabilities_xml is None:
            # load xml, might have been forgotten
            self.get_capabilities()
        layer_xml = xml_helper.parse_xml(xml=self.service_capabilities_xml)
        layer_xml = xml_helper.try_get_element_from_xml(xml_elem=layer_xml, elem="//Layer/Name[text()='{}']/parent::Layer".format(identifier))
        if len(layer_xml) > 0:
            layer_xml = layer_xml[0]
        else:
            return None
        return self._start_single_layer_parsing(layer_xml)
Beispiel #16
0
    def get_capabilities(self):
        """ Start a network call to retrieve the original capabilities xml document.

        Using the connector class, this function will GET the capabilities xml document as string.
        No file will be downloaded and stored on the storage. The string will be stored in the OGCWebService instance.

        Returns:
             nothing
        """
        params = {
            "request":
            OGCOperationEnum.GET_CAPABILITIES.value,
            "version":
            self.service_version.value
            if self.service_version is not None else "",
            "service": (self.service_type.value
                        if self.service_type is not None else "").upper(),
        }
        concat = "&" if self.service_connect_url[-1] != "&" else ""
        self.service_connect_url = "{}{}{}".format(self.service_connect_url,
                                                   concat, urlencode(params))
        ows_connector = CommonConnector(
            url=self.service_connect_url,
            external_auth=self.external_authentification,
            connection_type=ConnectionEnum.REQUESTS)
        ows_connector.http_method = 'GET'
        try:
            ows_connector.load()
            if ows_connector.status_code != 200:
                raise ConnectionError(ows_connector.status_code)
        except ReadTimeout:
            raise ConnectionError(
                CONNECTION_TIMEOUT.format(self.service_connect_url))

        tmp = ows_connector.content.decode("UTF-8")
        # check if tmp really contains an xml file
        xml = xml_helper.parse_xml(tmp)

        if xml is None:
            raise Exception(tmp)

        self.service_capabilities_xml = tmp
        self.connect_duration = ows_connector.run_time
        self.descriptive_document_encoding = ows_connector.encoding
Beispiel #17
0
def _remove_iso_metadata(metadata: Metadata, md_links: list,
                         existing_iso_links: list):
    """ Remove iso metadata that is not found in the newer md_links list but still lives in the persisted existing_iso_links list

    Args:
        metadata (Metadata): The edited metadata
        md_links (list): The new iso metadata links
        existing_iso_links (list): The existing metadata links, related to the metadata object
    Returns:
         nothing
    """
    # remove iso metadata from capabilities document
    rel_md = metadata
    service_type = metadata.service_type
    if not metadata.is_root():
        if service_type == OGCServiceEnum.WMS:
            rel_md = metadata.service.parent_service.metadata
        elif service_type == OGCServiceEnum.WFS:
            rel_md = metadata.featuretype.parent_service.metadata
    cap_doc = Document.objects.get(
        metadata=rel_md,
        is_original=False,
        document_type=DocumentEnum.CAPABILITY.value,
    )
    cap_doc_txt = cap_doc.content
    xml_cap_obj = xml_helper.parse_xml(cap_doc_txt).getroot()

    # if there are links in existing_iso_links that do not show up in md_links -> remove them
    for link in existing_iso_links:
        if link not in md_links:
            missing_md = metadata.get_related_metadatas(
                filters={'to_metadatas__to_metadata__metadata_url': link})
            missing_md.delete()
            # remove from capabilities
            xml_iso_element = xml_helper.find_element_where_attr(
                xml_cap_obj, "xlink:href", link)
            for elem in xml_iso_element:
                xml_helper.remove_element(elem)
    cap_doc_txt = xml_helper.xml_to_string(xml_cap_obj)
    cap_doc.content = cap_doc_txt
    cap_doc.save()
Beispiel #18
0
    def test_get_records(self):
        """ Test whether the GetRecords operation runs properly

        Returns:

        """
        get_records_param = {
            "service": "CSW",
            "version": "2.0.2",
            "request": "GetRecords",
            "elementsetname": "brief",
            "resulttype": "results",
        }
        response = self.client.get(reverse(CSW_PATH), data=get_records_param)
        status_code = response.status_code
        content = response.content
        content_xml = xml_helper.parse_xml(content)

        self.assertEqual(response.status_code, 200,
                         WRONG_STATUS_CODE_TEMPLATE.format(status_code))
        self.assertIsNotNone(content_xml, INVALID_XML_MSG)
Beispiel #19
0
    def test_get_records_constraint(self):
        """ Test whether the constraint parameter is working properly

        Returns:

        """
        get_records_param = {
            "service": "CSW",
            "version": "2.0.2",
            "request": "GetRecords",
            "elementsetname": "brief",
            "resulttype": "results",
            "constraint": "dc:identifier like %{}%".format(self.test_id),
            "constraintlanguage": "CQL_TEXT",
        }

        response = self.client.get(reverse(CSW_PATH), data=get_records_param)
        status_code = response.status_code
        content = response.content
        content_xml = xml_helper.parse_xml(content)

        self.assertEqual(response.status_code, 200,
                         WRONG_STATUS_CODE_TEMPLATE.format(status_code))
        self.assertIsNotNone(content_xml, INVALID_XML_MSG)

        # Iterate over dc:title objects and check whether they are sorted correctly!
        identifier_elems = xml_helper.try_get_element_from_xml(
            "//" + GENERIC_NAMESPACE_TEMPLATE.format("identifier"),
            content_xml)
        identifier = [
            xml_helper.try_get_text_from_xml_element(id_elem)
            for id_elem in identifier_elems
        ]
        identifier_inside = [self.test_id in id_elem for id_elem in identifier]
        self.assertTrue(
            False not in identifier_inside,
            "A result was returned, which does not fit to the given constraint parameter!"
        )
Beispiel #20
0
def check_uri_provides_ogc_capabilities(value) -> ValidationError:
    """ Checks whether a proper XML OGC Capabilities document can be found at the given url.

    Args:
        value: The url parameter
    Returns:
         None|ValidationError: None if the checks are valid, ValidationError else
    """
    connector = CommonConnector(url=value)
    connector.load()
    if connector.status_code == 401:
        # This means the resource needs authentication to be called. At this point we can not check whether this is
        # a proper OGC capabilities or not. Skip this check.
        return None
    try:
        xml_response = xml_helper.parse_xml(connector.content)
        root_elem = xml_response.getroot()
        tag_text = root_elem.tag
        if "Capabilities" not in tag_text:
            return ValidationError(_("This is no capabilities document."))
    except AttributeError:
        # No xml found!
        return ValidationError(_("No XML found."))
Beispiel #21
0
    def test_get_records_md_metadata(self):
        """ Test for checking if the GetRecordsById is working fine or not.

        Returns:

        """
        get_records_param = {
            "service": "CSW",
            "version": "2.0.2",
            "request": "GetRecordsById",
            "id": self.test_id,
            "elementsetname": "brief",
            "typenames": "gmd:MD_Metadata",
            "outputschema": "http://www.isotc211.org/2005/gmd",
        }

        response = self.client.get(reverse(CSW_PATH), data=get_records_param)
        status_code = response.status_code
        content = response.content
        content_xml = xml_helper.parse_xml(content)

        self.assertEqual(response.status_code, 200,
                         WRONG_STATUS_CODE_TEMPLATE.format(status_code))
        self.assertIsNotNone(content_xml, INVALID_XML_MSG)
Beispiel #22
0
    def harvest(self):
        """ Starts harvesting procedure

        Returns:

        """
        absolute_url = f'<a href="{self.metadata.get_absolute_url()}">{self.metadata.title}</a>'
        service_json = {'id': self.metadata.pk, 'absolute_url': absolute_url},
        if current_task:
            current_task.update_state(state=states.STARTED,
                                      meta={
                                          'service': service_json,
                                          'phase':
                                          f"Connecting to {absolute_url}",
                                      })

        # Fill the deleted_metadata with all persisted metadata, so we can eliminate each entry if it is still provided by
        # the catalogue. In the end we will have a list, which contains metadata IDs that are not found in the catalogue anymore.

        all_persisted_metadata_identifiers = self.metadata.get_related_metadatas(
            filters={
                'to_metadatas__relation_type':
                MetadataRelationEnum.HARVESTED_THROUGH.value
            }).values_list("identifier", flat=True)
        # Use a set instead of list to increase lookup afterwards
        self.deleted_metadata.update(all_persisted_metadata_identifiers)

        # Perform the initial "hits" request to get an overview of how many data will be fetched
        hits_response, status_code = self._get_harvest_response(
            result_type="hits")

        if status_code != 200:
            raise ConnectionError(
                _("Harvest failed: Code {}\n{}").format(
                    status_code, hits_response))
        xml_response = xml_helper.parse_xml(hits_response)
        if xml_response is None:
            raise ConnectionError(
                _("Response is not a valid xml: \n{}".format(hits_response)))

        try:
            if current_task:
                current_task.update_state(state=states.STARTED,
                                          meta={
                                              'phase':
                                              f"calculating harvesting time",
                                          })
            total_number_to_harvest = int(
                xml_helper.try_get_attribute_from_xml_element(
                    xml_response,
                    "numberOfRecordsMatched",
                    "//" + GENERIC_NAMESPACE_TEMPLATE.format("SearchResults"),
                ))
        except TypeError:
            csw_logger.error(
                "Malicious Harvest response: {}".format(hits_response))
            raise AttributeError(
                _("Harvest response is missing important data!"))
        if current_task:
            current_task.update_state(state=states.STARTED,
                                      meta={
                                          'service': service_json,
                                          'phase': "Start harvesting..."
                                      })

        self.progress_step_per_result = float(
            1 / total_number_to_harvest) * 100

        # There are wongly configured CSW, which do not return nextRecord=0 on the last page but instead continue on
        # nextRecord=1. We need to prevent endless loops by checking whether, we already worked on these positions and
        # simply end it there!
        processed_start_positions = set()

        t_start = time()
        number_rest_to_harvest = total_number_to_harvest
        number_of_harvested = 0
        self.harvest_result.timestamp_start = timezone.now()
        self.harvest_result.save()

        page_cacher = PageCacher()

        # Run as long as we can fetch data and as long as the user does not abort the pending task!
        while True:
            estimated_time_for_all = 'unknown'
            if current_task:
                current_task.update_state(
                    state=states.STARTED,
                    meta={
                        'phase':
                        _("Harvesting first {} of {}. Time remaining: {}").
                        format(self.max_records_per_request,
                               total_number_to_harvest,
                               estimated_time_for_all),
                    })
            processed_start_positions.add(self.start_position)
            # Get response
            next_response, status_code = self._get_harvest_response(
                result_type="results")

            if current_task:
                current_task.update_state(
                    state=states.STARTED,
                    meta={
                        'phase':
                        _("Processing harvested results for the first {} of {}. Time remaining: {}"
                          ).format(self.max_records_per_request,
                                   total_number_to_harvest,
                                   estimated_time_for_all),
                    })
            found_entries = self._process_harvest_response(next_response)

            # Calculate time since loop started
            duration = time() - t_start
            number_rest_to_harvest -= self.max_records_per_request
            number_of_harvested += found_entries
            self.harvest_result.number_results = number_of_harvested
            self.harvest_result.save()

            # Remove cached pages of API and CSW
            page_cacher.remove_pages(API_CACHE_KEY_PREFIX)
            page_cacher.remove_pages(CSW_CACHE_PREFIX)
            if self.start_position == 0 or self.start_position in processed_start_positions:
                # We are done!
                break
            else:
                seconds_for_rest = (number_rest_to_harvest *
                                    (duration / number_of_harvested))
                estimated_time_for_all = timezone.timedelta(
                    seconds=seconds_for_rest)

        # Add HarvestResult infos
        self.harvest_result.timestamp_end = timezone.now()
        self.harvest_result.number_results = number_of_harvested
        self.harvest_result.save()

        # Delete Metadata records which could not be found in the catalogue anymore
        # This has to be done if the harvesting run completely. Skip this part if the user aborted the harvest!
        deleted_metadatas = Metadata.objects.filter(
            identifier__in=self.deleted_metadata)
        deleted_metadatas.delete()

        # Remove cached pages of API and CSW
        page_cacher.remove_pages(API_CACHE_KEY_PREFIX)
        page_cacher.remove_pages(CSW_CACHE_PREFIX)
Beispiel #23
0
def overwrite_capabilities_document(metadata: Metadata):
    """ Overwrites the capabilities document which is related to the provided metadata.

    If a subelement of a service has been edited, the service root capabilities will be changed since this is the
    most requested document of the service.
    All subelements capabilities documents above the edited element will be reset to None and cached documents will be
    cleared. This forces an automatic creation of the correct capabilities on the next request for these elements,
    which will result in correct information about the edited subelement.

    Args:
        metadata (Metadata):
    Returns:
         nothing
    """
    is_root = metadata.is_root()
    if is_root:
        parent_metadata = metadata
    elif metadata.is_metadata_type(MetadataEnum.LAYER):
        parent_metadata = metadata.service.parent_service.metadata
    elif metadata.is_metadata_type(MetadataEnum.FEATURETYPE):
        parent_metadata = metadata.featuretype.parent_service.metadata

    # Make sure the Document record already exist by fetching the current capability xml
    # This is a little trick to auto-generate Document records which did not exist before!
    parent_metadata.get_current_capability_xml(
        parent_metadata.get_service_version().value)
    cap_doc = Document.objects.get(
        metadata=parent_metadata,
        document_type=DocumentEnum.CAPABILITY.value,
        is_original=False,
    )

    # overwrite all editable data
    xml_obj_root = xml_helper.parse_xml(cap_doc.content)

    # find matching xml element in xml doc
    _type = metadata.service_type.value
    _version = metadata.get_service_version()

    identifier = metadata.identifier
    if is_root:
        if metadata.is_service_type(OGCServiceEnum.WFS):
            if _version is OGCServiceVersionEnum.V_2_0_0 or _version is OGCServiceVersionEnum.V_2_0_2:
                XML_NAMESPACES["wfs"] = "http://www.opengis.net/wfs/2.0"
                XML_NAMESPACES["ows"] = "http://www.opengis.net/ows/1.1"
                XML_NAMESPACES["fes"] = "http://www.opengis.net/fes/2.0"
                XML_NAMESPACES["default"] = XML_NAMESPACES["wfs"]
            identifier = metadata.title

    xml_obj = xml_helper.find_element_where_text(xml_obj_root, txt=identifier)
    if len(xml_obj) > 0:
        xml_obj = xml_obj[0]

    # handle keywords
    _overwrite_capabilities_keywords(xml_obj, metadata, _type)

    # handle iso metadata links
    _overwrite_capabilities_iso_metadata_links(xml_obj, metadata)

    # overwrite data
    _overwrite_capabilities_data(xml_obj, metadata)

    # write xml back to Document record
    # Remove service_metadata_document as well, so it needs to be generated again!
    xml = xml_helper.xml_to_string(xml_obj_root)
    cap_doc.content = xml
    cap_doc.save()
    service_metadata_doc = Document.objects.filter(
        metadata=metadata,
        document_type=DocumentEnum.METADATA.value,
    )
    service_metadata_doc.delete()

    # Delete all cached documents, which holds old state!
    metadata.clear_cached_documents()

    # Delete all cached documents of root service, which holds old state!
    parent_metadata.clear_cached_documents()

    # Remove existing document contents from upper elements (children of root element), which holds old state!
    metadata.clear_upper_element_capabilities(clear_self_too=True)
Beispiel #24
0
    def _process_harvest_response(self, next_response: bytes) -> int:
        """ Processes the harvest response content

        While the last response is being processed, the next one is already loaded to decrease run time

        Args:
            response (bytes): The response as bytes
        Returns:
             number_found_entries (int): The amount of found metadata records in this response
        """
        xml_response = xml_helper.parse_xml(next_response)
        if xml_response is None:
            csw_logger.error(
                "Response is no valid xml. catalogue: {}, startPosition: {}, maxRecords: {}"
                .format(self.metadata.title, self.start_position,
                        self.max_records_per_request))
            # Abort!
            self.start_position = 0
            return

        md_metadata_entries = xml_helper.try_get_element_from_xml(
            "//" + GENERIC_NAMESPACE_TEMPLATE.format("MD_Metadata"),
            xml_response) or []
        next_record_position = int(
            xml_helper.try_get_attribute_from_xml_element(
                xml_response,
                "nextRecord",
                "//" + GENERIC_NAMESPACE_TEMPLATE.format("SearchResults"),
            ))
        self.start_position = next_record_position

        # Fetch found identifiers in parent process, so self.deleted_metadata can be edited easily
        for md_identifier in md_metadata_entries:
            id = xml_helper.try_get_text_from_xml_element(
                md_identifier,
                ".//" + GENERIC_NAMESPACE_TEMPLATE.format("fileIdentifier") +
                "/" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString"))
            try:
                self.deleted_metadata.remove(id)
            except KeyError:
                pass

        # Delete response to free memory
        del xml_response

        # Process response via multiple processes
        t_start = time()
        num_processes = int(cpu_count() / 2)
        num_processes = num_processes if num_processes >= 1 else 1
        index_step = int(len(md_metadata_entries) / num_processes)
        start_index = 0
        end_index = 0
        self.resource_list = md_metadata_entries
        process_list = []
        for i in range(0, num_processes):
            if index_step < 1:
                end_index = -1
            else:
                end_index += index_step
            p = Process(target=self._create_metadata_from_md_metadata,
                        args=(start_index, end_index))
            start_index += index_step
            process_list.append(p)
        # Close all connections to force each process to create a new one for itself
        connections.close_all()
        execute_threads(process_list)

        csw_logger.debug(
            "Harvesting '{}': runtime for {} metadata parsing: {}s ####".
            format(self.metadata.title, self.max_records_per_request,
                   time() - t_start))
        return len(md_metadata_entries)
Beispiel #25
0
    def _build_get_feature_xml(self, service_param: str, version_param: str,
                               request_param: str):
        """ Returns the POST request XML for a GetFeature request

        Args:
            service_param (str): The service param
            version_param (str): The version param
            request_param (str): The request param
        Returns:
             xml (str): The xml document
        """
        xml = ""

        format_param = self._get_POST_val("format")
        type_name_param = self._get_POST_val("typename") or self._get_POST_val(
            "typenames")
        filter_param = self._get_POST_val("filter")
        count_param = self._get_POST_val("count") or self._get_POST_val(
            "maxFeatures")
        resulttype_param = self._get_POST_val("count") or self._get_POST_val(
            "resultType")

        # check if the newer 'typeNames' instead of 'typeName' should be used
        type_name_identifier = "typeName"
        if version_param == OGCServiceVersionEnum.V_2_0_0.value or version_param == OGCServiceVersionEnum.V_2_0_2.value:
            type_name_identifier = "typeNames"

        reduced_ns_map = self._get_version_specific_namespaces(
            version_param, service_param)
        wfs_ns = reduced_ns_map["wfs"]

        root_attributes = {
            "service": service_param,
            "version": version_param,
        }

        if resulttype_param is not None:
            root_attributes["resultType"] = resulttype_param
        if format_param is not None:
            root_attributes["outputFormat"] = format_param
        if count_param is not None:
            param_tag = "maxFeatures"
            if version_param == OGCServiceVersionEnum.V_2_0_0.value or version_param == OGCServiceVersionEnum.V_2_0_2.value:
                param_tag = "count"
            root_attributes[param_tag] = count_param

        root = etree.Element(_tag="{" + wfs_ns + "}" + request_param,
                             nsmap=reduced_ns_map,
                             attrib=root_attributes)

        # create the xml filter object from the filter string parameter
        filter_xml = xml_helper.parse_xml(filter_param)
        if filter_xml is not None:
            filter_xml_root = filter_xml.getroot()

            for t_n_param in type_name_param.split(","):
                query_attributes = {type_name_identifier: t_n_param}
                query_elem = xml_helper.create_subelement(
                    root,
                    "{" + wfs_ns + "}" + "Query",
                    attrib=query_attributes)

                # add the filter xml object as subobject to the query to use e.g. the spatial restriction
                xml_helper.add_subelement(query_elem, filter_xml_root)

            xml = xml_helper.xml_to_string(root)

        return xml
    def parse_xml(self):
        """ Reads the needed data from the xml and writes to an ISOMetadata instance (self)

        Returns:
             nothing
        """
        xml = self.raw_metadata
        xml_obj = xml_helper.parse_xml(xml)
        self.file_identifier = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            "//gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString")
        self.character_set_code = xml_helper.try_get_attribute_from_xml_element(
            xml_elem=xml_obj,
            attribute="codeListValue",
            elem="//gmd:MD_Metadata/gmd:characterSet/gmd:MD_CharacterSetCode")
        if self.file_identifier is None:
            self.file_identifier = uuid.uuid4()
        self.date_stamp = xml_helper.try_get_text_from_xml_element(
            xml_obj, "//gmd:MD_Metadata/gmd:dateStamp/gco:Date")
        self.last_change_date = xml_helper.try_get_text_from_xml_element(
            xml_obj, "//gmd:MD_Metadata/gmd:dateStamp/gco:Date")

        self.md_standard_name = xml_helper.try_get_text_from_xml_element(
            xml_obj, "//gmd:metadataStandardName/gco:CharacterString")
        self.md_standard_version = xml_helper.try_get_text_from_xml_element(
            xml_obj, "//gmd:metadataStandardVersion/gco:CharacterString")

        self._parse_xml_legal_dates(xml_obj)
        self._parse_xml_legal_reports(xml_obj)

        # try to transform the last_change_date into a datetime object
        try:
            self.last_change_date = parse(self.last_change_date,
                                          tzinfo=timezone.utc)
        except (ValueError, OverflowError, TypeError):
            # if this is not possible due to wrong input, just use the current time...
            self.last_change_date = timezone.now()

        self.hierarchy_level = xml_helper.try_get_attribute_from_xml_element(
            xml_obj, "codeListValue",
            "//gmd:MD_Metadata/gmd:hierarchyLevel/gmd:MD_ScopeCode")
        if self.hierarchy_level == "service":
            xpath_type = "srv:SV_ServiceIdentification"
        else:
            xpath_type = "gmd:MD_DataIdentification"
        self.title = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString"
            .format(xpath_type))
        self._parse_xml_dataset_id(xml_obj, xpath_type)
        self.abstract = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:abstract/gco:CharacterString"
            .format(xpath_type))
        keywords = xml_helper.try_get_element_from_xml(
            xml_elem=xml_obj,
            elem=
            "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString"
            .format(xpath_type))
        for keyword in keywords:
            if keyword.text is not None and keyword not in self.keywords:
                self.keywords.append(
                    xml_helper.try_get_text_from_xml_element(keyword))

        language = xml_helper.try_get_single_element_from_xml(
            xml_elem=xml_obj,
            elem=
            "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:language/gmd:LanguageCode"
            .format(xpath_type))
        if language and language.text is not None:
            self.language = xml_helper.try_get_text_from_xml_element(language)

        iso_categories = xml_helper.try_get_element_from_xml(
            xml_elem=xml_obj,
            elem=
            "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:topicCategory/gmd:MD_TopicCategoryCode"
            .format(xpath_type))
        if iso_categories:
            for iso_category in iso_categories:
                self.iso_categories.append(
                    xml_helper.try_get_text_from_xml_element(iso_category))

        # Get all values from <gmd:distributionInfo> which declares the distributionFormat
        formats = xml_helper.try_get_element_from_xml(
            xml_elem=xml_obj,
            elem="//" +
            GENERIC_NAMESPACE_TEMPLATE.format("distributionFormat"))
        if formats:
            for format_elem in formats:
                # get the character value per format
                name_elem = xml_helper.try_get_single_element_from_xml(
                    xml_elem=format_elem,
                    elem=".//" + GENERIC_NAMESPACE_TEMPLATE.format("name"))
                if name_elem is None:
                    continue
                val = xml_helper.try_get_text_from_xml_element(
                    xml_elem=name_elem,
                    elem=".//" +
                    GENERIC_NAMESPACE_TEMPLATE.format("CharacterString"))
                self.formats.append(val)

        self.download_link = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            '//gmd:MD_Metadata/gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource[gmd:function/gmd:CI_OnLineFunctionCode/@codeListValue="download"]/gmd:linkage/gmd:URL'
        )
        self.transfer_size = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            '//gmd:MD_Metadata/gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:transferSize/gco:Real'
        )
        self.preview_image = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:graphicOverview/gmd:MD_BrowseGraphic/gmd:fileName/gco:CharacterString"
            .format(xpath_type))
        try:
            self.bounding_box["min_x"] = float(
                xml_helper.try_get_text_from_xml_element(
                    xml_obj,
                    "//gmd:westBoundLongitude/gco:Decimal".format(xpath_type)))
            self.bounding_box["min_y"] = float(
                xml_helper.try_get_text_from_xml_element(
                    xml_obj,
                    "//gmd:southBoundLatitude/gco:Decimal".format(xpath_type)))
            self.bounding_box["max_x"] = float(
                xml_helper.try_get_text_from_xml_element(
                    xml_obj,
                    "//gmd:eastBoundLongitude/gco:Decimal".format(xpath_type)))
            self.bounding_box["max_y"] = float(
                xml_helper.try_get_text_from_xml_element(
                    xml_obj,
                    "//gmd:northBoundLatitude/gco:Decimal".format(xpath_type)))
        except TypeError:
            self.bounding_box = None

        self._parse_xml_polygons(xml_obj, xpath_type)

        self.tmp_extent_begin = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition"
            .format(xpath_type))
        if self.tmp_extent_begin is None:
            self.tmp_extent_begin = "1900-01-01"

        self.tmp_extent_end = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition"
            .format(xpath_type))
        if self.tmp_extent_end is None:
            self.tmp_extent_end = "1900-01-01"

        equivalent_scale = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:spatialResolution/gmd:MD_Resolution/gmd:equivalentScale/gmd:MD_RepresentativeFraction/gmd:denominator/gco:Integer"
            .format(xpath_type))
        ground_res = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            "//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:spatialResolution/gmd:MD_Resolution/gmd:distance/gco:Distance"
            .format(xpath_type))
        if equivalent_scale is not None and int(equivalent_scale) > 0:
            self.spatial_res_val = equivalent_scale
            self.spatial_res_type = "scaleDenominator"
        elif ground_res is not None and len(ground_res) > 0:
            self.spatial_res_val = ground_res
            self.spatial_res_type = "groundDistance"

        self.ref_system = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            "//gmd:MD_Metadata/gmd:referenceSystemInfo/gmd:MD_ReferenceSystem/gmd:referenceSystemIdentifier/gmd:RS_Identifier/gmd:code/gco:CharacterString"
        )
        self.ref_system_version = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            "//gmd:MD_Metadata/gmd:referenceSystemInfo/gmd:MD_ReferenceSystem/gmd:referenceSystemIdentifier/gmd:RS_Identifier/gmd:version/gco:CharacterString"
        )
        self.ref_system_authority = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            "//gmd:MD_Metadata/gmd:referenceSystemInfo/gmd:MD_ReferenceSystem/gmd:referenceSystemIdentifier/gmd:RS_Identifier/gmd:authority/gmd:CI_Citation/gmd:title/gco:CharacterString"
        )
        epsg_api = EpsgApi()
        if self.ref_system is not None:
            self.ref_system = "EPSG:{}".format(
                epsg_api.get_subelements(self.ref_system).get("code"))

        # gmd:CI_OnLineFunctionCode
        dist_func_elem = xml_helper.try_get_single_element_from_xml(
            "//" + GENERIC_NAMESPACE_TEMPLATE.format("CI_OnLineFunctionCode"),
            xml_obj)
        self.distribution_function = xml_helper.try_get_attribute_from_xml_element(
            dist_func_elem,
            "codeListValue",
        )
        del dist_func_elem

        # gmd:MD_RepresentativeFraction
        fraction_elem = xml_helper.try_get_single_element_from_xml(
            "//" +
            GENERIC_NAMESPACE_TEMPLATE.format("MD_RepresentativeFraction"),
            xml_obj)
        self.fraction_denominator = xml_helper.try_get_text_from_xml_element(
            fraction_elem,
            ".//" + GENERIC_NAMESPACE_TEMPLATE.format("Integer"))
        del fraction_elem

        # gmd:useLimitation
        limit_elem = xml_helper.try_get_single_element_from_xml(
            "//" + GENERIC_NAMESPACE_TEMPLATE.format("useLimitation"), xml_obj)
        self.use_limitation = xml_helper.try_get_text_from_xml_element(
            limit_elem,
            ".//" + GENERIC_NAMESPACE_TEMPLATE.format("CharacterString"))
        del limit_elem

        self.lineage = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            "//gmd:MD_Metadata/gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:statement/gco:CharacterString"
        )

        restriction_code_attr_val = xml_helper.try_get_element_from_xml(
            xml_elem=xml_obj,
            elem=
            '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:useConstraints/gmd:MD_RestrictionCode/@codeListValue'
            .format(xpath_type))
        if len(restriction_code_attr_val) >= 2:
            legal_constraints = ""
            if restriction_code_attr_val[
                    0] == 'license' and restriction_code_attr_val[
                        1] == 'otherRestrictions':
                other_constraints = xml_helper.try_get_element_from_xml(
                    xml_elem=xml_obj,
                    elem=
                    '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:resourceConstraints/gmd:MD_LegalConstraints[gmd:useConstraints/gmd:MD_RestrictionCode/@codeListValue="otherRestrictions"]/gmd:otherConstraints/gco:CharacterString'
                    .format(xpath_type))
                for constraint in other_constraints:
                    try:
                        tmp_constraint = xml_helper.try_get_text_from_xml_element(
                            xml_elem=constraint)
                        constraint_json = json.loads(tmp_constraint)
                        self.license_source_note = constraint_json.get(
                            "quelle", None)
                        self.license_json = constraint_json
                    except ValueError:
                        # no, this is not a json!
                        # handle it is a normal text
                        legal_constraints += tmp_constraint + ";"
            self.fees = legal_constraints

        self.access_constraints = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:resourceConstraints/gmd:MD_LegalConstraints[gmd:accessConstraints/gmd:MD_RestrictionCode/@codeListValue="otherRestrictions"]/gmd:otherConstraints/gco:CharacterString'
            .format(xpath_type))
        self.responsible_party = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:organisationName/gco:CharacterString'
            .format(xpath_type))
        self.contact_person = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:individualName/gco:CharacterString'
            .format(xpath_type))
        self.contact_phone = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:contactInfo/gmd:CI_Contact/gmd:phone/gmd:CI_Telephone/gmd:voice/gco:CharacterString'
            .format(xpath_type))
        self.contact_email = xml_helper.try_get_text_from_xml_element(
            xml_obj,
            '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:pointOfContact/gmd:CI_ResponsibleParty/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString'
            .format(xpath_type))
        update_frequency = xml_helper.try_get_attribute_from_xml_element(
            xml_elem=xml_obj,
            attribute="codeListValue",
            elem=
            '//gmd:MD_Metadata/gmd:identificationInfo/{}/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode'
            .format(xpath_type))
        if update_frequency in self.valid_update_frequencies:
            self.update_frequency = update_frequency

        # inspire regulations
        regislations = {"inspire_rules": []}
        with open(INSPIRE_LEGISLATION_FILE, "r", encoding="utf-8") as _file:
            regislations = json.load(_file)
        for regislation in regislations["inspire_rules"]:
            reg = {
                "name": regislation.get("name", None),
                "date": regislation.get("date", "1900-01-01"),
                "pass": None,
            }
            statement = xml_helper.try_get_text_from_xml_element(
                xml_obj,
                '//gmd:MD_Metadata/gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:report/gmd:DQ_DomainConsistency/gmd:result/gmd:DQ_ConformanceResult[gmd:specification/gmd:CI_Citation/gmd:title/gco:CharacterString="{}" and gmd:specification/gmd:CI_Citation/gmd:date/gmd:CI_Date/gmd:date/gco:Date="{}"]/gmd:pass/gco:Boolean'
                .format(reg["name"], reg["date"]))
            statement_val = utils.resolve_boolean_attribute_val(statement)
            if statement_val is None:
                reg["pass"] = "******"
                self.inspire_interoperability = False
            else:
                reg["pass"] = statement_val
                # if only one regislation is not fullfilled, we do not have interoperability
                if not statement_val:
                    self.inspire_interoperability = False
            self.interoperability_list.append(reg)
Beispiel #27
0
    def harvest(self, task_id: str = None):
        """ Starts harvesting procedure

        Returns:

        """
        # Create a pending task record for the database first!
        task_exists = PendingTask.objects.filter(
            description__icontains=self.metadata.title).exists()
        if task_exists:
            raise ProcessLookupError(_("Harvesting is currently performed"))
        else:
            async_task_id = task_id or self.metadata.id
            self.pending_task = PendingTask.objects.create(
                task_id=async_task_id,
                description=json.dumps({
                    "service": self.metadata.title,
                    "phase": "Connecting...",
                }),
                progress=0,
                remaining_time=None,
                created_by=self.harvesting_group)

        # Fill the deleted_metadata with all persisted metadata, so we can eliminate each entry if it is still provided by
        # the catalogue. In the end we will have a list, which contains metadata IDs that are not found in the catalogue anymore.

        all_persisted_metadata_identifiers = self.metadata.get_related_metadatas(
            filters={
                'to_metadatas__relation_type':
                MetadataRelationEnum.HARVESTED_THROUGH.value
            }).values_list("identifier", flat=True)
        # Use a set instead of list to increase lookup afterwards
        self.deleted_metadata.update(all_persisted_metadata_identifiers)

        # Perform the initial "hits" request to get an overview of how many data will be fetched
        hits_response, status_code = self._get_harvest_response(
            result_type="hits")
        descr = json.loads(self.pending_task.description)
        if status_code != 200:
            descr["phase"] = "Harvest failed: HTTP Code {}"
            self.pending_task.description = json.dumps(descr)
            self.pending_task.save()
            raise ConnectionError(
                _("Harvest failed: Code {}\n{}").format(
                    status_code, hits_response))
        xml_response = xml_helper.parse_xml(hits_response)
        if xml_response is None:
            descr["phase"] = "Response is not a valid xml"
            self.pending_task.description = json.dumps(descr)
            self.pending_task.save()
            raise ConnectionError(
                _("Response is not a valid xml: \n{}".format(hits_response)))

        try:
            total_number_to_harvest = int(
                xml_helper.try_get_attribute_from_xml_element(
                    xml_response,
                    "numberOfRecordsMatched",
                    "//" + GENERIC_NAMESPACE_TEMPLATE.format("SearchResults"),
                ))
        except TypeError:
            csw_logger.error(
                "Malicious Harvest response: {}".format(hits_response))
            descr[
                "phase"] = "Harvest response incorrect. Inform an administrator!"
            self.pending_task.description = json.dumps(descr)
            self.pending_task.save()
            raise AttributeError(
                _("Harvest response is missing important data!"))

        descr["phase"] = "Start harvesting..."
        self.pending_task.description = json.dumps(descr)
        self.pending_task.save()
        progress_step_per_request = float(
            self.max_records_per_request / total_number_to_harvest) * 100

        # There are wongly configured CSW, which do not return nextRecord=0 on the last page but instead continue on
        # nextRecord=1. We need to prevent endless loops by checking whether, we already worked on these positions and
        # simply end it there!
        processed_start_positions = set()

        t_start = time()
        number_rest_to_harvest = total_number_to_harvest
        number_of_harvested = 0
        self.harvest_result.timestamp_start = timezone.now()
        self.harvest_result.save()

        page_cacher = PageCacher()

        # Run as long as we can fetch data and as long as the user does not abort the pending task!
        while self.pending_task is not None:
            processed_start_positions.add(self.start_position)
            # Get response
            next_response, status_code = self._get_harvest_response(
                result_type="results")

            found_entries = self._process_harvest_response(next_response)

            # Calculate time since loop started
            duration = time() - t_start
            number_rest_to_harvest -= self.max_records_per_request
            number_of_harvested += found_entries
            self.harvest_result.number_results = number_of_harvested
            self.harvest_result.save()

            # Remove cached pages of API and CSW
            page_cacher.remove_pages(API_CACHE_KEY_PREFIX)
            page_cacher.remove_pages(CSW_CACHE_PREFIX)
            if self.start_position == 0 or self.start_position in processed_start_positions:
                # We are done!
                estimated_time_for_all = timezone.timedelta(seconds=0)
                break
            else:
                seconds_for_rest = (number_rest_to_harvest *
                                    (duration / number_of_harvested))
                estimated_time_for_all = timezone.timedelta(
                    seconds=seconds_for_rest)

            self._update_pending_task(self.start_position,
                                      total_number_to_harvest,
                                      progress_step_per_request,
                                      estimated_time_for_all)

        # Add HarvestResult infos
        self.harvest_result.timestamp_end = timezone.now()
        self.harvest_result.number_results = number_of_harvested
        self.harvest_result.save()

        # Delete Metadata records which could not be found in the catalogue anymore
        # This has to be done if the harvesting run completely. Skip this part if the user aborted the harvest!
        if self.pending_task is not None:
            deleted_metadatas = Metadata.objects.filter(
                identifier__in=self.deleted_metadata)
            deleted_metadatas.delete()
            self.pending_task.delete()

        # Remove cached pages of API and CSW
        page_cacher.remove_pages(API_CACHE_KEY_PREFIX)
        page_cacher.remove_pages(CSW_CACHE_PREFIX)
Beispiel #28
0
    def test_proxy_setting(self):
        return
        """ Tests whether the proxy can be set properly.

        Returns:
        """
        metadata = self.service_wms.metadata

        # To avoid running celery in a separate test instance, we do not call the route. Instead we call the logic, which
        # is used to process access settings directly.
        async_process_securing_access(
            metadata.id,
            use_proxy=True,
            log_proxy=True,
            restrict_access=False,
        )

        self.cap_doc_wms.refresh_from_db()
        doc_unsecured = self.cap_doc_wms.content
        doc_secured = Document.objects.get(
            metadata=metadata,
            document_type=DocumentEnum.CAPABILITY.value,
            is_original=False,
        ).content

        # Check for all operations if the uris has been changed!
        # Do not check for GetCapabilities, since we always change this uri during registration!
        # Make sure all versions can be matched by the code - the xml structure differs a lot from version to version
        service_version = metadata.get_service_version()

        if metadata.is_service_type(OGCServiceEnum.WMS):
            operations = [
                OGCOperationEnum.GET_MAP.value,
                OGCOperationEnum.GET_FEATURE_INFO.value,
                OGCOperationEnum.DESCRIBE_LAYER.value,
                OGCOperationEnum.GET_LEGEND_GRAPHIC.value,
                OGCOperationEnum.GET_STYLES.value,
                OGCOperationEnum.PUT_STYLES.value,
            ]
        elif metadata.is_service_type(OGCServiceEnum.WFS):
            operations = [
                OGCOperationEnum.GET_FEATURE.value,
                OGCOperationEnum.TRANSACTION.value,
                OGCOperationEnum.LOCK_FEATURE.value,
                OGCOperationEnum.DESCRIBE_FEATURE_TYPE.value,
            ]
        else:
            operations = []

        # create xml documents from string documents and fetch only the relevant <Request> element for each
        xml_unsecured = xml_helper.parse_xml(doc_unsecured)
        request_unsecured = xml_helper.try_get_single_element_from_xml(elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("Request"), xml_elem=xml_unsecured)
        xml_secured = xml_helper.parse_xml(doc_secured)
        request_secured = xml_helper.try_get_single_element_from_xml(elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("Request"), xml_elem=xml_secured)

        for operation in operations:
            # Get <OPERATION> element
            operation_unsecured = xml_helper.try_get_single_element_from_xml(".//" + GENERIC_NAMESPACE_TEMPLATE.format(operation), request_unsecured)
            operation_secured = xml_helper.try_get_single_element_from_xml(".//" + GENERIC_NAMESPACE_TEMPLATE.format(operation), request_secured)

            if service_version == OGCServiceVersionEnum.V_1_0_0:
                if metadata.is_service_type(OGCServiceEnum.WMS):
                    # The WMS 1.0.0 specification uses <OPERATION> instead of <GetOPERATION> for any operation element.
                    operation = operation.replace("Get", "")

                    # Get <OPERATION> element again
                    operation_unsecured = xml_helper.try_get_single_element_from_xml(".//" + GENERIC_NAMESPACE_TEMPLATE.format(operation), request_unsecured)
                    operation_secured = xml_helper.try_get_single_element_from_xml(".//" + GENERIC_NAMESPACE_TEMPLATE.format(operation), request_secured)

                # Version 1.0.0 holds the uris in the "onlineResource" attribute of <Get> and <Post>
                get_unsecured = xml_helper.try_get_single_element_from_xml(".//" + GENERIC_NAMESPACE_TEMPLATE.format("Get"), operation_unsecured)
                get_secured = xml_helper.try_get_single_element_from_xml(".//" + GENERIC_NAMESPACE_TEMPLATE.format("Get"), operation_secured)
                post_unsecured = xml_helper.try_get_single_element_from_xml(".//" + GENERIC_NAMESPACE_TEMPLATE.format("Post"), operation_unsecured)
                post_secured = xml_helper.try_get_single_element_from_xml(".//" + GENERIC_NAMESPACE_TEMPLATE.format("Post"), operation_secured)

                online_res = "onlineResource"
                get_unsecured = xml_helper.try_get_attribute_from_xml_element(get_unsecured, online_res)
                get_secured = xml_helper.try_get_attribute_from_xml_element(get_secured, online_res)
                post_unsecured = xml_helper.try_get_attribute_from_xml_element(post_unsecured, online_res)
                post_secured = xml_helper.try_get_attribute_from_xml_element(post_secured, online_res)

                # Assert that all get/post elements are not None
                self.assertIsNotNone(get_secured, msg="The secured uri of '{}' is None!".format(operation))
                self.assertIsNotNone(post_secured, msg="The secured uri of '{}' is None!".format(operation))

                # Assert that the secured version is different from the unsecured one
                self.assertNotEqual(get_unsecured, get_secured, msg="The uri of '{}' has not been secured!".format(operation))
                self.assertNotEqual(post_unsecured, post_secured, msg="The uri of '{}' has not been secured!".format(operation))

                # Assert that the HOST_NAME constant appears in the secured uri
                self.assertContains(get_secured, HOST_NAME)
                self.assertContains(post_secured, HOST_NAME)

            elif service_version == OGCServiceVersionEnum.V_1_1_0 \
                    or service_version == OGCServiceVersionEnum.V_2_0_0 \
                    or service_version == OGCServiceVersionEnum.V_2_0_2:
                # Only WFS
                # Get <OPERATION> element again, since the operation is now identified using an attribute, not an element tag
                operation_unsecured = xml_helper.try_get_single_element_from_xml(
                    ".//" + GENERIC_NAMESPACE_TEMPLATE.format("Operation") + "[@name='" + operation + "']",
                    request_unsecured
                )
                operation_secured = xml_helper.try_get_single_element_from_xml(
                    ".//" + GENERIC_NAMESPACE_TEMPLATE.format("Operation") + "[@name='" + operation + "']",
                    request_secured
                )

                # Version 1.1.0 holds the uris in the href attribute of <Get> and <Post>
                get_unsecured = xml_helper.try_get_single_element_from_xml(".//" + GENERIC_NAMESPACE_TEMPLATE.format("Get"), operation_unsecured)
                get_secured = xml_helper.try_get_single_element_from_xml(".//" + GENERIC_NAMESPACE_TEMPLATE.format("Get"), operation_secured)
                post_unsecured = xml_helper.try_get_single_element_from_xml(".//" + GENERIC_NAMESPACE_TEMPLATE.format("Post"), operation_unsecured)
                post_secured = xml_helper.try_get_single_element_from_xml(".//" + GENERIC_NAMESPACE_TEMPLATE.format("Post"), operation_secured)

                get_unsecured = xml_helper.get_href_attribute(get_unsecured)
                get_secured = xml_helper.get_href_attribute(get_secured)
                post_unsecured = xml_helper.get_href_attribute(post_unsecured)
                post_secured = xml_helper.get_href_attribute(post_secured)

                # Assert that all get/post elements are not None
                self.assertIsNotNone(get_secured, msg="The secured uri of '{}' is None!".format(operation))
                self.assertIsNotNone(post_secured, msg="The secured uri of '{}' is None!".format(operation))

                # Assert that the secured version is different from the unsecured one
                self.assertNotEqual(get_unsecured, get_secured, msg="The uri of '{}' has not been secured!".format(operation))
                self.assertNotEqual(post_unsecured, post_secured, msg="The uri of '{}' has not been secured!".format(operation))

                # Assert that the HOST_NAME constant appears in the secured uri
                self.assertContains(get_secured, HOST_NAME)
                self.assertContains(post_secured, HOST_NAME)

            elif service_version == OGCServiceVersionEnum.V_1_1_1 or service_version == OGCServiceVersionEnum.V_1_3_0:
                # Version 1.1.1 holds the uris in the <OnlineResource> element inside <Get> and <Post>
                get_unsecured = xml_helper.try_get_single_element_from_xml(
                    ".//" + GENERIC_NAMESPACE_TEMPLATE.format("Get")
                    + "/" + GENERIC_NAMESPACE_TEMPLATE.format("OnlineResource"),
                    operation_unsecured
                )
                get_secured = xml_helper.try_get_single_element_from_xml(
                    ".//" + GENERIC_NAMESPACE_TEMPLATE.format("Get")
                    + "/" + GENERIC_NAMESPACE_TEMPLATE.format("OnlineResource"),
                    operation_secured
                )
                post_unsecured = xml_helper.try_get_single_element_from_xml(
                    ".//" + GENERIC_NAMESPACE_TEMPLATE.format("Post")
                    + "/" + GENERIC_NAMESPACE_TEMPLATE.format("OnlineResource"),
                    operation_unsecured
                )
                post_secured = xml_helper.try_get_single_element_from_xml(
                    ".//" + GENERIC_NAMESPACE_TEMPLATE.format("Post")
                    + "/" + GENERIC_NAMESPACE_TEMPLATE.format("OnlineResource"),
                    operation_secured
                )

                get_unsecured = xml_helper.get_href_attribute(get_unsecured)
                get_secured = xml_helper.get_href_attribute(get_secured)
                post_unsecured = xml_helper.get_href_attribute(post_unsecured)
                post_secured = xml_helper.get_href_attribute(post_secured)

                # Assert that both (secure/unsecure) uris are None or none of them
                # This is possible for operations that are not supported by the service
                if get_secured is not None and get_unsecured is not None:
                    self.assertIsNotNone(get_secured, msg="The secured uri of '{}' is None!".format(operation))

                    # Assert that the secured version is different from the unsecured one
                    self.assertNotEqual(get_unsecured, get_secured, msg="The uri of '{}' has not been secured!".format(operation))

                    # Assert that the HOST_NAME constant appears in the secured uri
                    self.assertTrue(HOST_NAME in get_secured)

                if post_secured is not None and post_unsecured is not None:
                    self.assertIsNotNone(post_secured, msg="The secured uri of '{}' is None!".format(operation))
                    self.assertNotEqual(post_unsecured, post_secured, msg="The uri of '{}' has not been secured!".format(operation))
                    self.assertTrue(HOST_NAME in post_secured)
            else:
                pass
Beispiel #29
0
    def post(self, data):
        """ Wraps the post functionality of different request implementations (CURL, Requests).

        The response is written to self.content.

        Args:
            data (dict|byte): The post data body
        Returns:
             nothing
        """
        try:
            # Automatically set the Content-Type header to xml,
            # if data is proper xml and no other Content-Type has been set, yet.
            check_xml = xml_helper.parse_xml(data)
            if check_xml is not None and self.additional_headers.get("Content-Type", None) is None:
                self.additional_headers["Content-Type"] = "application/xml"
        except ValueError:
            # In case of data not being xml, a value error will be thrown. We can skip the header setting in that case
            pass

        self.init_time = time.time()

        if self.connection_type is ConnectionEnum.CURL:
            # perform curl post
            pass
        elif self.connection_type is ConnectionEnum.REQUESTS:
            response = HttpResponse()
            # perform requests post
            if self.external_auth is None:
                response = requests.post(
                    self._url,
                    data,
                    timeout=REQUEST_TIMEOUT,
                    proxies=PROXIES,
                    headers=self.additional_headers,
                    verify=VERIFY_SSL_CERTIFICATES,
                )
            elif self.external_auth.auth_type == "http_basic":
                response = requests.post(
                    self._url,
                    data,
                    timeout=REQUEST_TIMEOUT,
                    proxies=PROXIES,
                    auth=HTTPBasicAuth(
                        self.external_auth.username, self.external_auth.password),
                    headers=self.additional_headers,
                    verify=VERIFY_SSL_CERTIFICATES,
                )
            elif self.external_auth.auth_type == "http_digest":
                response = requests.post(
                    self._url,
                    data,
                    timeout=REQUEST_TIMEOUT,
                    proxies=PROXIES,
                    auth=HTTPDigestAuth(
                        self.external_auth.username, self.external_auth.password),
                    headers=self.additional_headers,
                    verify=VERIFY_SSL_CERTIFICATES,
                )
            self.status_code = response.status_code
            self.content = response.content
            self.http_external_headers = response.headers._store
        else:
            # Should not happen - we only accept REQUEST or CURL
            pass
        self.run_time = time.time() - self.init_time
Beispiel #30
0
def get_resource_capabilities(request: HttpRequest, md: Metadata):
    """ Logic for retrieving a capabilities document.

    If no capabilities document can be provided by the given parameter, a fallback document will be returned.

    Args:
        request:
        md:
    Returns:

    """
    from service.tasks import async_increase_hits
    stored_version = md.get_service_version().value
    # move increasing hits to background process to speed up response time!
    # todo: after refactoring of md.increase_hits() maybe we don't need to start async tasks... test it!!!
    async_increase_hits.delay(md.id)

    if not md.is_active:
        return HttpResponse(content=SERVICE_DISABLED, status=423)

    # check that we have the requested version in our database
    version_param = None
    version_tag = None

    request_param = None
    request_tag = None

    use_fallback = None

    for k, v in request.GET.dict().items():
        if k.upper() == "VERSION":
            version_param = v
            version_tag = k
        elif k.upper() == "REQUEST":
            request_param = v
            request_tag = k
        elif k.upper() == "FALLBACK":
            use_fallback = resolve_boolean_attribute_val(v)

    # No version parameter has been provided by the request - we simply use the one we have.
    if version_param is None or len(version_param) == 0:
        version_param = stored_version

    if version_param not in [data.value for data in OGCServiceVersionEnum]:
        # version number not valid
        return HttpResponse(content=PARAMETER_ERROR.format(version_tag), status=404)

    elif request_param is not None and request_param != OGCOperationEnum.GET_CAPABILITIES.value:
        # request not valid
        return HttpResponse(content=PARAMETER_ERROR.format(request_tag), status=404)

    else:
        pass

    if md.is_catalogue_metadata:
        doc = md.get_remote_original_capabilities_document(version_param)

    elif stored_version == version_param or use_fallback is True or not md.is_root():
        # This is the case if
        # 1) a version is requested, which we have in our database
        # 2) the fallback parameter is set explicitly
        # 3) a subelement is requested, which normally do not have capability documents

        # We can check the cache for this document or we need to generate it!
        doc = md.get_current_capability_xml(version_param)
    else:
        # we have to fetch the remote document
        # fetch the requested capabilities document from remote - we do not provide this as our default (registered) one
        xml = md.get_remote_original_capabilities_document(version_param)
        tmp = xml_helper.parse_xml(xml)

        if tmp is None:
            raise ValueError("No xml document was retrieved. Content was :'{}'".format(xml))
        # we fake the persisted service version, so the document setters will change the correct elements in the xml
        # md.service.service_type.version = version_param
        doc = Document(
            content=xml,
            metadata=md,
            document_type=DocumentEnum.CAPABILITY.value,
            is_original=True
        )
        doc.set_capabilities_secured(auto_save=False)

        if md.use_proxy_uri:
            doc.set_proxy(True, auto_save=False, force_version=version_param)
        doc = doc.content

    return doc