Ejemplo n.º 1
0
def _add_iso_metadata(metadata: Metadata, md_links: list,
                      existing_iso_links: list):
    """ Adds iso metadata that is found in the newer md_links list but not in the persisted existing_iso_links list

    Args:
        metadata (Metadata): The edited metadata
        md_links (list): The new iso metadata links
        existing_iso_links (list): The existing metadata links, related to the metadata object
    Returns:
         nothing
    """
    # iterate over all links from the form and check if we need to persist them
    for link in md_links:
        if len(link) == 0:
            continue
        # check if this is already an existing uri and skip if so
        if link in existing_iso_links:
            continue
        # ... otherwise create a new iso metadata object
        iso_md = ISOMetadata(link, ResourceOriginEnum.EDITOR.value)
        iso_md = iso_md.to_db_model(created_by=metadata.created_by)
        iso_md.save()
        metadata.add_metadata_relation(
            to_metadata=iso_md,
            origin=iso_md.origin,
            relation_type=MetadataRelationEnum.DESCRIBES.value)
Ejemplo n.º 2
0
    def _create_additional_records(self, service: Service, metadata: Metadata,
                                   group: MrMapGroup):
        """ Creates additional records like linked service metadata, keywords or MimeTypes/Formats

        Args:
            service (Service): The service record
            metadata (Metadata): THe metadata record
        Returns:

        """
        # Keywords
        for kw in self.service_identification_keywords:
            if kw is None:
                continue
            keyword = Keyword.objects.get_or_create(keyword=kw)[0]
            metadata.keywords.add(keyword)

        # MimeTypes / Formats
        for operation, formats in self.operation_format_map.items():
            for format in formats:
                mime_type = MimeType.objects.get_or_create(operation=operation,
                                                           mime_type=format,
                                                           created_by=group)[0]
                metadata.formats.add(mime_type)

        # Check for linked service metadata that might be found during parsing
        if self.linked_service_metadata is not None:
            service.linked_service_metadata = self.linked_service_metadata.to_db_model(
                MetadataEnum.SERVICE.value, created_by=metadata.created_by)
            metadata.add_metadata_relation(
                to_metadata=service.linked_service_metadata,
                relation_type=MetadataRelationEnum.VISUALIZES.value,
                origin=ResourceOriginEnum.CAPABILITIES.value)
Ejemplo n.º 3
0
    def _create_additional_records(self, metadata: Metadata, layer: Layer,
                                   group: MrMapGroup, epsg_api: EpsgApi):
        """ Creates additional records such as Keywords, ReferenceSystems, Dimensions, ...

        Args:
            metadata (Metadata): The layer's metadata object
            layer (Layer): The Layer record object
            group (MrMapGroup): The owner/creator group
            epsg_api (EpsgApi): A epsg_api object
        Returns:

        """
        # Keywords
        for kw in self.capability_keywords:
            keyword = Keyword.objects.get_or_create(keyword=kw)[0]
            metadata.keywords.add(keyword)

        # handle reference systems
        for sys in self.capability_projection_system:
            parts = epsg_api.get_subelements(sys)
            # check if this srs is allowed for us. If not, skip it!
            if parts.get("code") not in ALLOWED_SRS:
                continue
            ref_sys = ReferenceSystem.objects.get_or_create(
                code=parts.get("code"), prefix=parts.get("prefix"))[0]
            metadata.reference_system.add(ref_sys)

        for iso_md in self.iso_metadata:
            iso_md = iso_md.to_db_model(created_by=group)
            metadata.add_metadata_relation(
                to_metadata=iso_md,
                relation_type=MetadataRelationEnum.DESCRIBES.value,
                origin=iso_md.origin)

        # Dimensions
        for dimension in self.dimension_list:
            dim = Dimension.objects.get_or_create(
                type=dimension.get("type"),
                units=dimension.get("units"),
                extent=dimension.get("extent"),
            )[0]
            layer.metadata.dimensions.add(dim)
Ejemplo n.º 4
0
    def _persist_metadata(self, md_data_entry: dict):
        """ Creates real Metadata model records from the parsed data

        Args:
            md_data_entry (dict):
        Returns:
             metadata (Metadata): The persisted metadata object
        """
        _id = md_data_entry["id"]
        # Remove this id from the set of metadata which shall be deleted in the end.
        try:
            self.deleted_metadata.remove(_id)
        except KeyError:
            pass

        try:
            md = Metadata.objects.get(identifier=_id, )
            is_new = False
            if md.last_remote_change == md_data_entry["date_stamp"]:
                # Nothing to do here!
                return
        except ObjectDoesNotExist:
            md = Metadata(identifier=_id)
            is_new = True
        md.access_constraints = md_data_entry.get("access_constraints", None)
        md.created_by = self.harvesting_group
        md.origin = ResourceOriginEnum.CATALOGUE.value
        md.last_remote_change = md_data_entry.get("date_stamp", None)
        md.title = md_data_entry.get("title", None)
        md.contact = md_data_entry.get("contact", None)
        md.language_code = md_data_entry.get("language_code", None)
        md.metadata_type = md_data_entry.get("metadata_type", None)
        md.abstract = md_data_entry.get("abstract", None)
        md.bounding_geometry = md_data_entry.get("bounding_geometry", None)
        formats = md_data_entry.get("formats", [])
        md.is_active = True
        md.capabilities_original_uri = md_data_entry.get(
            "capabilities_original_url", None)
        try:
            # Improve speed for keyword get-create by fetching (filter) all existing ones and only perform
            # get_or_create on the ones that do not exist yet. Speed up by ~50% for large amount of data
            existing_kws = Keyword.objects.filter(
                keyword__in=md_data_entry["keywords"])
            existing_kws = [kw.keyword for kw in existing_kws]
            new_kws = [
                kw for kw in md_data_entry["keywords"]
                if kw not in existing_kws
            ]
            [Keyword.objects.get_or_create(keyword=kw)[0] for kw in new_kws]
            kws = Keyword.objects.filter(keyword__in=md_data_entry["keywords"])

            # Same for MimeTypes
            existing_formats = MimeType.objects.filter(
                mime_type__in=md_data_entry["formats"])
            existing_formats = [
                _format.mime_type for _format in existing_formats
            ]
            new_formats = [
                _format for _format in md_data_entry["formats"]
                if _format not in existing_formats
            ]
            [
                MimeType.objects.get_or_create(mime_type=_format)[0]
                for _format in new_formats
            ]
            formats = MimeType.objects.filter(
                mime_type__in=md_data_entry["formats"])

            with transaction.atomic():
                if len(md_data_entry["categories"]) > 0:
                    q = Q()
                    for cat in md_data_entry["categories"]:
                        q |= Q(title_EN__iexact=cat)
                    categories = Category.objects.filter(q)
                else:
                    categories = []

                for link in md_data_entry.get("links", []):
                    url = link.get("link", None)
                    if url is None:
                        continue
                    generic_url = GenericUrl()
                    generic_url.description = "[HARVESTED URL] \n{}".format(
                        link.get("description", ""))
                    generic_url.method = "Get"
                    generic_url.url = url
                    generic_url.save()
                    md.additional_urls.add(generic_url)

                md.save(add_monitoring=False)
                md.keywords.add(*kws)
                md.categories.add(*categories)
                md.formats.add(*formats)

                # To reduce runtime, we only create a new MetadataRelation if we are sure there hasn't already been one.
                # Using get_or_create increases runtime on existing metadata too much!
                if is_new:
                    md.add_metadata_relation(
                        to_metadata=self.metadata,
                        relation_type=MetadataRelationEnum.HARVESTED_THROUGH.
                        value,
                        origin=ResourceOriginEnum.CATALOGUE.value)

            parent_id = md_data_entry["parent_id"]
            # Add the found parent_id to the parent_child map!
            if parent_id is not None:
                if self.parent_child_map.get(parent_id, None) is None:
                    self.parent_child_map[parent_id] = [md]
                else:
                    self.parent_child_map[parent_id].append(md)

        except (IntegrityError, DataError) as e:
            csw_logger.error(
                CSW_ERROR_LOG_TEMPLATE.format(md.identifier,
                                              self.metadata.title, e))