def _create_metadata_record(self, contact: Organization, group: MrMapGroup): """ Creates a Metadata record from the OGCWebMapService Args: contact (Organization): The contact organization for this metadata record group (MrMapGroup): The owner/creator group Returns: metadata (Metadata): The persisted metadata record """ metadata = Metadata() md_type = MetadataEnum.SERVICE.value metadata.metadata_type = md_type if self.service_file_iso_identifier is None: # We didn't found any file identifier in the document -> we create one self.service_file_iso_identifier = uuid.uuid4() metadata.title = self.service_identification_title metadata.abstract = self.service_identification_abstract metadata.online_resource = self.service_provider_onlineresource_linkage metadata.capabilities_original_uri = self.service_connect_url metadata.access_constraints = self.service_identification_accessconstraints metadata.fees = self.service_identification_fees if self.service_bounding_box is not None: metadata.bounding_geometry = self.service_bounding_box metadata.identifier = self.service_file_identifier metadata.is_active = False metadata.created_by = group metadata.contact = contact # Save metadata instance to be able to add M2M entities metadata.save() return metadata
def _create_metadata_record(self, parent_service: Service, group: MrMapGroup): """ Creates a Metadata record from the OGCLayer object Args: self (OGCLayer): The OGCLayer object (result of parsing) parent_service (Service): The parent Service object group (MrMapGroup): The creator/owner group Returns: metadata (Metadata): The persisted metadata object """ metadata = Metadata() md_type = MetadataEnum.LAYER.value metadata.metadata_type = md_type metadata.title = self.title metadata.abstract = self.abstract metadata.online_resource = parent_service.metadata.online_resource metadata.capabilities_original_uri = parent_service.metadata.capabilities_original_uri metadata.identifier = self.identifier metadata.contact = parent_service.metadata.contact metadata.access_constraints = parent_service.metadata.access_constraints metadata.is_active = False metadata.created_by = group # Save metadata to use id afterwards metadata.save() # create bounding box polygon bounding_points = ((float(self.capability_bbox_lat_lon["minx"]), float(self.capability_bbox_lat_lon["miny"])), (float(self.capability_bbox_lat_lon["minx"]), float(self.capability_bbox_lat_lon["maxy"])), (float(self.capability_bbox_lat_lon["maxx"]), float(self.capability_bbox_lat_lon["maxy"])), (float(self.capability_bbox_lat_lon["maxx"]), float(self.capability_bbox_lat_lon["miny"])), (float(self.capability_bbox_lat_lon["minx"]), float(self.capability_bbox_lat_lon["miny"]))) metadata.bounding_geometry = Polygon(bounding_points) metadata.save() return metadata
def _persist_metadata(self, md_data_entry: dict): """ Creates real Metadata model records from the parsed data Args: md_data_entry (dict): Returns: metadata (Metadata): The persisted metadata object """ _id = md_data_entry["id"] # Remove this id from the set of metadata which shall be deleted in the end. try: self.deleted_metadata.remove(_id) except KeyError: pass try: md = Metadata.objects.get(identifier=_id, ) is_new = False if md.last_remote_change == md_data_entry["date_stamp"]: # Nothing to do here! return except ObjectDoesNotExist: md = Metadata(identifier=_id) is_new = True md.access_constraints = md_data_entry.get("access_constraints", None) md.created_by = self.harvesting_group md.origin = ResourceOriginEnum.CATALOGUE.value md.last_remote_change = md_data_entry.get("date_stamp", None) md.title = md_data_entry.get("title", None) md.contact = md_data_entry.get("contact", None) md.language_code = md_data_entry.get("language_code", None) md.metadata_type = md_data_entry.get("metadata_type", None) md.abstract = md_data_entry.get("abstract", None) md.bounding_geometry = md_data_entry.get("bounding_geometry", None) formats = md_data_entry.get("formats", []) md.is_active = True md.capabilities_original_uri = md_data_entry.get( "capabilities_original_url", None) try: # Improve speed for keyword get-create by fetching (filter) all existing ones and only perform # get_or_create on the ones that do not exist yet. Speed up by ~50% for large amount of data existing_kws = Keyword.objects.filter( keyword__in=md_data_entry["keywords"]) existing_kws = [kw.keyword for kw in existing_kws] new_kws = [ kw for kw in md_data_entry["keywords"] if kw not in existing_kws ] [Keyword.objects.get_or_create(keyword=kw)[0] for kw in new_kws] kws = Keyword.objects.filter(keyword__in=md_data_entry["keywords"]) # Same for MimeTypes existing_formats = MimeType.objects.filter( mime_type__in=md_data_entry["formats"]) existing_formats = [ _format.mime_type for _format in existing_formats ] new_formats = [ _format for _format in md_data_entry["formats"] if _format not in existing_formats ] [ MimeType.objects.get_or_create(mime_type=_format)[0] for _format in new_formats ] formats = MimeType.objects.filter( mime_type__in=md_data_entry["formats"]) with transaction.atomic(): if len(md_data_entry["categories"]) > 0: q = Q() for cat in md_data_entry["categories"]: q |= Q(title_EN__iexact=cat) categories = Category.objects.filter(q) else: categories = [] for link in md_data_entry.get("links", []): url = link.get("link", None) if url is None: continue generic_url = GenericUrl() generic_url.description = "[HARVESTED URL] \n{}".format( link.get("description", "")) generic_url.method = "Get" generic_url.url = url generic_url.save() md.additional_urls.add(generic_url) md.save(add_monitoring=False) md.keywords.add(*kws) md.categories.add(*categories) md.formats.add(*formats) # To reduce runtime, we only create a new MetadataRelation if we are sure there hasn't already been one. # Using get_or_create increases runtime on existing metadata too much! if is_new: md.add_metadata_relation( to_metadata=self.metadata, relation_type=MetadataRelationEnum.HARVESTED_THROUGH. value, origin=ResourceOriginEnum.CATALOGUE.value) parent_id = md_data_entry["parent_id"] # Add the found parent_id to the parent_child map! if parent_id is not None: if self.parent_child_map.get(parent_id, None) is None: self.parent_child_map[parent_id] = [md] else: self.parent_child_map[parent_id].append(md) except (IntegrityError, DataError) as e: csw_logger.error( CSW_ERROR_LOG_TEMPLATE.format(md.identifier, self.metadata.title, e))
def update_metadata(old: Metadata, new: Metadata, keep_custom_md: bool): """ Overwrites existing metadata (old) with newer content (new). Database related information like id, created_by, and so on is saved before and written back after overwriting. Args: old (Metadata): The existing metadata, that shall be overwritten new (Metadata): The new metadata that is used for overwriting Returns: old (Metadata): The overwritten metadata """ # reset update candidate new.is_update_candidate_for = None # Save important persistance information _id = old.id created_by = old.created_by created_on = old.created activated = old.is_active metadata_type = old.metadata_type metadata_is_custom = old.is_custom # If needed, cache custom metadata custom_md = {} if keep_custom_md: custom_md_fields = MetadataEditorForm._meta.fields for field in custom_md_fields: custom_md[field] = old.__getattribute__(field) del custom_md_fields # Overwrite old information with new one old = deepcopy(new) old.id = _id old.created = created_on old.created_by = created_by old.is_active = activated old.metadata_type = metadata_type # reference systems old.reference_system.clear() old.reference_system.add(*new.reference_system.all()) # Dimensions old.dimensions.clear() old.dimensions.add(*new.dimensions.all()) # formats old.formats.clear() old.formats.add(*new.formats_list) # Restore custom metadata if needed if keep_custom_md: old.is_custom = metadata_is_custom for key, val in custom_md.items(): # ManyRelatedManagers have to be handled differently try: old.__setattr__(key, val) except TypeError: # If the above simple attribute setter fails, we are dealing with a ManyRelatedManager, that has to be # handled differently field = val.prefetch_cache_name old_manager = old.__getattribute__(field) old_manager_elems = old_manager.all() custom_m2m_elements = val.all() for elem in custom_m2m_elements: if elem not in old_manager_elems: old_manager.add(elem) else: # Keywords updating without keeping custom md old.keywords.clear() old.keywords.add(*new.keywords.all()) old.last_modified = timezone.now() return old