def _create_metadata_record(self, contact: Organization, group: MrMapGroup): """ Creates a Metadata record from the OGCWebMapService Args: contact (Organization): The contact organization for this metadata record group (MrMapGroup): The owner/creator group Returns: metadata (Metadata): The persisted metadata record """ metadata = Metadata() md_type = MetadataEnum.SERVICE.value metadata.metadata_type = md_type if self.service_file_iso_identifier is None: # We didn't found any file identifier in the document -> we create one self.service_file_iso_identifier = uuid.uuid4() metadata.title = self.service_identification_title metadata.abstract = self.service_identification_abstract metadata.online_resource = self.service_provider_onlineresource_linkage metadata.capabilities_original_uri = self.service_connect_url metadata.access_constraints = self.service_identification_accessconstraints metadata.fees = self.service_identification_fees if self.service_bounding_box is not None: metadata.bounding_geometry = self.service_bounding_box metadata.identifier = self.service_file_identifier metadata.is_active = False metadata.created_by = group metadata.contact = contact # Save metadata instance to be able to add M2M entities metadata.save() return metadata
def _fill_metadata_db_model(self, metadata: Metadata): """ Fills a Metadata db record from the ISOMetadata data Args: metadata (Metadata): The old/empty object Returns: metadata (Metadata): The metadata object """ metadata.identifier = self.file_identifier metadata.abstract = self.abstract metadata.access_constraints = self.access_constraints # Take the polygon with the largest area as bounding geometry if len(self.polygonal_extent_exterior) > 0: max_area_poly = None for poly in self.polygonal_extent_exterior: if max_area_poly is None: max_area_poly = poly if max_area_poly.area < poly.area: max_area_poly = poly metadata.bounding_geometry = max_area_poly try: metadata.contact = Organization.objects.get_or_create( organization_name=self.responsible_party, email=self.contact_email, )[0] except MultipleObjectsReturned: # okay, we need to create a unique organization # "unique" since it will only be identified using organization_name and email metadata.contact = Organization.objects.get_or_create( organization_name="{}#1".format(self.responsible_party), email=self.contact_email, )[0] metadata.is_inspire_conform = self.inspire_interoperability metadata.metadata_url = self.uri metadata.last_remote_change = self.last_change_date metadata.spatial_res_type = self.spatial_res_type metadata.spatial_res_value = self.spatial_res_val if self.title is None: self.title = "BROKEN" metadata.title = self.title metadata.origin = self.origin metadata.is_broken = self.is_broken metadata.save() # save legal dates and reports for report in self.legal_reports: report.date.save() report.save() metadata.legal_reports.add(report) for date in self.legal_dates: date.save() metadata.legal_dates.add(date) metadata.save() return metadata
def to_db_model(self, type=MetadataEnum.DATASET.value, created_by: MrMapGroup = None): """ Get corresponding metadata object from database or create it if not found! Returns: metadata (Metadata): A db model Metadata object """ update = False new = False # try to find the object by uuid and uri. If not existing yet, create a new record try: metadata = Metadata.objects.get(identifier=self.file_identifier, metadata_url=self.uri) # check if the parsed metadata might be newer # make sure both date time objects will be comparable persisted_change = metadata.last_remote_change.replace(tzinfo=utc) new_change = self.last_change_date.replace(tzinfo=utc) if persisted_change > new_change: # Nothing to do here return metadata else: update = True except ObjectDoesNotExist: # object does not seem to exist -> create it! metadata = Metadata() md_type = type metadata.metadata_type = md_type if metadata.is_dataset_metadata: metadata.dataset = Dataset() metadata.dataset.created_by = created_by metadata.created_by = created_by new = True if update or new: # In case of a dataset, we need to fill the information into the dataset object if metadata.is_dataset_metadata: metadata.dataset = self._fill_dataset_db_model( metadata.dataset) metadata = self._fill_metadata_db_model(metadata) metadata.save() metadata.dataset.save() orig_document = Document.objects.get_or_create( metadata=metadata, document_type=DocumentEnum.METADATA.value, is_original=True, )[0] orig_document.content = self.raw_metadata orig_document.save() if update: metadata.keywords.clear() for kw in self.keywords: keyword = Keyword.objects.get_or_create(keyword=kw)[0] metadata.keywords.add(keyword) return metadata
def create_service_model_instance(self, user: MrMapUser, register_group, register_for_organization, external_auth, is_update_candidate_for): """ Map all data from the OGCCatalogueService class to their database models Args: user (MrMapUser): The user which performs the action register_group (Group): The group which is used to register this service register_for_organization (Organization): The organization for which this service is being registered external_auth (ExternalAuthentication): The external authentication object Returns: service (Service): Service instance, contains all information, ready for persisting! """ if current_task: current_task.update_state(state=states.STARTED, meta={ 'current': PROGRESS_STATUS_AFTER_PARSING, 'phase': 'Persisting...', }) md = Metadata() md_type = MetadataEnum.CATALOGUE.value md.metadata_type = md_type md.title = self.service_identification_title md.identifier = self.service_file_identifier md.abstract = self.service_identification_abstract md.online_resource = self.service_provider_onlineresource_linkage md.contact = self._create_contact_organization_record() md.authority_url = self.service_provider_url md.access_constraints = self.service_identification_accessconstraints md.fees = self.service_identification_fees md.created_by = register_group md.capabilities_original_uri = self.service_connect_url if self.service_bounding_box is not None: md.bounding_geometry = self.service_bounding_box # Save metadata record so we can use M2M or id of record later md.save() md.identifier = str(md.id) if md.identifier is None else md.identifier # Keywords for kw in self.service_identification_keywords: if kw is None: continue keyword = Keyword.objects.get_or_create(keyword=kw)[0] md.keywords.add(keyword) md.formats.add(*self.formats_list) md.save() service = self._create_service_record(register_group, register_for_organization, md, is_update_candidate_for) return service
def _fill_form_list(form_list, metadata: Metadata, dataset: Dataset, user: MrMapUser): """ Iterates over all forms and applies the metadata changes on the objects Args: form_list: The list of forms metadata: The metadata record dataset: The dataset record user: The performing user Returns: """ function_map = { "DatasetIdentificationForm": DatasetWizard._fill_metadata_dataset_identification_form, "DatasetResponsiblePartyForm": DatasetWizard._fill_metadata_dataset_responsible_party_form, "DatasetClassificationForm": DatasetWizard._fill_metadata_dataset_classification_form, "DatasetSpatialExtentForm": DatasetWizard._fill_metadata_dataset_spatial_extent_form, "DatasetLicenseConstraintsForm": DatasetWizard._fill_metadata_dataset_licence_form, "DatasetQualityForm": DatasetWizard._fill_metadata_dataset_quality_form, } for form in form_list: form_class = type(form).__name__ function_map[form_class](form.cleaned_data, metadata, dataset, user) dataset.save() metadata.is_custom = True metadata.save() try: doc = Document.objects.get( metadata__id=metadata.id, document_type=DocumentEnum.METADATA.value, is_original=False, ) doc.is_active = metadata.is_active DatasetWizard._overwrite_dataset_document(metadata, doc) except ObjectDoesNotExist: DatasetWizard._create_dataset_document(metadata)
class NewDatasetWizard(PermissionRequiredMixin, DatasetWizard): permission_required = PermissionEnum.CAN_ADD_DATASET_METADATA.value raise_exception = True permission_denied_message = NO_PERMISSION def __init__(self, *args, **kwargs): super().__init__(action_url=reverse( 'editor:dataset-metadata-wizard-new', ), title=_(format_html('<b>Add New Dataset</b>')), *args, **kwargs) def get_form_kwargs(self, step=None): return {'request': self.request} def done(self, form_list, **kwargs): """ Iterates over all forms and fills the Metadata/Dataset records accordingly Args: form_list (FormList): An iterable list of forms kwargs: Returns: """ # Create instances self.metadata = Metadata() self.metadata.metadata_type = MetadataEnum.DATASET.value self.metadata.is_active = True self.dataset = Dataset() self.dataset.is_active = True self.dataset.md_identifier_code = self.metadata.identifier self.dataset.metadata_standard_name = "ISO 19115 Geographic information - Metadata" self.dataset.metadata_standard_version = "ISO 19115:2003(E)" # Pre-save objects to be able to add M2M relations self.metadata.save() self.metadata.identifier = self.metadata.id self.dataset.metadata = self.metadata self.dataset.save() self.metadata.metadata_url = reverse("resource:get-dataset-metadata", args=(self.dataset.id, )) return super().done(form_list=form_list, **kwargs)
def _create_metadata_record(self, parent_service: Service, group: MrMapGroup): """ Creates a Metadata record from the OGCLayer object Args: self (OGCLayer): The OGCLayer object (result of parsing) parent_service (Service): The parent Service object group (MrMapGroup): The creator/owner group Returns: metadata (Metadata): The persisted metadata object """ metadata = Metadata() md_type = MetadataEnum.LAYER.value metadata.metadata_type = md_type metadata.title = self.title metadata.abstract = self.abstract metadata.online_resource = parent_service.metadata.online_resource metadata.capabilities_original_uri = parent_service.metadata.capabilities_original_uri metadata.identifier = self.identifier metadata.contact = parent_service.metadata.contact metadata.access_constraints = parent_service.metadata.access_constraints metadata.is_active = False metadata.created_by = group # Save metadata to use id afterwards metadata.save() # create bounding box polygon bounding_points = ((float(self.capability_bbox_lat_lon["minx"]), float(self.capability_bbox_lat_lon["miny"])), (float(self.capability_bbox_lat_lon["minx"]), float(self.capability_bbox_lat_lon["maxy"])), (float(self.capability_bbox_lat_lon["maxx"]), float(self.capability_bbox_lat_lon["maxy"])), (float(self.capability_bbox_lat_lon["maxx"]), float(self.capability_bbox_lat_lon["miny"])), (float(self.capability_bbox_lat_lon["minx"]), float(self.capability_bbox_lat_lon["miny"]))) metadata.bounding_geometry = Polygon(bounding_points) metadata.save() return metadata
def overwrite_metadata(original_md: Metadata, custom_md: Metadata, editor_form): """ Overwrites the original data with the custom date Args: original_md (Metadata): The original Metadata object custom_md (Metadata): The custom Metadata object editor_form: The editor form which holds additional data Returns: nothing """ original_md.title = custom_md.title original_md.abstract = custom_md.abstract original_md.access_constraints = custom_md.access_constraints # we need the metadata_url to reset dataset metadatas # original_md.metadata_url = custom_md.metadata_url original_md.licence = custom_md.licence # get db objects from values # Keyword updating keywords = editor_form.cleaned_data["keywords"] original_md.keywords.clear() for kw in keywords: keyword = Keyword.objects.get_or_create(keyword=kw)[0] original_md.keywords.add(keyword) # Language updating original_md.language_code = editor_form.cleaned_data["language_code"] # Categories updating # Categories are provided as id's to prevent language related conflicts try: categories = editor_form.cleaned_data["categories"] original_md.categories.clear() for category in categories: original_md.categories.add(category) except KeyError: pass # Categories are inherited by subelements subelements = original_md.get_described_element().get_subelements( ).select_related('metadata') for subelement in subelements: subelement.metadata.categories.clear() for category in categories: subelement.metadata.categories.add(category) # change capabilities document so that all sensitive elements (links) are proxied if original_md.use_proxy_uri != custom_md.use_proxy_uri: if custom_md.use_proxy_uri == 'on': original_md.set_proxy(True) else: original_md.set_proxy(False) # save metadata original_md.is_custom = True original_md.save() if original_md.is_dataset_metadata: overwrite_dataset_metadata_document(original_md) else: overwrite_capabilities_document(original_md)
def _persist_metadata(self, md_data_entry: dict): """ Creates real Metadata model records from the parsed data Args: md_data_entry (dict): Returns: metadata (Metadata): The persisted metadata object """ _id = md_data_entry["id"] # Remove this id from the set of metadata which shall be deleted in the end. try: self.deleted_metadata.remove(_id) except KeyError: pass try: md = Metadata.objects.get(identifier=_id, ) is_new = False if md.last_remote_change == md_data_entry["date_stamp"]: # Nothing to do here! return except ObjectDoesNotExist: md = Metadata(identifier=_id) is_new = True md.access_constraints = md_data_entry.get("access_constraints", None) md.created_by = self.harvesting_group md.origin = ResourceOriginEnum.CATALOGUE.value md.last_remote_change = md_data_entry.get("date_stamp", None) md.title = md_data_entry.get("title", None) md.contact = md_data_entry.get("contact", None) md.language_code = md_data_entry.get("language_code", None) md.metadata_type = md_data_entry.get("metadata_type", None) md.abstract = md_data_entry.get("abstract", None) md.bounding_geometry = md_data_entry.get("bounding_geometry", None) formats = md_data_entry.get("formats", []) md.is_active = True md.capabilities_original_uri = md_data_entry.get( "capabilities_original_url", None) try: # Improve speed for keyword get-create by fetching (filter) all existing ones and only perform # get_or_create on the ones that do not exist yet. Speed up by ~50% for large amount of data existing_kws = Keyword.objects.filter( keyword__in=md_data_entry["keywords"]) existing_kws = [kw.keyword for kw in existing_kws] new_kws = [ kw for kw in md_data_entry["keywords"] if kw not in existing_kws ] [Keyword.objects.get_or_create(keyword=kw)[0] for kw in new_kws] kws = Keyword.objects.filter(keyword__in=md_data_entry["keywords"]) # Same for MimeTypes existing_formats = MimeType.objects.filter( mime_type__in=md_data_entry["formats"]) existing_formats = [ _format.mime_type for _format in existing_formats ] new_formats = [ _format for _format in md_data_entry["formats"] if _format not in existing_formats ] [ MimeType.objects.get_or_create(mime_type=_format)[0] for _format in new_formats ] formats = MimeType.objects.filter( mime_type__in=md_data_entry["formats"]) with transaction.atomic(): if len(md_data_entry["categories"]) > 0: q = Q() for cat in md_data_entry["categories"]: q |= Q(title_EN__iexact=cat) categories = Category.objects.filter(q) else: categories = [] for link in md_data_entry.get("links", []): url = link.get("link", None) if url is None: continue generic_url = GenericUrl() generic_url.description = "[HARVESTED URL] \n{}".format( link.get("description", "")) generic_url.method = "Get" generic_url.url = url generic_url.save() md.additional_urls.add(generic_url) md.save(add_monitoring=False) md.keywords.add(*kws) md.categories.add(*categories) md.formats.add(*formats) # To reduce runtime, we only create a new MetadataRelation if we are sure there hasn't already been one. # Using get_or_create increases runtime on existing metadata too much! if is_new: md.add_metadata_relation( to_metadata=self.metadata, relation_type=MetadataRelationEnum.HARVESTED_THROUGH. value, origin=ResourceOriginEnum.CATALOGUE.value) parent_id = md_data_entry["parent_id"] # Add the found parent_id to the parent_child map! if parent_id is not None: if self.parent_child_map.get(parent_id, None) is None: self.parent_child_map[parent_id] = [md] else: self.parent_child_map[parent_id].append(md) except (IntegrityError, DataError) as e: csw_logger.error( CSW_ERROR_LOG_TEMPLATE.format(md.identifier, self.metadata.title, e))