def delete(self, *args, current_user=None, **kwargs): with_user = current_user # On supprime toutes les ressources attachées au jeu de données Resource = apps.get_model(app_label='idgo_admin', model_name='Resource') for resource in Resource.objects.filter(dataset=self): resource.delete(current_user=current_user, synchronize_dataset=False) # > > > > > > BETA < < < < < < # if BETA: ResourceBeta = apps.get_model(app_label='idgo_resource', model_name='Resource') for resource_beta in ResourceBeta.objects.filter(dataset=self): resource_beta._related.delete() resource_beta.delete() # > > > > > > BETA < < < < < < # # On supprime le package CKAN ckan_id = str(self.ckan_id) if with_user: username = with_user.username apikey = CkanHandler.get_user(username)['apikey'] with CkanUserHandler(apikey=apikey) as ckan_user: ckan_user.delete_dataset(ckan_id) else: CkanHandler.delete_dataset(ckan_id) CkanHandler.purge_dataset(ckan_id) # On supprime l'instance super().delete(*args, **kwargs)
def save(self, *args, harvest=True, **kwargs): Category = apps.get_model(app_label='idgo_admin', model_name='Category') Dataset = apps.get_model(app_label='idgo_admin', model_name='Dataset') License = apps.get_model(app_label='idgo_admin', model_name='License') Resource = apps.get_model(app_label='idgo_admin', model_name='Resource') ResourceFormats = apps.get_model(app_label='idgo_admin', model_name='ResourceFormats') # (1) Supprimer les jeux de données qui ne sont plus synchronisés previous = self.pk and RemoteCsw.objects.get(pk=self.pk) if previous: for dataset in Dataset.harvested_csw.filter( remote_instance=previous): dataset.delete() else: # Dans le cas d'une création, on vérifie si l'URL CSW est valide try: with CswBaseHandler(self.url): pass except CswBaseError as e: raise ValidationError(e.__str__(), code='url') # (2) Sauver l'instance super().save(*args, **kwargs) # (3) Créer/Mettre à jour les jeux de données synchronisés # On récupère dans le `stack` l'utilisateur effectuant l'opération editor = None for entry in inspect.stack(): try: editor = entry[0].f_locals['request'].user._wrapped except (KeyError, AttributeError): continue break if not previous: return if harvest: # Puis on moissonne le catalogue try: ckan_ids = [] geonet_ids = [] with transaction.atomic(): with CswBaseHandler(self.url) as csw: packages = csw.get_packages( xml=self.getrecords or None) for package in packages: if not package['type'] == 'dataset': continue geonet_id = package['id'] update_frequency = dict(Dataset.FREQUENCY_CHOICES).get( package.get('frequency'), 'unknown') update_frequency = package.get('frequency') if not (update_frequency and update_frequency in dict( Dataset.FREQUENCY_CHOICES).keys()): update_frequency = 'unknown' date_creation = package.get('dataset_creation_date', None) if date_creation: try: date_creation = datetime.strptime( date_creation, ISOFORMAT_DATE) except ValueError as e: logger.warning(e) date_creation = None date_modification = package.get( 'dataset_modification_date', None) if date_modification: try: date_modification = datetime.strptime( date_modification, ISOFORMAT_DATE) except ValueError as e: logger.warning(e) date_modification = None date_publication = package.get( 'dataset_publication_date', None) if date_publication: try: date_publication = datetime.strptime( date_publication, ISOFORMAT_DATE) except ValueError as e: logger.warning(e) date_publication = None # Licence license_titles = package.get('license_titles') filters = [ Q(slug__in=license_titles), Q(title__in=license_titles), Q(alternate_titles__overlap=license_titles), ] license = License.objects.filter(reduce( ior, filters)).distinct().first() if not license: try: license = License.objects.get( slug=settings.DEFAULTS_VALUES.get( 'LICENSE')) except License.DoesNotExist: license = License.objects.first() # On pousse la fiche de MD dans Geonet if not geonet.get_record(geonet_id): try: geonet.create_record(geonet_id, package['xml']) except Exception as e: logger.warning( 'La création de la fiche de métadonnées a échoué.' ) logger.error(e) else: geonet_ids.append(geonet_id) geonet.publish( geonet_id) # Toujours publier la fiche else: try: geonet.update_record(geonet_id, package['xml']) except Exception as e: logger.warning( 'La mise à jour de la fiche de métadonnées a échoué.' ) logger.error(e) slug = 'sync{}-{}'.format( str(uuid.uuid4())[:7].lower(), slugify(geonet_id))[:100] kvp = { 'slug': slug, 'title': package.get('title'), 'description': package.get('notes'), 'date_creation': date_creation and date_creation.date(), 'date_modification': date_modification and date_modification.date(), 'date_publication': date_publication and date_publication.date(), 'editor': editor, 'license': license, 'owner_email': self.organisation.email or DEFAULT_CONTACT_EMAIL, 'owner_name': self.organisation.legal_name or DEFAULT_PLATFORM_NAME, 'organisation': self.organisation, 'published': not package.get('private'), 'remote_instance': self, 'remote_dataset': geonet_id, 'update_frequency': update_frequency, 'bbox': package.get('bbox'), # broadcaster_email # broadcaster_name # data_type # geocover 'geonet_id': geonet_id, # granularity # thumbnail # support } dataset, created = Dataset.harvested_csw.update_or_create( **kvp) if created: ckan_ids.append(dataset.ckan_id) categories_name = [ m['name'] for m in package.get('groups', []) ] iso_topic_reverse = dict( (v, k) for k, v in Category._meta.fields[5].choices) filters = [ Q(slug__in=categories_name), Q(name__in=categories_name), Q(iso_topic__in=[ m['name'] for m in package.get('groups', []) ]), Q(iso_topic__in=[ iso_topic_reverse.get(name) for name in categories_name ]), Q(alternate_titles__overlap=categories_name), ] categories = Category.objects.filter( reduce(ior, filters)).distinct() if categories: dataset.categories.set(categories, clear=True) if not created: dataset.keywords.clear() keywords = [ tag['display_name'] for tag in package.get('tags') ] dataset.keywords.add(*keywords) dataset.save(current_user=None, synchronize=True, activate=False) for resource in package.get('resources', []): try: ckan_id = uuid.uuid4() except ValueError as e: logger.exception(e) logger.error( "I can't crash here, so I do not pay any attention to this error." ) continue filters = [] protocol = resource.get('protocol') protocol and filters.append(Q(protocol=protocol)) mimetype = resource.get('mimetype') mimetype and filters.append( Q(mimetype__overlap=[mimetype])) try: format_type = ResourceFormats.objects.get( reduce(iand, filters)) except (ResourceFormats.MultipleObjectsReturned, ResourceFormats.DoesNotExist, TypeError): format_type = None kvp = { 'ckan_id': ckan_id, 'dataset': dataset, 'format_type': format_type, 'title': resource['name'] or resource['url'], 'referenced_url': resource['url'], } try: resource = Resource.objects.get( ckan_id=ckan_id) except Resource.DoesNotExist: resource = Resource.default.create(save_opts={ 'current_user': editor, 'synchronize': True }, **kvp) else: for k, v in kvp.items(): setattr(resource, k, v) resource.save(current_user=editor, synchronize=True) except Exception as e: for id in ckan_ids: logger.warning( 'Delete CKAN package : {id}.'.format(id=str(id))) CkanHandler.purge_dataset(str(id)) for id in geonet_ids: logger.warning('Delete MD : {id}.'.format(id=str(id))) geonet.delete_record(id) logger.error(e) raise CriticalError() else: for id in ckan_ids: CkanHandler.publish_dataset(id=str(id), state='active')
def save(self, *args, **kwargs): Category = apps.get_model(app_label='idgo_admin', model_name='Category') Dataset = apps.get_model(app_label='idgo_admin', model_name='Dataset') License = apps.get_model(app_label='idgo_admin', model_name='License') Resource = apps.get_model(app_label='idgo_admin', model_name='Resource') ResourceFormats = apps.get_model(app_label='idgo_admin', model_name='ResourceFormats') # (1) Supprimer les jeux de données qui ne sont plus synchronisés previous = self.pk and RemoteCkan.objects.get(pk=self.pk) if previous: remote_organisation__in = [ x for x in (previous.sync_with or []) if x not in (self.sync_with or []) ] filter = { 'remote_instance': previous, 'remote_organisation__in': remote_organisation__in, } # TODO: 'Dataset.harvested_ckan.filter(**filter).delete()' ne fonctionne pas for dataset in Dataset.harvested_ckan.filter(**filter): dataset.delete() else: # Dans le cas d'une création, on vérifie si l'URL CKAN est valide try: with CkanBaseHandler(self.url): pass except CkanBaseError as e: raise ValidationError(e.__str__(), code='url') # (2) Sauver l'instance super().save(*args, **kwargs) # (3) Créer/Mettre à jour les jeux de données synchronisés # On récupère dans le `stack` l'utilisateur effectuant l'opération editor = User.objects.get(pk=DEFAULT_USER_ID) for entry in inspect.stack(): try: editor = entry[0].f_locals['request'].user._wrapped except (KeyError, AttributeError): continue break # Puis on moissonne le catalogue if self.sync_with: try: ckan_ids = [] with transaction.atomic(): # TODO: Factoriser for value in self.sync_with: with CkanBaseHandler(self.url) as ckan: ckan_organisation = ckan.get_organisation( value, include_datasets=True, include_groups=True, include_tags=True) if not ckan_organisation.get('package_count', 0): continue for package in ckan_organisation.get('packages'): if not package['state'] == 'active' \ or not package['type'] == 'dataset': continue with CkanBaseHandler(self.url) as ckan: package = ckan.get_package(package['id']) ckan_id = uuid.UUID(package['id']) update_frequency = dict( Dataset.FREQUENCY_CHOICES).get( package.get('frequency'), 'unknown') update_frequency = package.get('frequency') if not (update_frequency and update_frequency in dict(Dataset.FREQUENCY_CHOICES).keys()): update_frequency = 'unknown' metadata_created = package.get( 'metadata_created', None) if metadata_created: metadata_created = datetime.strptime( metadata_created, ISOFORMAT_DATETIME) metadata_modified = package.get( 'metadata_modified', None) if metadata_modified: metadata_modified = datetime.strptime( metadata_modified, ISOFORMAT_DATETIME) try: mapping_licence = MappingLicence.objects.get( remote_ckan=self, slug=package.get('license_id')) except MappingLicence.DoesNotExist: try: license = License.objects.get( slug='other-at') except MappingLicence.DoesNotExist: license = None else: logger.warning("'{}' non trouvé".format( package.get('license_id'))) license = mapping_licence.licence slug = 'sync{}-{}'.format( str(uuid.uuid4())[:7].lower(), package.get('name'))[:100] kvp = { 'slug': slug, 'title': package.get('title'), 'description': package.get('notes'), 'date_creation': metadata_created and metadata_created.date(), 'date_modification': metadata_modified and metadata_modified.date(), # date_publication 'editor': editor, 'license': license, 'owner_email': self.organisation.email or DEFAULT_CONTACT_EMAIL, 'owner_name': self.organisation.legal_name or DEFAULT_PLATFORM_NAME, 'organisation': self.organisation, 'published': not package.get('private'), 'remote_instance': self, 'remote_dataset': ckan_id, 'remote_organisation': value, 'update_frequency': update_frequency, # bbox # broadcaster_email # broadcaster_name # data_type # geocover # geonet_id # granularity # thumbnail # support } dataset, created = Dataset.harvested_ckan.update_or_create( **kvp) mapping_categories = MappingCategory.objects.filter( remote_ckan=self, slug__in=[ m['name'] for m in package.get('groups', []) ]) if mapping_categories: dataset.categories = set( mc.category for mc in mapping_categories) if not created: dataset.keywords.clear() keywords = [ tag['display_name'] for tag in package.get('tags') ] dataset.keywords.add(*keywords) dataset.save(current_user=None, synchronize=True, activate=False) ckan_ids.append(dataset.ckan_id) for resource in package.get('resources', []): try: ckan_id = uuid.UUID(resource['id']) except ValueError as e: logger.exception(e) logger.error( "I can't crash here, so I do not pay any attention to this error." ) continue try: ckan_format = resource['format'].upper() format_type = ResourceFormats.objects.get( ckan_format=ckan_format) except (ResourceFormats. MultipleObjectsReturned, ResourceFormats.DoesNotExist, TypeError) as e: logger.exception(e) logger.error( "I can't crash here, so I do not pay any attention to this error." ) format_type = None kvp = { 'ckan_id': ckan_id, 'dataset': dataset, 'format_type': format_type, 'title': resource['name'], 'referenced_url': resource['url'], } try: resource = Resource.objects.get( ckan_id=ckan_id) except Resource.DoesNotExist: resource = Resource.default.create( save_opts={ 'current_user': None, 'synchronize': True }, **kvp) else: for k, v in kvp.items(): setattr(resource, k, v) resource.save(current_user=None, synchronize=True) except Exception as e: for id in ckan_ids: CkanHandler.purge_dataset(str(id)) logger.error(e) raise CriticalError() else: for id in ckan_ids: CkanHandler.publish_dataset(id=str(id), state='active')