예제 #1
0
    def merge_name(self, request, queryset_tags):

        datasets = Dataset.objects.filter(
            keywords__in=queryset_tags).distinct()
        if 'apply' in request.POST:
            form = NewKeywordForm(request.POST)
            if form.is_valid():
                error = False

                name = form.cleaned_data.get('new_name')
                tag, created = Tag.objects.get_or_create(name=name)
                # WIP

                for dataset in datasets:
                    dataset.keywords.add(tag)
                    ckan_id = str(dataset.ckan_id)
                    qs_dataset_keywords = dataset.keywords.all().exclude(
                        id__in=queryset_tags)

                    tags = [
                        *[{
                            'name': k.name
                        } for k in qs_dataset_keywords], *[{
                            'name': tag.name
                        }]
                    ]

                    logger.info('Update dataset %d with tags: %s' %
                                (dataset.pk, tags))
                    try:
                        CkanHandler.publish_dataset(id=ckan_id, tags=tags)
                    except CkanBaseError as e:
                        logger.exception(e)
                        error = True
                        dataset.keywords.remove(tag)
                        break
                    else:
                        continue
                if error:
                    messages.error(request, (
                        "Une erreur est survenue. "
                        "Veuillez contacter l'administrateur de la plateforme."
                    ))
                else:
                    queryset_tags.exclude(pk=tag.pk).delete()
                    messages.info(
                        request, ("La mise à jour est effectuée avec succès."))
                return HttpResponseRedirect(request.get_full_path())

        else:  # request.GET
            form = NewKeywordForm()
        # then
        template_html = 'admin/idgo_admin/taggit_merge_name.html'
        context = {'form': form, 'tags': queryset_tags, 'datasets': datasets}
        return render(request, template_html, context=context)
예제 #2
0
    def handle(self, *args, **options):
        dataset_qs = Dataset.default.filter(
            keywords__isnull=False).distinct().order_by('id')
        total = dataset_qs.count()
        count = 0
        for dataset in dataset_qs:
            count += 1
            qs_dataset_keywords = dataset.keywords.all()

            ckan_id = str(dataset.ckan_id)

            logger.info(
                "[%d/%d] - Synchronize Dataset %d (%s) with tags: '%s'." %
                (count, total, dataset.pk, ckan_id, "', '".join(
                    [k.name for k in qs_dataset_keywords])))

            try:
                CkanHandler.publish_dataset(id=ckan_id,
                                            tags=[{
                                                'name': k.name
                                            } for k in qs_dataset_keywords])
            except Exception as e:
                logger.exception(e)
                logger.warning("Error was ingored.")
예제 #3
0
파일: organisation.py 프로젝트: jerbou/idgo
    def save(self, *args, harvest=True, **kwargs):
        Category = apps.get_model(app_label='idgo_admin',
                                  model_name='Category')
        Dataset = apps.get_model(app_label='idgo_admin', model_name='Dataset')
        License = apps.get_model(app_label='idgo_admin', model_name='License')
        Resource = apps.get_model(app_label='idgo_admin',
                                  model_name='Resource')
        ResourceFormats = apps.get_model(app_label='idgo_admin',
                                         model_name='ResourceFormats')

        # (1) Supprimer les jeux de données qui ne sont plus synchronisés
        previous = self.pk and RemoteCsw.objects.get(pk=self.pk)
        if previous:
            for dataset in Dataset.harvested_csw.filter(
                    remote_instance=previous):
                dataset.delete()
        else:
            # Dans le cas d'une création, on vérifie si l'URL CSW est valide
            try:
                with CswBaseHandler(self.url):
                    pass
            except CswBaseError as e:
                raise ValidationError(e.__str__(), code='url')

        # (2) Sauver l'instance
        super().save(*args, **kwargs)

        # (3) Créer/Mettre à jour les jeux de données synchronisés

        # On récupère dans le `stack` l'utilisateur effectuant l'opération
        editor = None
        for entry in inspect.stack():
            try:
                editor = entry[0].f_locals['request'].user._wrapped
            except (KeyError, AttributeError):
                continue
            break

        if not previous:
            return

        if harvest:
            # Puis on moissonne le catalogue
            try:
                ckan_ids = []
                geonet_ids = []
                with transaction.atomic():

                    with CswBaseHandler(self.url) as csw:
                        packages = csw.get_packages(
                            xml=self.getrecords or None)

                    for package in packages:
                        if not package['type'] == 'dataset':
                            continue

                        geonet_id = package['id']
                        update_frequency = dict(Dataset.FREQUENCY_CHOICES).get(
                            package.get('frequency'), 'unknown')
                        update_frequency = package.get('frequency')
                        if not (update_frequency and update_frequency in dict(
                                Dataset.FREQUENCY_CHOICES).keys()):
                            update_frequency = 'unknown'

                        date_creation = package.get('dataset_creation_date',
                                                    None)
                        if date_creation:
                            try:
                                date_creation = datetime.strptime(
                                    date_creation, ISOFORMAT_DATE)
                            except ValueError as e:
                                logger.warning(e)
                                date_creation = None

                        date_modification = package.get(
                            'dataset_modification_date', None)
                        if date_modification:
                            try:
                                date_modification = datetime.strptime(
                                    date_modification, ISOFORMAT_DATE)
                            except ValueError as e:
                                logger.warning(e)
                                date_modification = None

                        date_publication = package.get(
                            'dataset_publication_date', None)
                        if date_publication:
                            try:
                                date_publication = datetime.strptime(
                                    date_publication, ISOFORMAT_DATE)
                            except ValueError as e:
                                logger.warning(e)
                                date_publication = None

                        # Licence
                        license_titles = package.get('license_titles')
                        filters = [
                            Q(slug__in=license_titles),
                            Q(title__in=license_titles),
                            Q(alternate_titles__overlap=license_titles),
                        ]
                        license = License.objects.filter(reduce(
                            ior, filters)).distinct().first()
                        if not license:
                            try:
                                license = License.objects.get(
                                    slug=settings.DEFAULTS_VALUES.get(
                                        'LICENSE'))
                            except License.DoesNotExist:
                                license = License.objects.first()

                        # On pousse la fiche de MD dans Geonet
                        if not geonet.get_record(geonet_id):
                            try:
                                geonet.create_record(geonet_id, package['xml'])
                            except Exception as e:
                                logger.warning(
                                    'La création de la fiche de métadonnées a échoué.'
                                )
                                logger.error(e)
                            else:
                                geonet_ids.append(geonet_id)
                                geonet.publish(
                                    geonet_id)  # Toujours publier la fiche
                        else:
                            try:
                                geonet.update_record(geonet_id, package['xml'])
                            except Exception as e:
                                logger.warning(
                                    'La mise à jour de la fiche de métadonnées a échoué.'
                                )
                                logger.error(e)

                        slug = 'sync{}-{}'.format(
                            str(uuid.uuid4())[:7].lower(),
                            slugify(geonet_id))[:100]
                        kvp = {
                            'slug':
                            slug,
                            'title':
                            package.get('title'),
                            'description':
                            package.get('notes'),
                            'date_creation':
                            date_creation and date_creation.date(),
                            'date_modification':
                            date_modification and date_modification.date(),
                            'date_publication':
                            date_publication and date_publication.date(),
                            'editor':
                            editor,
                            'license':
                            license,
                            'owner_email':
                            self.organisation.email or DEFAULT_CONTACT_EMAIL,
                            'owner_name':
                            self.organisation.legal_name
                            or DEFAULT_PLATFORM_NAME,
                            'organisation':
                            self.organisation,
                            'published':
                            not package.get('private'),
                            'remote_instance':
                            self,
                            'remote_dataset':
                            geonet_id,
                            'update_frequency':
                            update_frequency,
                            'bbox':
                            package.get('bbox'),
                            # broadcaster_email
                            # broadcaster_name
                            # data_type
                            # geocover
                            'geonet_id':
                            geonet_id,
                            # granularity
                            # thumbnail
                            # support
                        }

                        dataset, created = Dataset.harvested_csw.update_or_create(
                            **kvp)
                        if created:
                            ckan_ids.append(dataset.ckan_id)

                        categories_name = [
                            m['name'] for m in package.get('groups', [])
                        ]
                        iso_topic_reverse = dict(
                            (v, k)
                            for k, v in Category._meta.fields[5].choices)

                        filters = [
                            Q(slug__in=categories_name),
                            Q(name__in=categories_name),
                            Q(iso_topic__in=[
                                m['name'] for m in package.get('groups', [])
                            ]),
                            Q(iso_topic__in=[
                                iso_topic_reverse.get(name)
                                for name in categories_name
                            ]),
                            Q(alternate_titles__overlap=categories_name),
                        ]

                        categories = Category.objects.filter(
                            reduce(ior, filters)).distinct()
                        if categories:
                            dataset.categories.set(categories, clear=True)

                        if not created:
                            dataset.keywords.clear()
                        keywords = [
                            tag['display_name'] for tag in package.get('tags')
                        ]
                        dataset.keywords.add(*keywords)

                        dataset.save(current_user=None,
                                     synchronize=True,
                                     activate=False)

                        for resource in package.get('resources', []):
                            try:
                                ckan_id = uuid.uuid4()
                            except ValueError as e:
                                logger.exception(e)
                                logger.error(
                                    "I can't crash here, so I do not pay any attention to this error."
                                )
                                continue

                            filters = []
                            protocol = resource.get('protocol')
                            protocol and filters.append(Q(protocol=protocol))
                            mimetype = resource.get('mimetype')
                            mimetype and filters.append(
                                Q(mimetype__overlap=[mimetype]))
                            try:
                                format_type = ResourceFormats.objects.get(
                                    reduce(iand, filters))
                            except (ResourceFormats.MultipleObjectsReturned,
                                    ResourceFormats.DoesNotExist, TypeError):
                                format_type = None

                            kvp = {
                                'ckan_id': ckan_id,
                                'dataset': dataset,
                                'format_type': format_type,
                                'title': resource['name'] or resource['url'],
                                'referenced_url': resource['url'],
                            }

                            try:
                                resource = Resource.objects.get(
                                    ckan_id=ckan_id)
                            except Resource.DoesNotExist:
                                resource = Resource.default.create(save_opts={
                                    'current_user':
                                    editor,
                                    'synchronize':
                                    True
                                },
                                                                   **kvp)
                            else:
                                for k, v in kvp.items():
                                    setattr(resource, k, v)
                            resource.save(current_user=editor,
                                          synchronize=True)

            except Exception as e:
                for id in ckan_ids:
                    logger.warning(
                        'Delete CKAN package : {id}.'.format(id=str(id)))
                    CkanHandler.purge_dataset(str(id))
                for id in geonet_ids:
                    logger.warning('Delete MD : {id}.'.format(id=str(id)))
                    geonet.delete_record(id)
                logger.error(e)
                raise CriticalError()
            else:
                for id in ckan_ids:
                    CkanHandler.publish_dataset(id=str(id), state='active')
예제 #4
0
파일: organisation.py 프로젝트: jerbou/idgo
    def save(self, *args, **kwargs):
        Category = apps.get_model(app_label='idgo_admin',
                                  model_name='Category')
        Dataset = apps.get_model(app_label='idgo_admin', model_name='Dataset')
        License = apps.get_model(app_label='idgo_admin', model_name='License')
        Resource = apps.get_model(app_label='idgo_admin',
                                  model_name='Resource')
        ResourceFormats = apps.get_model(app_label='idgo_admin',
                                         model_name='ResourceFormats')

        # (1) Supprimer les jeux de données qui ne sont plus synchronisés
        previous = self.pk and RemoteCkan.objects.get(pk=self.pk)

        if previous:
            remote_organisation__in = [
                x for x in (previous.sync_with or [])
                if x not in (self.sync_with or [])
            ]
            filter = {
                'remote_instance': previous,
                'remote_organisation__in': remote_organisation__in,
            }

            # TODO: 'Dataset.harvested_ckan.filter(**filter).delete()' ne fonctionne pas
            for dataset in Dataset.harvested_ckan.filter(**filter):
                dataset.delete()
        else:
            # Dans le cas d'une création, on vérifie si l'URL CKAN est valide
            try:
                with CkanBaseHandler(self.url):
                    pass
            except CkanBaseError as e:
                raise ValidationError(e.__str__(), code='url')

        # (2) Sauver l'instance
        super().save(*args, **kwargs)

        # (3) Créer/Mettre à jour les jeux de données synchronisés

        # On récupère dans le `stack` l'utilisateur effectuant l'opération
        editor = User.objects.get(pk=DEFAULT_USER_ID)
        for entry in inspect.stack():
            try:
                editor = entry[0].f_locals['request'].user._wrapped
            except (KeyError, AttributeError):
                continue
            break

        # Puis on moissonne le catalogue
        if self.sync_with:
            try:
                ckan_ids = []
                with transaction.atomic():

                    # TODO: Factoriser
                    for value in self.sync_with:
                        with CkanBaseHandler(self.url) as ckan:
                            ckan_organisation = ckan.get_organisation(
                                value,
                                include_datasets=True,
                                include_groups=True,
                                include_tags=True)

                        if not ckan_organisation.get('package_count', 0):
                            continue
                        for package in ckan_organisation.get('packages'):
                            if not package['state'] == 'active' \
                                    or not package['type'] == 'dataset':
                                continue
                            with CkanBaseHandler(self.url) as ckan:
                                package = ckan.get_package(package['id'])

                            ckan_id = uuid.UUID(package['id'])

                            update_frequency = dict(
                                Dataset.FREQUENCY_CHOICES).get(
                                    package.get('frequency'), 'unknown')
                            update_frequency = package.get('frequency')
                            if not (update_frequency and update_frequency
                                    in dict(Dataset.FREQUENCY_CHOICES).keys()):
                                update_frequency = 'unknown'
                            metadata_created = package.get(
                                'metadata_created', None)
                            if metadata_created:
                                metadata_created = datetime.strptime(
                                    metadata_created, ISOFORMAT_DATETIME)
                            metadata_modified = package.get(
                                'metadata_modified', None)
                            if metadata_modified:
                                metadata_modified = datetime.strptime(
                                    metadata_modified, ISOFORMAT_DATETIME)

                            try:
                                mapping_licence = MappingLicence.objects.get(
                                    remote_ckan=self,
                                    slug=package.get('license_id'))
                            except MappingLicence.DoesNotExist:
                                try:
                                    license = License.objects.get(
                                        slug='other-at')
                                except MappingLicence.DoesNotExist:
                                    license = None
                            else:
                                logger.warning("'{}' non trouvé".format(
                                    package.get('license_id')))
                                license = mapping_licence.licence

                            slug = 'sync{}-{}'.format(
                                str(uuid.uuid4())[:7].lower(),
                                package.get('name'))[:100]
                            kvp = {
                                'slug':
                                slug,
                                'title':
                                package.get('title'),
                                'description':
                                package.get('notes'),
                                'date_creation':
                                metadata_created and metadata_created.date(),
                                'date_modification':
                                metadata_modified and metadata_modified.date(),
                                # date_publication
                                'editor':
                                editor,
                                'license':
                                license,
                                'owner_email':
                                self.organisation.email
                                or DEFAULT_CONTACT_EMAIL,
                                'owner_name':
                                self.organisation.legal_name
                                or DEFAULT_PLATFORM_NAME,
                                'organisation':
                                self.organisation,
                                'published':
                                not package.get('private'),
                                'remote_instance':
                                self,
                                'remote_dataset':
                                ckan_id,
                                'remote_organisation':
                                value,
                                'update_frequency':
                                update_frequency,
                                # bbox
                                # broadcaster_email
                                # broadcaster_name
                                # data_type
                                # geocover
                                # geonet_id
                                # granularity
                                # thumbnail
                                # support
                            }

                            dataset, created = Dataset.harvested_ckan.update_or_create(
                                **kvp)

                            mapping_categories = MappingCategory.objects.filter(
                                remote_ckan=self,
                                slug__in=[
                                    m['name']
                                    for m in package.get('groups', [])
                                ])
                            if mapping_categories:
                                dataset.categories = set(
                                    mc.category for mc in mapping_categories)

                            if not created:
                                dataset.keywords.clear()
                            keywords = [
                                tag['display_name']
                                for tag in package.get('tags')
                            ]
                            dataset.keywords.add(*keywords)
                            dataset.save(current_user=None,
                                         synchronize=True,
                                         activate=False)

                            ckan_ids.append(dataset.ckan_id)

                            for resource in package.get('resources', []):
                                try:
                                    ckan_id = uuid.UUID(resource['id'])
                                except ValueError as e:
                                    logger.exception(e)
                                    logger.error(
                                        "I can't crash here, so I do not pay any attention to this error."
                                    )
                                    continue

                                try:
                                    ckan_format = resource['format'].upper()
                                    format_type = ResourceFormats.objects.get(
                                        ckan_format=ckan_format)
                                except (ResourceFormats.
                                        MultipleObjectsReturned,
                                        ResourceFormats.DoesNotExist,
                                        TypeError) as e:
                                    logger.exception(e)
                                    logger.error(
                                        "I can't crash here, so I do not pay any attention to this error."
                                    )
                                    format_type = None

                                kvp = {
                                    'ckan_id': ckan_id,
                                    'dataset': dataset,
                                    'format_type': format_type,
                                    'title': resource['name'],
                                    'referenced_url': resource['url'],
                                }

                                try:
                                    resource = Resource.objects.get(
                                        ckan_id=ckan_id)
                                except Resource.DoesNotExist:
                                    resource = Resource.default.create(
                                        save_opts={
                                            'current_user': None,
                                            'synchronize': True
                                        },
                                        **kvp)
                                else:
                                    for k, v in kvp.items():
                                        setattr(resource, k, v)
                                resource.save(current_user=None,
                                              synchronize=True)

            except Exception as e:
                for id in ckan_ids:
                    CkanHandler.purge_dataset(str(id))
                logger.error(e)
                raise CriticalError()
            else:
                for id in ckan_ids:
                    CkanHandler.publish_dataset(id=str(id), state='active')
예제 #5
0
    def synchronize(self, with_user=None, activate=None):
        """Synchronizer le jeu de données avec l'instance de CKAN."""

        # Identifiant du package CKAN :
        id = self.ckan_id and str(self.ckan_id) or None
        # Si la valeur est `None`, alors il s'agit d'une création.

        # Définition des propriétés du « paquet »
        # =======================================

        datatype = [item.slug for item in self.data_type.all()]

        date_creation = self.date_creation and str(self.date_creation) or ''
        date_modification = self.date_modification and str(
            self.date_modification) or ''
        date_publication = self.date_publication and str(
            self.date_publication) or ''

        broadcaster_name = self.broadcaster_name or \
            self.support and self.support.name or DEFAULT_PLATFORM_NAME
        broadcaster_email = self.broadcaster_email or \
            self.support and self.support.email or DEFAULT_CONTACT_EMAIL

        geocover = self.geocover or ''

        granularity = self.granularity and self.granularity.slug or ''

        licenses = [license['id'] for license in CkanHandler.get_licenses()]
        if self.license and self.license.ckan_id in licenses:
            license_id = self.license.ckan_id
        else:
            license_id = ''

        ows = False
        Resource = apps.get_model(app_label='idgo_admin',
                                  model_name='Resource')
        for resource in Resource.objects.filter(dataset=self):
            ows = resource.ogc_services

        spatial = self.bbox and self.bbox.geojson or ''
        support = self.support and self.support.slug or ''
        tags = [{'name': keyword.name} for keyword in self.keywords.all()]
        try:
            thumbnail = urljoin(DOMAIN_NAME, self.thumbnail.url)
        except ValueError:
            thumbnail = ''

        # On vérifie si le jeu de données est un cas particulier
        # de jeu de données moissonné CKAN/CSW/DCAT
        remote_url = None
        if ENABLE_CKAN_HARVESTER:
            # (1) DCAT
            RemoteCkanDataset = apps.get_model(app_label='idgo_admin',
                                               model_name='RemoteCkanDataset')
            try:
                remote_dataset = RemoteCkanDataset.objects.get(dataset=self)
            except RemoteCkanDataset.DoesNotExist:
                pass
            else:
                remote_url = remote_dataset.url
        if ENABLE_CSW_HARVESTER:
            # (2) CSW
            RemoteCswDataset = apps.get_model(app_label='idgo_admin',
                                              model_name='RemoteCswDataset')
            try:
                remote_dataset = RemoteCswDataset.objects.get(dataset=self)
            except RemoteCswDataset.DoesNotExist:
                pass
            else:
                remote_url = remote_dataset.url
        if ENABLE_DCAT_HARVESTER:
            # (3) DCAT
            RemoteDcatDataset = apps.get_model(app_label='idgo_admin',
                                               model_name='RemoteDcatDataset')
            try:
                remote_dataset = RemoteDcatDataset.objects.get(dataset=self)
            except RemoteDcatDataset.DoesNotExist:
                pass
            else:
                remote_url = remote_dataset.url

        data = {
            'author': self.owner_name,
            'author_email': self.owner_email,
            'datatype': datatype,
            'dataset_creation_date': date_creation,
            'dataset_modification_date': date_modification,
            'dataset_publication_date': date_publication,
            'frequency': self.update_frequency or 'unknown',
            'geocover': geocover,
            'granularity': granularity,
            'groups': [],
            'inspire_url': self.geonet_url,
            'license_id': license_id,
            'maintainer': broadcaster_name,
            'maintainer_email': broadcaster_email,
            'name': self.slug,
            'notes': self.description,
            'owner_org': str(self.organisation.ckan_id),
            'ows': str(ows),  # IMPORTANT
            'private': self.private,
            'remote_url': remote_url or '',
            'spatial': spatial,
            'support': support,
            'tags': tags,
            'title': self.title,
            'thumbnail': thumbnail,
            'url': '',  # IMPORTANT
        }

        if activate is not None:
            data['state'] = activate and 'active' or 'deleted'

        # Synchronisation des catégories :
        for category in self.categories.all():
            data['groups'].append({'name': category.slug})

        organisation_id = str(self.organisation.ckan_id)

        # Synchronisation de l'organisation ; si l'organisation
        # n'existe pas il faut la créer
        ckan_organisation = CkanHandler.get_organisation(organisation_id)
        if not ckan_organisation:
            CkanHandler.add_organisation(self.organisation)
        # et si l'organisation est désactiver il faut l'activer
        elif ckan_organisation.get('state') == 'deleted':
            CkanHandler.activate_organisation(organisation_id)

        if with_user:
            username = with_user.username

            # TODO: C'est très lourd de faire cela systématiquement -> voir pour améliorer cela
            CkanHandler.add_user_to_organisation(username, organisation_id)
            for category in self.categories.all():
                category_id = str(category.ckan_id)
                CkanHandler.add_user_to_group(username, category_id)
            #

            apikey = CkanHandler.get_user(username)['apikey']
            with CkanUserHandler(apikey=apikey) as ckan_user:
                return ckan_user.publish_dataset(id=id, **data)
        else:
            return CkanHandler.publish_dataset(id=id, **data)