Beispiel #1
0
    def serialize(cls, reuse):
        """By default use the ``to_dict`` method

        and exclude ``_id``, ``_cls`` and ``owner`` fields.
        """
        datasets = Dataset.objects(id__in=[r.id for r in reuse.datasets])
        datasets = list(datasets.only('id', 'title').no_dereference())
        organization = None
        owner = None
        if reuse.organization:
            organization = Organization.objects(
                id=reuse.organization.id).first()
        elif reuse.owner:
            owner = User.objects(id=reuse.owner.id).first()
        return {
            'title': reuse.title,
            'description': reuse.description,
            'url': reuse.url,
            'organization': str(organization.id) if organization else None,
            'owner': str(owner.id) if owner else None,
            'type': reuse.type,
            'topic': reuse.topic,
            'tags': reuse.tags,
            'tag_suggest': reuse.tags,
            'badges': [badge.kind for badge in reuse.badges],
            'created': to_iso_datetime(reuse.created_at),
            'last_modified': to_iso_datetime(reuse.last_modified),
            'dataset': [{
                'id': str(d.id),
                'title': d.title
            } for d in datasets],
            'metrics': reuse.metrics,
            'featured': reuse.featured,
            'extras': reuse.extras,
            'reuse_suggest': {
                'input': cls.completer_tokenize(reuse.title) + [reuse.id],
                'output': str(reuse.id),
                'payload': {
                    'title': reuse.title,
                    'slug': reuse.slug,
                    'image_url': reuse.image(500, external=True),
                },
            },
        }
Beispiel #2
0
 def serialize(cls, organization):
     extras = {}
     for key, value in organization.extras.items():
         extras[key] = to_iso_datetime(value) if isinstance(
             value, datetime.datetime) else value
     return {
         'id': str(organization.id),
         'name': organization.name,
         'acronym': organization.acronym if organization.acronym else None,
         'description': organization.description,
         'url': organization.url,
         'badges': [badge.kind for badge in organization.badges],
         'created_at': to_iso_datetime(organization.created_at),
         'orga_sp': 1 if organization.public_service else 0,
         'followers': organization.metrics.get('followers', 0),
         'datasets': organization.metrics.get('datasets', 0),
         'reuses': organization.metrics.get('reuses', 0),
         'views': organization.metrics.get('views', 0),
         'extras': extras
     }
Beispiel #3
0
def serialize_resource_for_event(resource):
    resource_dict = {
        'id': str(resource.id),
        'url': resource.url,
        'format': resource.format,
        'title': resource.title,
        'schema': resource.schema,
        'description': resource.description,
        'filetype': resource.filetype,
        'type': resource.type,
        'mime': resource.mime,
        'filesize': resource.filesize,
        'checksum_type': resource.checksum.type if resource.checksum else None,
        'checksum_value': resource.checksum.value if resource.checksum else None,
        'created_at': to_iso_datetime(resource.created_at),
        'modified': to_iso_datetime(resource.modified),
        'published': to_iso_datetime(resource.published)
    }
    extras = {}
    for key, value in resource.extras.items():
        extras[key] = to_iso_datetime(value) if isinstance(value, datetime.datetime) else value
    resource_dict.update({'extras': extras})
    return resource_dict
Beispiel #4
0
 def serialize(cls, user):
     return {
         'first_name': user.first_name,
         'last_name': user.last_name,
         'about': user.about,
         'organizations': [str(o.id) for o in user.organizations],
         'metrics': user.metrics,
         'created': to_iso_datetime(user.created_at),
         'user_suggest': {
             'input': cls.completer_tokenize(user.fullname) + [user.id],
             'output': str(user.id),
             'payload': {
                 'avatar_url': user.avatar(40, external=True),
                 'first_name': user.first_name,
                 'last_name': user.last_name,
                 'slug': user.slug,
             },
         },
         'visible': user.visible,
     }
Beispiel #5
0
 def serialize(cls, organization):
     completions = cls.completer_tokenize(organization.name)
     completions.append(organization.id)
     if organization.acronym:
         completions.append(organization.acronym)
     return {
         'name': organization.name,
         'acronym': organization.acronym,
         'description': organization.description,
         'url': organization.url,
         'metrics': organization.metrics,
         'badges': [badge.kind for badge in organization.badges],
         'created': to_iso_datetime(organization.created_at),
         'org_suggest': {
             'input': completions,
             'output': str(organization.id),
             'payload': {
                 'name': organization.name,
                 'acronym': organization.acronym,
                 'image_url': organization.logo(40, external=True),
                 'slug': organization.slug,
             },
         }
     }
Beispiel #6
0
 def test_to_iso_datetime_with_datetime(self):
     self.assertEqual(to_iso_datetime(datetime(1984, 2, 29, 1, 2, 3)),
                      '1984-02-29T01:02:03')
Beispiel #7
0
 def test_to_iso_datetime_emtpy(self):
     self.assertEqual(to_iso_datetime(None), None)
Beispiel #8
0
    def serialize(cls, dataset):
        organization = None
        owner = None

        if dataset.organization:
            org = Organization.objects(id=dataset.organization.id).first()
            organization = {
                'id': str(org.id),
                'name': org.name,
                'public_service': 1 if org.public_service else 0,
                'followers': org.metrics.get('followers', 0)
            }
        elif dataset.owner:
            owner = User.objects(id=dataset.owner.id).first()

        document = {
            'id': str(dataset.id),
            'title': dataset.title,
            'description': dataset.description,
            'acronym': dataset.acronym or None,
            'url': dataset.display_url,
            'tags': dataset.tags,
            'license': getattr(dataset.license, 'id', None),
            'badges': [badge.kind for badge in dataset.badges],
            'frequency': dataset.frequency,
            'created_at': to_iso_datetime(dataset.created_at),
            'views': dataset.metrics.get('views', 0),
            'followers': dataset.metrics.get('followers', 0),
            'reuses': dataset.metrics.get('reuses', 0),
            'featured': 1 if dataset.featured else 0,
            'resources_count': len(dataset.resources),
            'organization': organization,
            'owner': str(owner.id) if owner else None,
            'format':
            [r.format.lower() for r in dataset.resources if r.format],
            'schema':
            [r.schema.get('name') for r in dataset.resources if r.schema]
        }
        extras = {}
        for key, value in dataset.extras.items():
            extras[key] = to_iso_datetime(value) if isinstance(
                value, datetime.datetime) else value
        document.update({'extras': extras})

        if (dataset.temporal_coverage is not None
                and dataset.temporal_coverage.start
                and dataset.temporal_coverage.end):
            start = to_iso_datetime(dataset.temporal_coverage.start)
            end = to_iso_datetime(dataset.temporal_coverage.end)
            document.update({
                'temporal_coverage_start': start,
                'temporal_coverage_end': end,
            })

        if dataset.spatial is not None:
            # Index precise zone labels and parents zone identifiers
            # to allow fast filtering.
            zone_ids = [z.id for z in dataset.spatial.zones]
            zones = GeoZone.objects(id__in=zone_ids).exclude('geom')
            parents = set()
            geozones = []
            coverage_level = ADMIN_LEVEL_MAX
            for zone in zones:
                geozones.append({
                    'id': zone.id,
                    'name': zone.name,
                    'keys': zone.keys_values
                })
                parents |= set(zone.parents)
                coverage_level = min(coverage_level, admin_levels[zone.level])

            geozones.extend([{'id': p} for p in parents])
            document.update({
                'geozones': geozones,
                'granularity': dataset.spatial.granularity,
            })
        return document
Beispiel #9
0
 def test_to_iso_datetime_with_date(self):
     assert to_iso_datetime(date(1984, 2, 29)) == '1984-02-29T00:00:00'
Beispiel #10
0
 def test_to_iso_datetime_emtpy(self):
     assert to_iso_datetime(None) is None
Beispiel #11
0
 def test_to_iso_datetime_emtpy(self):
     self.assertEqual(to_iso_datetime(None), None)
Beispiel #12
0
 def test_to_iso_datetime_before_1900(self):
     assert to_iso_datetime(date(1884, 2, 29)) == '1884-02-29T00:00:00'
Beispiel #13
0
 def test_to_iso_datetime_with_date(self):
     assert to_iso_datetime(date(1984, 2, 29)) == '1984-02-29T00:00:00'
Beispiel #14
0
 def test_to_iso_datetime_with_datetime(self):
     result = to_iso_datetime(datetime(1984, 2, 29, 1, 2, 3))
     assert result == '1984-02-29T01:02:03'
Beispiel #15
0
 def test_to_iso_datetime_emtpy(self):
     assert to_iso_datetime(None) is None
Beispiel #16
0
 def test_to_iso_datetime_with_date(self):
     self.assertEqual(to_iso_datetime(date(1984, 2, 29)),
                      '1984-02-29T00:00:00')
Beispiel #17
0
 def test_to_iso_datetime_before_1900(self):
     self.assertEqual(to_iso_datetime(date(1884, 2, 29)),
                      '1884-02-29T00:00:00')
Beispiel #18
0
 def test_to_iso_datetime_with_datetime(self):
     self.assertEqual(to_iso_datetime(datetime(1984, 2, 29, 1, 2, 3)),
                      '1984-02-29T01:02:03')
Beispiel #19
0
 def test_to_iso_datetime_with_datetime(self):
     result = to_iso_datetime(datetime(1984, 2, 29, 1, 2, 3))
     assert result == '1984-02-29T01:02:03'
Beispiel #20
0
 def test_to_iso_datetime_with_date(self):
     self.assertEqual(to_iso_datetime(date(1984, 2, 29)),
                      '1984-02-29T00:00:00')
Beispiel #21
0
 def test_to_iso_datetime_before_1900(self):
     assert to_iso_datetime(date(1884, 2, 29)) == '1884-02-29T00:00:00'
Beispiel #22
0
 def test_to_iso_datetime_before_1900(self):
     self.assertEqual(to_iso_datetime(date(1884, 2, 29)),
                      '1884-02-29T00:00:00')
Beispiel #23
0
    def serialize(cls, dataset):
        organization = None
        owner = None
        image_url = None
        spatial_weight = DEFAULT_SPATIAL_WEIGHT
        temporal_weight = DEFAULT_TEMPORAL_WEIGHT

        if dataset.organization:
            organization = Organization.objects(
                id=dataset.organization.id).first()
            image_url = organization.logo(40, external=True)
        elif dataset.owner:
            owner = User.objects(id=dataset.owner.id).first()
            image_url = owner.avatar(40, external=True)

        certified = organization and organization.certified

        document = {
            'title':
            dataset.title,
            'description':
            dataset.description,
            'license':
            getattr(dataset.license, 'id', None),
            'tags':
            dataset.tags,
            'badges': [badge.kind for badge in dataset.badges],
            'tag_suggest':
            dataset.tags,
            'resources': [{
                'title': r.title,
                'description': r.description,
                'format': r.format,
                'type': r.type,
                'schema': r.schema,
            } for r in dataset.resources],
            'format_suggest':
            [r.format.lower() for r in dataset.resources if r.format],
            'mime_suggest': [],  # Need a custom loop below
            'frequency':
            dataset.frequency,
            'organization':
            str(organization.id) if organization else None,
            'owner':
            str(owner.id) if owner else None,
            'dataset_suggest': {
                'input':
                cls.completer_tokenize(dataset.title) + [str(dataset.id)],
                'output': dataset.title,
                'payload': {
                    'id': str(dataset.id),
                    'slug': dataset.slug,
                    'acronym': dataset.acronym,
                    'image_url': image_url,
                },
            },
            'created':
            to_iso_datetime(dataset.created_at),
            'last_modified':
            to_iso_datetime(dataset.last_modified),
            'metrics':
            dataset.metrics,
            'featured':
            dataset.featured,
            'from_certified':
            certified,
        }
        if (dataset.temporal_coverage is not None
                and dataset.temporal_coverage.start
                and dataset.temporal_coverage.end):
            start = dataset.temporal_coverage.start.toordinal()
            end = dataset.temporal_coverage.end.toordinal()
            temporal_weight = min(
                abs(end - start) / 365, cls.from_config('MAX_TEMPORAL_WEIGHT'))
            document.update({
                'temporal_coverage': {
                    'start': start,
                    'end': end
                },
                'temporal_weight': temporal_weight,
            })

        if dataset.spatial is not None:
            # Index precise zone labels and parents zone identifiers
            # to allow fast filtering.
            zone_ids = [z.id for z in dataset.spatial.zones]
            zones = GeoZone.objects(id__in=zone_ids).exclude('geom')
            parents = set()
            geozones = []
            coverage_level = ADMIN_LEVEL_MAX
            for zone in zones:
                geozones.append({
                    'id': zone.id,
                    'name': zone.name,
                    'keys': zone.keys_values
                })
                parents |= set(zone.parents)
                coverage_level = min(coverage_level, admin_levels[zone.level])

            geozones.extend([{'id': p} for p in parents])

            spatial_weight = ADMIN_LEVEL_MAX / coverage_level
            document.update({
                'geozones': geozones,
                'granularity': dataset.spatial.granularity,
                'spatial_weight': spatial_weight,
            })

        document['dataset_suggest']['weight'] = cls.get_suggest_weight(
            temporal_weight, spatial_weight, dataset.featured)

        if dataset.acronym:
            document['dataset_suggest']['input'].append(dataset.acronym)

        # mime Completion
        mimes = {r.mime.lower() for r in dataset.resources if r.mime}
        for mime in mimes:
            document['mime_suggest'].append({
                'input':
                mime.replace('+', '/').split('/') + [mime],
                'output':
                mime,
            })

        return document
Beispiel #24
0
 def test_to_iso_datetime_before_1000(self):
     assert to_iso_datetime(date(908, 2, 29)) == '0908-02-29T00:00:00'