Exemple #1
0
class DatasetSearch(ModelSearchAdapter):
    model = Dataset
    fuzzy = True
    exclude_fields = ['spatial.geom', 'spatial.zones.geom']

    class Meta:
        doc_type = 'Dataset'

    title = String(analyzer=i18n_analyzer,
                   fields={'raw': String(index='not_analyzed')})
    description = String(analyzer=i18n_analyzer)
    license = String(index='not_analyzed')
    frequency = String(index='not_analyzed')
    organization = String(index='not_analyzed')
    owner = String(index='not_analyzed')
    tags = String(index='not_analyzed',
                  fields={'i18n': String(index='not_analyzed')})
    badges = String(index='not_analyzed')
    tag_suggest = Completion(analyzer=simple,
                             search_analyzer=simple,
                             payloads=False)
    resources = Object(
        properties={
            'title': String(),
            'description': String(),
            'format': String(index='not_analyzed')
        })
    format_suggest = Completion(analyzer=simple,
                                search_analyzer=simple,
                                payloads=False)
    dataset_suggest = Completion(analyzer=simple,
                                 search_analyzer=simple,
                                 payloads=True)
    created = Date(format='date_hour_minute_second')
    last_modified = Date(format='date_hour_minute_second')
    metrics = metrics_mapping_for(Dataset)
    featured = Boolean()
    temporal_coverage = Nested(multi=False,
                               properties={
                                   'start': Long(),
                                   'end': Long()
                               })
    temporal_weight = Long(),
    geozones = Object(
        properties={
            'id': String(index='not_analyzed'),
            'name': String(index='not_analyzed'),
            'keys': String(index='not_analyzed')
        })
    granularity = String(index='not_analyzed')
    spatial_weight = Long()
    from_certified = Boolean()

    fields = (
        'geozones.keys^9',
        'geozones.name^9',
        'acronym^7',
        'title^6',
        'tags.i18n^3',
        'description',
    )
    sorts = {
        'title': 'title.raw',
        'created': 'created',
        'last_modified': 'last_modified',
        'reuses': 'metrics.reuses',
        'followers': 'metrics.followers',
        'views': 'metrics.views',
    }

    facets = {
        'tag':
        TermsFacet(field='tags'),
        'badge':
        TermsFacet(field='badges', labelizer=dataset_badge_labelizer),
        'organization':
        ModelTermsFacet(field='organization', model=Organization),
        'owner':
        ModelTermsFacet(field='owner', model=User),
        'license':
        ModelTermsFacet(field='license', model=License),
        'geozone':
        ModelTermsFacet(field='geozones.id',
                        model=GeoZone,
                        labelizer=zone_labelizer),
        'granularity':
        TermsFacet(field='granularity', labelizer=granularity_labelizer),
        'format':
        TermsFacet(field='resources.format'),
        'reuses':
        RangeFacet(field='metrics.reuses',
                   ranges=[('none', (None, 1)), ('few', (1, 5)),
                           ('quite', (5, 10)), ('many', (10, None))],
                   labels={
                       'none': _('Never reused'),
                       'few': _('Little reused'),
                       'quite': _('Quite reused'),
                       'many': _('Heavily reused'),
                   }),
        'temporal_coverage':
        TemporalCoverageFacet(field='temporal_coverage'),
        'featured':
        BoolFacet(field='featured'),
    }
    boosters = [
        BoolBooster('featured', 1.5),
        BoolBooster('from_certified', 1.2),
        ValueFactor('spatial_weight', missing=1),
        ValueFactor('temporal_weight', missing=1),
        GaussDecay('metrics.reuses', max_reuses, decay=0.1),
        GaussDecay('metrics.followers',
                   max_followers,
                   max_followers,
                   decay=0.1),
    ]

    @classmethod
    def is_indexable(cls, dataset):
        return (dataset.deleted is None and len(dataset.resources) > 0
                and not dataset.private)

    @classmethod
    def get_suggest_weight(cls, temporal_weight, spatial_weight, featured):
        '''Compute the suggest part of the indexation payload'''
        featured_weight = 1 if not featured else FEATURED_WEIGHT
        return temporal_weight * spatial_weight * featured_weight

    @classmethod
    def serialize(cls, dataset):
        organization = None
        owner = None
        image_url = None
        spatial_weight = DEFAULT_SPATIAL_WEIGHT
        temporal_weight = DEFAULT_TEMPORAL_WEIGHT

        if dataset.organization:
            organization = Organization.objects(
                id=dataset.organization.id).first()
            image_url = organization.logo(40, external=True)
        elif dataset.owner:
            owner = User.objects(id=dataset.owner.id).first()
            image_url = owner.avatar(40, external=True)

        certified = organization and organization.certified

        document = {
            'title':
            dataset.title,
            'description':
            dataset.description,
            'license':
            getattr(dataset.license, 'id', None),
            'tags':
            dataset.tags,
            'badges': [badge.kind for badge in dataset.badges],
            'tag_suggest':
            dataset.tags,
            'resources': [{
                'title': r.title,
                'description': r.description,
                'format': r.format,
            } for r in dataset.resources],
            'format_suggest':
            [r.format.lower() for r in dataset.resources if r.format],
            'frequency':
            dataset.frequency,
            'organization':
            str(organization.id) if organization else None,
            'owner':
            str(owner.id) if owner else None,
            'dataset_suggest': {
                'input': cls.completer_tokenize(dataset.title) + [dataset.id],
                'output': dataset.title,
                'payload': {
                    'id': str(dataset.id),
                    'slug': dataset.slug,
                    'acronym': dataset.acronym,
                    'image_url': image_url,
                },
            },
            'created':
            dataset.created_at.strftime('%Y-%m-%dT%H:%M:%S'),
            'last_modified':
            dataset.last_modified.strftime('%Y-%m-%dT%H:%M:%S'),
            'metrics':
            dataset.metrics,
            'featured':
            dataset.featured,
            'from_certified':
            certified,
        }
        if (dataset.temporal_coverage is not None
                and dataset.temporal_coverage.start
                and dataset.temporal_coverage.end):
            start = dataset.temporal_coverage.start.toordinal()
            end = dataset.temporal_coverage.end.toordinal()
            temporal_weight = min((end - start) / 365, MAX_TEMPORAL_WEIGHT)
            document.update({
                'temporal_coverage': {
                    'start': start,
                    'end': end
                },
                'temporal_weight': temporal_weight,
            })

        if dataset.spatial is not None:
            # Index precise zone labels and parents zone identifiers
            # to allow fast filtering.
            zone_ids = [z.id for z in dataset.spatial.zones]
            zones = GeoZone.objects(id__in=zone_ids).exclude('geom')
            parents = set()
            geozones = []
            coverage_level = ADMIN_LEVEL_MAX
            for zone in zones:
                geozones.append({
                    'id': zone.id,
                    'name': zone.name,
                    'keys': zone.keys_values
                })
                parents |= set(zone.parents)
                coverage_level = min(coverage_level, admin_levels[zone.level])

            geozones.extend([{'id': p} for p in parents])

            spatial_weight = ADMIN_LEVEL_MAX / coverage_level
            document.update({
                'geozones': geozones,
                'granularity': dataset.spatial.granularity,
                'spatial_weight': spatial_weight,
            })

        document['dataset_suggest']['weight'] = cls.get_suggest_weight(
            temporal_weight, spatial_weight, dataset.featured)

        if dataset.acronym:
            document['dataset_suggest']['input'].append(dataset.acronym)

        return document
Exemple #2
0
class UserSearch(ModelSearchAdapter):
    model = User
    fuzzy = True

    class Meta:
        doc_type = 'User'

    first_name = String()
    last_name = String()
    about = String(analyzer=i18n_analyzer)
    organizations = String(index='not_analyzed')
    visible = Boolean()
    metrics = metrics_mapping_for(User)
    created = Date(format='date_hour_minute_second')
    user_suggest = Completion(analyzer=simple,
                              search_analyzer=simple,
                              payloads=True)

    sorts = {
        'last_name': 'last_name',
        'first_name': 'first_name',
        'datasets': 'metrics.datasets',
        'reuses': 'metrics.reuses',
        'followers': 'metrics.followers',
        'views': 'metrics.views',
        'created': 'created',
    }
    facets = {
        'organization':
        ModelTermsFacet(field='organizations', model=Organization),
        'datasets':
        RangeFacet(field='metrics.datasets',
                   ranges=[('none', (None, 1)), ('few', (1, 5)),
                           ('many', (5, None))],
                   labels={
                       'none': _('No datasets'),
                       'few': _('Few datasets'),
                       'many': _('Many datasets'),
                   }),
        'followers':
        RangeFacet(field='metrics.followers',
                   ranges=[('none', (None, 1)), ('few', (1, 5)),
                           ('many', (5, None))],
                   labels={
                       'none': _('No followers'),
                       'few': _('Few followers'),
                       'many': _('Many followers'),
                   }),
    }
    boosters = [
        GaussDecay('metrics.reuses', 50, decay=0.8),
        GaussDecay('metrics.datasets', 50, decay=0.8),
        GaussDecay('metrics.followers', 200, 200, decay=0.8),
    ]

    @classmethod
    def serialize(cls, user):
        return {
            'first_name': user.first_name,
            'last_name': user.last_name,
            'about': user.about,
            'organizations': [str(o.id) for o in user.organizations],
            'metrics': user.metrics,
            'created': to_iso_datetime(user.created_at),
            'user_suggest': {
                'input': cls.completer_tokenize(user.fullname) + [user.id],
                'output': str(user.id),
                'payload': {
                    'avatar_url': user.avatar(40, external=True),
                    'first_name': user.first_name,
                    'last_name': user.last_name,
                    'slug': user.slug,
                },
            },
            'visible': user.visible,
        }
Exemple #3
0
class DatasetSearch(ModelSearchAdapter):
    model = Dataset
    fuzzy = True
    mapping = {
        'properties': {
            'title': {
                'type': 'string',
                'analyzer': i18n_analyzer,
                'fields': {
                    'raw': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    }
                }
            },
            'description': {
                'type': 'string',
                'analyzer': i18n_analyzer
            },
            'license': {
                'type': 'string',
                'index': 'not_analyzed'
            },
            'frequency': {
                'type': 'string'
            },
            'organization': {
                'type': 'string'
            },
            'owner': {
                'type': 'string'
            },
            'supplier': {
                'type': 'string'
            },
            'tags': {
                'type': 'string',
                'index_name': 'tag',
                'index': 'not_analyzed'
            },
            'tag_suggest': {
                'type': 'completion',
                'index_analyzer': 'simple',
                'search_analyzer': 'simple',
                'payloads': False,
            },
            'resources': {
                'type': 'object',
                'index_name': 'resource',
                'properties': {
                    'title': {
                        'type': 'string'
                    },
                    'description': {
                        'type': 'string'
                    },
                    'license': {
                        'type': 'string'
                    },
                }
            },
            'format_suggest': {
                'type': 'completion',
                'index_analyzer': 'simple',
                'search_analyzer': 'simple',
                'payloads': False,
            },
            'dataset_suggest': {
                'type': 'completion',
                'index_analyzer': 'simple',
                'search_analyzer': 'simple',
                'payloads': True,
            },
            'created': {
                'type': 'date',
                'format': 'date_hour_minute_second'
            },
            'last_modified': {
                'type': 'date',
                'format': 'date_hour_minute_second'
            },
            'metrics': metrics_mapping(Dataset),
            'featured': {
                'type': 'boolean'
            },
            'temporal_coverage':
            {  # Store dates as ordinals to handle pre-1900 dates
                'type': 'object',
                'properties': {
                    'start': {
                        'type': 'long'
                    },
                    'end': {
                        'type': 'long'
                    },
                }
            },
            'territories': {
                'type': 'object',
                'index_name': 'territories',
                'properties': {
                    'id': {
                        'type': 'string'
                    },
                    'name': {
                        'type': 'string'
                    },
                    'code': {
                        'type': 'string'
                    },
                }
            },
            'granularity': {
                'type': 'string',
                'index': 'not_analyzed'
            },
            # 'geom': {
            #     'type': 'geo_shape',
            #     'precision': '100m',
            # },
            'extras': {
                'type': 'object',
                'index_name': 'extra',
            },
        }
    }
    fields = (
        'title^6',
        'tags^3',
        'territories.name^3',
        'description',
        'code',
    )
    sorts = {
        'title': Sort('title.raw'),
        'created': Sort('created'),
        'last_modified': Sort('last_modified'),
        'reuses': Sort('metrics.reuses'),
        'followers': Sort('metrics.followers'),
        'views': Sort('metrics.views'),
    }
    facets = {
        'tag':
        TermFacet('tags'),
        'organization':
        ModelTermFacet('organization', Organization),
        'owner':
        ModelTermFacet('owner', User),
        'supplier':
        ModelTermFacet('supplier', Organization),
        'license':
        ModelTermFacet('license', License),
        'territory':
        ModelTermFacet('territories.id', Territory),
        'granularity':
        TermFacet('granularity', lambda l, v: SPATIAL_GRANULARITIES[v]),
        'format':
        TermFacet('resources.format'),
        'reuses':
        RangeFacet('metrics.reuses'),
        'temporal_coverage':
        TemporalCoverageFacet('temporal_coverage'),
        'featured':
        BoolFacet('featured'),
        'extra':
        ExtrasFacet('extras'),
    }
    boosters = [
        BoolBooster('featured', 1.1),
        BoolBooster('from_public_service', 1.3),
        GaussDecay('metrics.reuses', max_reuses, decay=0.8),
        GaussDecay('metrics.followers',
                   max_followers,
                   max_followers,
                   decay=0.8),
    ]

    @classmethod
    def is_indexable(cls, dataset):
        return dataset.deleted is None and len(
            dataset.resources) > 0 and not dataset.private

    @classmethod
    def serialize(cls, dataset):
        org_id = str(dataset.organization.id
                     ) if dataset.organization is not None else None
        supplier_id = str(
            dataset.supplier.id) if dataset.supplier is not None else None
        supplier_id = supplier_id if supplier_id != org_id else None
        if dataset.organization:
            image_url = dataset.organization.logo(40)
        elif dataset.owner:
            image_url = dataset.owner.avatar(40)
        else:
            image_url = None

        document = {
            'title':
            dataset.title,
            'description':
            dataset.description,
            'license':
            dataset.license.id if dataset.license is not None else None,
            'tags':
            dataset.tags,
            'tag_suggest':
            dataset.tags,
            'resources': [{
                'title': r.title,
                'description': r.description,
                'format': r.format,
            } for r in dataset.resources],
            'format_suggest':
            [r.format.lower() for r in dataset.resources if r.format],
            'frequency':
            dataset.frequency,
            'organization':
            org_id,
            'owner':
            str(dataset.owner.id) if dataset.owner else None,
            'supplier':
            supplier_id,
            'dataset_suggest': {
                'input': cls.completer_tokenize(dataset.title),
                'output': dataset.title,
                'payload': {
                    'id': str(dataset.id),
                    'slug': dataset.slug,
                    'image_url': image_url,
                },
            },
            'created':
            dataset.created_at.strftime('%Y-%m-%dT%H:%M:%S'),
            'last_modified':
            dataset.last_modified.strftime('%Y-%m-%dT%H:%M:%S'),
            'metrics':
            dataset.metrics,
            'extras':
            dataset.extras,
            'featured':
            dataset.featured,
            'from_public_service':
            dataset.organization.public_service if dataset.organization else
            False,  # TODO: extract tis into plugin
        }
        if dataset.temporal_coverage is not None and dataset.temporal_coverage.start and dataset.temporal_coverage.end:
            document.update({
                'temporal_coverage': {
                    'start': dataset.temporal_coverage.start.toordinal(),
                    'end': dataset.temporal_coverage.end.toordinal(),
                }
            })

        if dataset.spatial is not None:
            document.update({
                'territories': [{
                    'id': str(t.id),
                    'name': t.name,
                    'code': t.code
                } for t in dataset.spatial.territories],
                # 'geom': dataset.spatial.geom,
                'granularity':
                dataset.spatial.granularity,
            })

        return document
Exemple #4
0
class UserSearch(ModelSearchAdapter):
    model = User
    fuzzy = True
    # analyzer = 'not_analyzed'

    mapping = {
        'properties': {
            'first_name': {'type': 'string'},
            'last_name': {'type': 'string'},
            'about': {'type': 'string', 'analyzer': i18n_analyzer},
            'organizations': {'type': 'string', 'index_name': 'organization'},
            'visible': {'type': 'boolean'},
            'metrics': metrics_mapping(User),
            'created': {'type': 'date', 'format': 'date_hour_minute_second'},
            'user_suggest': {
                'type': 'completion',
                'index_analyzer': 'simple',
                'search_analyzer': 'simple',
                'payloads': True,
            },
        }
    }

    fields = (
        'last_name^6',
        'first_name^5',
        'about'
    )
    sorts = {
        'last_name': Sort('last_name'),
        'first_name': Sort('first_name'),
        'datasets': Sort('metrics.datasets'),
        'reuses': Sort('metrics.reuses'),
        'followers': Sort('metrics.followers'),
        'views': Sort('metrics.views'),
        'created': Sort('created'),
    }
    facets = {
        'organization': ModelTermFacet('organizations', Organization),
        'reuses': RangeFacet('metrics.reuses'),
        'datasets': RangeFacet('metrics.datasets'),
    }
    boosters = [
        GaussDecay('metrics.reuses', 50, decay=0.8),
        GaussDecay('metrics.datasets', 50, decay=0.8),
        GaussDecay('metrics.followers', 200, 200, decay=0.8),
    ]

    @classmethod
    def serialize(cls, user):
        return {
            'first_name': user.first_name,
            'last_name': user.last_name,
            'about': user.about,
            'organizations': [str(o.id) for o in user.organizations],
            'metrics': user.metrics,
            'created': user.created_at.strftime('%Y-%m-%dT%H:%M:%S'),
            'user_suggest': {
                'input': cls.completer_tokenize(user.fullname) + [user.id],
                'output': str(user.id),
                'payload': {
                    'avatar_url': user.avatar(40),
                    'first_name': user.first_name,
                    'last_name': user.last_name,
                    'slug': user.slug,
                },
            },
            'visible': user.visible
        }
Exemple #5
0
class DatasetSearch(ModelSearchAdapter):
    model = Dataset
    fuzzy = True
    mapping = {
        'properties': {
            'title': {
                'type': 'string',
                'analyzer': i18n_analyzer,
                'fields': {
                    'raw': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    }
                }
            },
            'description': {
                'type': 'string',
                'analyzer': i18n_analyzer
            },
            'license': {
                'type': 'string',
                'index': 'not_analyzed'
            },
            'frequency': {
                'type': 'string'
            },
            'organization': {
                'type': 'string'
            },
            'owner': {
                'type': 'string'
            },
            'tags': {
                'type': 'string',
                'index_name': 'tag',
                'index': 'not_analyzed',
                'fields': {
                    'i18n': {
                        'type': 'string',
                        'analyzer': i18n_analyzer
                    }
                }
            },
            'badges': {
                'type': 'string',
                'index_name': 'badges',
                'index': 'not_analyzed'
            },
            'tag_suggest': {
                'type': 'completion',
                'index_analyzer': 'simple',
                'search_analyzer': 'simple',
                'payloads': False,
            },
            'resources': {
                'type': 'object',
                'index_name': 'resource',
                'properties': {
                    'title': {
                        'type': 'string'
                    },
                    'description': {
                        'type': 'string'
                    },
                    'license': {
                        'type': 'string'
                    },
                }
            },
            'format_suggest': {
                'type': 'completion',
                'index_analyzer': 'simple',
                'search_analyzer': 'simple',
                'payloads': False,
            },
            'dataset_suggest': {
                'type': 'completion',
                'index_analyzer': 'simple',
                'search_analyzer': 'simple',
                'payloads': True,
            },
            'created': {
                'type': 'date',
                'format': 'date_hour_minute_second'
            },
            'last_modified': {
                'type': 'date',
                'format': 'date_hour_minute_second'
            },
            'metrics': metrics_mapping(Dataset),
            'featured': {
                'type': 'boolean'
            },
            # Store dates as ordinals to handle pre-1900 dates.
            'temporal_coverage': {
                'type': 'object',
                'properties': {
                    'start': {
                        'type': 'long'
                    },
                    'end': {
                        'type': 'long'
                    },
                }
            },
            'geozones': {
                'type': 'object',
                'index_name': 'geozones',
                'properties': {
                    'id': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    },
                    'name': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    },
                    'keys': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    },
                }
            },
            'granularity': {
                'type': 'string',
                'index': 'not_analyzed'
            },
            # 'geom': {
            #     'type': 'geo_shape',
            #     'precision': '100m',
            # },
            'extras': {
                'type': 'object',
                'index_name': 'extra',
            },
        }
    }
    fields = (
        'geozones.keys^9',
        'geozones.name^9',
        'title^6',
        'tags.i18n^3',
        'description',
    )
    sorts = {
        'title': Sort('title.raw'),
        'created': Sort('created'),
        'last_modified': Sort('last_modified'),
        'reuses': Sort('metrics.reuses'),
        'followers': Sort('metrics.followers'),
        'views': Sort('metrics.views'),
    }
    facets = {
        'tag': TermFacet('tags'),
        'badge': TermFacet('badges', labelizer=dataset_badge_labelizer),
        'organization': ModelTermFacet('organization', Organization),
        'owner': ModelTermFacet('owner', User),
        'license': ModelTermFacet('license', License),
        'geozone': ModelTermFacet('geozones.id', GeoZone, zone_labelizer),
        'granularity': TermFacet('granularity', granularity_labelizer),
        'format': TermFacet('resources.format'),
        'reuses': RangeFacet('metrics.reuses'),
        'temporal_coverage': TemporalCoverageFacet('temporal_coverage'),
        'featured': BoolFacet('featured'),
        'extra': ExtrasFacet('extras'),
    }
    boosters = [
        BoolBooster('featured', 1.1),
        GaussDecay('metrics.reuses', max_reuses, decay=0.1),
        GaussDecay('metrics.followers',
                   max_followers,
                   max_followers,
                   decay=0.1),
    ]

    @classmethod
    def is_indexable(cls, dataset):
        return (dataset.deleted is None and len(dataset.resources) > 0
                and not dataset.private)

    @classmethod
    def serialize(cls, dataset):
        org_id = (str(dataset.organization.id)
                  if dataset.organization is not None else None)
        if dataset.organization:
            image_url = dataset.organization.logo(40)
        elif dataset.owner:
            image_url = dataset.owner.avatar(40)
        else:
            image_url = None

        document = {
            'title':
            dataset.title,
            'description':
            dataset.description,
            'license':
            (dataset.license.id if dataset.license is not None else None),
            'tags':
            dataset.tags,
            'badges': [badge.kind for badge in dataset.badges],
            'tag_suggest':
            dataset.tags,
            'resources': [{
                'title': r.title,
                'description': r.description,
                'format': r.format,
            } for r in dataset.resources],
            'format_suggest':
            [r.format.lower() for r in dataset.resources if r.format],
            'frequency':
            dataset.frequency,
            'organization':
            org_id,
            'owner':
            str(dataset.owner.id) if dataset.owner else None,
            'dataset_suggest': {
                'input': cls.completer_tokenize(dataset.title) + [dataset.id],
                'output': dataset.title,
                'payload': {
                    'id': str(dataset.id),
                    'slug': dataset.slug,
                    'image_url': image_url,
                },
            },
            'created':
            dataset.created_at.strftime('%Y-%m-%dT%H:%M:%S'),
            'last_modified':
            dataset.last_modified.strftime('%Y-%m-%dT%H:%M:%S'),
            'metrics':
            dataset.metrics,
            'extras':
            dataset.extras,
            'featured':
            dataset.featured,
        }
        if (dataset.temporal_coverage is not None
                and dataset.temporal_coverage.start
                and dataset.temporal_coverage.end):
            document.update({
                'temporal_coverage': {
                    'start': dataset.temporal_coverage.start.toordinal(),
                    'end': dataset.temporal_coverage.end.toordinal(),
                }
            })

        if dataset.spatial is not None:
            # Index precise zone labels and parents zone identifiers
            # to allow fast filtering.
            zones = GeoZone.objects(
                id__in=[z.id for z in dataset.spatial.zones])
            parents = set()
            geozones = []
            for zone in zones:
                geozones.append({
                    'id': zone.id,
                    'name': zone.name,
                    'keys': zone.keys_values
                })
                parents |= set(zone.parents)

            geozones.extend([{'id': p} for p in parents])

            document.update({
                'geozones': geozones,
                # 'geom': dataset.spatial.geom,
                'granularity': dataset.spatial.granularity,
            })

        return document
Exemple #6
0
class OrganizationSearch(search.ModelSearchAdapter):
    model = Organization
    fuzzy = True

    class Meta:
        doc_type = 'Organization'

    name = String(analyzer=search.i18n_analyzer,
                  fields={'raw': String(index='not_analyzed')})
    acronym = String(index='not_analyzed')
    description = String(analyzer=search.i18n_analyzer)
    badges = String(index='not_analyzed')
    url = String(index='not_analyzed')
    created = Date(format='date_hour_minute_second')
    metrics = search.metrics_mapping_for(Organization)
    org_suggest = Completion(analyzer=simple,
                             search_analyzer=simple,
                             payloads=True)

    sorts = {
        'name': 'name.raw',
        'reuses': 'metrics.reuses',
        'datasets': 'metrics.datasets',
        'followers': 'metrics.followers',
        'views': 'metrics.views',
        'created': 'created',
        'last_modified': 'last_modified',
    }
    facets = {
        'reuses':
        RangeFacet(field='metrics.reuses',
                   ranges=[('none', (None, 1)), ('few', (1, 5)),
                           ('many', (5, None))],
                   labels={
                       'none': _('No reuses'),
                       'few': _('Few reuses'),
                       'many': _('Many reuses'),
                   }),
        'badge':
        TermsFacet(field='badges', labelizer=organization_badge_labelizer),
        'datasets':
        RangeFacet(field='metrics.datasets',
                   ranges=[('none', (None, 1)), ('few', (1, 5)),
                           ('many', (5, None))],
                   labels={
                       'none': _('No datasets'),
                       'few': _('Few datasets'),
                       'many': _('Many datasets'),
                   }),
        'followers':
        RangeFacet(field='metrics.followers',
                   ranges=[('none', (None, 1)), ('few', (1, 5)),
                           ('many', (5, None))],
                   labels={
                       'none': _('No followers'),
                       'few': _('Few followers'),
                       'many': _('Many followers'),
                   }),
    }
    boosters = [
        search.GaussDecay('metrics.followers',
                          max_followers,
                          decay=lazy('followers_decay')),
        search.GaussDecay('metrics.reuses',
                          max_reuses,
                          decay=lazy('reuses_decay')),
        search.GaussDecay('metrics.datasets',
                          max_datasets,
                          decay=lazy('datasets_decay')),
    ]

    @classmethod
    def is_indexable(cls, org):
        return org.deleted is None

    @classmethod
    def serialize(cls, organization):
        completions = cls.completer_tokenize(organization.name)
        completions.append(organization.id)
        if organization.acronym:
            completions.append(organization.acronym)
        return {
            'name': organization.name,
            'acronym': organization.acronym,
            'description': organization.description,
            'url': organization.url,
            'metrics': organization.metrics,
            'badges': [badge.kind for badge in organization.badges],
            'created': to_iso_datetime(organization.created_at),
            'org_suggest': {
                'input': completions,
                'output': str(organization.id),
                'payload': {
                    'name': organization.name,
                    'acronym': organization.acronym,
                    'image_url': organization.logo(40, external=True),
                    'slug': organization.slug,
                },
            }
        }