def test_map_metrics(self): mapping = search.metrics_mapping_for(Fake) self.assert_dict_equal(mapping, { 'type': 'object', 'properties': { 'fake-metric-int': { 'type': 'integer', }, 'fake-metric-float': { 'type': 'float', }, } })
def test_map_metrics(self): mapping = search.metrics_mapping_for(Fake) assert_json_equal(mapping, { 'type': 'object', 'properties': { 'fake-metric-int': { 'type': 'integer', }, 'fake-metric-float': { 'type': 'float', }, } })
class DatasetSearch(ModelSearchAdapter): model = Dataset fuzzy = True exclude_fields = ['spatial.geom', 'spatial.zones.geom'] class Meta: doc_type = 'Dataset' title = String(analyzer=i18n_analyzer, fields={'raw': String(index='not_analyzed')}) description = String(analyzer=i18n_analyzer) license = String(index='not_analyzed') frequency = String(index='not_analyzed') organization = String(index='not_analyzed') owner = String(index='not_analyzed') tags = String(index='not_analyzed', fields={'i18n': String(index='not_analyzed')}) badges = String(index='not_analyzed') tag_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=False) resources = Object( properties={ 'title': String(), 'description': String(), 'format': String(index='not_analyzed') }) format_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=False) dataset_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=True) created = Date(format='date_hour_minute_second') last_modified = Date(format='date_hour_minute_second') metrics = metrics_mapping_for(Dataset) featured = Boolean() temporal_coverage = Nested(multi=False, properties={ 'start': Long(), 'end': Long() }) temporal_weight = Long(), geozones = Object( properties={ 'id': String(index='not_analyzed'), 'name': String(index='not_analyzed'), 'keys': String(index='not_analyzed') }) granularity = String(index='not_analyzed') spatial_weight = Long() from_certified = Boolean() fields = ( 'geozones.keys^9', 'geozones.name^9', 'acronym^7', 'title^6', 'tags.i18n^3', 'description', ) sorts = { 'title': 'title.raw', 'created': 'created', 'last_modified': 'last_modified', 'reuses': 'metrics.reuses', 'followers': 'metrics.followers', 'views': 'metrics.views', } facets = { 'tag': TermsFacet(field='tags'), 'badge': TermsFacet(field='badges', labelizer=dataset_badge_labelizer), 'organization': ModelTermsFacet(field='organization', model=Organization), 'owner': ModelTermsFacet(field='owner', model=User), 'license': ModelTermsFacet(field='license', model=License), 'geozone': ModelTermsFacet(field='geozones.id', model=GeoZone, labelizer=zone_labelizer), 'granularity': TermsFacet(field='granularity', labelizer=granularity_labelizer), 'format': TermsFacet(field='resources.format'), 'reuses': RangeFacet(field='metrics.reuses', ranges=[('none', (None, 1)), ('few', (1, 5)), ('quite', (5, 10)), ('many', (10, None))], labels={ 'none': _('Never reused'), 'few': _('Little reused'), 'quite': _('Quite reused'), 'many': _('Heavily reused'), }), 'temporal_coverage': TemporalCoverageFacet(field='temporal_coverage'), 'featured': BoolFacet(field='featured'), } boosters = [ BoolBooster('featured', 1.5), BoolBooster('from_certified', 1.2), ValueFactor('spatial_weight', missing=1), ValueFactor('temporal_weight', missing=1), GaussDecay('metrics.reuses', max_reuses, decay=0.1), GaussDecay('metrics.followers', max_followers, max_followers, decay=0.1), ] @classmethod def is_indexable(cls, dataset): return (dataset.deleted is None and len(dataset.resources) > 0 and not dataset.private) @classmethod def get_suggest_weight(cls, temporal_weight, spatial_weight, featured): '''Compute the suggest part of the indexation payload''' featured_weight = 1 if not featured else FEATURED_WEIGHT return temporal_weight * spatial_weight * featured_weight @classmethod def serialize(cls, dataset): organization = None owner = None image_url = None spatial_weight = DEFAULT_SPATIAL_WEIGHT temporal_weight = DEFAULT_TEMPORAL_WEIGHT if dataset.organization: organization = Organization.objects( id=dataset.organization.id).first() image_url = organization.logo(40, external=True) elif dataset.owner: owner = User.objects(id=dataset.owner.id).first() image_url = owner.avatar(40, external=True) certified = organization and organization.certified document = { 'title': dataset.title, 'description': dataset.description, 'license': getattr(dataset.license, 'id', None), 'tags': dataset.tags, 'badges': [badge.kind for badge in dataset.badges], 'tag_suggest': dataset.tags, 'resources': [{ 'title': r.title, 'description': r.description, 'format': r.format, } for r in dataset.resources], 'format_suggest': [r.format.lower() for r in dataset.resources if r.format], 'frequency': dataset.frequency, 'organization': str(organization.id) if organization else None, 'owner': str(owner.id) if owner else None, 'dataset_suggest': { 'input': cls.completer_tokenize(dataset.title) + [dataset.id], 'output': dataset.title, 'payload': { 'id': str(dataset.id), 'slug': dataset.slug, 'acronym': dataset.acronym, 'image_url': image_url, }, }, 'created': dataset.created_at.strftime('%Y-%m-%dT%H:%M:%S'), 'last_modified': dataset.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), 'metrics': dataset.metrics, 'featured': dataset.featured, 'from_certified': certified, } if (dataset.temporal_coverage is not None and dataset.temporal_coverage.start and dataset.temporal_coverage.end): start = dataset.temporal_coverage.start.toordinal() end = dataset.temporal_coverage.end.toordinal() temporal_weight = min((end - start) / 365, MAX_TEMPORAL_WEIGHT) document.update({ 'temporal_coverage': { 'start': start, 'end': end }, 'temporal_weight': temporal_weight, }) if dataset.spatial is not None: # Index precise zone labels and parents zone identifiers # to allow fast filtering. zone_ids = [z.id for z in dataset.spatial.zones] zones = GeoZone.objects(id__in=zone_ids).exclude('geom') parents = set() geozones = [] coverage_level = ADMIN_LEVEL_MAX for zone in zones: geozones.append({ 'id': zone.id, 'name': zone.name, 'keys': zone.keys_values }) parents |= set(zone.parents) coverage_level = min(coverage_level, admin_levels[zone.level]) geozones.extend([{'id': p} for p in parents]) spatial_weight = ADMIN_LEVEL_MAX / coverage_level document.update({ 'geozones': geozones, 'granularity': dataset.spatial.granularity, 'spatial_weight': spatial_weight, }) document['dataset_suggest']['weight'] = cls.get_suggest_weight( temporal_weight, spatial_weight, dataset.featured) if dataset.acronym: document['dataset_suggest']['input'].append(dataset.acronym) return document
class UserSearch(ModelSearchAdapter): model = User fuzzy = True class Meta: doc_type = 'User' first_name = String() last_name = String() about = String(analyzer=i18n_analyzer) organizations = String(index='not_analyzed') visible = Boolean() metrics = metrics_mapping_for(User) created = Date(format='date_hour_minute_second') user_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=True) sorts = { 'last_name': 'last_name', 'first_name': 'first_name', 'datasets': 'metrics.datasets', 'reuses': 'metrics.reuses', 'followers': 'metrics.followers', 'views': 'metrics.views', 'created': 'created', } facets = { 'organization': ModelTermsFacet(field='organizations', model=Organization), 'datasets': RangeFacet(field='metrics.datasets', ranges=[('none', (None, 1)), ('few', (1, 5)), ('many', (5, None))], labels={ 'none': _('No datasets'), 'few': _('Few datasets'), 'many': _('Many datasets'), }), 'followers': RangeFacet(field='metrics.followers', ranges=[('none', (None, 1)), ('few', (1, 5)), ('many', (5, None))], labels={ 'none': _('No followers'), 'few': _('Few followers'), 'many': _('Many followers'), }), } boosters = [ GaussDecay('metrics.reuses', 50, decay=0.8), GaussDecay('metrics.datasets', 50, decay=0.8), GaussDecay('metrics.followers', 200, 200, decay=0.8), ] @classmethod def serialize(cls, user): return { 'first_name': user.first_name, 'last_name': user.last_name, 'about': user.about, 'organizations': [str(o.id) for o in user.organizations], 'metrics': user.metrics, 'created': to_iso_datetime(user.created_at), 'user_suggest': { 'input': cls.completer_tokenize(user.fullname) + [user.id], 'output': str(user.id), 'payload': { 'avatar_url': user.avatar(40, external=True), 'first_name': user.first_name, 'last_name': user.last_name, 'slug': user.slug, }, }, 'visible': user.visible, }
class ReuseSearch(ModelSearchAdapter): model = Reuse fuzzy = True class Meta: doc_type = 'Reuse' title = String(analyzer=i18n_analyzer, fields={ 'raw': String(index='not_analyzed') }) description = String(analyzer=i18n_analyzer) url = String(index='not_analyzed') organization = String(index='not_analyzed') owner = String(index='not_analyzed') type = String(index='not_analyzed') tags = String(index='not_analyzed', fields={ 'i18n': String(index='not_analyzed') }) badges = String(index='not_analyzed') tag_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=False) datasets = Object( properties={ 'id': String(index='not_analyzed'), 'title': String(), } ) created = Date(format='date_hour_minute_second') last_modified = Date(format='date_hour_minute_second') metrics = metrics_mapping_for(Reuse) featured = Boolean() reuse_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=True) extras = Object() fields = ( 'title^4', 'description^2', 'datasets.title', ) facets = { 'tag': TermsFacet(field='tags'), 'organization': ModelTermsFacet(field='organization', model=Organization), 'owner': ModelTermsFacet(field='owner', model=User), 'dataset': ModelTermsFacet(field='dataset.id', model=Dataset), 'type': TermsFacet(field='type', labelizer=reuse_type_labelizer), 'datasets': RangeFacet(field='metrics.datasets', ranges=[('none', (None, 1)), ('few', (1, 5)), ('many', (5, None))], labels={ 'none': _('No datasets'), 'few': _('Few datasets'), 'many': _('Many datasets'), }), 'followers': RangeFacet(field='metrics.followers', ranges=[('none', (None, 1)), ('few', (1, 5)), ('many', (5, None))], labels={ 'none': _('No followers'), 'few': _('Few followers'), 'many': _('Many followers'), }), 'badge': TermsFacet(field='badges', labelizer=reuse_badge_labelizer), 'featured': BoolFacet(field='featured'), } sorts = { 'title': 'title.raw', 'created': 'created', 'last_modified': 'last_modified', 'datasets': 'metrics.datasets', 'followers': 'metrics.followers', 'views': 'metrics.views', } boosters = [ BoolBooster('featured', 1.1), GaussDecay('metrics.datasets', max_datasets, decay=0.8), GaussDecay('metrics.followers', max_followers, decay=0.8), ] @classmethod def is_indexable(cls, reuse): return (reuse.deleted is None and len(reuse.datasets) > 0 and not reuse.private) @classmethod def serialize(cls, reuse): """By default use the ``to_dict`` method and exclude ``_id``, ``_cls`` and ``owner`` fields. """ return { 'title': reuse.title, 'description': reuse.description, 'url': reuse.url, 'organization': (str(reuse.organization.id) if reuse.organization else None), 'owner': str(reuse.owner.id) if reuse.owner else None, 'type': reuse.type, 'tags': reuse.tags, 'tag_suggest': reuse.tags, 'badges': [badge.kind for badge in reuse.badges], 'created': reuse.created_at.strftime('%Y-%m-%dT%H:%M:%S'), 'last_modified': reuse.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), 'dataset': [{ 'id': str(d.id), 'title': d.title } for d in reuse.datasets if isinstance(d, Dataset)], 'metrics': reuse.metrics, 'featured': reuse.featured, 'extras': reuse.extras, 'reuse_suggest': { 'input': cls.completer_tokenize(reuse.title) + [reuse.id], 'output': str(reuse.id), 'payload': { 'title': reuse.title, 'slug': reuse.slug, 'image_url': reuse.image(40, external=True), }, }, }
class OrganizationSearch(search.ModelSearchAdapter): model = Organization fuzzy = True class Meta: doc_type = 'Organization' name = String(analyzer=search.i18n_analyzer, fields={'raw': String(index='not_analyzed')}) acronym = String(index='not_analyzed') description = String(analyzer=search.i18n_analyzer) badges = String(index='not_analyzed') url = String(index='not_analyzed') created = Date(format='date_hour_minute_second') metrics = search.metrics_mapping_for(Organization) org_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=True) sorts = { 'name': 'name.raw', 'reuses': 'metrics.reuses', 'datasets': 'metrics.datasets', 'followers': 'metrics.followers', 'views': 'metrics.views', 'created': 'created', 'last_modified': 'last_modified', } facets = { 'reuses': RangeFacet(field='metrics.reuses', ranges=[('none', (None, 1)), ('few', (1, 5)), ('many', (5, None))], labels={ 'none': _('No reuses'), 'few': _('Few reuses'), 'many': _('Many reuses'), }), 'badge': TermsFacet(field='badges', labelizer=organization_badge_labelizer), 'datasets': RangeFacet(field='metrics.datasets', ranges=[('none', (None, 1)), ('few', (1, 5)), ('many', (5, None))], labels={ 'none': _('No datasets'), 'few': _('Few datasets'), 'many': _('Many datasets'), }), 'followers': RangeFacet(field='metrics.followers', ranges=[('none', (None, 1)), ('few', (1, 5)), ('many', (5, None))], labels={ 'none': _('No followers'), 'few': _('Few followers'), 'many': _('Many followers'), }), } boosters = [ search.GaussDecay('metrics.followers', max_followers, decay=lazy('followers_decay')), search.GaussDecay('metrics.reuses', max_reuses, decay=lazy('reuses_decay')), search.GaussDecay('metrics.datasets', max_datasets, decay=lazy('datasets_decay')), ] @classmethod def is_indexable(cls, org): return org.deleted is None @classmethod def serialize(cls, organization): completions = cls.completer_tokenize(organization.name) completions.append(organization.id) if organization.acronym: completions.append(organization.acronym) return { 'name': organization.name, 'acronym': organization.acronym, 'description': organization.description, 'url': organization.url, 'metrics': organization.metrics, 'badges': [badge.kind for badge in organization.badges], 'created': to_iso_datetime(organization.created_at), 'org_suggest': { 'input': completions, 'output': str(organization.id), 'payload': { 'name': organization.name, 'acronym': organization.acronym, 'image_url': organization.logo(40, external=True), 'slug': organization.slug, }, } }