def preview_from_config( name, url, backend, description=None, frequency=DEFAULT_HARVEST_FREQUENCY, owner=None, organization=None, config=None, ): '''Preview an harvesting from a source created with the given parameters''' if owner and not isinstance(owner, User): owner = User.get(owner) if organization and not isinstance(organization, Organization): organization = Organization.get(organization) source = HarvestSource( name=name, url=url, backend=backend, description=description, frequency=frequency or DEFAULT_HARVEST_FREQUENCY, owner=owner, organization=organization, config=config, ) cls = backends.get(current_app, source.backend) max_items = current_app.config['HARVEST_PREVIEW_MAX_ITEMS'] backend = cls(source, dryrun=True, max_items=max_items) return backend.harvest()
def create_source(name, url, backend, description=None, frequency=DEFAULT_HARVEST_FREQUENCY, owner=None, organization=None, config=None, ): '''Create a new harvest source''' if owner and not isinstance(owner, User): owner = User.get(owner) if organization and not isinstance(organization, Organization): organization = Organization.get(organization) source = HarvestSource.objects.create( name=name, url=url, backend=backend, description=description, frequency=frequency or DEFAULT_HARVEST_FREQUENCY, owner=owner, organization=organization, config=config, ) signals.harvest_source_created.send(source) return source
def preview_from_config(name, url, backend, description=None, frequency=DEFAULT_HARVEST_FREQUENCY, owner=None, organization=None, config=None, ): '''Preview an harvesting from a source created with the given parameters''' if owner and not isinstance(owner, User): owner = User.get(owner) if organization and not isinstance(organization, Organization): organization = Organization.get(organization) source = HarvestSource( name=name, url=url, backend=backend, description=description, frequency=frequency or DEFAULT_HARVEST_FREQUENCY, owner=owner, organization=organization, config=config, ) cls = backends.get(current_app, source.backend) max_items = current_app.config['HARVEST_PREVIEW_MAX_ITEMS'] backend = cls(source, dryrun=True, max_items=max_items) return backend.harvest()
def create_source( name, url, backend, description=None, frequency=DEFAULT_HARVEST_FREQUENCY, owner=None, organization=None, config=None, ): '''Create a new harvest source''' if owner and not isinstance(owner, User): owner = User.get(owner) if organization and not isinstance(organization, Organization): organization = Organization.get(organization) source = HarvestSource.objects.create( name=name, url=url, backend=backend, description=description, frequency=frequency or DEFAULT_HARVEST_FREQUENCY, owner=owner, organization=organization, config=config, ) signals.harvest_source_created.send(source) return source
def delete(): '''Delete an existing user''' email = click.prompt('Email') user = User.objects(email=email).first() if not user: exit_with_error('Invalid user') user.delete() success('User deleted successfully')
def run(self): email = prompt('Email') user = User.objects(email=email).first() if not user: print 'Invalid user' return user.delete() print 'User deleted successfully'
def delete(): '''Delete an existing user''' email = prompt('Email') user = User.objects(email=email).first() if not user: print 'Invalid user' return user.delete() print 'User deleted successfully'
def delete(): """Delete an existing user""" email = prompt("Email") user = User.objects(email=email).first() if not user: print "Invalid user" return user.delete() print "User deleted successfully"
def activate(): '''Activate an existing user (validate their email confirmation)''' email = click.prompt('Email') user = User.objects(email=email).first() if not user: exit_with_error('Invalid user') if user.confirmed_at is not None: exit_with_error('User email address already confirmed') return user.confirmed_at = datetime.utcnow() user.save() success('User activated successfully')
def activate(): '''Activate an existing user (validate their email confirmation)''' email = prompt('Email') user = User.objects(email=email).first() if not user: print 'Invalid user' return if user.confirmed_at is not None: print 'User email address already confirmed' return user.confirmed_at = datetime.utcnow() user.save() print 'User activated successfully'
def get(self): '''Suggest users''' args = suggest_parser.parse_args() users = User.objects(deleted=None, slug__icontains=slugify(args['q'], separator='-', to_lower=True)) return [{ 'id': user.id, 'first_name': user.first_name, 'last_name': user.last_name, 'avatar_url': user.avatar_url, 'slug': user.slug, } for user in users.order_by(DEFAULT_SORTING).limit(args['size'])]
def get(self): '''List all users''' args = user_parser.parse() users = User.objects(deleted=None) if args['q']: search_users = users.search_text(args['q']) if args['sort']: return search_users.order_by(args['sort']).paginate( args['page'], args['page_size']) else: return search_users.order_by('$text_score').paginate( args['page'], args['page_size']) if args['sort']: return users.order_by(args['sort']).paginate( args['page'], args['page_size']) return users.order_by(DEFAULT_SORTING).paginate( args['page'], args['page_size'])
def c3_badges(filename): '''Toggle C3 badges from an organization list''' with open(filename, 'r') as titles: user = User.objects(first_name='Etalab', last_name='Bot').first() badge = DatasetBadge(kind=C3, created_by=user) for title in titles: title = title.decode('utf-8').strip(u'\n') if title.startswith(u'*'): continue slug = slugify.slugify(title.lower()) dataset = (Dataset.objects(title=title).first() or Dataset.objects(slug=slug).first()) if dataset is None: log.info(u'{title} not found'.format(title=title)) else: dataset.badges.append(badge) dataset.save() log.info('Done')
def serialize(cls, reuse): """By default use the ``to_dict`` method and exclude ``_id``, ``_cls`` and ``owner`` fields. """ datasets = Dataset.objects(id__in=[r.id for r in reuse.datasets]) datasets = list(datasets.only('id', 'title').no_dereference()) organization = None owner = None if reuse.organization: organization = Organization.objects( id=reuse.organization.id).first() elif reuse.owner: owner = User.objects(id=reuse.owner.id).first() return { 'title': reuse.title, 'description': reuse.description, 'url': reuse.url, 'organization': str(organization.id) if organization else None, 'owner': str(owner.id) if owner else None, 'type': reuse.type, 'topic': reuse.topic, 'tags': reuse.tags, 'tag_suggest': reuse.tags, 'badges': [badge.kind for badge in reuse.badges], 'created': to_iso_datetime(reuse.created_at), 'last_modified': to_iso_datetime(reuse.last_modified), 'dataset': [{ 'id': str(d.id), 'title': d.title } for d in datasets], 'metrics': reuse.metrics, 'featured': reuse.featured, 'extras': reuse.extras, 'reuse_suggest': { 'input': cls.completer_tokenize(reuse.title) + [reuse.id], 'output': str(reuse.id), 'payload': { 'title': reuse.title, 'slug': reuse.slug, 'image_url': reuse.image(500, external=True), }, }, }
def serialize(cls, reuse): """By default use the ``to_dict`` method and exclude ``_id``, ``_cls`` and ``owner`` fields. """ datasets = Dataset.objects(id__in=[r.id for r in reuse.datasets]) datasets = list(datasets.only('id', 'title').no_dereference()) organization = None owner = None if reuse.organization: organization = Organization.objects(id=reuse.organization.id).first() elif reuse.owner: owner = User.objects(id=reuse.owner.id).first() return { 'title': reuse.title, 'description': reuse.description, 'url': reuse.url, 'organization': str(organization.id) if organization else None, 'owner': str(owner.id) if owner else None, 'type': reuse.type, 'tags': reuse.tags, 'tag_suggest': reuse.tags, 'badges': [badge.kind for badge in reuse.badges], 'created': reuse.created_at.strftime('%Y-%m-%dT%H:%M:%S'), 'last_modified': reuse.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), 'dataset': [{ 'id': str(d.id), 'title': d.title } for d in datasets], 'metrics': reuse.metrics, 'featured': reuse.featured, 'extras': reuse.extras, 'reuse_suggest': { 'input': cls.completer_tokenize(reuse.title) + [reuse.id], 'output': str(reuse.id), 'payload': { 'title': reuse.title, 'slug': reuse.slug, 'image_url': reuse.image(40, external=True), }, }, }
def create_oauth_client(client_name, user_email, uri, grant_types, scope, response_types): '''Creates an OAuth2Client instance in DB''' user = User.objects(email=user_email).first() if user is None: exit_with_error('No matching user to email') client = OAuth2Client.objects.create(name=client_name, owner=user, grant_types=grant_types, scope=scope, response_types=response_types, redirect_uris=uri) click.echo(f'New OAuth client: {client.name}') click.echo(f'Client\'s ID {client.id}') click.echo(f'Client\'s secret {client.secret}') click.echo(f'Client\'s grant_types {client.grant_types}') click.echo(f'Client\'s response_types {client.response_types}') click.echo(f'Client\'s URI {client.redirect_uris}')
def serialize(cls, dataset): organization = None owner = None image_url = None spatial_weight = DEFAULT_SPATIAL_WEIGHT temporal_weight = DEFAULT_TEMPORAL_WEIGHT if dataset.organization: organization = Organization.objects( id=dataset.organization.id).first() image_url = organization.logo(40, external=True) elif dataset.owner: owner = User.objects(id=dataset.owner.id).first() image_url = owner.avatar(40, external=True) certified = organization and organization.certified document = { 'title': dataset.title, 'description': dataset.description, 'license': getattr(dataset.license, 'id', None), 'tags': dataset.tags, 'badges': [badge.kind for badge in dataset.badges], 'tag_suggest': dataset.tags, 'resources': [{ 'title': r.title, 'description': r.description, 'format': r.format, } for r in dataset.resources], 'format_suggest': [r.format.lower() for r in dataset.resources if r.format], 'frequency': dataset.frequency, 'organization': str(organization.id) if organization else None, 'owner': str(owner.id) if owner else None, 'dataset_suggest': { 'input': cls.completer_tokenize(dataset.title) + [dataset.id], 'output': dataset.title, 'payload': { 'id': str(dataset.id), 'slug': dataset.slug, 'acronym': dataset.acronym, 'image_url': image_url, }, }, 'created': dataset.created_at.strftime('%Y-%m-%dT%H:%M:%S'), 'last_modified': dataset.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), 'metrics': dataset.metrics, 'featured': dataset.featured, 'from_certified': certified, } if (dataset.temporal_coverage is not None and dataset.temporal_coverage.start and dataset.temporal_coverage.end): start = dataset.temporal_coverage.start.toordinal() end = dataset.temporal_coverage.end.toordinal() temporal_weight = min((end - start) / 365, MAX_TEMPORAL_WEIGHT) document.update({ 'temporal_coverage': { 'start': start, 'end': end }, 'temporal_weight': temporal_weight, }) if dataset.spatial is not None: # Index precise zone labels and parents zone identifiers # to allow fast filtering. zone_ids = [z.id for z in dataset.spatial.zones] zones = GeoZone.objects(id__in=zone_ids).exclude('geom') parents = set() geozones = [] coverage_level = ADMIN_LEVEL_MAX for zone in zones: geozones.append({ 'id': zone.id, 'name': zone.name, 'keys': zone.keys_values }) parents |= set(zone.parents) coverage_level = min(coverage_level, admin_levels[zone.level]) geozones.extend([{'id': p} for p in parents]) spatial_weight = ADMIN_LEVEL_MAX / coverage_level document.update({ 'geozones': geozones, 'granularity': dataset.spatial.granularity, 'spatial_weight': spatial_weight, }) document['dataset_suggest']['weight'] = cls.get_suggest_weight( temporal_weight, spatial_weight, dataset.featured) if dataset.acronym: document['dataset_suggest']['input'].append(dataset.acronym) return document
def serialize(cls, dataset): organization = None owner = None image_url = None spatial_weight = DEFAULT_SPATIAL_WEIGHT temporal_weight = DEFAULT_TEMPORAL_WEIGHT if dataset.organization: organization = Organization.objects(id=dataset.organization.id).first() image_url = organization.logo(40, external=True) elif dataset.owner: owner = User.objects(id=dataset.owner.id).first() image_url = owner.avatar(40, external=True) certified = organization and organization.certified document = { 'title': dataset.title, 'description': dataset.description, 'license': getattr(dataset.license, 'id', None), 'tags': dataset.tags, 'badges': [badge.kind for badge in dataset.badges], 'tag_suggest': dataset.tags, 'resources': [ { 'title': r.title, 'description': r.description, 'format': r.format, 'type': r.type, } for r in dataset.resources], 'format_suggest': [r.format.lower() for r in dataset.resources if r.format], 'frequency': dataset.frequency, 'organization': str(organization.id) if organization else None, 'owner': str(owner.id) if owner else None, 'dataset_suggest': { 'input': cls.completer_tokenize(dataset.title) + [str(dataset.id)], 'output': dataset.title, 'payload': { 'id': str(dataset.id), 'slug': dataset.slug, 'acronym': dataset.acronym, 'image_url': image_url, }, }, 'created': dataset.created_at.strftime('%Y-%m-%dT%H:%M:%S'), 'last_modified': dataset.last_modified.strftime( '%Y-%m-%dT%H:%M:%S'), 'metrics': dataset.metrics, 'featured': dataset.featured, 'from_certified': certified, } if (dataset.temporal_coverage is not None and dataset.temporal_coverage.start and dataset.temporal_coverage.end): start = dataset.temporal_coverage.start.toordinal() end = dataset.temporal_coverage.end.toordinal() temporal_weight = min((end - start) / 365, MAX_TEMPORAL_WEIGHT) document.update({ 'temporal_coverage': {'start': start, 'end': end}, 'temporal_weight': temporal_weight, }) if dataset.spatial is not None: # Index precise zone labels and parents zone identifiers # to allow fast filtering. zone_ids = [z.id for z in dataset.spatial.zones] zones = GeoZone.objects(id__in=zone_ids).exclude('geom') parents = set() geozones = [] coverage_level = ADMIN_LEVEL_MAX for zone in zones: geozones.append({ 'id': zone.id, 'name': zone.name, 'keys': zone.keys_values }) parents |= set(zone.parents) coverage_level = min(coverage_level, admin_levels[zone.level]) geozones.extend([{'id': p} for p in parents]) spatial_weight = ADMIN_LEVEL_MAX / coverage_level document.update({ 'geozones': geozones, 'granularity': dataset.spatial.granularity, 'spatial_weight': spatial_weight, }) document['dataset_suggest']['weight'] = cls.get_suggest_weight( temporal_weight, spatial_weight, dataset.featured) if dataset.acronym: document['dataset_suggest']['input'].append(dataset.acronym) return document
def get_value(self): return User.objects(starred_reuses=self.target).count()
def get_value(self): return User.objects(starred_datasets=self.target).count()
def serialize(cls, dataset): organization = None owner = None if dataset.organization: org = Organization.objects(id=dataset.organization.id).first() organization = { 'id': str(org.id), 'name': org.name, 'public_service': 1 if org.public_service else 0, 'followers': org.metrics.get('followers', 0) } elif dataset.owner: owner = User.objects(id=dataset.owner.id).first() document = { 'id': str(dataset.id), 'title': dataset.title, 'description': dataset.description, 'acronym': dataset.acronym or None, 'url': dataset.display_url, 'tags': dataset.tags, 'license': getattr(dataset.license, 'id', None), 'badges': [badge.kind for badge in dataset.badges], 'frequency': dataset.frequency, 'created_at': to_iso_datetime(dataset.created_at), 'views': dataset.metrics.get('views', 0), 'followers': dataset.metrics.get('followers', 0), 'reuses': dataset.metrics.get('reuses', 0), 'featured': 1 if dataset.featured else 0, 'resources_count': len(dataset.resources), 'organization': organization, 'owner': str(owner.id) if owner else None, 'format': [r.format.lower() for r in dataset.resources if r.format], 'schema': [r.schema.get('name') for r in dataset.resources if r.schema] } extras = {} for key, value in dataset.extras.items(): extras[key] = to_iso_datetime(value) if isinstance( value, datetime.datetime) else value document.update({'extras': extras}) if (dataset.temporal_coverage is not None and dataset.temporal_coverage.start and dataset.temporal_coverage.end): start = to_iso_datetime(dataset.temporal_coverage.start) end = to_iso_datetime(dataset.temporal_coverage.end) document.update({ 'temporal_coverage_start': start, 'temporal_coverage_end': end, }) if dataset.spatial is not None: # Index precise zone labels and parents zone identifiers # to allow fast filtering. zone_ids = [z.id for z in dataset.spatial.zones] zones = GeoZone.objects(id__in=zone_ids).exclude('geom') parents = set() geozones = [] coverage_level = ADMIN_LEVEL_MAX for zone in zones: geozones.append({ 'id': zone.id, 'name': zone.name, 'keys': zone.keys_values }) parents |= set(zone.parents) coverage_level = min(coverage_level, admin_levels[zone.level]) geozones.extend([{'id': p} for p in parents]) document.update({ 'geozones': geozones, 'granularity': dataset.spatial.granularity, }) return document
def get_value(self): return User.objects(starred_organizations=self.target).count()
def count_users(self): from udata.models import User self.metrics['users'] = User.objects(confirmed_at__ne=None, deleted=None).count() self.save()