def init(ctx): '''Initialize your udata instance (search index, user, sample data...)''' log.info('Apply DB migrations if needed') ctx.invoke(migrate, record=True) ctx.invoke(index) if IS_TTY: text = _('Do you want to create a superadmin user?') if click.confirm(text, default=True): user = ctx.invoke(user_commands.create) ctx.invoke(user_commands.set_admin, email=user.email) text = _('Do you want to import some data-related license?') if click.confirm(text, default=True): ctx.invoke(licenses) text = _('Do you want to import some spatial zones (countries)?') if click.confirm(text, default=True): ctx.invoke(spatial_load) text = _('Do you want to create some sample data?') if click.confirm(text, default=True): ctx.invoke(generate_fixtures) success(_('Your udata instance is ready!'))
def apigouvfr_load_apis(self): '''Load dataset-related APIs from api.gouv.fr''' r = requests.get(current_app.config['APIGOUVFR_URL'], timeout=10) r.raise_for_status() # cleanup existing mappings Dataset.objects.filter(**{ f'extras__{APIGOUVFR_EXTRAS_KEY}__exists': True, }).update(**{ f'unset__extras__{APIGOUVFR_EXTRAS_KEY}': True, }) apis = r.json() datasets_apis = defaultdict(list) for api in apis: d_ids = api.pop('datagouv_uuid', []) if not d_ids: continue if not all([k in api for k in APIGOUVFR_EXPECTED_FIELDS]): error(f'Missing field in payload: {api}') continue if api['openness'] not in current_app.config.get( 'APIGOUVFR_ALLOW_OPENNESS', []): continue for d_id in d_ids: if api not in datasets_apis[d_id]: datasets_apis[d_id].append(api) for d_id, d_apis in datasets_apis.items(): process_dataset(d_id, d_apis) success('Done.')
def licenses(source=DEFAULT_LICENSE_FILE): '''Feed the licenses from a JSON file''' if source.startswith('http'): json_licenses = requests.get(source).json() else: with open(source) as fp: json_licenses = json.load(fp) if len(json_licenses): log.info('Dropping existing licenses') License.drop_collection() for json_license in json_licenses: flags = [] for field, flag in FLAGS_MAP.items(): if json_license.get(field, False): flags.append(flag) license = License.objects.create( id=json_license['id'], title=json_license['title'], url=json_license['url'] or None, maintainer=json_license['maintainer'] or None, flags=flags, active=json_license.get('active', False), alternate_urls=json_license.get('alternate_urls', []), alternate_titles=json_license.get('alternate_titles', []), ) log.info('Added license "%s"', license.title) try: License.objects.get(id=DEFAULT_LICENSE['id']) except License.DoesNotExist: License.objects.create(**DEFAULT_LICENSE) log.info('Added license "%s"', DEFAULT_LICENSE['title']) success('Done')
def set_admin(email): '''Set an user as administrator''' user = datastore.get_user(email) log.info('Adding admin role to user %s (%s)', user.fullname, user.email) role = datastore.find_or_create_role('admin') datastore.add_role_to_user(user, role) success('User %s (%s) is now administrator' % (user.fullname, user.email))
def update(site=False, organizations=False, users=False, datasets=False, reuses=False): '''Update all metrics for the current date''' do_all = not any((site, organizations, users, datasets, reuses)) if do_all or site: log.info('Update site metrics') update_site_metrics() if do_all or datasets: log.info('Update datasets metrics') for dataset in Dataset.objects.timeout(False): update_metrics_for(dataset) if do_all or reuses: log.info('Update reuses metrics') for reuse in Reuse.objects.timeout(False): update_metrics_for(reuse) if do_all or organizations: log.info('Update organizations metrics') for organization in Organization.objects.timeout(False): update_metrics_for(organization) if do_all or users: log.info('Update user metrics') for user in User.objects.timeout(False): update_metrics_for(user) success('All metrics have been updated')
def delete(): '''Delete an existing user''' email = click.prompt('Email') user = User.objects(email=email).first() if not user: exit_with_error('Invalid user') user.delete() success('User deleted successfully')
def process_dataset(d_id, apis): try: dataset = get_dataset(d_id) except (Dataset.DoesNotExist, mongoengine.errors.ValidationError): return error(f'Dataset {d_id} not found') dataset.extras[APIGOUVFR_EXTRAS_KEY] = apis dataset.save() success(f'Imported {len(apis)} API(s) for {str(dataset)}')
def validate(): '''Validate the Swagger/OpenAPI specification with your config''' with current_app.test_request_context(): schema = json.loads(json.dumps(api.__schema__)) try: schemas.validate(schema) success('API specifications are valid') except schemas.SchemaValidationError as e: exit_with_error('API specifications are not valid', e)
def recommendations_clean(): nb_datasets = Dataset.objects.filter( **{ f'extras__recommendations:sources__exists': True, }).update( **{ 'unset__extras__recommendations': True, 'unset__extras__recommendations:sources': True, }) success(f"Removed recommendations from {nb_datasets} dataset(s)")
def activate(): '''Activate an existing user (validate their email confirmation)''' email = click.prompt('Email') user = User.objects(email=email).first() if not user: exit_with_error('Invalid user') if user.confirmed_at is not None: exit_with_error('User email address already confirmed') return user.confirmed_at = datetime.utcnow() user.save() success('User activated successfully')
def fill(start, end): '''Fill the piwik metrics''' start = start or end log.info('Loading metrics from {start} to {end}'.format(start=start, end=end)) current_date = start while current_date <= end: log.info('Processing %s', current_date) counter.count_for(current_date) current_date += timedelta(days=1) success('Loaded all metrics for the period')
def render(): '''Force (re)rendering stored images''' from udata.core.organization.models import Organization from udata.core.post.models import Post from udata.core.reuse.models import Reuse from udata.core.user.models import User header('Rendering images') count = Counter() total = Counter() organizations = Organization.objects(logo__exists=True) total['orgs'] = organizations.count() log.info('Processing {0} organizations logos'.format(total['orgs'])) for org in organizations: count['orgs'] += render_or_skip(org, 'logo') users = User.objects(avatar__exists=True) total['users'] = users.count() log.info('Processing {0} user avatars'.format(total['users'])) for user in users: count['users'] += render_or_skip(user, 'avatar') posts = Post.objects(image__exists=True) total['posts'] = posts.count() log.info('Processing {0} post images'.format(total['posts'])) for post in posts: count['posts'] += render_or_skip(post, 'image') reuses = Reuse.objects(image__exists=True) total['reuses'] = reuses.count() log.info('Processing {0} reuse images'.format(total['reuses'])) for reuse in reuses: count['reuses'] += render_or_skip(reuse, 'image') log.info('''Summary: Organization logos: {count[orgs]}/{total[orgs]} User avatars: {count[users]}/{total[users]} Post images: {count[posts]}/{total[posts]} Reuse images: {count[reuses]}/{total[reuses]} '''.format(count=count, total=total)) success('Images rendered')
def process_dataset(source, dataset): try: target_dataset = get_dataset(dataset['id']) except (Dataset.DoesNotExist, mongoengine.errors.ValidationError): error(f"Dataset {dataset['id']} not found") return log.info(f"Processing recommendations for dataset {dataset['id']}") valid_recos = [] for reco in dataset['recommendations']: try: reco_dataset_obj = get_dataset(reco['id']) valid_recos.append({ 'id': str(reco_dataset_obj.id), 'score': reco['score'], 'source': source, }) except (Dataset.DoesNotExist, mongoengine.errors.ValidationError): error(f"Recommended dataset {reco['id']} not found") continue if len(valid_recos): success( f"Found {len(valid_recos)} new recommendations for dataset {dataset['id']}" ) new_sources = set( target_dataset.extras.get('recommendations:sources', [])) new_sources.add(source) merged_recommendations = target_dataset.extras.get( 'recommendations', []) merged_recommendations.extend(valid_recos) new_recommendations = sorted(merged_recommendations, key=lambda k: k['score'], reverse=True) target_dataset.extras['recommendations:sources'] = list(new_sources) target_dataset.extras['recommendations'] = new_recommendations target_dataset.save() else: error(f"No recommendations found for dataset {dataset['id']}")
def purge(datasets, reuses, organizations): ''' Permanently remove data flagged as deleted. If no model flag is given, all models are purged. ''' purge_all = not any((datasets, reuses, organizations)) if purge_all or datasets: log.info('Purging datasets') purge_datasets() if purge_all or reuses: log.info('Purging reuses') purge_reuses() if purge_all or organizations: log.info('Purging organizations') purge_organizations() success('Done')
def create(): '''Create a new user''' data = { 'first_name': click.prompt('First name'), 'last_name': click.prompt('Last name'), 'email': click.prompt('Email'), 'password': click.prompt('Password', hide_input=True), 'password_confirm': click.prompt('Confirm Password', hide_input=True), } # Until https://github.com/mattupstate/flask-security/issues/672 is fixed with current_app.test_request_context(): form = RegisterForm(MultiDict(data), meta={'csrf': False}) if form.validate(): data['password'] = encrypt_password(data['password']) del data['password_confirm'] data['confirmed_at'] = datetime.utcnow() user = datastore.create_user(**data) success('User(id={u.id} email={u.email}) created'.format(u=user)) return user errors = '\n'.join('\n'.join(e) for e in form.errors.values()) exit_with_error('Error creating user', errors)
def check(): '''Check the LDAP configuration''' bind_dn = manager.config.get('LDAP_BIND_USER_DN', None) if not bind_dn: exit_with_error('Missing LDAP_BIND_USER_DN setting') header('Trying to connect with bind user') try: who_am_i = manager.connection.extend.standard.who_am_i() success('Bind DN successfully connected') echo('Bind DN user is "{}"'.format(white(safe_unicode(who_am_i)))) except Exception as e: exit_with_error('Unable to connect', e) header('Trying to authenticate an user') email = prompt(white('User email')) password = prompt(white('User password'), hide_input=True) result = manager.authenticate(email, password) if result.status == AuthenticationResponseStatus.success: success('User successfully connected') echo('Authenticated user is "{email} ({dn})"'.format( email=white(safe_unicode(result.user_id)), dn=white(safe_unicode(result.user_dn)) )) echo('User has the following remote attributes:') for key, value in result.user_info.items(): echo(b'{key}: {value}'.format(key=white(safe_unicode(key)), value=safe_unicode(value))) echo('Local user will be created with the following values:') for key, value in manager.extract_user_infos(result.user_info).items(): echo(b'{key}: {value}'.format(key=white(safe_unicode(key)), value=safe_unicode(value))) else: exit_with_error('Unable to authenticate user "{0}"'.format(safe_unicode(email))) success('LDAP configuration is working')
def init(ctx): '''Initialize your udata instance (search index, user, sample data...)''' log.info('Apply DB migrations if needed') ctx.invoke(migrate, record=True) ctx.invoke(index) if IS_TTY: text = _('Do you want to create a superadmin user?') if click.confirm(text, default=True): user = ctx.invoke(user_commands.create) ctx.invoke(user_commands.set_admin, email=user.email) text = _('Do you want to import some data-related license?') if click.confirm(text, default=True): ctx.invoke(licenses) text = _('Do you want to create some sample data?') if click.confirm(text, default=True): ctx.invoke(generate_fixtures) success(_('Your udata instance is ready!'))
def process_dataset(source, dataset): try: target_dataset = get_dataset(dataset['id']) except (Dataset.DoesNotExist, mongoengine.errors.ValidationError): error(f"Dataset {dataset['id']} not found") return log.info(f"Processing recommendations for dataset {dataset['id']}") valid_recos_datasets = [] valid_recos_reuses = [] for reco in dataset['recommendations']: # default type is `dataset` for retrocompat reco_type = reco.get('type', 'dataset') if reco_type == 'dataset': try: reco_dataset_obj = get_dataset(reco['id']) if reco_dataset_obj.id == target_dataset.id: continue valid_recos_datasets.append({ 'id': str(reco_dataset_obj.id), 'score': reco['score'], 'source': source, }) except (Dataset.DoesNotExist, mongoengine.errors.ValidationError): error(f"Recommended dataset {reco['id']} not found") continue elif reco_type == 'reuse': try: reuse = get_reuse(reco['id']) valid_recos_reuses.append({ 'id': str(reuse.id), 'score': reco['score'], 'source': source, }) except (Reuse.DoesNotExist, mongoengine.errors.ValidationError): error(f"Recommended reuse {reco['id']} not found") continue else: error(f'Unknown recommendation type {reco_type}') continue if len(valid_recos_datasets) or len(valid_recos_reuses): new_sources = set( target_dataset.extras.get('recommendations:sources', [])) new_sources.add(source) target_dataset.extras['recommendations:sources'] = list(new_sources) if len(valid_recos_datasets): success( f"Found {len(valid_recos_datasets)} new dataset recommendations for dataset {dataset['id']}" ) merged_recommendations = target_dataset.extras.get( 'recommendations', []) merged_recommendations.extend(valid_recos_datasets) new_recommendations = sorted(merged_recommendations, key=lambda k: k['score'], reverse=True) target_dataset.extras['recommendations'] = new_recommendations if len(valid_recos_reuses): success( f"Found {len(valid_recos_reuses)} new reuse recommendations for dataset {dataset['id']}" ) merged_recommendations = target_dataset.extras.get( 'recommendations-reuses', []) merged_recommendations.extend(valid_recos_reuses) new_recommendations = sorted(merged_recommendations, key=lambda k: k['score'], reverse=True) target_dataset.extras['recommendations-reuses'] = new_recommendations if len(valid_recos_datasets) or len(valid_recos_reuses): target_dataset.save() else: error(f"No recommendations found for dataset {dataset['id']}")
def flush(): '''Flush the cache''' log.info('Flushing cache') cache.clear() success('Cache flushed')
def update(site=False, organizations=False, users=False, datasets=False, reuses=False, drop=False): '''Update all metrics for the current date''' do_all = not any((site, organizations, users, datasets, reuses)) if do_all or site: log.info('Update site metrics') try: site = Site.objects(id=current_app.config['SITE_ID']).first() if drop: site.metrics.clear() site.count_users() site.count_org() site.count_datasets() site.count_resources() site.count_reuses() site.count_followers() site.count_discussions() site.count_max_dataset_followers() site.count_max_dataset_reuses() site.count_max_reuse_datasets() site.count_max_reuse_followers() site.count_max_org_followers() site.count_max_org_reuses() site.count_max_org_datasets() except Exception as e: log.info(f'Error during update: {e}') if do_all or datasets: log.info('Update datasets metrics') all_datasets = Dataset.objects.visible().timeout(False) with click.progressbar(all_datasets, length=Dataset.objects.count()) as dataset_bar: for dataset in dataset_bar: try: if drop: dataset.metrics.clear() dataset.count_discussions() dataset.count_issues() dataset.count_reuses() dataset.count_followers() except Exception as e: log.info(f'Error during update: {e}') continue if do_all or reuses: log.info('Update reuses metrics') all_reuses = Reuse.objects.visible().timeout(False) with click.progressbar( all_reuses, length=Reuse.objects.visible().count()) as reuses_bar: for reuse in reuses_bar: try: if drop: reuse.metrics.clear() reuse.count_discussions() reuse.count_issues() reuse.count_followers() except Exception as e: log.info(f'Error during update: {e}') continue if do_all or organizations: log.info('Update organizations metrics') all_org = Organization.objects.visible().timeout(False) with click.progressbar( all_org, length=Organization.objects.visible().count()) as org_bar: for organization in org_bar: try: if drop: organization.metrics.clear() organization.count_datasets() organization.count_reuses() organization.count_followers() except Exception as e: log.info(f'Error during update: {e}') continue if do_all or users: log.info('Update user metrics') all_users = User.objects.timeout(False) with click.progressbar(all_users, length=User.objects.count()) as users_bar: for user in users_bar: try: if drop: user.metrics.clear() user.count_datasets() user.count_reuses() user.count_followers() user.count_following() except Exception as e: log.info(f'Error during update: {e}') continue success('All metrics have been updated')