def xml_export(short_name): """Export project results as an XML file. :param short_name: The short name of the project. """ project = project_repo.get_by_shortname(short_name) if project is None: # pragma: no cover abort(404) results = result_repo.filter_by(project_id=project.id) data = [r.info for r in results if isinstance(r.info, dict)] xml = dicttoxml.dicttoxml(data, custom_root='record-group', item_func=lambda x: 'record', attr_type=False) exporter = Exporter() name = exporter._project_name_latin_encoded(project) secure_name = secure_filename('{0}_results.xml'.format(name)) fn = "filename={0}".format(secure_name) dom = parseString(xml) pretty_xml = dom.toprettyxml() resp = make_response(pretty_xml) resp.headers["Content-Disposition"] = "attachment; {0}".format(fn) resp.headers["Content-type"] = "text/xml" resp.headers['Cache-Control'] = "no-store, no-cache, must-revalidate, \ post-check=0, pre-check=0, max-age=0" return resp
def csv_export(short_name): """Export project results as a CSV file. :param short_name: The short name of the project. """ project = project_repo.get_by_shortname(short_name) if project is None: # pragma: no cover abort(404) si = StringIO.StringIO() writer = UnicodeWriter(si) exporter = Exporter() name = exporter._project_name_latin_encoded(project) secure_name = secure_filename('{0}_{1}.csv'.format(name, 'results')) results = result_repo.filter_by(project_id=project.id) data = [] for r in results: row = {k: v for k, v in r.dictize().items()} if isinstance(row['info'], dict): # Explode info keys = row['info'].keys() for k in keys: row['info_{0}'.format(k)] = row['info'][k] data.append(row) headers = set(itertools.chain(*[row.keys() for row in data])) writer.writerow([h for h in headers]) for row in data: writer.writerow([row.get(h, '') for h in headers]) fn = "filename={0}".format(secure_name) resp = make_response(si.getvalue()) resp.headers["Content-Disposition"] = "attachment; {0}".format(fn) resp.headers["Content-type"] = "text/csv" resp.headers['Cache-Control'] = "no-store, no-cache, must-revalidate, \ post-check=0, pre-check=0, max-age=0" return resp
def user_progress(project_id=None, short_name=None): """API endpoint for user progress. Return a JSON object with two fields regarding the tasks for the user: { 'done': 10, 'total: 100 } This will mean that the user has done a 10% of the available tasks for him """ if project_id or short_name: if short_name: project = project_repo.get_by_shortname(short_name) elif project_id: project = project_repo.get(project_id) if project: # For now, keep this version, but wait until redis cache is used here for task_runs too query_attrs = dict(project_id=project.id) if current_user.is_anonymous(): query_attrs['user_ip'] = request.remote_addr or '127.0.0.1' else: query_attrs['user_id'] = current_user.id taskrun_count = task_repo.count_task_runs_with(**query_attrs) tmp = dict(done=taskrun_count, total=n_tasks(project.id)) return Response(json.dumps(tmp), mimetype="application/json") else: return abort(404) else: # pragma: no cover return abort(404)
def export_results(short_name): """Export project results as an XML or CSV file. :param short_name: The short name of the project. """ project = project_repo.get_by_shortname(short_name) if project is None: # pragma: no cover abort(404) fmt = request.args.get('format') export_formats = ["xml", "csv"] if not fmt: if len(request.args) >= 1: abort(404) return redirect(url_for('.index')) results = result_repo.filter_by(project_id=project.id) if fmt not in export_formats: abort(415) elif fmt == "xml": resp = get_xml_response(results) elif fmt == "csv": resp = get_csv_response(results) exporter = Exporter() name = exporter._project_name_latin_encoded(project) secure_name = secure_filename('{0}_results.{1}'.format(name, fmt)) fn = "filename={0}".format(secure_name) resp.headers["Content-Disposition"] = "attachment; {0}".format(fn) resp.headers["Content-type"] = "text/{0}".format(fmt) resp.headers['Cache-Control'] = "no-store, no-cache, must-revalidate, \ post-check=0, pre-check=0, max-age=0" return resp
def user_progress(project_id=None, short_name=None): """API endpoint for user progress. Return a JSON object with two fields regarding the tasks for the user: { 'done': 10, 'total: 100, 'remaining': 90 } This will mean that the user has done a 10% of the available tasks for him and 90 tasks are yet to be submitted """ if current_user.is_anonymous(): return abort(401) if project_id or short_name: if short_name: project = project_repo.get_by_shortname(short_name) elif project_id: project = project_repo.get(project_id) if project: # For now, keep this version, but wait until redis cache is used here for task_runs too query_attrs = dict(project_id=project.id) query_attrs['user_id'] = current_user.id taskrun_count = task_repo.count_task_runs_with(**query_attrs) num_available_tasks = n_available_tasks(project.id, current_user.id) tmp = dict(done=taskrun_count, total=n_tasks(project.id), remaining=num_available_tasks) return Response(json.dumps(tmp), mimetype="application/json") else: return abort(404) else: # pragma: no cover return abort(404)
def user_progress(project_id=None, short_name=None): """API endpoint for user progress. Return a JSON object with two fields regarding the tasks for the user: { 'done': 10, 'total: 100 } This will mean that the user has done a 10% of the available tasks for him """ if project_id or short_name: if short_name: project = project_repo.get_by_shortname(short_name) elif project_id: project = project_repo.get(project_id) if project: # For now, keep this version, but wait until redis cache is # used here for task_runs too query_attrs = dict(project_id=project.id) if current_user.is_anonymous: query_attrs['user_ip'] = anonymizer.ip(request.remote_addr or '127.0.0.1') else: query_attrs['user_id'] = current_user.id taskrun_count = task_repo.count_task_runs_with(**query_attrs) tmp = dict(done=taskrun_count, total=n_tasks(project.id)) return Response(json.dumps(tmp), mimetype="application/json") else: return abort(404) else: # pragma: no cover return abort(404)
def analyse(): """Trigger analysis for a result or set of results.""" if request.method == 'GET': return respond('The analysis endpoint is listening...') payload = request.json or {} short_name = payload.get('project_short_name') project = project_repo.get_by_shortname(short_name) if not project: # pragma: no cover abort(404) category = project_repo.get_category(project.category_id) presenter = category.info.get('presenter') valid_presenters = ['z3950', 'iiif-annotation'] if not presenter or presenter not in valid_presenters: abort(400, 'Invalid task presenter') # Analyse all or empty if payload.get('all') or payload.get('empty'): ensure_authorized_to('update', project) if payload.get('all'): analyse_all(project.id, presenter) elif payload.get('empty'): analyse_empty(project.id, presenter) return respond('OK') # Analyse single if payload.get('event') != 'task_completed': abort(400) result_id = payload['result_id'] analyse_single(result_id, presenter) return respond('OK')
def cancel_task(task_id=None): """Unlock task upon cancel so that same task can be presented again.""" if not current_user.is_authenticated: return abort(401) data = request.json projectname = data.get('projectname', None) project = project_repo.get_by_shortname(projectname) if not project: return abort(400) user_id = current_user.id scheduler, timeout = get_scheduler_and_timeout(project) if scheduler in (Schedulers.locked, Schedulers.user_pref, Schedulers.task_queue): task_locked_by_user = has_lock(task_id, user_id, timeout) if task_locked_by_user: release_lock(task_id, user_id, timeout) current_app.logger.info( 'Project {} - user {} cancelled task {}'.format( project.id, current_user.id, task_id)) release_reserve_task_lock_by_id(project.id, task_id, current_user.id, timeout, expiry=EXPIRE_LOCK_DELAY) return Response(json.dumps({'success': True}), 200, mimetype="application/json")
def user_progress(project_id=None, short_name=None): """API endpoint for user progress. Return a JSON object with four fields regarding the tasks for the user: { 'done': 10, 'total: 100, 'remaining': 90, 'remaining_for_user': 45 } This will mean that the user has done 10% of the available tasks for the project, 90 tasks are yet to be submitted and the user can access 45 of them based on user preferences. """ if current_user.is_anonymous: return abort(401) if project_id or short_name: if short_name: project = project_repo.get_by_shortname(short_name) elif project_id: project = project_repo.get(project_id) if project: # For now, keep this version, but wait until redis cache is # used here for task_runs too query_attrs = dict(project_id=project.id, user_id=current_user.id) guidelines_updated = _guidelines_updated(project.id, current_user.id) taskrun_count = task_repo.count_task_runs_with(**query_attrs) num_available_tasks = n_available_tasks(project.id, include_gold_task=True) num_available_tasks_for_user = n_available_tasks_for_user( project, current_user.id) response = dict(done=taskrun_count, total=n_tasks(project.id), completed=n_completed_tasks(project.id), remaining=num_available_tasks, locked=len({ task["task_id"] for task in get_locked_tasks(project) }), remaining_for_user=num_available_tasks_for_user, quiz=current_user.get_quiz_for_project(project), guidelines_updated=guidelines_updated) if current_user.admin or (current_user.subadmin and current_user.id in project.owners_ids): num_gold_tasks = n_unexpired_gold_tasks(project.id) response['available_gold_tasks'] = num_gold_tasks return Response(json.dumps(response), mimetype="application/json") else: return abort(404) else: # pragma: no cover return abort(404)
def sync(short_name): """Sync a project with a GitHub repo.""" project = project_repo.get_by_shortname(short_name) if not project: # pragma: no cover abort(404) ensure_authorized_to('update', project) form = GitHubURLForm(request.form) if request.method == 'POST' and form.validate(): github_url = form.github_url.data return redirect(url_for('.import_repo', github_url=github_url, short_name=project.short_name)) elif request.method == 'POST': # pragma: no cover flash(gettext('Please correct the errors'), 'error') return render_template('projects/github/sync.html', form=form, project=project)
def auth_jwt_project(short_name): """Create a JWT for a project via its secret KEY.""" project_secret_key = None if 'Authorization' in request.headers: project_secret_key = request.headers.get('Authorization') if project_secret_key: project = project_repo.get_by_shortname(short_name) if project and project.secret_key == project_secret_key: token = jwt.encode({'short_name': short_name, 'project_id': project.id}, project.secret_key, algorithm='HS256') return token else: return abort(404) else: return abort(403)
def get_projects_report(self, base_url): results = project_repo.get_projects_report() projects = [] for row in results: owners_ids = project_repo.get_by_shortname( row.short_name).owners_ids coowners = (co for co in user_repo.get_users(owners_ids) if co.name != row.owner_name) num_available_tasks = n_available_tasks(row.id) coowner_names = '|'.join('{};{}'.format(co.name, co.email_addr) for co in coowners) if not coowner_names: coowner_names = 'None' has_completed = str(num_available_tasks == 0) project = OrderedDict([('id', row.id), ('name', row.name), ('short_name', row.short_name), ('url', base_url + row.short_name), ('description', row.description), ('long_description', row.long_description), ('created', row.created), ('owner_name', row.owner_name), ('owner_email', row.owner_email), ('coowners', coowner_names), ('category_name', row.category_name), ('allow_anonymous_contributors', row.allow_anonymous_contributors), ('password_protected', row.password_protected), ('webhook', row.webhook), ('scheduler', row.scheduler), ('has_completed', has_completed), ('finish_time', row.ft), ('percent_complete', row.percent_complete), ('n_tasks', row.n_tasks), ('pending_tasks', row.pending_tasks), ('n_workers', row.n_workers), ('n_answers', row.n_answers), ('workers', row.workers), ('updated', row.updated), ('oldest_available', row.oldest_available), ('last_submission', row.last_submission), ('n_taskruns', row.n_taskruns), ('pending_taskruns', row.pending_taskruns)]) projects.append(project) return pd.DataFrame(projects)
def task_progress(project_id=None, short_name=None): """API endpoint for task progress. Returns a JSON object continaing the number of tasks which meet the user defined filter constraints """ if current_user.is_anonymous: return abort(401) if not (project_id or short_name): return abort(404) if short_name: project = project_repo.get_by_shortname(short_name) elif project_id: project = project_repo.get(project_id) filter_fields = request.args if not project: return abort(404) sql_text = "SELECT COUNT(*) FROM task WHERE project_id=" + str(project.id) task_info_fields = get_searchable_columns(project.id) # create sql query from filter fields received on request.args for key in filter_fields.keys(): if key in task_fields: sql_text += " AND {0}=:{1}".format(key, key) elif key in task_info_fields: # include support for empty string and null in URL if filter_fields[key].lower() in ["null", ""]: sql_text += " AND info ->> '{0}' is Null".format(key) else: sql_text += " AND info ->> '{0}'=:{1}".format(key, key) else: raise Exception( "invalid key: the field that you are filtering by does not exist" ) sql_text += ';' sql_query = text(sql_text) results = db.slave_session.execute(sql_query, filter_fields) timeout = current_app.config.get('TIMEOUT') # results are stored as a sqlalchemy resultProxy num_tasks = results.first()[0] task_count_dict = dict(task_count=num_tasks) return Response(json.dumps(task_count_dict), mimetype="application/json")
def user_progress(project_id=None, short_name=None): """API endpoint for user progress. Return a JSON object with four fields regarding the tasks for the user: { 'done': 10, 'total: 100, 'remaining': 90, 'remaining_for_user': 45 } This will mean that the user has done 10% of the available tasks for the project, 90 tasks are yet to be submitted and the user can access 45 of them based on user preferences. """ if current_user.is_anonymous: return abort(401) if project_id or short_name: if short_name: project = project_repo.get_by_shortname(short_name) elif project_id: project = project_repo.get(project_id) if project: # For now, keep this version, but wait until redis cache is # used here for task_runs too query_attrs = dict(project_id=project.id) query_attrs['user_id'] = current_user.id taskrun_count = task_repo.count_task_runs_with(**query_attrs) num_available_tasks = n_available_tasks(project.id, current_user.id) num_available_tasks_for_user = n_available_tasks_for_user( project, current_user.id) response = dict(done=taskrun_count, total=n_tasks(project.id), remaining=num_available_tasks, remaining_for_user=num_available_tasks_for_user, quiz=current_user.get_quiz_for_project(project)) return Response(json.dumps(response), mimetype="application/json") else: return abort(404) else: # pragma: no cover return abort(404)
def import_repo(short_name): """Import a project from a GitHub repo.""" project = project_repo.get_by_shortname(short_name) if not project: # pragma: no cover abort(404) ensure_authorized_to('update', project) github_url = request.args.get('github_url') try: gh_repo = GitHubRepo(github_url) except GitHubURLError as e: flash(str(e), 'error') return redirect(url_for('.sync', short_name=project.short_name)) gh_repo.load_contents() try: gh_repo.validate() except InvalidPybossaProjectError as e: flash(str(e), 'error') return redirect(url_for('.sync', short_name=project.short_name)) form = GitHubProjectForm(request.form) project_json = gh_repo.get_project_json() _populate_form(form, gh_repo.contents, project_json) categories = project_repo.get_all_categories() if request.method == 'POST' and form.validate(): info = json.loads(form.additional_properties.data) original_short_name = project_json['short_name'] if form.tutorial.data: resp = github.get(form.tutorial.data) info['tutorial'] = resp.content.replace(original_short_name, project.short_name) if form.task_presenter.data: resp = github.get(form.task_presenter.data) info['task_presenter'] = resp.content.replace(original_short_name, project.short_name) if form.results.data: resp = github.get(form.results.data) info['results'] = resp.content.replace(original_short_name, project.short_name) long_description = None if form.long_description.data: resp = github.get(form.long_description.data) long_description = resp.content old_project = Project(**project.dictize()) project.description = form.description.data project.long_description = long_description project.category_id = form.category_id.data project.webhook = form.webhook.data project.info = info if form.thumbnail.data: data = github.get(form.thumbnail.data).content prefix = time.time() filename = "project_%s_thumbnail_%i.png" % (project.id, prefix) container = "user_%s" % current_user.id _download(filename, container, data) if project.info.get("thumbnail"): uploader.delete_file(project.info["thumbnail"], container) project.info['container'] = container project.info['thumbnail'] = filename try: project_repo.update(project) except sqlalchemy.exc.DataError as e: # pragma: no cover flash('''DataError: {0} <br><br>Please check the files being imported from GitHub'''.format(e.orig), 'danger') return redirect(url_for('.sync', short_name=project.short_name)) auditlogger.add_log_entry(old_project, project, current_user) cached_cat.reset() cached_projects.get_project(project.short_name) flash(gettext('Project updated!'), 'success') return redirect(url_for('project.tasks', short_name=project.short_name)) elif request.method == 'POST': # pragma: no cover flash(gettext('Please correct the errors'), 'error') else: form.process() form.description.data = project_json.get('description', '') form.webhook.data = project_json.get('webhook', '') reserved_keys = ['name', 'short_name', 'description', 'webhook', 'category_id'] for k in reserved_keys: project_json.pop(k, None) form.additional_properties.data = json.dumps(project_json) return render_template('projects/github/import.html', form=form, github_url=github_url, project=project)
def project_name_to_oid(shortname): project = project_repo.get_by_shortname(shortname) return project.id if project else None
def export_tasks(current_user_email_addr, short_name, ty, expanded, filetype, filters=None): """Export tasks/taskruns from a project.""" from pybossa.core import (task_csv_exporter, task_json_exporter, project_repo) import pybossa.exporter.consensus_exporter as export_consensus project = project_repo.get_by_shortname(short_name) try: # Export data and upload .zip file locally if ty == 'consensus': export_fn = getattr(export_consensus, 'export_consensus_{}'.format(filetype)) elif filetype == 'json': export_fn = task_json_exporter.make_zip elif filetype == 'csv': export_fn = task_csv_exporter.make_zip else: export_fn = None # Construct message if export_fn is not None: # Success email subject = u'Data exported for your project: {0}'.format(project.name) msg = u'Your exported data is attached.' else: # Failure email subject = u'Data export failed for your project: {0}'.format(project.name) msg = u'There was an issue with your export. ' + \ u'Please try again or report this issue ' + \ u'to a {0} administrator.' msg = msg.format(current_app.config.get('BRAND')) body = u'Hello,\n\n' + msg + '\n\nThe {0} team.' body = body.format(current_app.config.get('BRAND')) mail_dict = dict(recipients=[current_user_email_addr], subject=subject, body=body) message = Message(**mail_dict) # Attach export file to message if export_fn is not None: with export_fn(project, ty, expanded, filters) as fp: message.attach(fp.filename, "application/zip", fp.read()) mail.send(message) job_response = u'{0} {1} file was successfully exported for: {2}' return job_response.format( ty.capitalize(), filetype.upper(), project.name) except: current_app.logger.exception( u'Export email failed - Project: {0}' .format(project.name)) subject = u'Email delivery failed for your project: {0}'.format(project.name) msg = u'There was an error when attempting to deliver your data export via email.' body = u'Hello,\n\n' + msg + u'\n\nThe {0} team.' body = body.format(current_app.config.get('BRAND')) mail_dict = dict(recipients=[current_user_email_addr], subject=subject, body=body) message = Message(**mail_dict) mail.send(message) raise