Esempio n. 1
0
def xml_export(short_name):
    """Export project results as an XML file.

    :param short_name: The short name of the project.
    """
    project = project_repo.get_by_shortname(short_name)
    if project is None:  # pragma: no cover
        abort(404)

    results = result_repo.filter_by(project_id=project.id)
    data = [r.info for r in results if isinstance(r.info, dict)]
    xml = dicttoxml.dicttoxml(data, custom_root='record-group',
                              item_func=lambda x: 'record', attr_type=False)

    exporter = Exporter()
    name = exporter._project_name_latin_encoded(project)
    secure_name = secure_filename('{0}_results.xml'.format(name))
    fn = "filename={0}".format(secure_name)
    dom = parseString(xml)
    pretty_xml = dom.toprettyxml()
    resp = make_response(pretty_xml)
    resp.headers["Content-Disposition"] = "attachment; {0}".format(fn)
    resp.headers["Content-type"] = "text/xml"
    resp.headers['Cache-Control'] = "no-store, no-cache, must-revalidate, \
                                    post-check=0, pre-check=0, max-age=0"
    return resp
Esempio n. 2
0
def csv_export(short_name):
    """Export project results as a CSV file.

    :param short_name: The short name of the project.
    """
    project = project_repo.get_by_shortname(short_name)
    if project is None:  # pragma: no cover
        abort(404)
    si = StringIO.StringIO()
    writer = UnicodeWriter(si)
    exporter = Exporter()
    name = exporter._project_name_latin_encoded(project)
    secure_name = secure_filename('{0}_{1}.csv'.format(name, 'results'))
    results = result_repo.filter_by(project_id=project.id)
    data = []

    for r in results:
        row = {k: v for k, v in r.dictize().items()}
        if isinstance(row['info'], dict):  # Explode info
            keys = row['info'].keys()
            for k in keys:
                row['info_{0}'.format(k)] = row['info'][k]
        data.append(row)
    headers = set(itertools.chain(*[row.keys() for row in data]))
    writer.writerow([h for h in headers])
    for row in data:
        writer.writerow([row.get(h, '') for h in headers])

    fn = "filename={0}".format(secure_name)
    resp = make_response(si.getvalue())
    resp.headers["Content-Disposition"] = "attachment; {0}".format(fn)
    resp.headers["Content-type"] = "text/csv"
    resp.headers['Cache-Control'] = "no-store, no-cache, must-revalidate, \
                                    post-check=0, pre-check=0, max-age=0"
    return resp
Esempio n. 3
0
def user_progress(project_id=None, short_name=None):
    """API endpoint for user progress.

    Return a JSON object with two fields regarding the tasks for the user:
        { 'done': 10,
          'total: 100
        }
       This will mean that the user has done a 10% of the available tasks for
       him

    """
    if project_id or short_name:
        if short_name:
            project = project_repo.get_by_shortname(short_name)
        elif project_id:
            project = project_repo.get(project_id)

        if project:
            # For now, keep this version, but wait until redis cache is used here for task_runs too
            query_attrs = dict(project_id=project.id)
            if current_user.is_anonymous():
                query_attrs['user_ip'] = request.remote_addr or '127.0.0.1'
            else:
                query_attrs['user_id'] = current_user.id
            taskrun_count = task_repo.count_task_runs_with(**query_attrs)
            tmp = dict(done=taskrun_count, total=n_tasks(project.id))
            return Response(json.dumps(tmp), mimetype="application/json")
        else:
            return abort(404)
    else:  # pragma: no cover
        return abort(404)
Esempio n. 4
0
def export_results(short_name):
    """Export project results as an XML or CSV file.

    :param short_name: The short name of the project.
    """
    project = project_repo.get_by_shortname(short_name)
    if project is None:  # pragma: no cover
        abort(404)

    fmt = request.args.get('format')
    export_formats = ["xml", "csv"]
    if not fmt:
        if len(request.args) >= 1:
            abort(404)
        return redirect(url_for('.index'))

    results = result_repo.filter_by(project_id=project.id)
    if fmt not in export_formats:
        abort(415)
    elif fmt == "xml":
        resp = get_xml_response(results)
    elif fmt == "csv":
        resp = get_csv_response(results)

    exporter = Exporter()
    name = exporter._project_name_latin_encoded(project)
    secure_name = secure_filename('{0}_results.{1}'.format(name, fmt))
    fn = "filename={0}".format(secure_name)
    resp.headers["Content-Disposition"] = "attachment; {0}".format(fn)
    resp.headers["Content-type"] = "text/{0}".format(fmt)
    resp.headers['Cache-Control'] = "no-store, no-cache, must-revalidate, \
                                    post-check=0, pre-check=0, max-age=0"
    return resp
Esempio n. 5
0
def user_progress(project_id=None, short_name=None):
    """API endpoint for user progress.

    Return a JSON object with two fields regarding the tasks for the user:
        { 'done': 10,
          'total: 100,
          'remaining': 90
        }
       This will mean that the user has done a 10% of the available tasks for
       him and 90 tasks are yet to be submitted

    """
    if current_user.is_anonymous():
        return abort(401)
    if project_id or short_name:
        if short_name:
            project = project_repo.get_by_shortname(short_name)
        elif project_id:
            project = project_repo.get(project_id)

        if project:
            # For now, keep this version, but wait until redis cache is used here for task_runs too
            query_attrs = dict(project_id=project.id)
            query_attrs['user_id'] = current_user.id
            taskrun_count = task_repo.count_task_runs_with(**query_attrs)
            num_available_tasks = n_available_tasks(project.id,
                                                    current_user.id)
            tmp = dict(done=taskrun_count,
                       total=n_tasks(project.id),
                       remaining=num_available_tasks)
            return Response(json.dumps(tmp), mimetype="application/json")
        else:
            return abort(404)
    else:  # pragma: no cover
        return abort(404)
Esempio n. 6
0
def user_progress(project_id=None, short_name=None):
    """API endpoint for user progress.

    Return a JSON object with two fields regarding the tasks for the user:
        { 'done': 10,
          'total: 100
        }
       This will mean that the user has done a 10% of the available tasks for
       him

    """
    if project_id or short_name:
        if short_name:
            project = project_repo.get_by_shortname(short_name)
        elif project_id:
            project = project_repo.get(project_id)

        if project:
            # For now, keep this version, but wait until redis cache is
            # used here for task_runs too
            query_attrs = dict(project_id=project.id)
            if current_user.is_anonymous:
                query_attrs['user_ip'] = anonymizer.ip(request.remote_addr or
                                                       '127.0.0.1')
            else:
                query_attrs['user_id'] = current_user.id
            taskrun_count = task_repo.count_task_runs_with(**query_attrs)
            tmp = dict(done=taskrun_count, total=n_tasks(project.id))
            return Response(json.dumps(tmp), mimetype="application/json")
        else:
            return abort(404)
    else:  # pragma: no cover
        return abort(404)
Esempio n. 7
0
def analyse():
    """Trigger analysis for a result or set of results."""
    if request.method == 'GET':
        return respond('The analysis endpoint is listening...')

    payload = request.json or {}
    short_name = payload.get('project_short_name')
    project = project_repo.get_by_shortname(short_name)
    if not project:  # pragma: no cover
        abort(404)

    category = project_repo.get_category(project.category_id)
    presenter = category.info.get('presenter')
    valid_presenters = ['z3950', 'iiif-annotation']
    if not presenter or presenter not in valid_presenters:
        abort(400, 'Invalid task presenter')

    # Analyse all or empty
    if payload.get('all') or payload.get('empty'):
        ensure_authorized_to('update', project)

        if payload.get('all'):
            analyse_all(project.id, presenter)
        elif payload.get('empty'):
            analyse_empty(project.id, presenter)

        return respond('OK')

    # Analyse single
    if payload.get('event') != 'task_completed':
        abort(400)

    result_id = payload['result_id']
    analyse_single(result_id, presenter)
    return respond('OK')
Esempio n. 8
0
def cancel_task(task_id=None):
    """Unlock task upon cancel so that same task can be presented again."""
    if not current_user.is_authenticated:
        return abort(401)

    data = request.json
    projectname = data.get('projectname', None)
    project = project_repo.get_by_shortname(projectname)
    if not project:
        return abort(400)

    user_id = current_user.id
    scheduler, timeout = get_scheduler_and_timeout(project)
    if scheduler in (Schedulers.locked, Schedulers.user_pref,
                     Schedulers.task_queue):
        task_locked_by_user = has_lock(task_id, user_id, timeout)
        if task_locked_by_user:
            release_lock(task_id, user_id, timeout)
            current_app.logger.info(
                'Project {} - user {} cancelled task {}'.format(
                    project.id, current_user.id, task_id))
            release_reserve_task_lock_by_id(project.id,
                                            task_id,
                                            current_user.id,
                                            timeout,
                                            expiry=EXPIRE_LOCK_DELAY)

    return Response(json.dumps({'success': True}),
                    200,
                    mimetype="application/json")
Esempio n. 9
0
def user_progress(project_id=None, short_name=None):
    """API endpoint for user progress.

    Return a JSON object with four fields regarding the tasks for the user:
        { 'done': 10,
          'total: 100,
          'remaining': 90,
          'remaining_for_user': 45
        }
       This will mean that the user has done 10% of the available tasks for the
       project, 90 tasks are yet to be submitted and the user can access 45 of
       them based on user preferences.

    """
    if current_user.is_anonymous:
        return abort(401)
    if project_id or short_name:
        if short_name:
            project = project_repo.get_by_shortname(short_name)
        elif project_id:
            project = project_repo.get(project_id)

        if project:
            # For now, keep this version, but wait until redis cache is
            # used here for task_runs too
            query_attrs = dict(project_id=project.id, user_id=current_user.id)
            guidelines_updated = _guidelines_updated(project.id,
                                                     current_user.id)
            taskrun_count = task_repo.count_task_runs_with(**query_attrs)
            num_available_tasks = n_available_tasks(project.id,
                                                    include_gold_task=True)
            num_available_tasks_for_user = n_available_tasks_for_user(
                project, current_user.id)
            response = dict(done=taskrun_count,
                            total=n_tasks(project.id),
                            completed=n_completed_tasks(project.id),
                            remaining=num_available_tasks,
                            locked=len({
                                task["task_id"]
                                for task in get_locked_tasks(project)
                            }),
                            remaining_for_user=num_available_tasks_for_user,
                            quiz=current_user.get_quiz_for_project(project),
                            guidelines_updated=guidelines_updated)
            if current_user.admin or (current_user.subadmin and current_user.id
                                      in project.owners_ids):
                num_gold_tasks = n_unexpired_gold_tasks(project.id)
                response['available_gold_tasks'] = num_gold_tasks
            return Response(json.dumps(response), mimetype="application/json")
        else:
            return abort(404)
    else:  # pragma: no cover
        return abort(404)
def sync(short_name):
    """Sync a project with a GitHub repo."""
    project = project_repo.get_by_shortname(short_name)
    if not project:  # pragma: no cover
        abort(404)
    ensure_authorized_to('update', project)
    form = GitHubURLForm(request.form)
    if request.method == 'POST' and form.validate():
        github_url = form.github_url.data
        return redirect(url_for('.import_repo', github_url=github_url,
                                short_name=project.short_name))
    elif request.method == 'POST':  # pragma: no cover
        flash(gettext('Please correct the errors'), 'error')
    return render_template('projects/github/sync.html', form=form,
                           project=project)
Esempio n. 11
0
def auth_jwt_project(short_name):
    """Create a JWT for a project via its secret KEY."""
    project_secret_key = None
    if 'Authorization' in request.headers:
        project_secret_key = request.headers.get('Authorization')
    if project_secret_key:
        project = project_repo.get_by_shortname(short_name)
        if project and project.secret_key == project_secret_key:
            token = jwt.encode({'short_name': short_name,
                                'project_id': project.id},
                               project.secret_key, algorithm='HS256')
            return token
        else:
            return abort(404)
    else:
        return abort(403)
Esempio n. 12
0
def auth_jwt_project(short_name):
    """Create a JWT for a project via its secret KEY."""
    project_secret_key = None
    if 'Authorization' in request.headers:
        project_secret_key = request.headers.get('Authorization')
    if project_secret_key:
        project = project_repo.get_by_shortname(short_name)
        if project and project.secret_key == project_secret_key:
            token = jwt.encode({'short_name': short_name,
                                'project_id': project.id},
                               project.secret_key, algorithm='HS256')
            return token
        else:
            return abort(404)
    else:
        return abort(403)
    def get_projects_report(self, base_url):
        results = project_repo.get_projects_report()
        projects = []

        for row in results:
            owners_ids = project_repo.get_by_shortname(
                row.short_name).owners_ids
            coowners = (co for co in user_repo.get_users(owners_ids)
                        if co.name != row.owner_name)
            num_available_tasks = n_available_tasks(row.id)
            coowner_names = '|'.join('{};{}'.format(co.name, co.email_addr)
                                     for co in coowners)
            if not coowner_names:
                coowner_names = 'None'
            has_completed = str(num_available_tasks == 0)
            project = OrderedDict([('id', row.id), ('name', row.name),
                                   ('short_name', row.short_name),
                                   ('url', base_url + row.short_name),
                                   ('description', row.description),
                                   ('long_description', row.long_description),
                                   ('created', row.created),
                                   ('owner_name', row.owner_name),
                                   ('owner_email', row.owner_email),
                                   ('coowners', coowner_names),
                                   ('category_name', row.category_name),
                                   ('allow_anonymous_contributors',
                                    row.allow_anonymous_contributors),
                                   ('password_protected',
                                    row.password_protected),
                                   ('webhook', row.webhook),
                                   ('scheduler', row.scheduler),
                                   ('has_completed', has_completed),
                                   ('finish_time', row.ft),
                                   ('percent_complete', row.percent_complete),
                                   ('n_tasks', row.n_tasks),
                                   ('pending_tasks', row.pending_tasks),
                                   ('n_workers', row.n_workers),
                                   ('n_answers', row.n_answers),
                                   ('workers', row.workers),
                                   ('updated', row.updated),
                                   ('oldest_available', row.oldest_available),
                                   ('last_submission', row.last_submission),
                                   ('n_taskruns', row.n_taskruns),
                                   ('pending_taskruns', row.pending_taskruns)])

            projects.append(project)
        return pd.DataFrame(projects)
Esempio n. 14
0
def task_progress(project_id=None, short_name=None):
    """API endpoint for task progress.

    Returns a JSON object continaing the number of tasks which meet the user defined filter constraints
    """
    if current_user.is_anonymous:
        return abort(401)
    if not (project_id or short_name):
        return abort(404)
    if short_name:
        project = project_repo.get_by_shortname(short_name)
    elif project_id:
        project = project_repo.get(project_id)
    filter_fields = request.args
    if not project:
        return abort(404)

    sql_text = "SELECT COUNT(*) FROM task WHERE project_id=" + str(project.id)
    task_info_fields = get_searchable_columns(project.id)

    # create sql query from filter fields received on request.args
    for key in filter_fields.keys():
        if key in task_fields:
            sql_text += " AND {0}=:{1}".format(key, key)
        elif key in task_info_fields:
            # include support for empty string and null in URL
            if filter_fields[key].lower() in ["null", ""]:
                sql_text += " AND info ->> '{0}' is Null".format(key)
            else:
                sql_text += " AND info ->> '{0}'=:{1}".format(key, key)
        else:
            raise Exception(
                "invalid key: the field that you are filtering by does not exist"
            )
    sql_text += ';'
    sql_query = text(sql_text)
    results = db.slave_session.execute(sql_query, filter_fields)
    timeout = current_app.config.get('TIMEOUT')

    # results are stored as a sqlalchemy resultProxy
    num_tasks = results.first()[0]
    task_count_dict = dict(task_count=num_tasks)
    return Response(json.dumps(task_count_dict), mimetype="application/json")
Esempio n. 15
0
def user_progress(project_id=None, short_name=None):
    """API endpoint for user progress.

    Return a JSON object with four fields regarding the tasks for the user:
        { 'done': 10,
          'total: 100,
          'remaining': 90,
          'remaining_for_user': 45
        }
       This will mean that the user has done 10% of the available tasks for the
       project, 90 tasks are yet to be submitted and the user can access 45 of
       them based on user preferences.

    """
    if current_user.is_anonymous:
        return abort(401)
    if project_id or short_name:
        if short_name:
            project = project_repo.get_by_shortname(short_name)
        elif project_id:
            project = project_repo.get(project_id)

        if project:
            # For now, keep this version, but wait until redis cache is
            # used here for task_runs too
            query_attrs = dict(project_id=project.id)
            query_attrs['user_id'] = current_user.id
            taskrun_count = task_repo.count_task_runs_with(**query_attrs)
            num_available_tasks = n_available_tasks(project.id,
                                                    current_user.id)
            num_available_tasks_for_user = n_available_tasks_for_user(
                project, current_user.id)
            response = dict(done=taskrun_count,
                            total=n_tasks(project.id),
                            remaining=num_available_tasks,
                            remaining_for_user=num_available_tasks_for_user,
                            quiz=current_user.get_quiz_for_project(project))
            return Response(json.dumps(response), mimetype="application/json")
        else:
            return abort(404)
    else:  # pragma: no cover
        return abort(404)
def import_repo(short_name):
    """Import a project from a GitHub repo."""
    project = project_repo.get_by_shortname(short_name)
    if not project:  # pragma: no cover
        abort(404)
    ensure_authorized_to('update', project)

    github_url = request.args.get('github_url')
    try:
        gh_repo = GitHubRepo(github_url)
    except GitHubURLError as e:
        flash(str(e), 'error')
        return redirect(url_for('.sync', short_name=project.short_name))

    gh_repo.load_contents()
    try:
        gh_repo.validate()
    except InvalidPybossaProjectError as e:
        flash(str(e), 'error')
        return redirect(url_for('.sync', short_name=project.short_name))

    form = GitHubProjectForm(request.form)
    project_json = gh_repo.get_project_json()
    _populate_form(form, gh_repo.contents, project_json)
    categories = project_repo.get_all_categories()

    if request.method == 'POST' and form.validate():
        info = json.loads(form.additional_properties.data)
        original_short_name = project_json['short_name']
        if form.tutorial.data:
            resp = github.get(form.tutorial.data)
            info['tutorial'] = resp.content.replace(original_short_name,
                                                    project.short_name)
        if form.task_presenter.data:
            resp = github.get(form.task_presenter.data)
            info['task_presenter'] = resp.content.replace(original_short_name,
                                                          project.short_name)
        if form.results.data:
            resp = github.get(form.results.data)
            info['results'] = resp.content.replace(original_short_name,
                                                   project.short_name)
        long_description = None
        if form.long_description.data:
            resp = github.get(form.long_description.data)
            long_description = resp.content

        old_project = Project(**project.dictize())
        project.description = form.description.data
        project.long_description = long_description
        project.category_id = form.category_id.data
        project.webhook = form.webhook.data
        project.info = info

        if form.thumbnail.data:
            data = github.get(form.thumbnail.data).content
            prefix = time.time()
            filename = "project_%s_thumbnail_%i.png" % (project.id, prefix)
            container = "user_%s" % current_user.id
            _download(filename, container, data)
            if project.info.get("thumbnail"):
                uploader.delete_file(project.info["thumbnail"], container)
            project.info['container'] = container
            project.info['thumbnail'] = filename
        try:
            project_repo.update(project)
        except sqlalchemy.exc.DataError as e:  # pragma: no cover
            flash('''DataError: {0} <br><br>Please check the files being
                  imported from GitHub'''.format(e.orig), 'danger')
            return redirect(url_for('.sync', short_name=project.short_name))

        auditlogger.add_log_entry(old_project, project, current_user)
        cached_cat.reset()
        cached_projects.get_project(project.short_name)
        flash(gettext('Project updated!'), 'success')
        return redirect(url_for('project.tasks',
                                short_name=project.short_name))

    elif request.method == 'POST':  # pragma: no cover
        flash(gettext('Please correct the errors'), 'error')

    else:
        form.process()
        form.description.data = project_json.get('description', '')
        form.webhook.data = project_json.get('webhook', '')

        reserved_keys = ['name', 'short_name', 'description', 'webhook',
                         'category_id']
        for k in reserved_keys:
            project_json.pop(k, None)
        form.additional_properties.data = json.dumps(project_json)

    return render_template('projects/github/import.html', form=form,
                           github_url=github_url, project=project)
Esempio n. 17
0
def project_name_to_oid(shortname):
    project = project_repo.get_by_shortname(shortname)
    return project.id if project else None
Esempio n. 18
0
def export_tasks(current_user_email_addr, short_name,
                 ty, expanded, filetype, filters=None):
    """Export tasks/taskruns from a project."""
    from pybossa.core import (task_csv_exporter, task_json_exporter,
                              project_repo)
    import pybossa.exporter.consensus_exporter as export_consensus

    project = project_repo.get_by_shortname(short_name)

    try:
        # Export data and upload .zip file locally
        if ty == 'consensus':
            export_fn = getattr(export_consensus,
                                'export_consensus_{}'.format(filetype))
        elif filetype == 'json':
            export_fn = task_json_exporter.make_zip
        elif filetype == 'csv':
            export_fn = task_csv_exporter.make_zip
        else:
            export_fn = None

        # Construct message
        if export_fn is not None:
            # Success email
            subject = u'Data exported for your project: {0}'.format(project.name)
            msg = u'Your exported data is attached.'
        else:
            # Failure email
            subject = u'Data export failed for your project: {0}'.format(project.name)
            msg = u'There was an issue with your export. ' + \
                  u'Please try again or report this issue ' + \
                  u'to a {0} administrator.'
            msg = msg.format(current_app.config.get('BRAND'))

        body = u'Hello,\n\n' + msg + '\n\nThe {0} team.'
        body = body.format(current_app.config.get('BRAND'))
        mail_dict = dict(recipients=[current_user_email_addr],
                         subject=subject,
                         body=body)
        message = Message(**mail_dict)

        # Attach export file to message
        if export_fn is not None:
            with export_fn(project, ty, expanded, filters) as fp:
                message.attach(fp.filename, "application/zip", fp.read())

        mail.send(message)
        job_response = u'{0} {1} file was successfully exported for: {2}'
        return job_response.format(
                ty.capitalize(), filetype.upper(), project.name)
    except:
        current_app.logger.exception(
                u'Export email failed - Project: {0}'
                .format(project.name))
        subject = u'Email delivery failed for your project: {0}'.format(project.name)
        msg = u'There was an error when attempting to deliver your data export via email.'
        body = u'Hello,\n\n' + msg + u'\n\nThe {0} team.'
        body = body.format(current_app.config.get('BRAND'))
        mail_dict = dict(recipients=[current_user_email_addr],
                         subject=subject,
                         body=body)
        message = Message(**mail_dict)
        mail.send(message)
        raise