Ejemplo n.º 1
0
    def test_task_browse_gold_task_filters(self):
        filters = dict(task_id=1,
                       hide_completed=True,
                       gold_task='1',
                       order_by='task_id')
        expected_filter_query = " AND task.id = :task_id AND task.state='ongoing' AND task.calibration = :calibration"
        expected_params = {'task_id': 1, 'calibration': '1'}
        filters, params = get_task_filters(filters)
        assert filters == expected_filter_query, filters
        assert params == expected_params, params

        filters = dict(task_id=1,
                       hide_completed=True,
                       gold_task='0',
                       order_by='task_id')
        expected_params = {'task_id': 1, 'calibration': '0'}
        filters, params = get_task_filters(filters)
        assert filters == expected_filter_query, filters
        assert params == expected_params, params

        args = dict(task_id=12345, gold_task='1')
        valid_args = dict(task_id=12345,
                          gold_task='1',
                          order_by_dict={},
                          display_columns=[
                              u'task_id', u'priority', u'pcomplete',
                              u'created', u'finish_time', u'gold_task',
                              u'actions'
                          ])
        pargs = parse_tasks_browse_args(args)
        assert pargs == valid_args, pargs

        args = dict(task_id=12345, gold_task='0')
        valid_args = dict(task_id=12345,
                          gold_task='0',
                          order_by_dict={},
                          display_columns=[
                              u'task_id', u'priority', u'pcomplete',
                              u'created', u'finish_time', u'gold_task',
                              u'actions'
                          ])
        pargs = parse_tasks_browse_args(args)
        assert pargs == valid_args, pargs

        args = dict(task_id=12345, gold_task='All')
        valid_args = dict(task_id=12345,
                          order_by_dict={},
                          display_columns=[
                              u'task_id', u'priority', u'pcomplete',
                              u'created', u'finish_time', u'gold_task',
                              u'actions'
                          ])
        pargs = parse_tasks_browse_args(args)
        assert pargs == valid_args, pargs

        args = dict(task_id=12345, gold_task='7')
        assert_raises(ValueError, parse_tasks_browse_args, args)
Ejemplo n.º 2
0
 def update_priority(self, project_id, priority, filters):
     priority = min(1.0, priority)
     priority = max(0.0, priority)
     conditions, params = get_task_filters(filters)
     sql = text('''
                WITH to_update AS (
                     SELECT task.id as id,
                     coalesce(ct, 0) as n_task_runs, task.n_answers, ft,
                     priority_0, task.created
                     FROM task LEFT OUTER JOIN
                     (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct,
                     MAX(finish_time) as ft FROM task_run
                     WHERE project_id=:project_id GROUP BY task_id) AS log_counts
                     ON task.id=log_counts.task_id
                     WHERE task.project_id=:project_id {}
                )
                UPDATE task
                SET priority_0=:priority
                WHERE project_id=:project_id AND task.id in (
                     SELECT id FROM to_update);
                '''.format(conditions))
     self.db.session.execute(sql, dict(priority=priority,
                                       project_id=project_id,
                                       **params))
     self.db.session.commit()
     cached_projects.clean_project(project_id)
Ejemplo n.º 3
0
    def test_task_browse_get_task_filters(self):
        filters = dict(task_id=1,
                       hide_completed=True,
                       pcomplete_from='0.5',
                       pcomplete_to='0.7',
                       priority_from=0.0,
                       priority_to=0.5,
                       created_from='2018-01-01T00:00:00.0001',
                       created_to='2018-12-12T00:00:00.0001',
                       ftime_from='2018-01-01T00:00:00.0001',
                       ftime_to='2018-12-12T00:00:00.0001',
                       order_by='task_id',
                       filter_by_field=[(u'CompanyName', u'starts with',
                                         u'abc')],
                       filter_by_upref=dict(languages=['en'],
                                            locations=['us']),
                       state='ongoing')
        expected_filter_query = ''' AND task.id = :task_id AND task.state=\'ongoing\' AND (coalesce(ct, 0)/task.n_answers) >= :pcomplete_from AND LEAST(coalesce(ct, 0)/task.n_answers, 1.0) <= :pcomplete_to AND priority_0 >= :priority_from AND priority_0 <= :priority_to AND task.created >= :created_from AND task.created <= :created_to AND ft >= :ftime_from AND ft <= :ftime_to AND state = :state AND (COALESCE(task.info->>\'CompanyName\', \'\') ilike :filter_by_field_0 escape \'\\\') AND ( ( (task.user_pref-> \'locations\' IS NULL AND task.user_pref-> \'languages\' IS NULL) OR (task.user_pref @> \'{"languages": ["en"]}\' OR task.user_pref @> \'{"locations": ["us"]}\') ) )'''
        expected_params = {
            'task_id': 1,
            'pcomplete_from': '0.5',
            'pcomplete_to': '0.7',
            'ftime_to': '2018-12-12T05:00:00.000100+00:00',
            'created_from': '2018-01-01T05:00:00.000100+00:00',
            'ftime_from': '2018-01-01T05:00:00.000100+00:00',
            'state': 'ongoing',
            'priority_to': 0.5,
            'priority_from': 0.0,
            'filter_by_field_0': 'abc%',
            'created_to': '2018-12-12T05:00:00.000100+00:00'
        }

        filters, params = get_task_filters(filters)
        assert filters == expected_filter_query, filters
        assert params == expected_params, params
Ejemplo n.º 4
0
    def update_tasks_redundancy(self, project, n_answers, filters=None):
        """
        Update the n_answer of every task from a project and their state.
        Use raw SQL for performance. Mark tasks as exported = False for
        tasks with curr redundancy < new redundancy, with state as completed
        and were marked as exported = True
        """
        from pybossa.jobs import check_and_send_task_notifications

        if n_answers < self.MIN_REDUNDANCY or n_answers > self.MAX_REDUNDANCY:
            raise ValueError("Invalid redundancy value: {}".format(n_answers))

        filters = filters or {}
        task_expiration = '{} day'.format(self.rdancy_upd_exp)
        conditions, params = get_task_filters(filters)
        tasks_not_updated = self._get_redundancy_update_msg(
            project, n_answers, conditions, params, task_expiration)

        self.update_task_exported_status(project.id, n_answers, conditions, params, task_expiration)
        sql = text('''
                   WITH all_tasks_with_orig_filter AS (
                        SELECT task.id as id,
                        coalesce(ct, 0) as n_task_runs, task.n_answers, ft,
                        priority_0, task.created
                        FROM task LEFT OUTER JOIN
                        (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct,
                        MAX(finish_time) as ft FROM task_run
                        WHERE project_id=:project_id GROUP BY task_id) AS log_counts
                        ON task.id=log_counts.task_id
                        WHERE task.project_id=:project_id {}
                   ),

                   tasks_with_file_urls AS (
                        SELECT t.id as id FROM task t
                        WHERE t.id IN (SELECT id from all_tasks_with_orig_filter)
                        AND jsonb_typeof(t.info) = 'object'
                        AND EXISTS(SELECT TRUE FROM jsonb_object_keys(t.info) AS key
                        WHERE key ILIKE '%\_\_upload\_url%')
                   ),

                   tasks_excl_file_urls AS (
                        SELECT id FROM all_tasks_with_orig_filter
                        WHERE id NOT IN (SELECT id FROM tasks_with_file_urls)
                   )

                   UPDATE task SET n_answers=:n_answers,
                   state='ongoing' WHERE project_id=:project_id AND
                   ((id IN (SELECT id from tasks_excl_file_urls)) OR
                   (id IN (SELECT id from tasks_with_file_urls) AND state='ongoing'
                   AND TO_DATE(created, 'YYYY-MM-DD\THH24:MI:SS.US') >= NOW() - :task_expiration ::INTERVAL));'''
                   .format(conditions))
        self.db.session.execute(sql, dict(n_answers=n_answers,
                                          project_id=project.id,
                                          task_expiration=task_expiration,
                                          **params))
        self.update_task_state(project.id)
        self.db.session.commit()
        cached_projects.clean_project(project.id)
        check_and_send_task_notifications(project.id)
        return tasks_not_updated
Ejemplo n.º 5
0
    def delete_valid_from_project(self,
                                  project,
                                  force_reset=False,
                                  filters=None):
        if not force_reset:
            """Delete only tasks that have no results associated."""
            params = {}
            sql = text('''
                DELETE FROM task WHERE task.project_id=:project_id
                AND task.id NOT IN
                (SELECT task_id FROM result
                WHERE result.project_id=:project_id GROUP BY result.task_id);
                ''')
        else:
            """force reset, remove all results."""
            filters = filters or {}
            conditions, params = get_task_filters(filters)

            # bulkdel db conn is with db user having session_replication_role
            # when bulkdel is not configured, make explict sql query to set
            # session replication role to replica
            sql_session_repl = ''
            if not 'bulkdel' in current_app.config.get('SQLALCHEMY_BINDS'):
                sql_session_repl = 'SET session_replication_role TO replica;'
            sql = text('''
                BEGIN;

                {}

                CREATE TEMP TABLE to_delete ON COMMIT DROP AS (
                    SELECT task.id as id,
                    coalesce(ct, 0) as n_task_runs, task.n_answers, ft,
                    priority_0, task.created
                    FROM task LEFT OUTER JOIN
                    (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct,
                    MAX(finish_time) as ft FROM task_run
                    WHERE project_id=:project_id GROUP BY task_id) AS log_counts
                    ON task.id=log_counts.task_id
                    WHERE task.project_id=:project_id {}
                );
                DELETE FROM result WHERE project_id=:project_id
                       AND task_id in (SELECT id FROM to_delete);
                DELETE FROM task_run WHERE project_id=:project_id
                       AND task_id in (SELECT id FROM to_delete);
                DELETE FROM task WHERE task.project_id=:project_id
                       AND id in (SELECT id FROM to_delete);
                COMMIT;
                '''.format(sql_session_repl, conditions))
        self.db.bulkdel_session.execute(sql,
                                        dict(project_id=project.id, **params))
        self.db.bulkdel_session.commit()
        cached_projects.clean_project(project.id)
        self._delete_zip_files_from_store(project)
Ejemplo n.º 6
0
def browse_tasks(project_id, args):
    """Cache browse tasks view for a project."""
    tasks = []
    total_count = task_count(project_id, args)
    if not total_count:
        return total_count, tasks

    filters, filter_params = get_task_filters(args)
    sql = text('''
               SELECT task.id,
               coalesce(ct, 0) as n_task_runs, task.n_answers, ft,
               priority_0, task.created, task.calibration
               FROM task LEFT OUTER JOIN
               (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct,
               MAX(finish_time) as ft FROM task_run
               WHERE project_id=:project_id GROUP BY task_id) AS log_counts
               ON task.id=log_counts.task_id
               WHERE task.project_id=:project_id''' + filters +
               " ORDER BY %s" % (args.get('order_by') or 'id ASC') +
               " LIMIT :limit OFFSET :offset")

    limit = args.get('records_per_page') or 10
    offset = args.get('offset') or 0

    results = session.execute(
        sql,
        dict(project_id=project_id,
             limit=limit,
             offset=offset,
             **filter_params))

    for row in results:
        # TODO: use Jinja filters to format date
        def format_date(date):
            if date is not None:
                return convert_utc_to_est(date).strftime('%m-%d-%y %H:%M')

        finish_time = format_date(row.ft)
        created = format_date(row.created)
        task = dict(id=row.id,
                    n_task_runs=row.n_task_runs,
                    n_answers=row.n_answers,
                    priority_0=row.priority_0,
                    finish_time=finish_time,
                    created=created,
                    calibration=row.calibration)
        task['pct_status'] = _pct_status(row.n_task_runs, row.n_answers)
        tasks.append(task)
    return total_count, tasks
Ejemplo n.º 7
0
def get_consensus_data_metadata(project_id, filters):
    conditions, filter_params = get_task_filters(filters)
    query = text('''
        SELECT
            task.id as task_id,
            task.project_id as project_id,
            task.calibration as gold,
            task.info as task_info,
            task.user_pref as user_pref,
            r.info as consensus,
            taskruns.task_run__id as task_run__id,
            taskruns.task_run__created as task_run__created,
            taskruns.task_run__finish_time as task_run__finish_time,
            taskruns.task_run__user_id as task_run__user_id,
            taskruns.task_run__info as task_run__info,
            taskruns.email_addr as email_addr,
            taskruns.fullname as fullname

        FROM task LEFT JOIN (
            SELECT task_id,
                CAST(COUNT(tr.id) AS FLOAT) AS ct,
                MAX(tr.finish_time) as ft,
                array_agg(tr.id) as task_run__id,
                array_agg(tr.created) as task_run__created,
                array_agg(tr.finish_time) as task_run__finish_time,
                json_object_agg(u.name, tr.info) as task_run__info,
                array_agg(tr.user_id) as task_run__user_id,
                array_agg(u.email_addr) as email_addr,
                array_agg(u.fullname) as fullname
            FROM task_run tr
            JOIN "user" u
            ON tr.user_id = u.id
            WHERE project_id = :project_id
            GROUP BY task_id
            ) AS taskruns
        ON task.id = taskruns.task_id
        LEFT JOIN result r
            ON task.id = r.task_id
            AND r.last_version = True
        WHERE (task.state = 'completed' OR (task.calibration = 1 AND taskruns.ct > 0))
        AND task.project_id=:project_id
        {};
    '''.format(conditions))
    params = dict(project_id=project_id, **filter_params)
    rows = db.slave_session.execute(query, params).fetchall()
    return format_consensus(rows)
Ejemplo n.º 8
0
def browse_tasks_export_count(obj, project_id, expanded, filters):
    """Returns the count of the tasks from the browse tasks view
    for a project using the same filters that are selected by
    the user in the UI.
    """
    conditions, filter_params = get_task_filters(filters)
    if obj == 'task':
        sql = text('''
                   SELECT COUNT(task.id)
                     FROM task
                     LEFT OUTER JOIN (
                       SELECT task_id
                            , CAST(COUNT(id) AS FLOAT) AS ct
                            , MAX(finish_time) as ft
                         FROM task_run
                           WHERE project_id = :project_id
                           GROUP BY task_id
                       ) AS log_counts
                       ON task.id = log_counts.task_id
                     WHERE project_id = :project_id
                     {0}
                   '''.format(conditions))
    elif obj == 'task_run':
        sql = text('''
                  SELECT COUNT(task_run.id)
                    FROM task_run
                    LEFT JOIN task
                      ON task_run.task_id = task.id
                    LEFT OUTER JOIN (
                      SELECT task_id
                           , CAST(COUNT(id) AS FLOAT) AS ct
                           , MAX(finish_time) as ft
                        FROM task_run
                          WHERE project_id = :project_id
                          GROUP BY task_id
                      ) AS log_counts
                      ON task.id = log_counts.task_id
                    WHERE task_run.project_id = :project_id
                    {0}
                  '''.format(conditions))
    else:
        return

    return session.execute(sql, dict(project_id=project_id,
                                     **filter_params)).scalar()
Ejemplo n.º 9
0
def task_count(project_id, filters):
    """Return the count of tasks in a project matching the given filters."""
    conditions, filter_params = get_task_filters(filters)
    sql = text('''
               SELECT COUNT(*) OVER() as total_count, task.id,
               coalesce(ct, 0) as n_task_runs, task.n_answers, ft,
               priority_0, task.created
               FROM task LEFT OUTER JOIN
               (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct,
               MAX(finish_time) as ft FROM task_run
               WHERE project_id=:project_id GROUP BY task_id) AS log_counts
               ON task.id=log_counts.task_id
               WHERE task.project_id=:project_id {} LIMIT 1'''
               .format(conditions))

    results = session.execute(sql, dict(project_id=project_id,
                                        **filter_params))

    row = results.first()
    return row.total_count if row else 0
Ejemplo n.º 10
0
    def delete_valid_from_project(self, project, force_reset=False, filters=None):
        if not force_reset:
            """Delete only tasks that have no results associated."""
            params = {}
            sql = text('''
                DELETE FROM task WHERE task.project_id=:project_id
                AND task.id NOT IN
                (SELECT task_id FROM result
                WHERE result.project_id=:project_id GROUP BY result.task_id);
                ''')
        else:
            """force reset, remove all results."""
            filters = filters or {}
            conditions, params = get_task_filters(filters)
            sql = text('''
                BEGIN;
                CREATE TEMP TABLE to_delete ON COMMIT DROP AS (
                    SELECT task.id as id,
                    coalesce(ct, 0) as n_task_runs, task.n_answers, ft,
                    priority_0, task.created
                    FROM task LEFT OUTER JOIN
                    (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct,
                    MAX(finish_time) as ft FROM task_run
                    WHERE project_id=:project_id GROUP BY task_id) AS log_counts
                    ON task.id=log_counts.task_id
                    WHERE task.project_id=:project_id {}
                );
                DELETE FROM result WHERE project_id=:project_id
                       AND task_id in (SELECT id FROM to_delete);
                DELETE FROM task_run WHERE project_id=:project_id
                       AND task_id in (SELECT id FROM to_delete);
                DELETE FROM task WHERE task.project_id=:project_id
                       AND id in (SELECT id FROM to_delete);

                COMMIT;
                '''.format(conditions))
        self.db.session.execute(sql, dict(project_id=project.id, **params))
        self.db.session.commit()
        cached_projects.clean_project(project.id)
        self._delete_zip_files_from_store(project)
Ejemplo n.º 11
0
def task_count(project_id, args):
    """Return the count of tasks in a project matching the given filters."""
    filters, filter_params = get_task_filters(args)
    sql = text('''
                SELECT COUNT(*) AS total_count
                FROM task WHERE task.id IN
                (
                SELECT task.id FROM task LEFT OUTER JOIN
                    (
                    SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct,
                    MAX(finish_time) as ft FROM task_run
                    WHERE project_id=:project_id GROUP BY task_id
                    ) AS log_counts
                    ON task.id=log_counts.task_id
                    WHERE task.project_id=:project_id {}
                )
               '''.format(filters))

    results = session.execute(sql, dict(project_id=project_id,
                                        **filter_params))

    row = results.first()
    return row.total_count if row else 0
Ejemplo n.º 12
0
    def update_tasks_redundancy(self, project, n_answers, filters=None):
        """
        Update the n_answer of every task from a project and their state.
        Use raw SQL for performance. Mark tasks as exported = False for
        tasks with curr redundancy < new redundancy, with state as completed
        and were marked as exported = True
        """

        filters = filters or {}
        conditions, params = get_task_filters(filters)
        if n_answers < self.MIN_REDUNDANCY or n_answers > self.MAX_REDUNDANCY:
            raise ValueError("Invalid redundancy value: {}".format(n_answers))

        self.update_task_exported_status(project.id, n_answers, conditions, params)

        sql = text('''
                   WITH to_update AS (
                        SELECT task.id as id,
                        coalesce(ct, 0) as n_task_runs, task.n_answers, ft,
                        priority_0, task.created
                        FROM task LEFT OUTER JOIN
                        (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct,
                        MAX(finish_time) as ft FROM task_run
                        WHERE project_id=:project_id GROUP BY task_id) AS log_counts
                        ON task.id=log_counts.task_id
                        WHERE task.project_id=:project_id {}
                   )
                   UPDATE task SET n_answers=:n_answers,
                   state='ongoing' WHERE project_id=:project_id
                   AND task.id in (SELECT id from to_update);'''
                   .format(conditions))
        self.db.session.execute(sql, dict(n_answers=n_answers,
                                          project_id=project.id,
                                          **params))
        self.update_task_state(project.id, n_answers)
        self.db.session.commit()
        cached_projects.clean_project(project.id)
Ejemplo n.º 13
0
def delete_bulk_tasks(data):
    """Delete tasks in bulk from project."""
    from sqlalchemy.sql import text
    from pybossa.core import db
    import pybossa.cache.projects as cached_projects
    from pybossa.cache.task_browse_helpers import get_task_filters

    project_id = data['project_id']
    project_name = data['project_name']
    curr_user = data['curr_user']
    coowners = data['coowners']
    current_user_fullname = data['current_user_fullname']
    force_reset = data['force_reset']
    params = {}

    # lock tasks for given project with SELECT FOR UPDATE
    # create temp table with all tasks to be deleted
    # during transaction, disable constraints check with session_replication_role
    # delete rows from child talbes first and then from parent
    if not force_reset:
        """Delete only tasks that have no results associated."""
        sql = text('''
                BEGIN;
                SELECT task_id FROM counter WHERE project_id=:project_id FOR UPDATE;
                SELECT task_id FROM task_run WHERE project_id=:project_id FOR UPDATE;
                SELECT id FROM task WHERE project_id=:project_id FOR UPDATE;

                SET session_replication_role TO replica;

                CREATE TEMP TABLE to_delete ON COMMIT DROP AS (
                    SELECT task.id as id FROM task WHERE project_id=:project_id
                    AND task.id NOT IN
                    (SELECT task_id FROM result
                    WHERE result.project_id=:project_id
                    GROUP BY result.task_id)
                );

                DELETE FROM counter WHERE project_id=:project_id
                        AND task_id IN (SELECT id FROM to_delete);
                DELETE FROM task_run WHERE project_id=:project_id
                        AND task_id IN (SELECT id FROM to_delete);
                DELETE FROM task WHERE project_id=:project_id
                        AND id IN (SELECT id FROM to_delete);

                COMMIT;
                ''')
        msg = ("Tasks and taskruns with no associated results have been "
               "deleted from project {0} by {1}"
               .format(project_name, current_user_fullname))
    else:
        args = data.get('filters', {})
        conditions, params = get_task_filters(args)
        sql = text('''
                BEGIN;
                SELECT task_id FROM counter WHERE project_id=:project_id FOR UPDATE;
                SELECT task_id FROM result WHERE project_id=:project_id FOR UPDATE;
                SELECT task_id FROM task_run WHERE project_id=:project_id FOR UPDATE;
                SELECT id FROM task WHERE project_id=:project_id FOR UPDATE;

                SET session_replication_role TO replica;

                CREATE TEMP TABLE to_delete ON COMMIT DROP AS (
                    SELECT task.id as id,
                    coalesce(ct, 0) as n_task_runs, task.n_answers, ft,
                    priority_0, task.created
                    FROM task LEFT OUTER JOIN
                    (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct,
                    MAX(finish_time) as ft FROM task_run
                    WHERE project_id=:project_id GROUP BY task_id) AS log_counts
                    ON task.id=log_counts.task_id
                    WHERE task.project_id=:project_id {}
                );

                DELETE FROM counter WHERE project_id=:project_id
                        AND task_id IN (SELECT id FROM to_delete);
                DELETE FROM result WHERE project_id=:project_id
                       AND task_id in (SELECT id FROM to_delete);
                DELETE FROM task_run WHERE project_id=:project_id
                       AND task_id in (SELECT id FROM to_delete);
                DELETE FROM task WHERE task.project_id=:project_id
                       AND id in (SELECT id FROM to_delete);

                COMMIT;
                '''.format(conditions))
        msg = ("Tasks, taskruns and results associated have been "
               "deleted from project {0} as requested by {1}"
               .format(project_name, current_user_fullname))
    db.session.execute(sql, dict(project_id=project_id, **params))
    cached_projects.clean_project(project_id)
    subject = 'Tasks deletion from %s' % project_name
    body = 'Hello,\n\n' + msg + '\n\nThe %s team.'\
        % current_app.config.get('BRAND')

    recipients = [curr_user]
    for user in coowners:
        recipients.append(user.email_addr)

    mail_dict = dict(recipients=recipients, subject=subject, body=body)
    send_mail(mail_dict)
Ejemplo n.º 14
0
def browse_tasks_export(obj, project_id, expanded, filters, disclose_gold):
    """Export tasks from the browse tasks view for a project
    using the same filters that are selected by the user
    in the UI.
    """
    TASK_FIELDS, TASK_GOLD_FIELD, TASKRUN_FIELDS = (
        (TASK_FIELDS_WITH_GOLD, TASK_GOLD_FIELD_WITH_GOLD,
         TASKRUN_FIELDS_WITH_GOLD) if disclose_gold else
        (TASK_FIELDS_WITHOUT_GOLD, TASK_GOLD_FIELD_WITHOUT_GOLD,
         TASKRUN_FIELDS_WITHOUT_GOLD))
    conditions, filter_params = get_task_filters(filters)
    if obj == 'task':
        sql = text('''
                   SELECT {0}
                     FROM task
                     LEFT OUTER JOIN (
                       SELECT task_id
                            , CAST(COUNT(id) AS FLOAT) AS ct
                            , MAX(finish_time) as ft
                         FROM task_run
                           WHERE project_id = :project_id
                           GROUP BY task_id
                       ) AS log_counts
                       ON task.id = log_counts.task_id
                     WHERE project_id = :project_id
                     {1}
                   '''.format(
            _field_mapreducer((TASK_FIELDS, ), (TASK_GOLD_FIELD, )),
            conditions))
    elif obj == 'task_run':
        if expanded:
            sql = text('''
                      SELECT {0}
                        FROM task_run
                        LEFT JOIN task
                          ON task_run.task_id = task.id
                        LEFT OUTER JOIN (
                          SELECT task_id
                               , CAST(COUNT(id) AS FLOAT) AS ct
                               , MAX(finish_time) as ft
                            FROM task_run
                              WHERE project_id = :project_id
                              GROUP BY task_id
                          ) AS log_counts
                          ON task.id = log_counts.task_id
                        LEFT JOIN "user"
                          ON task_run.user_id = "user".id
                        WHERE task_run.project_id = :project_id
                        {1}
                      '''.format(
                _field_mapreducer(
                    (TASKRUN_FIELDS, ''), (TASK_FIELDS, 'task__'),
                    (USER_FIELDS, 'user__'), (TASK_GOLD_FIELD, 'task__')),
                conditions))
        else:
            sql = text('''
                      SELECT {0}
                        FROM task_run
                        LEFT JOIN task
                          ON task_run.task_id = task.id
                        LEFT OUTER JOIN (
                          SELECT task_id
                               , CAST(COUNT(id) AS FLOAT) AS ct
                               , MAX(finish_time) as ft
                            FROM task_run
                              WHERE project_id = :project_id
                              GROUP BY task_id
                          ) AS log_counts
                          ON task_run.task_id = log_counts.task_id
                        WHERE task_run.project_id = :project_id
                        {1}
                      '''.format(
                _field_mapreducer((TASKRUN_FIELDS, ''),
                                  (TASK_GOLD_FIELD, 'task__')), conditions))
    else:
        return
    return session.execute(sql, dict(project_id=project_id, **filter_params))
Ejemplo n.º 15
0
def browse_tasks(project_id,
                 args,
                 filter_user_prefs=False,
                 user_id=None,
                 **kwargs):
    """Cache browse tasks view for a project."""

    # TODO: use Jinja filters to format date
    def format_date(date):
        if date is not None:
            return convert_utc_to_est(date).strftime('%m-%d-%y %H:%M')

    def format_task(row, lock_users=[]):
        """convert database record to task dictionary and format data."""
        finish_time = format_date(row.ft)
        created = format_date(row.created)
        user_pref = row.user_pref or {}
        task = dict(id=row.id,
                    n_task_runs=row.n_task_runs,
                    n_answers=row.n_answers,
                    priority_0=row.priority_0,
                    finish_time=finish_time,
                    created=created,
                    calibration=row.calibration,
                    userPrefLang=", ".join(user_pref.get("languages", [])),
                    userPrefLoc=", ".join(user_pref.get("locations", [])),
                    lock_users=lock_users)
        task['pct_status'] = _pct_status(row.n_task_runs, row.n_answers)
        return task

    order_by = args.get('order_by') or ""
    tasks = []
    total_count = task_count(project_id, args)
    if not total_count:
        return total_count, tasks

    locked_tasks_in_project = {}
    for lock in get_locked_tasks_project(project_id):
        locked_tasks_in_project.setdefault(int(lock["task_id"]),
                                           []).append(lock["user_id"])

    args['user_id'] = user_id
    filters, filter_params = get_task_filters(args)

    sql = """
            SELECT task.id,
            coalesce(ct, 0) as n_task_runs, task.n_answers, ft,
            priority_0, task.created, task.calibration,
            task.user_pref, task.worker_filter, task.worker_pref
            FROM task LEFT OUTER JOIN
            (SELECT task_id, CAST(COUNT(id) AS FLOAT) AS ct,
            MAX(finish_time) as ft FROM task_run
            WHERE project_id=:project_id GROUP BY task_id) AS log_counts
            ON task.id=log_counts.task_id
            WHERE task.project_id=:project_id""" + filters + \
            " ORDER BY %s" % (args.get('order_by') or 'id ASC')

    params = dict(project_id=project_id, **filter_params)
    limit = args.get('records_per_page') or 10
    offset = args.get('offset') or 0

    if filter_user_prefs:
        # construct task list for worker view
        params["assign_user"] = args["sql_params"]["assign_user"]
        results = session.execute(text(sql), params)
        task_rank_info = []

        user_profile = args.get("filter_by_wfilter_upref",
                                {}).get("current_user_profile", {})
        for row in results:
            score = 0
            w_pref = row.worker_pref or {}
            w_filter = row.worker_filter or {}
            user_pref = row.user_pref or {}
            # validate worker_filter and compute preference score
            if not user_meet_task_requirement(row.id, w_filter, user_profile):
                continue
            if not args.get('order_by'):
                # if there is no sort defined, sort task by preference scores
                score = get_task_preference_score(w_pref, user_profile)

            task = format_task(row)
            task_rank_info.append((task, score))

        # get a list of available tasks for current worker
        total_count = len(task_rank_info)
        tasks = select_available_tasks(task_rank_info, locked_tasks_in_project,
                                       project_id, user_id, offset + limit,
                                       args.get("order_by"))
        tasks = tasks[offset:offset + limit]

    else:
        # construct task browse page for owners/admins
        if not "lock_status" in order_by:
            sql += " LIMIT :limit OFFSET :offset"
            params["limit"] = limit
            params["offset"] = offset

        results = session.execute(text(sql), params)
        task_rank_info = []

        for row in results:
            task = format_task(row, locked_tasks_in_project.get(row.id, []))
            lock_score = 0
            if row.id in locked_tasks_in_project:
                lock_score = -1
            else:
                lock_score = task['pct_status']

            task_rank_info.append((task, lock_score))

        if order_by == "lock_status asc":
            task_rank_info = heapq.nlargest(offset + limit,
                                            task_rank_info,
                                            key=lambda tup: tup[1])
            tasks = task_rank_info[offset:offset + limit]
        elif order_by == "lock_status desc":
            task_rank_info = heapq.nsmallest(offset + limit,
                                             task_rank_info,
                                             key=lambda tup: tup[1])
            tasks = task_rank_info[offset:offset + limit]
        else:
            tasks = task_rank_info

    return total_count, [t[0] for t in tasks]