Beispiel #1
0
    def template_get_locked_task(project_id,
                                 user_id=None,
                                 user_ip=None,
                                 external_uid=None,
                                 limit=1,
                                 offset=0,
                                 orderby='priority_0',
                                 desc=True,
                                 rand_within_priority=False,
                                 task_type='gold_last'):
        if offset > 2:
            raise BadRequest('')
        if offset > 0:
            return None
        task_id, lock_seconds = get_task_id_and_duration_for_project_user(
            project_id, user_id)
        if lock_seconds > 10:
            task = session.query(Task).get(task_id)
            if task:
                return [task]
        user_count = get_active_user_count(project_id, sentinel.master)
        assign_user = json.dumps({
            'assign_user': [cached_users.get_user_email(user_id)]
        }) if user_id else None
        current_app.logger.info(
            "Project {} - number of current users: {}".format(
                project_id, user_count))

        sql = query_factory(project_id,
                            user_id=user_id,
                            limit=limit,
                            rand_within_priority=rand_within_priority,
                            task_type=task_type)
        rows = session.execute(
            sql,
            dict(project_id=project_id,
                 user_id=user_id,
                 assign_user=assign_user,
                 limit=user_count + 5))

        for task_id, taskcount, n_answers, calibration, timeout in rows:
            timeout = timeout or TIMEOUT
            remaining = float('inf') if calibration else n_answers - taskcount
            if acquire_lock(task_id, user_id, remaining, timeout):
                rows.close()
                save_task_id_project_id(task_id, project_id, 2 * timeout)
                register_active_user(project_id,
                                     user_id,
                                     sentinel.master,
                                     ttl=timeout)

                task_type = 'gold task' if calibration else 'task'
                current_app.logger.info(
                    'Project {} - user {} obtained {} {}, timeout: {}'.format(
                        project_id, user_id, task_type, task_id, timeout))
                return [session.query(Task).get(task_id)]

        return []
Beispiel #2
0
def n_available_tasks_for_user(project, user_id=None, user_ip=None):
    """Return the number of tasks for a given project a user can contribute to.
    based on the completion of the project tasks, previous task_runs
    submitted by the user and user preference set under user profile.
    """
    from pybossa.sched import Schedulers

    n_tasks = 0
    if user_id is None or user_id <= 0:
        return n_tasks
    assign_user = json.dumps({'assign_user': [cached_users.get_user_email(user_id)]}) if user_id else None
    scheduler = project["info"].get('sched', 'default') if type(project) == dict else project.info.get('sched', 'default')
    project_id = project['id'] if type(project) == dict else project.id
    if scheduler not in [Schedulers.user_pref, Schedulers.task_queue]:
        sql = '''
               SELECT COUNT(*) AS n_tasks FROM task
               WHERE project_id=:project_id AND state !='completed'
               AND state !='enrich'
               AND id NOT IN
               (SELECT task_id FROM task_run WHERE
               project_id=:project_id AND user_id=:user_id)
               ; '''
    else:
        user_pref_list = cached_users.get_user_preferences(user_id)
        user_filter_list = cached_users.get_user_filters(user_id)
        sql = '''
               SELECT task.id, worker_filter FROM task
               WHERE project_id=:project_id AND state !='completed'
               AND state !='enrich'
               AND id NOT IN
               (SELECT task_id FROM task_run WHERE
               project_id=:project_id AND user_id=:user_id)
               AND ({})
               AND ({})
               ;'''.format(user_pref_list, user_filter_list)
    sqltext = text(sql)
    try:
        result = session.execute(sqltext, dict(project_id=project_id, user_id=user_id, assign_user=assign_user))
        if scheduler not in [Schedulers.user_pref, Schedulers.task_queue]:
            for row in result:
                n_tasks = row.n_tasks
                return n_tasks
        else:
            num_available_tasks = 0
            user_profile = cached_users.get_user_profile_metadata(user_id)
            user_profile = json.loads(user_profile) if user_profile else {}
            for task_id, w_filter in result:
                w_filter = w_filter or {}
                num_available_tasks += int(
                    cached_task_browse_helpers.user_meet_task_requirement(task_id, w_filter, user_profile)
                )
            return num_available_tasks

    except Exception as e:
        current_app.logger.exception('Exception in get_user_pref_task {0}, sql: {1}'.format(str(e), str(sqltext)))
        return None
Beispiel #3
0
def n_available_tasks_for_user(project, user_id=None, user_ip=None):
    """Return the number of tasks for a given project a user can contribute to.
    based on the completion of the project tasks, previous task_runs
    submitted by the user and user preference set under user profile.
    """
    from pybossa.sched import Schedulers

    allowed_task_levels_clause = get_data_access_db_clause_for_task_assignment(
        user_id)
    n_tasks = 0
    if user_id is None or user_id <= 0:
        return n_tasks
    assign_user = json.dumps({
        'assign_user': [cached_users.get_user_email(user_id)]
    }) if user_id else None
    scheduler = project.get(
        'sched', 'default') if type(project) == dict else project.info.get(
            'sched', 'default')
    project_id = project['id'] if type(project) == dict else project.id
    if scheduler != Schedulers.user_pref:
        sql = '''
               SELECT COUNT(*) AS n_tasks FROM task
               WHERE project_id=:project_id AND state !='completed'
               AND state !='enrich'
               AND id NOT IN
               (SELECT task_id FROM task_run WHERE
               project_id=:project_id AND user_id=:user_id) {}
               ; '''.format(allowed_task_levels_clause)
    else:
        user_pref_list = cached_users.get_user_preferences(user_id)
        sql = '''
               SELECT COUNT(*) AS n_tasks FROM task
               WHERE project_id=:project_id AND state !='completed'
               AND state !='enrich'
               AND id NOT IN
               (SELECT task_id FROM task_run WHERE
               project_id=:project_id AND user_id=:user_id)
               AND ({}) {} ;
               '''.format(user_pref_list, allowed_task_levels_clause)
    sqltext = text(sql)
    try:
        result = session.execute(
            sqltext,
            dict(project_id=project_id,
                 user_id=user_id,
                 assign_user=assign_user))
    except Exception as e:
        current_app.logger.exception(
            'Exception in get_user_pref_task {0}, sql: {1}'.format(
                str(e), str(sqltext)))
        return None

    for row in result:
        n_tasks = row.n_tasks
    return n_tasks
Beispiel #4
0
    def template_get_locked_task(project_id, user_id=None, user_ip=None,
                                 external_uid=None, limit=1, offset=0,
                                 orderby='priority_0', desc=True,
                                 rand_within_priority=False, task_type='gold_last',
                                 filter_user_prefs=False):
        if offset > 2:
            raise BadRequest('')
        if offset > 0:
            return None
        task_id, lock_seconds = get_task_id_and_duration_for_project_user(project_id, user_id)
        if lock_seconds > 10:
            task = session.query(Task).get(task_id)
            if task:
                return [task]
        user_count = get_active_user_count(project_id, sentinel.master)
        assign_user = json.dumps({'assign_user': [cached_users.get_user_email(user_id)]}) if user_id else None
        current_app.logger.info(
            "Project {} - number of current users: {}"
            .format(project_id, user_count))

        sql = query_factory(project_id, user_id=user_id, limit=limit,
                            rand_within_priority=rand_within_priority,
                            task_type=task_type)
        limit = current_app.config.get('DB_MAXIMUM_BATCH_SIZE') if filter_user_prefs else user_count + 5
        rows = session.execute(sql, dict(project_id=project_id,
                                         user_id=user_id,
                                         assign_user=assign_user,
                                         limit=limit))
        user_profile = cached_users.get_user_profile_metadata(user_id)
        if filter_user_prefs:
            # validate user qualification and calculate task preference score
            user_profile = json.loads(user_profile) if user_profile else {}
            task_rank_info = []
            for task_id, taskcount, n_answers, calibration, w_filter, w_pref, timeout in rows:
                w_pref = w_pref or {}
                w_filter = w_filter or {}
                meet_requirement = cached_task_browse_helpers.user_meet_task_requirement(task_id, w_filter, user_profile)
                if meet_requirement:
                    score = cached_task_browse_helpers.get_task_preference_score(w_pref, user_profile)
                    task_rank_info.append((task_id, taskcount, n_answers, calibration, score, None, timeout))
            rows = sorted(task_rank_info, key=lambda tup: tup[4], reverse=True)
        else:
            rows = [r for r in rows]

        for task_id, taskcount, n_answers, calibration, _, _, timeout in rows:
            timeout = timeout or TIMEOUT
            remaining = float('inf') if calibration else n_answers - taskcount
            if acquire_lock(task_id, user_id, remaining, timeout):
                return _lock_task_for_user(task_id, project_id, user_id, timeout, calibration)

        return []
Beispiel #5
0
    def get_target_owners(self, project, is_new_project):
        """Get the email addresses of all owners and
        coowners for the target project.

        :param project: a project object
        :return: an list of email addresses
        """
        target = self.get_target(short_name=project.short_name)

        if is_new_project:
            owner_emails = [
                cached_users.get_user_email(owner_id)
                for owner_id in project.owners_ids
            ]

        else:
            owner_emails = self.get_target_emails(target['owners_ids'])

        return [
            owner_email for owner_email in owner_emails
            if owner_email is not None
        ]
Beispiel #6
0
    def template_get_locked_task(project_id,
                                 user_id=None,
                                 user_ip=None,
                                 external_uid=None,
                                 limit=1,
                                 offset=0,
                                 orderby='priority_0',
                                 desc=True,
                                 rand_within_priority=False,
                                 task_type='gold_last',
                                 filter_user_prefs=False,
                                 task_category_filters=""):
        if offset > 2:
            raise BadRequest('')
        if offset > 0:
            return None
        project = project_repo.get(project_id)
        timeout = project.info.get('timeout', TIMEOUT)
        task_queue_scheduler = project.info.get(
            "sched", "default") in [Schedulers.task_queue]
        reserve_task_config = project.info.get("reserve_tasks",
                                               {}).get("category", [])
        task_id, lock_seconds = get_task_id_and_duration_for_project_user(
            project_id, user_id)
        if lock_seconds > 10:
            task = session.query(Task).get(task_id)
            if task:
                return [task]
        task_id = None
        user_count = get_active_user_count(project_id, sentinel.master)
        assign_user = json.dumps({
            'assign_user': [cached_users.get_user_email(user_id)]
        }) if user_id else None
        current_app.logger.info(
            "Project {} - number of current users: {}".format(
                project_id, user_count))

        sql_filters, exclude_user = "", False
        if task_queue_scheduler and reserve_task_config:
            sql_filters, category_keys = get_reserve_task_category_info(
                reserve_task_config, project_id, timeout, user_id)
            if not category_keys:
                # no category reserved by current user. search categories
                # excluding the ones reserved by other users
                current_app.logger.info(
                    "Project %s, user %s, %s", project_id, user_id,
                    "No task category reserved by user. Search tasks excuding categories reserved by other users"
                )
                exclude_user = True
                sql_filters, category_keys = get_reserve_task_category_info(
                    reserve_task_config, project_id, timeout, user_id,
                    exclude_user)
                current_app.logger.info(
                    "SQL filter excuding task categories reserved by other users. sql filter %s",
                    sql_filters)

        limit = current_app.config.get(
            'DB_MAXIMUM_BATCH_SIZE') if filter_user_prefs else user_count + 5
        sql = query_factory(project_id,
                            user_id=user_id,
                            limit=limit,
                            rand_within_priority=rand_within_priority,
                            task_type=task_type,
                            task_category_filters=sql_filters)
        rows = session.execute(
            sql,
            dict(project_id=project_id,
                 user_id=user_id,
                 assign_user=assign_user,
                 limit=limit))

        if task_queue_scheduler and reserve_task_config and rows and not rows.rowcount and not exclude_user:
            # With task category reserved by user and no records returned,
            # no ongoing tasks with task category reserved by user exist.
            # Hence, query db for tasks excluding task categories reserved
            # by other users passing exclude_users = True
            current_app.logger.info(
                "Project %s, user %s, %s", project_id, user_id,
                "No task exist with task category already reserved by user. Search tasks excuding categories reserved by other users"
            )
            exclude_user = True
            release_reserve_task_lock_by_keys(category_keys, timeout)
            sql_filters, category_keys = get_reserve_task_category_info(
                reserve_task_config, project_id, timeout, user_id,
                exclude_user)
            current_app.logger.info(
                "SQL filter excuding task categories reserved by other users. sql filter %s",
                sql_filters)
            sql = query_factory(project_id,
                                user_id=user_id,
                                limit=limit,
                                rand_within_priority=rand_within_priority,
                                task_type=task_type,
                                task_category_filters=sql_filters)
            rows = session.execute(
                sql,
                dict(project_id=project_id,
                     user_id=user_id,
                     assign_user=assign_user,
                     limit=limit))

        user_profile = cached_users.get_user_profile_metadata(user_id)

        if filter_user_prefs:
            # validate user qualification and calculate task preference score
            user_profile = json.loads(user_profile) if user_profile else {}
            task_rank_info = []
            for task_id, taskcount, n_answers, calibration, w_filter, w_pref, timeout in rows:
                w_pref = w_pref or {}
                w_filter = w_filter or {}
                meet_requirement = cached_task_browse_helpers.user_meet_task_requirement(
                    task_id, w_filter, user_profile)
                if meet_requirement:
                    score = cached_task_browse_helpers.get_task_preference_score(
                        w_pref, user_profile)
                    task_rank_info.append((task_id, taskcount, n_answers,
                                           calibration, score, None, timeout))
            rows = sorted(task_rank_info, key=lambda tup: tup[4], reverse=True)
        else:
            rows = [r for r in rows]

        for task_id, taskcount, n_answers, calibration, _, _, timeout in rows:
            timeout = timeout or TIMEOUT
            remaining = float('inf') if calibration else n_answers - taskcount
            if acquire_lock(task_id, user_id, remaining, timeout):
                # reserve tasks
                acquire_reserve_task_lock(project_id, task_id, user_id,
                                          timeout)
                return _lock_task_for_user(task_id, project_id, user_id,
                                           timeout, calibration)
        return []