def locked_task_sql(project_id, user_id=None, limit=1, rand_within_priority=False, task_type='gold_last', filter_user_prefs=False, priority_sort=True): ''' `task_type` will affect the type of tasks return by the query and can be one one of the following values: gold -> only gold tasks will be returned no_gold -> only non-gold tasks will be returned gold_last -> non-gold tasks will be returned before gold tasks. (Default) gold_first -> gold tasks will be returned before non-gold tasks. ''' filters = [] if filter_user_prefs: filters.append('AND ({})'.format( cached_users.get_user_preferences(user_id))) filters.append( data_access.get_data_access_db_clause_for_task_assignment(user_id) or '') if task_type == 'gold': filters.append('AND task.calibration = 1') elif task_type == 'no_gold': filters.append('AND task.calibration != 1') order_by = [] if task_type == 'gold_last': order_by.append('task.calibration') elif task_type == 'gold_first': order_by.append('task.calibration DESC NULLS LAST') if priority_sort: order_by.append('priority_0 DESC') if rand_within_priority: order_by.append('random()') else: order_by.append('id ASC') sql = ''' SELECT task.id, COUNT(task_run.task_id) AS taskcount, n_answers, task.calibration, (SELECT info->'timeout' FROM project WHERE id=:project_id) as timeout FROM task LEFT JOIN task_run ON (task.id = task_run.task_id) WHERE NOT EXISTS (SELECT 1 FROM task_run WHERE project_id=:project_id AND user_id=:user_id AND task_id=task.id) AND task.project_id=:project_id AND ((task.expiration IS NULL) OR (task.expiration > (now() at time zone 'utc')::timestamp)) AND task.state !='completed' AND task.state !='enrich' {} group by task.id ORDER BY {} LIMIT :limit; '''.format(' '.join(filters), ','.join(order_by)) return text(sql)
def n_available_tasks_for_user(project, user_id=None, user_ip=None): """Return the number of tasks for a given project a user can contribute to. based on the completion of the project tasks, previous task_runs submitted by the user and user preference set under user profile. """ from pybossa.sched import Schedulers allowed_task_levels_clause = get_data_access_db_clause_for_task_assignment( user_id) n_tasks = 0 if user_id is None or user_id <= 0: return n_tasks assign_user = json.dumps({ 'assign_user': [cached_users.get_user_email(user_id)] }) if user_id else None scheduler = project.get( 'sched', 'default') if type(project) == dict else project.info.get( 'sched', 'default') project_id = project['id'] if type(project) == dict else project.id if scheduler != Schedulers.user_pref: sql = ''' SELECT COUNT(*) AS n_tasks FROM task WHERE project_id=:project_id AND state !='completed' AND state !='enrich' AND id NOT IN (SELECT task_id FROM task_run WHERE project_id=:project_id AND user_id=:user_id) {} ; '''.format(allowed_task_levels_clause) else: user_pref_list = cached_users.get_user_preferences(user_id) sql = ''' SELECT COUNT(*) AS n_tasks FROM task WHERE project_id=:project_id AND state !='completed' AND state !='enrich' AND id NOT IN (SELECT task_id FROM task_run WHERE project_id=:project_id AND user_id=:user_id) AND ({}) {} ; '''.format(user_pref_list, allowed_task_levels_clause) sqltext = text(sql) try: result = session.execute( sqltext, dict(project_id=project_id, user_id=user_id, assign_user=assign_user)) except Exception as e: current_app.logger.exception( 'Exception in get_user_pref_task {0}, sql: {1}'.format( str(e), str(sqltext))) return None for row in result: n_tasks = row.n_tasks return n_tasks
def get_user_pref_task(project_id, user_id=None, user_ip=None, external_uid=None, limit=1, offset=0, orderby='priority_0', desc=True, rand_within_priority=False, present_gold_task=False, gold_only=False): """ Select a new task based on user preference set under user profile. For each incomplete task, check if the number of users working on the task is smaller than the number of answers still needed. In that case, acquire a lock on the task that matches user preference(if any) with users profile and return the task to the user. If offset is nonzero, skip that amount of available tasks before returning to the user. """ user_pref_list = cached_users.get_user_preferences(user_id) secondary_order = 'random()' if rand_within_priority else 'id ASC' allowed_task_levels_clause = data_access.get_data_access_db_clause_for_task_assignment( user_id) order_by_calib = 'DESC NULLS LAST' if present_gold_task else '' gold_only_clause = 'AND task.calibration = 1' if gold_only else '' sql = ''' SELECT task.id, COUNT(task_run.task_id) AS taskcount, n_answers, task.calibration, (SELECT info->'timeout' FROM project WHERE id=:project_id) as timeout FROM task LEFT JOIN task_run ON (task.id = task_run.task_id) WHERE NOT EXISTS (SELECT 1 FROM task_run WHERE project_id=:project_id AND user_id=:user_id AND task_id=task.id) AND task.project_id=:project_id AND ({}) AND ((task.expiration IS NULL) OR (task.expiration > (now() at time zone 'utc')::timestamp)) AND task.state !='completed' {} {} group by task.id ORDER BY task.calibration {}, priority_0 DESC, {} LIMIT :limit; '''.format(user_pref_list, allowed_task_levels_clause, gold_only_clause, order_by_calib, secondary_order) return text(sql)
def get_locked_task(project_id, user_id=None, user_ip=None, external_uid=None, limit=1, offset=0, orderby='priority_0', desc=True, rand_within_priority=False, present_gold_task=False): """ Select a new task to be returned to the contributor. For each incomplete task, check if the number of users working on the task is smaller than the number of answers still needed. In that case, acquire a lock on the task and return the task to the user. If offset is nonzero, skip that amount of available tasks before returning to the user. """ having_clause = 'HAVING COUNT(task_run.task_id) < n_answers' if not present_gold_task else '' allowed_task_levels_clause = data_access.get_data_access_db_clause_for_task_assignment( user_id) order_by_calib = 'DESC NULLS LAST' if present_gold_task else '' sql = text(''' SELECT task.id, COUNT(task_run.task_id) AS taskcount, n_answers, task.calibration, (SELECT info->'timeout' FROM project WHERE id=:project_id) as timeout FROM task LEFT JOIN task_run ON (task.id = task_run.task_id) WHERE NOT EXISTS (SELECT 1 FROM task_run WHERE project_id=:project_id AND user_id=:user_id AND task_id=task.id) AND task.project_id=:project_id AND ((task.expiration IS NULL) OR (task.expiration > (now() at time zone 'utc')::timestamp)) AND task.state !='completed' {} group by task.id {} ORDER BY task.calibration {}, priority_0 DESC, {} LIMIT :limit; '''.format(allowed_task_levels_clause, having_clause, order_by_calib, 'random()' if rand_within_priority else 'id ASC')) return sql
def get_locked_task(project_id, user_id=None, user_ip=None, external_uid=None, offset=0, limit=1, orderby='priority_0', desc=True, rand_within_priority=False, present_gold_task=False, gold_only=False): having_clause = 'HAVING COUNT(task_run.task_id) < n_answers' if not ( present_gold_task or gold_only) else '' allowed_task_levels_clause = data_access.get_data_access_db_clause_for_task_assignment( user_id) order_by_calib = 'DESC NULLS LAST' if present_gold_task else '' gold_only_clause = 'AND task.calibration = 1' if gold_only else '' sql = ''' SELECT task.id, COUNT(task_run.task_id) AS taskcount, n_answers, task.calibration, (SELECT info->'timeout' FROM project WHERE id=:project_id) as timeout FROM task LEFT JOIN task_run ON (task.id = task_run.task_id) WHERE NOT EXISTS (SELECT 1 FROM task_run WHERE project_id=:project_id AND user_id=:user_id AND task_id=task.id) AND task.project_id=:project_id AND ((task.expiration IS NULL) OR (task.expiration > (now() at time zone 'utc')::timestamp)) AND task.state !='completed' {} {} group by task.id {} ORDER BY task.calibration {}, priority_0 DESC, {} LIMIT :limit; '''.format(allowed_task_levels_clause, gold_only_clause, having_clause, order_by_calib, 'random()' if rand_within_priority else 'id ASC') return text(sql)
def get_locked_task(project_id, user_id=None, user_ip=None, external_uid=None, limit=1, offset=0, orderby='priority_0', desc=True, rand_within_priority=False): """ Select a new task to be returned to the contributor. For each incomplete task, check if the number of users working on the task is smaller than the number of answers still needed. In that case, acquire a lock on the task and return the task to the user. If offset is nonzero, skip that amount of available tasks before returning to the user. """ allowed_task_levels_clause = data_access.get_data_access_db_clause_for_task_assignment( user_id) sql = text(''' SELECT task.id, COUNT(task_run.task_id) AS taskcount, n_answers, (SELECT info->'timeout' FROM project WHERE id=:project_id) as timeout FROM task LEFT JOIN task_run ON (task.id = task_run.task_id) WHERE NOT EXISTS (SELECT 1 FROM task_run WHERE project_id=:project_id AND user_id=:user_id AND task_id=task.id) AND task.project_id=:project_id AND task.state !='completed' {} group by task.id HAVING COUNT(task_run.task_id) < n_answers ORDER BY priority_0 DESC, {} LIMIT :limit; '''.format(allowed_task_levels_clause, 'random()' if rand_within_priority else 'id ASC')) return sql