def labeling_page(): """ Label studio frontend: task labeling """ project = project_get_or_create() if project.no_tasks(): return redirect('/welcome') # task data: load task or task with completions if it exists task_data = None task_id = request.args.get('task_id', None) if task_id is not None: task_id = int(task_id) # Task explore mode task_data = project.get_task_with_completions( task_id) or project.source_storage.get(task_id) task_data = resolve_task_data_uri(task_data) if project.ml_backends_connected: task_data = project.make_predictions(task_data) project.analytics.send(getframeinfo(currentframe()).function) return flask.render_template('labeling.html', project=project, config=project.config, label_config_line=project.label_config_line, task_id=task_id, task_data=task_data, **find_editor_files())
def labeling_page(): """ Label stream for tasks """ if g.project.no_tasks(): return redirect(url_for('label_studio.welcome_page')) # task data: load task or task with completions if it exists task_data = None task_id = request.args.get('task_id', None) if task_id is not None: task_id = int(task_id) # Task explore mode task_data = g.project.get_task_with_completions( task_id) or g.project.source_storage.get(task_id) task_data = resolve_task_data_uri(task_data, project=g.project) if g.project.ml_backends_connected: task_data = g.project.make_predictions(task_data) return flask.render_template('labeling.html', project=g.project, config=g.project.config, label_config_line=g.project.label_config_line, task_id=task_id, task_data=task_data, **find_editor_files())
def api_task_by_id(task_id): """ Get task by id, this call will refresh this task predictions """ task_id = int(task_id) # try to get task with completions first if request.method == 'GET': task_data = g.project.get_task_with_completions( task_id) or g.project.source_storage.get(task_id) task_data = resolve_task_data_uri(task_data, project=g.project) if g.project.ml_backends_connected: task_data = g.project.make_predictions(task_data) # change indent for pretty jsonify indent = 2 if request.values.get('pretty', False) else None response = current_app.response_class( json.dumps(task_data, indent=indent) + "\n", mimetype=current_app.config["JSONIFY_MIMETYPE"], ) return make_response(response, 200) # delete task elif request.method == 'DELETE': g.project.remove_task(task_id) return make_response(jsonify('Task deleted.'), 204)
def labeling_page(): """ Label stream for tasks """ if g.project.no_tasks(): return redirect(url_for('label_studio.welcome_page')) # task data: load task or task with completions if it exists task_id = request.args.get('task_id', None) task_data = None # open separated LSF for task if task_id is not None: task_id = int(task_id) # Task explore mode task_data = g.project.get_task_with_completions( task_id) or g.project.source_storage.get(task_id) task_data = resolve_task_data_uri(task_data, project=g.project) if g.project.ml_backends_connected: task_data = g.project.make_predictions(task_data) # data manager if no task id to open elif 'label-old' not in request.url: return redirect(url_for('data_manager_blueprint.tasks_page')) return flask.render_template('labeling.html', project=g.project, config=g.project.config, label_config_line=g.project.label_config_line, task_id=task_id, task_data=task_data, version=label_studio.__version__, **find_editor_files())
def next_task(project, params, items): """ Generate next task for labeling stream :param project: project :param params.values['sampling'] = sequential | random-uniform | prediction-score-min | prediction-score-max :param items: task ids to sample from """ # try to find task is not presented in completions sampling = None if params is None else params.values.get('sampling', None) completed_tasks_ids = project.get_completions_ids() task = project.next_task(completed_tasks_ids, task_ids=items, sampling=sampling) if task is None: # no tasks found return {'response_code': 404, 'id': None} task = resolve_task_data_uri(task, project=project) task = project.resolve_undefined_task_data(task) # collect prediction from multiple ml backends if project.ml_backends_connected: task = project.make_predictions(task) logger.debug('Next task:\n' + str(task.get('id', None))) task['response_code'] = 200 return task
def load_task(project, task_id, params, resolve_uri=False): """ Preload task: get completed_at, cancelled_completions, evaluate pre-signed urls for storages, aggregate over completion data, etc. """ # TODO: make this clear and more flexible # make some tricky optimizations if filters are disabled and ordering == ['id'] if params is not None and hasattr(params, 'fields') and params.fields == ['tasks:id']: if not check_filters_enabled(params): if not check_order_enabled(params) or params.tab.get('ordering') in [['tasks:id'], ['-tasks:id']]: return {'id': task_id} target_task = project.target_storage.get(task_id) source_task = project.source_storage.get(task_id, inplace=True, validate=False) if target_task is None: task = copy(source_task) else: task = copy(target_task) # tasks can hold the newest version of predictions, so get it from tasks task['predictions'] = source_task.get('predictions', []) # we need to modify the root of task task = copy(task) # completed_at completed_at = get_completed_at(task) if completed_at != 0 and isinstance(completed_at, int): completed_at = timestamp_to_local_datetime(completed_at).strftime(DATETIME_FORMAT) task['completed_at'] = completed_at # completion results aggregations over all completions completions = task.get('completions', []) if len(completions) > 0: task['completions_results'] = json.dumps([item.get('result', []) for item in completions]) else: task['completions_results'] = '' # prediction score predictions = task.get('predictions', []) if len(predictions) > 0: task['predictions_results'] = json.dumps([item.get('result', []) for item in predictions]) scores = [p['score'] for p in predictions if 'score' in p] if scores: task['predictions_score'] = sum(scores) / len(scores) else: task['predictions_results'] = '' # aggregations task['total_completions'] = len(task.get('completions', [])) task['total_predictions'] = len(task.get('predictions', [])) task['cancelled_completions'] = get_cancelled_completions(task) # don't resolve data (s3/gcs is slow) if it's not necessary (it's very slow) if resolve_uri: task = resolve_task_data_uri(task, project=project) task = project.resolve_undefined_task_data(task) return task
def api_all_tasks(): """ Get full tasks with pagination, completions and predictions """ project = project_get_or_create() page, page_size = int(request.args.get('page', 1)), int( request.args.get('page_size', 10)) order = request.args.get('order', 'id') if page < 1 or page_size < 1: return make_response( jsonify({'detail': 'Incorrect page or page_size'}), 422) order_inverted = order[0] == '-' order = order[1:] if order_inverted else order if order not in ['id', 'completed_at', 'has_skipped_completions']: return make_response(jsonify({'detail': 'Incorrect order'}), 422) # get task ids and sort them by completed time task_ids = project.source_storage.ids() completed_at = project.get_completed_at() skipped_status = project.get_skipped_status() # ordering pre_order = [{ 'id': i, 'completed_at': completed_at[i] if i in completed_at else None, 'has_skipped_completions': skipped_status[i] if i in completed_at else None, } for i in task_ids] # for has_skipped_completions use two keys ordering if order == 'has_skipped_completions': ordered = sorted( pre_order, key=lambda x: (DirectionSwitch(x['has_skipped_completions'], order_inverted), DirectionSwitch(x['completed_at'], False))) else: ordered = sorted(pre_order, key=lambda x: (DirectionSwitch(x[order], order_inverted))) paginated = ordered[(page - 1) * page_size:page * page_size] # get tasks with completions tasks = [] for item in paginated: i = item['id'] task = project.get_task_with_completions(i) if task is None: # no completion at task task = project.source_storage.get(i) else: task['completed_at'] = item['completed_at'] task['has_skipped_completions'] = item['has_skipped_completions'] task = resolve_task_data_uri(task) tasks.append(task) return make_response(jsonify(tasks), 200)
def api_all_tasks(): """ Get full tasks with pagination, completions and predictions """ page, page_size = int(request.args.get('page', 1)), int(request.args.get('page_size', 10)) order = request.args.get('order', 'id') if page < 1 or page_size < 1: return make_response(jsonify({'detail': 'Incorrect page or page_size'}), 422) order_inverted = order[0] == '-' order = order[1:] if order_inverted else order if order not in ['id', 'completed_at', 'has_skipped_completions']: return make_response(jsonify({'detail': 'Incorrect order'}), 422) # get task ids and sort them by completed time task_ids = g.project.source_storage.ids() completed_at = g.project.get_completed_at() skipped_status = g.project.get_skipped_status() # ordering pre_order = ({ 'id': i, 'completed_at': completed_at[i] if i in completed_at else None, 'has_skipped_completions': skipped_status[i] if i in completed_at else None, } for i in task_ids) if order == 'id': ordered = sorted(pre_order, key=lambda x: x['id'], reverse=order_inverted) else: # for has_skipped_completions use two keys ordering if order == 'has_skipped_completions': ordered = sorted(pre_order, key=lambda x: (DirectionSwitch(x['has_skipped_completions'], not order_inverted), DirectionSwitch(x['completed_at'], False))) # another orderings else: ordered = sorted(pre_order, key=lambda x: (DirectionSwitch(x[order], not order_inverted))) paginated = ordered[(page - 1) * page_size:page * page_size] # get tasks with completions tasks = [] for item in paginated: if item['completed_at'] != 'undefined' and item['completed_at'] is not None: item['completed_at'] = timestamp_to_local_datetime(item['completed_at']).strftime('%Y-%m-%d %H:%M:%S') i = item['id'] task = g.project.get_task_with_completions(i) if task is None: # no completion at task task = g.project.source_storage.get(i) else: task['completed_at'] = item['completed_at'] task['has_skipped_completions'] = item['has_skipped_completions'] task = resolve_task_data_uri(task, project=g.project) tasks.append(task) return make_response(jsonify(tasks), 200)
def prepare_tasks(project, params): """ Main function to get tasks """ import time # this option helps to avoid a total request for tasks and completions from storage (huge speed up for s3/gcs) first_page_full_render = project.config.get('first_page_full_render', True) page, page_size = params.page, params.page_size # use max count to speed up evaluation of tasks without filters and ordering full_render = check_filters_enabled(params) or check_order_enabled(params) full_render |= (page <= 1 if first_page_full_render else page < 1) or page_size <= 0 task_range = None if full_render else ((page - 1) * page_size, page * page_size) # load all tasks from db with some aggregations over completions tasks, total_tasks = load_tasks(project, params, resolve_uri=False, task_range=task_range) total_completions, total_predictions = None, None if full_render: # filter tasks = filter_tasks(tasks, params) # order tasks = order_tasks(params, tasks) total_tasks = len(tasks) # aggregations total_completions, total_predictions = 0, 0 for task in tasks: total_completions += task.get('total_completions', 0) total_predictions += task.get('total_predictions', 0) # pagination if page > 0 and page_size > 0: tasks = tasks[(page - 1) * page_size:page * page_size] # use only necessary fields to avoid storage (s3/gcs/etc) overloading need_uri_resolving = True if hasattr(params, 'fields'): # TODO: or tab.hiddenColumns need_uri_resolving = any(['data.' in field for field in params.fields]) # resolve all task fields if need_uri_resolving: for i, task in enumerate(tasks): tasks[i] = resolve_task_data_uri(task, project=project) return { 'tasks': tasks, 'total': total_tasks, 'total_completions': total_completions, 'total_predictions': total_predictions }
def api_tasks(task_id): """ Get task by id """ # try to get task with completions first task_id = int(task_id) if request.method == 'GET': task_data = g.project.get_task_with_completions(task_id) or g.project.source_storage.get(task_id) task_data = resolve_task_data_uri(task_data, project=g.project) if g.project.ml_backends_connected: task_data = g.project.make_predictions(task_data) return make_response(jsonify(task_data), 200) elif request.method == 'DELETE': g.project.remove_task(task_id) return make_response(jsonify('Task deleted.'), 204)
def api_tasks(task_id): """ Get task by id """ # try to get task with completions first task_id = int(task_id) project = project_get_or_create() if request.method == 'GET': task_data = project.get_task_with_completions( task_id) or project.source_storage.get(task_id) task_data = resolve_task_data_uri(task_data) project.analytics.send(getframeinfo(currentframe()).function) return make_response(jsonify(task_data), 200) elif request.method == 'DELETE': project.remove_task(task_id) project.analytics.send(getframeinfo(currentframe()).function) return make_response(jsonify('Task deleted.'), 204)
def api_generate_next_task(): """Generate next task for labeling page (label stream)""" # try to find task is not presented in completions completed_tasks_ids = g.project.get_completions_ids() task = g.project.next_task(completed_tasks_ids) if task is None: # no tasks found return make_response("", 404) task = resolve_task_data_uri(task, project=g.project) # collect prediction from multiple ml backends if g.project.ml_backends_connected: task = g.project.make_predictions(task) logger.debug("Next task:\n" + str(task.get("id", None))) return make_response(jsonify(task), 200)
def api_generate_next_task(): """ Generate next task to label """ project = project_get_or_create() # try to find task is not presented in completions completed_tasks_ids = project.get_completions_ids() task = project.next_task(completed_tasks_ids) if task is None: # no tasks found project.analytics.send(getframeinfo(currentframe()).function, error=404) return make_response('', 404) task = resolve_task_data_uri(task) project.analytics.send(getframeinfo(currentframe()).function) # collect prediction from multiple ml backends if project.ml_backends_connected: task = project.make_predictions(task) logger.debug('Next task:\n' + json.dumps(task, indent=2)) return make_response(jsonify(task), 200)
def prepare_tasks(project, params): order, page, page_size = params.order, params.page, params.page_size fields = params.fields ascending = order[0] == "-" order = order[1:] if order[0] == "-" else order if order not in ["id", "completed_at", "has_cancelled_completions"]: raise DataManagerException("Incorrect order") # get task ids and sort them by completed time task_ids = project.source_storage.ids() completed_at = ( project.get_completed_at() ) # task can have multiple completions, get the last of completed cancelled_status = project.get_cancelled_status() # ordering pre_order = ({ "id": i, "completed_at": completed_at[i] if i in completed_at else None, "has_cancelled_completions": cancelled_status[i] if i in completed_at else None, } for i in task_ids) if order == "id": ordered = sorted(pre_order, key=lambda x: x["id"], reverse=ascending) else: # for has_cancelled_completions use two keys ordering if order == "has_cancelled_completions": ordered = sorted( pre_order, key=lambda x: ( DirectionSwitch(x["has_cancelled_completions"], not ascending), DirectionSwitch(x["completed_at"], False), ), ) # another orderings else: ordered = sorted(pre_order, key=lambda x: (DirectionSwitch(x[order], not ascending))) paginated = ordered[(page - 1) * page_size:page * page_size] # get tasks with completions tasks = [] for item in paginated: i = item["id"] task = project.get_task_with_completions(i) # no completions at task, get task without completions if task is None: task = project.source_storage.get(i) else: # evaluate completed_at time completed_at = item["completed_at"] if completed_at != "undefined" and completed_at is not None: completed_at = timestamp_to_local_datetime( completed_at).strftime("%Y-%m-%d %H:%M:%S") task["completed_at"] = completed_at task["has_cancelled_completions"] = item[ "has_cancelled_completions"] # don't resolve data (s3/gcs is slow) if it's not in fields if "all" in fields or "data" in fields: task = resolve_task_data_uri(task, project=project) # leave only chosen fields if "all" not in fields: task = {field: task[field] for field in fields} tasks.append(task) return tasks