예제 #1
0
def labeling_page():
    """ Label studio frontend: task labeling
    """
    project = project_get_or_create()
    if project.no_tasks():
        return redirect('/welcome')

    # task data: load task or task with completions if it exists
    task_data = None
    task_id = request.args.get('task_id', None)

    if task_id is not None:
        task_id = int(task_id)
        # Task explore mode
        task_data = project.get_task_with_completions(
            task_id) or project.source_storage.get(task_id)
        task_data = resolve_task_data_uri(task_data)

        if project.ml_backends_connected:
            task_data = project.make_predictions(task_data)

    project.analytics.send(getframeinfo(currentframe()).function)
    return flask.render_template('labeling.html',
                                 project=project,
                                 config=project.config,
                                 label_config_line=project.label_config_line,
                                 task_id=task_id,
                                 task_data=task_data,
                                 **find_editor_files())
예제 #2
0
def labeling_page():
    """ Label stream for tasks
    """
    if g.project.no_tasks():
        return redirect(url_for('label_studio.welcome_page'))

    # task data: load task or task with completions if it exists
    task_data = None
    task_id = request.args.get('task_id', None)

    if task_id is not None:
        task_id = int(task_id)
        # Task explore mode
        task_data = g.project.get_task_with_completions(
            task_id) or g.project.source_storage.get(task_id)
        task_data = resolve_task_data_uri(task_data, project=g.project)

        if g.project.ml_backends_connected:
            task_data = g.project.make_predictions(task_data)

    return flask.render_template('labeling.html',
                                 project=g.project,
                                 config=g.project.config,
                                 label_config_line=g.project.label_config_line,
                                 task_id=task_id,
                                 task_data=task_data,
                                 **find_editor_files())
예제 #3
0
def api_task_by_id(task_id):
    """ Get task by id, this call will refresh this task predictions
    """
    task_id = int(task_id)

    # try to get task with completions first
    if request.method == 'GET':
        task_data = g.project.get_task_with_completions(
            task_id) or g.project.source_storage.get(task_id)
        task_data = resolve_task_data_uri(task_data, project=g.project)

        if g.project.ml_backends_connected:
            task_data = g.project.make_predictions(task_data)

        # change indent for pretty jsonify
        indent = 2 if request.values.get('pretty', False) else None
        response = current_app.response_class(
            json.dumps(task_data, indent=indent) + "\n",
            mimetype=current_app.config["JSONIFY_MIMETYPE"],
        )
        return make_response(response, 200)

    # delete task
    elif request.method == 'DELETE':
        g.project.remove_task(task_id)
        return make_response(jsonify('Task deleted.'), 204)
예제 #4
0
def labeling_page():
    """ Label stream for tasks
    """
    if g.project.no_tasks():
        return redirect(url_for('label_studio.welcome_page'))

    # task data: load task or task with completions if it exists
    task_id = request.args.get('task_id', None)
    task_data = None

    # open separated LSF for task
    if task_id is not None:
        task_id = int(task_id)
        # Task explore mode
        task_data = g.project.get_task_with_completions(
            task_id) or g.project.source_storage.get(task_id)
        task_data = resolve_task_data_uri(task_data, project=g.project)

        if g.project.ml_backends_connected:
            task_data = g.project.make_predictions(task_data)

    # data manager if no task id to open
    elif 'label-old' not in request.url:
        return redirect(url_for('data_manager_blueprint.tasks_page'))

    return flask.render_template('labeling.html',
                                 project=g.project,
                                 config=g.project.config,
                                 label_config_line=g.project.label_config_line,
                                 task_id=task_id,
                                 task_data=task_data,
                                 version=label_studio.__version__,
                                 **find_editor_files())
예제 #5
0
def next_task(project, params, items):
    """ Generate next task for labeling stream

        :param project: project
        :param params.values['sampling'] = sequential | random-uniform | prediction-score-min | prediction-score-max
        :param items: task ids to sample from
    """
    # try to find task is not presented in completions
    sampling = None if params is None else params.values.get('sampling', None)
    completed_tasks_ids = project.get_completions_ids()
    task = project.next_task(completed_tasks_ids, task_ids=items, sampling=sampling)

    if task is None:
        # no tasks found
        return {'response_code': 404, 'id': None}

    task = resolve_task_data_uri(task, project=project)
    task = project.resolve_undefined_task_data(task)

    # collect prediction from multiple ml backends
    if project.ml_backends_connected:
        task = project.make_predictions(task)

    logger.debug('Next task:\n' + str(task.get('id', None)))
    task['response_code'] = 200
    return task
예제 #6
0
def load_task(project, task_id, params, resolve_uri=False):
    """ Preload task: get completed_at, cancelled_completions,
        evaluate pre-signed urls for storages, aggregate over completion data, etc.
    """
    # TODO: make this clear and more flexible
    # make some tricky optimizations if filters are disabled and ordering == ['id']
    if params is not None and hasattr(params, 'fields') and params.fields == ['tasks:id']:
        if not check_filters_enabled(params):
            if not check_order_enabled(params) or params.tab.get('ordering') in [['tasks:id'], ['-tasks:id']]:
                return {'id': task_id}

    target_task = project.target_storage.get(task_id)
    source_task = project.source_storage.get(task_id, inplace=True, validate=False)

    if target_task is None:
        task = copy(source_task)
    else:
        task = copy(target_task)
        # tasks can hold the newest version of predictions, so get it from tasks
        task['predictions'] = source_task.get('predictions', [])

    # we need to modify the root of task
    task = copy(task)

    # completed_at
    completed_at = get_completed_at(task)
    if completed_at != 0 and isinstance(completed_at, int):
        completed_at = timestamp_to_local_datetime(completed_at).strftime(DATETIME_FORMAT)
    task['completed_at'] = completed_at

    # completion results aggregations over all completions
    completions = task.get('completions', [])
    if len(completions) > 0:
        task['completions_results'] = json.dumps([item.get('result', []) for item in completions])
    else:
        task['completions_results'] = ''

    # prediction score
    predictions = task.get('predictions', [])
    if len(predictions) > 0:
        task['predictions_results'] = json.dumps([item.get('result', []) for item in predictions])
        scores = [p['score'] for p in predictions if 'score' in p]
        if scores:
            task['predictions_score'] = sum(scores) / len(scores)
    else:
        task['predictions_results'] = ''

    # aggregations
    task['total_completions'] = len(task.get('completions', []))
    task['total_predictions'] = len(task.get('predictions', []))
    task['cancelled_completions'] = get_cancelled_completions(task)

    # don't resolve data (s3/gcs is slow) if it's not necessary (it's very slow)
    if resolve_uri:
        task = resolve_task_data_uri(task, project=project)

    task = project.resolve_undefined_task_data(task)
    return task
예제 #7
0
def api_all_tasks():
    """ Get full tasks with pagination, completions and predictions
    """
    project = project_get_or_create()
    page, page_size = int(request.args.get('page', 1)), int(
        request.args.get('page_size', 10))
    order = request.args.get('order', 'id')
    if page < 1 or page_size < 1:
        return make_response(
            jsonify({'detail': 'Incorrect page or page_size'}), 422)

    order_inverted = order[0] == '-'
    order = order[1:] if order_inverted else order
    if order not in ['id', 'completed_at', 'has_skipped_completions']:
        return make_response(jsonify({'detail': 'Incorrect order'}), 422)

    # get task ids and sort them by completed time
    task_ids = project.source_storage.ids()
    completed_at = project.get_completed_at()
    skipped_status = project.get_skipped_status()

    # ordering
    pre_order = [{
        'id':
        i,
        'completed_at':
        completed_at[i] if i in completed_at else None,
        'has_skipped_completions':
        skipped_status[i] if i in completed_at else None,
    } for i in task_ids]
    # for has_skipped_completions use two keys ordering
    if order == 'has_skipped_completions':
        ordered = sorted(
            pre_order,
            key=lambda x:
            (DirectionSwitch(x['has_skipped_completions'], order_inverted),
             DirectionSwitch(x['completed_at'], False)))
    else:
        ordered = sorted(pre_order,
                         key=lambda x:
                         (DirectionSwitch(x[order], order_inverted)))
    paginated = ordered[(page - 1) * page_size:page * page_size]

    # get tasks with completions
    tasks = []
    for item in paginated:
        i = item['id']
        task = project.get_task_with_completions(i)
        if task is None:  # no completion at task
            task = project.source_storage.get(i)
        else:
            task['completed_at'] = item['completed_at']
            task['has_skipped_completions'] = item['has_skipped_completions']
        task = resolve_task_data_uri(task)
        tasks.append(task)

    return make_response(jsonify(tasks), 200)
예제 #8
0
def api_all_tasks():
    """ Get full tasks with pagination, completions and predictions
    """
    page, page_size = int(request.args.get('page', 1)), int(request.args.get('page_size', 10))
    order = request.args.get('order', 'id')
    if page < 1 or page_size < 1:
        return make_response(jsonify({'detail': 'Incorrect page or page_size'}), 422)

    order_inverted = order[0] == '-'
    order = order[1:] if order_inverted else order
    if order not in ['id', 'completed_at', 'has_skipped_completions']:
        return make_response(jsonify({'detail': 'Incorrect order'}), 422)

    # get task ids and sort them by completed time
    task_ids = g.project.source_storage.ids()
    completed_at = g.project.get_completed_at()
    skipped_status = g.project.get_skipped_status()

    # ordering
    pre_order = ({
        'id': i,
        'completed_at': completed_at[i] if i in completed_at else None,
        'has_skipped_completions': skipped_status[i] if i in completed_at else None,
    } for i in task_ids)

    if order == 'id':
        ordered = sorted(pre_order, key=lambda x: x['id'], reverse=order_inverted)

    else:
        # for has_skipped_completions use two keys ordering
        if order == 'has_skipped_completions':
            ordered = sorted(pre_order,
                             key=lambda x: (DirectionSwitch(x['has_skipped_completions'], not order_inverted),
                                            DirectionSwitch(x['completed_at'], False)))
        # another orderings
        else:
            ordered = sorted(pre_order, key=lambda x: (DirectionSwitch(x[order], not order_inverted)))

    paginated = ordered[(page - 1) * page_size:page * page_size]

    # get tasks with completions
    tasks = []
    for item in paginated:
        if item['completed_at'] != 'undefined' and item['completed_at'] is not None:
            item['completed_at'] = timestamp_to_local_datetime(item['completed_at']).strftime('%Y-%m-%d %H:%M:%S')
        i = item['id']
        task = g.project.get_task_with_completions(i)
        if task is None:  # no completion at task
            task = g.project.source_storage.get(i)
        else:
            task['completed_at'] = item['completed_at']
            task['has_skipped_completions'] = item['has_skipped_completions']
        task = resolve_task_data_uri(task, project=g.project)
        tasks.append(task)

    return make_response(jsonify(tasks), 200)
예제 #9
0
def prepare_tasks(project, params):
    """ Main function to get tasks
    """
    import time
    # this option helps to avoid a total request for tasks and completions from storage (huge speed up for s3/gcs)
    first_page_full_render = project.config.get('first_page_full_render', True)
    page, page_size = params.page, params.page_size

    # use max count to speed up evaluation of tasks without filters and ordering
    full_render = check_filters_enabled(params) or check_order_enabled(params)
    full_render |= (page <= 1
                    if first_page_full_render else page < 1) or page_size <= 0
    task_range = None if full_render else ((page - 1) * page_size,
                                           page * page_size)

    # load all tasks from db with some aggregations over completions
    tasks, total_tasks = load_tasks(project,
                                    params,
                                    resolve_uri=False,
                                    task_range=task_range)
    total_completions, total_predictions = None, None

    if full_render:
        # filter
        tasks = filter_tasks(tasks, params)

        # order
        tasks = order_tasks(params, tasks)
        total_tasks = len(tasks)

        # aggregations
        total_completions, total_predictions = 0, 0
        for task in tasks:
            total_completions += task.get('total_completions', 0)
            total_predictions += task.get('total_predictions', 0)

        # pagination
        if page > 0 and page_size > 0:
            tasks = tasks[(page - 1) * page_size:page * page_size]

    # use only necessary fields to avoid storage (s3/gcs/etc) overloading
    need_uri_resolving = True
    if hasattr(params, 'fields'):  # TODO: or tab.hiddenColumns
        need_uri_resolving = any(['data.' in field for field in params.fields])

    # resolve all task fields
    if need_uri_resolving:
        for i, task in enumerate(tasks):
            tasks[i] = resolve_task_data_uri(task, project=project)

    return {
        'tasks': tasks,
        'total': total_tasks,
        'total_completions': total_completions,
        'total_predictions': total_predictions
    }
예제 #10
0
def api_tasks(task_id):
    """ Get task by id
    """
    # try to get task with completions first
    task_id = int(task_id)
    if request.method == 'GET':
        task_data = g.project.get_task_with_completions(task_id) or g.project.source_storage.get(task_id)
        task_data = resolve_task_data_uri(task_data, project=g.project)

        if g.project.ml_backends_connected:
            task_data = g.project.make_predictions(task_data)

        return make_response(jsonify(task_data), 200)
    elif request.method == 'DELETE':
        g.project.remove_task(task_id)
        return make_response(jsonify('Task deleted.'), 204)
예제 #11
0
def api_tasks(task_id):
    """ Get task by id
    """
    # try to get task with completions first
    task_id = int(task_id)
    project = project_get_or_create()
    if request.method == 'GET':
        task_data = project.get_task_with_completions(
            task_id) or project.source_storage.get(task_id)
        task_data = resolve_task_data_uri(task_data)
        project.analytics.send(getframeinfo(currentframe()).function)
        return make_response(jsonify(task_data), 200)
    elif request.method == 'DELETE':
        project.remove_task(task_id)
        project.analytics.send(getframeinfo(currentframe()).function)
        return make_response(jsonify('Task deleted.'), 204)
예제 #12
0
def api_generate_next_task():
    """Generate next task for labeling page (label stream)"""
    # try to find task is not presented in completions
    completed_tasks_ids = g.project.get_completions_ids()
    task = g.project.next_task(completed_tasks_ids)
    if task is None:
        # no tasks found
        return make_response("", 404)

    task = resolve_task_data_uri(task, project=g.project)

    # collect prediction from multiple ml backends
    if g.project.ml_backends_connected:
        task = g.project.make_predictions(task)
    logger.debug("Next task:\n" + str(task.get("id", None)))
    return make_response(jsonify(task), 200)
예제 #13
0
def api_generate_next_task():
    """ Generate next task to label
    """
    project = project_get_or_create()
    # try to find task is not presented in completions
    completed_tasks_ids = project.get_completions_ids()
    task = project.next_task(completed_tasks_ids)
    if task is None:
        # no tasks found
        project.analytics.send(getframeinfo(currentframe()).function, error=404)
        return make_response('', 404)

    task = resolve_task_data_uri(task)

    project.analytics.send(getframeinfo(currentframe()).function)

    # collect prediction from multiple ml backends
    if project.ml_backends_connected:
        task = project.make_predictions(task)
    logger.debug('Next task:\n' + json.dumps(task, indent=2))
    return make_response(jsonify(task), 200)
예제 #14
0
def prepare_tasks(project, params):
    order, page, page_size = params.order, params.page, params.page_size
    fields = params.fields

    ascending = order[0] == "-"
    order = order[1:] if order[0] == "-" else order
    if order not in ["id", "completed_at", "has_cancelled_completions"]:
        raise DataManagerException("Incorrect order")

    # get task ids and sort them by completed time
    task_ids = project.source_storage.ids()
    completed_at = (
        project.get_completed_at()
    )  # task can have multiple completions, get the last of completed
    cancelled_status = project.get_cancelled_status()

    # ordering
    pre_order = ({
        "id":
        i,
        "completed_at":
        completed_at[i] if i in completed_at else None,
        "has_cancelled_completions":
        cancelled_status[i] if i in completed_at else None,
    } for i in task_ids)

    if order == "id":
        ordered = sorted(pre_order, key=lambda x: x["id"], reverse=ascending)

    else:
        # for has_cancelled_completions use two keys ordering
        if order == "has_cancelled_completions":
            ordered = sorted(
                pre_order,
                key=lambda x: (
                    DirectionSwitch(x["has_cancelled_completions"],
                                    not ascending),
                    DirectionSwitch(x["completed_at"], False),
                ),
            )
        # another orderings
        else:
            ordered = sorted(pre_order,
                             key=lambda x:
                             (DirectionSwitch(x[order], not ascending)))

    paginated = ordered[(page - 1) * page_size:page * page_size]

    # get tasks with completions
    tasks = []
    for item in paginated:
        i = item["id"]
        task = project.get_task_with_completions(i)

        # no completions at task, get task without completions
        if task is None:
            task = project.source_storage.get(i)
        else:
            # evaluate completed_at time
            completed_at = item["completed_at"]
            if completed_at != "undefined" and completed_at is not None:
                completed_at = timestamp_to_local_datetime(
                    completed_at).strftime("%Y-%m-%d %H:%M:%S")
            task["completed_at"] = completed_at
            task["has_cancelled_completions"] = item[
                "has_cancelled_completions"]

        # don't resolve data (s3/gcs is slow) if it's not in fields
        if "all" in fields or "data" in fields:
            task = resolve_task_data_uri(task, project=project)

        # leave only chosen fields
        if "all" not in fields:
            task = {field: task[field] for field in fields}

        tasks.append(task)

    return tasks