Exemple #1
0
def prepare_annotations(tasks, params):
    """ Main function to get annotations
        TODO: it's a draft only
    """
    page, page_size = params.page, params.page_size

    # unpack completions from tasks
    items = []
    for task in tasks:
        completions = task.get('completions', [])

        # assign task ids to have link between completion and task in the data manager
        for completion in completions:
            completion['task_id'] = task['id']
            # convert created_at
            created_at = completion.get('created_at', None)
            if created_at:
                completion['created_at'] = timestamp_to_local_datetime(
                    created_at).strftime(DATETIME_FORMAT)

        items += completions

    total = len(items)

    # skip pagination if page<0 and page_size<=0
    if page > 0 and page_size > 0:
        items = items[(page - 1) * page_size:page * page_size]

    return {'annotations': items, 'total': total}
Exemple #2
0
def load_task(project, task_id, params, resolve_uri=False):
    """ Preload task: get completed_at, cancelled_completions,
        evaluate pre-signed urls for storages, aggregate over completion data, etc.
    """
    # TODO: make this clear and more flexible
    # make some tricky optimizations if filters are disabled and ordering == ['id']
    if params is not None and hasattr(params, 'fields') and params.fields == ['tasks:id']:
        if not check_filters_enabled(params):
            if not check_order_enabled(params) or params.tab.get('ordering') in [['tasks:id'], ['-tasks:id']]:
                return {'id': task_id}

    target_task = project.target_storage.get(task_id)
    source_task = project.source_storage.get(task_id, inplace=True, validate=False)

    if target_task is None:
        task = copy(source_task)
    else:
        task = copy(target_task)
        # tasks can hold the newest version of predictions, so get it from tasks
        task['predictions'] = source_task.get('predictions', [])

    # we need to modify the root of task
    task = copy(task)

    # completed_at
    completed_at = get_completed_at(task)
    if completed_at != 0 and isinstance(completed_at, int):
        completed_at = timestamp_to_local_datetime(completed_at).strftime(DATETIME_FORMAT)
    task['completed_at'] = completed_at

    # completion results aggregations over all completions
    completions = task.get('completions', [])
    if len(completions) > 0:
        task['completions_results'] = json.dumps([item.get('result', []) for item in completions])
    else:
        task['completions_results'] = ''

    # prediction score
    predictions = task.get('predictions', [])
    if len(predictions) > 0:
        task['predictions_results'] = json.dumps([item.get('result', []) for item in predictions])
        scores = [p['score'] for p in predictions if 'score' in p]
        if scores:
            task['predictions_score'] = sum(scores) / len(scores)
    else:
        task['predictions_results'] = ''

    # aggregations
    task['total_completions'] = len(task.get('completions', []))
    task['total_predictions'] = len(task.get('predictions', []))
    task['cancelled_completions'] = get_cancelled_completions(task)

    # don't resolve data (s3/gcs is slow) if it's not necessary (it's very slow)
    if resolve_uri:
        task = resolve_task_data_uri(task, project=project)

    task = project.resolve_undefined_task_data(task)
    return task
def api_all_tasks():
    """ Get full tasks with pagination, completions and predictions
    """
    page, page_size = int(request.args.get('page', 1)), int(request.args.get('page_size', 10))
    order = request.args.get('order', 'id')
    if page < 1 or page_size < 1:
        return make_response(jsonify({'detail': 'Incorrect page or page_size'}), 422)

    order_inverted = order[0] == '-'
    order = order[1:] if order_inverted else order
    if order not in ['id', 'completed_at', 'has_skipped_completions']:
        return make_response(jsonify({'detail': 'Incorrect order'}), 422)

    # get task ids and sort them by completed time
    task_ids = g.project.source_storage.ids()
    completed_at = g.project.get_completed_at()
    skipped_status = g.project.get_skipped_status()

    # ordering
    pre_order = ({
        'id': i,
        'completed_at': completed_at[i] if i in completed_at else None,
        'has_skipped_completions': skipped_status[i] if i in completed_at else None,
    } for i in task_ids)

    if order == 'id':
        ordered = sorted(pre_order, key=lambda x: x['id'], reverse=order_inverted)

    else:
        # for has_skipped_completions use two keys ordering
        if order == 'has_skipped_completions':
            ordered = sorted(pre_order,
                             key=lambda x: (DirectionSwitch(x['has_skipped_completions'], not order_inverted),
                                            DirectionSwitch(x['completed_at'], False)))
        # another orderings
        else:
            ordered = sorted(pre_order, key=lambda x: (DirectionSwitch(x[order], not order_inverted)))

    paginated = ordered[(page - 1) * page_size:page * page_size]

    # get tasks with completions
    tasks = []
    for item in paginated:
        if item['completed_at'] != 'undefined' and item['completed_at'] is not None:
            item['completed_at'] = timestamp_to_local_datetime(item['completed_at']).strftime('%Y-%m-%d %H:%M:%S')
        i = item['id']
        task = g.project.get_task_with_completions(i)
        if task is None:  # no completion at task
            task = g.project.source_storage.get(i)
        else:
            task['completed_at'] = item['completed_at']
            task['has_skipped_completions'] = item['has_skipped_completions']
        task = resolve_task_data_uri(task, project=g.project)
        tasks.append(task)

    return make_response(jsonify(tasks), 200)
Exemple #4
0
    def get_completed_at(self):
        """ Get completed time for tasks

        :return: list of string with formatted datetime
        """
        times = {}
        for _, data in self.target_storage.items():
            id = data['id']
            try:
                latest_time = max(data['completions'], key=itemgetter('created_at'))['created_at']
            except Exception as exc:
                times[id] = 'undefined'
            else:
                times[id] = timestamp_to_local_datetime(latest_time).strftime('%Y-%m-%d %H:%M:%S')
        return times
def prepare_tasks(project, params):
    order, page, page_size = params.order, params.page, params.page_size
    fields = params.fields

    ascending = order[0] == "-"
    order = order[1:] if order[0] == "-" else order
    if order not in ["id", "completed_at", "has_cancelled_completions"]:
        raise DataManagerException("Incorrect order")

    # get task ids and sort them by completed time
    task_ids = project.source_storage.ids()
    completed_at = (
        project.get_completed_at()
    )  # task can have multiple completions, get the last of completed
    cancelled_status = project.get_cancelled_status()

    # ordering
    pre_order = ({
        "id":
        i,
        "completed_at":
        completed_at[i] if i in completed_at else None,
        "has_cancelled_completions":
        cancelled_status[i] if i in completed_at else None,
    } for i in task_ids)

    if order == "id":
        ordered = sorted(pre_order, key=lambda x: x["id"], reverse=ascending)

    else:
        # for has_cancelled_completions use two keys ordering
        if order == "has_cancelled_completions":
            ordered = sorted(
                pre_order,
                key=lambda x: (
                    DirectionSwitch(x["has_cancelled_completions"],
                                    not ascending),
                    DirectionSwitch(x["completed_at"], False),
                ),
            )
        # another orderings
        else:
            ordered = sorted(pre_order,
                             key=lambda x:
                             (DirectionSwitch(x[order], not ascending)))

    paginated = ordered[(page - 1) * page_size:page * page_size]

    # get tasks with completions
    tasks = []
    for item in paginated:
        i = item["id"]
        task = project.get_task_with_completions(i)

        # no completions at task, get task without completions
        if task is None:
            task = project.source_storage.get(i)
        else:
            # evaluate completed_at time
            completed_at = item["completed_at"]
            if completed_at != "undefined" and completed_at is not None:
                completed_at = timestamp_to_local_datetime(
                    completed_at).strftime("%Y-%m-%d %H:%M:%S")
            task["completed_at"] = completed_at
            task["has_cancelled_completions"] = item[
                "has_cancelled_completions"]

        # don't resolve data (s3/gcs is slow) if it's not in fields
        if "all" in fields or "data" in fields:
            task = resolve_task_data_uri(task, project=project)

        # leave only chosen fields
        if "all" not in fields:
            task = {field: task[field] for field in fields}

        tasks.append(task)

    return tasks