def prepare_annotations(tasks, params): """ Main function to get annotations TODO: it's a draft only """ page, page_size = params.page, params.page_size # unpack completions from tasks items = [] for task in tasks: completions = task.get('completions', []) # assign task ids to have link between completion and task in the data manager for completion in completions: completion['task_id'] = task['id'] # convert created_at created_at = completion.get('created_at', None) if created_at: completion['created_at'] = timestamp_to_local_datetime( created_at).strftime(DATETIME_FORMAT) items += completions total = len(items) # skip pagination if page<0 and page_size<=0 if page > 0 and page_size > 0: items = items[(page - 1) * page_size:page * page_size] return {'annotations': items, 'total': total}
def load_task(project, task_id, params, resolve_uri=False): """ Preload task: get completed_at, cancelled_completions, evaluate pre-signed urls for storages, aggregate over completion data, etc. """ # TODO: make this clear and more flexible # make some tricky optimizations if filters are disabled and ordering == ['id'] if params is not None and hasattr(params, 'fields') and params.fields == ['tasks:id']: if not check_filters_enabled(params): if not check_order_enabled(params) or params.tab.get('ordering') in [['tasks:id'], ['-tasks:id']]: return {'id': task_id} target_task = project.target_storage.get(task_id) source_task = project.source_storage.get(task_id, inplace=True, validate=False) if target_task is None: task = copy(source_task) else: task = copy(target_task) # tasks can hold the newest version of predictions, so get it from tasks task['predictions'] = source_task.get('predictions', []) # we need to modify the root of task task = copy(task) # completed_at completed_at = get_completed_at(task) if completed_at != 0 and isinstance(completed_at, int): completed_at = timestamp_to_local_datetime(completed_at).strftime(DATETIME_FORMAT) task['completed_at'] = completed_at # completion results aggregations over all completions completions = task.get('completions', []) if len(completions) > 0: task['completions_results'] = json.dumps([item.get('result', []) for item in completions]) else: task['completions_results'] = '' # prediction score predictions = task.get('predictions', []) if len(predictions) > 0: task['predictions_results'] = json.dumps([item.get('result', []) for item in predictions]) scores = [p['score'] for p in predictions if 'score' in p] if scores: task['predictions_score'] = sum(scores) / len(scores) else: task['predictions_results'] = '' # aggregations task['total_completions'] = len(task.get('completions', [])) task['total_predictions'] = len(task.get('predictions', [])) task['cancelled_completions'] = get_cancelled_completions(task) # don't resolve data (s3/gcs is slow) if it's not necessary (it's very slow) if resolve_uri: task = resolve_task_data_uri(task, project=project) task = project.resolve_undefined_task_data(task) return task
def api_all_tasks(): """ Get full tasks with pagination, completions and predictions """ page, page_size = int(request.args.get('page', 1)), int(request.args.get('page_size', 10)) order = request.args.get('order', 'id') if page < 1 or page_size < 1: return make_response(jsonify({'detail': 'Incorrect page or page_size'}), 422) order_inverted = order[0] == '-' order = order[1:] if order_inverted else order if order not in ['id', 'completed_at', 'has_skipped_completions']: return make_response(jsonify({'detail': 'Incorrect order'}), 422) # get task ids and sort them by completed time task_ids = g.project.source_storage.ids() completed_at = g.project.get_completed_at() skipped_status = g.project.get_skipped_status() # ordering pre_order = ({ 'id': i, 'completed_at': completed_at[i] if i in completed_at else None, 'has_skipped_completions': skipped_status[i] if i in completed_at else None, } for i in task_ids) if order == 'id': ordered = sorted(pre_order, key=lambda x: x['id'], reverse=order_inverted) else: # for has_skipped_completions use two keys ordering if order == 'has_skipped_completions': ordered = sorted(pre_order, key=lambda x: (DirectionSwitch(x['has_skipped_completions'], not order_inverted), DirectionSwitch(x['completed_at'], False))) # another orderings else: ordered = sorted(pre_order, key=lambda x: (DirectionSwitch(x[order], not order_inverted))) paginated = ordered[(page - 1) * page_size:page * page_size] # get tasks with completions tasks = [] for item in paginated: if item['completed_at'] != 'undefined' and item['completed_at'] is not None: item['completed_at'] = timestamp_to_local_datetime(item['completed_at']).strftime('%Y-%m-%d %H:%M:%S') i = item['id'] task = g.project.get_task_with_completions(i) if task is None: # no completion at task task = g.project.source_storage.get(i) else: task['completed_at'] = item['completed_at'] task['has_skipped_completions'] = item['has_skipped_completions'] task = resolve_task_data_uri(task, project=g.project) tasks.append(task) return make_response(jsonify(tasks), 200)
def get_completed_at(self): """ Get completed time for tasks :return: list of string with formatted datetime """ times = {} for _, data in self.target_storage.items(): id = data['id'] try: latest_time = max(data['completions'], key=itemgetter('created_at'))['created_at'] except Exception as exc: times[id] = 'undefined' else: times[id] = timestamp_to_local_datetime(latest_time).strftime('%Y-%m-%d %H:%M:%S') return times
def prepare_tasks(project, params): order, page, page_size = params.order, params.page, params.page_size fields = params.fields ascending = order[0] == "-" order = order[1:] if order[0] == "-" else order if order not in ["id", "completed_at", "has_cancelled_completions"]: raise DataManagerException("Incorrect order") # get task ids and sort them by completed time task_ids = project.source_storage.ids() completed_at = ( project.get_completed_at() ) # task can have multiple completions, get the last of completed cancelled_status = project.get_cancelled_status() # ordering pre_order = ({ "id": i, "completed_at": completed_at[i] if i in completed_at else None, "has_cancelled_completions": cancelled_status[i] if i in completed_at else None, } for i in task_ids) if order == "id": ordered = sorted(pre_order, key=lambda x: x["id"], reverse=ascending) else: # for has_cancelled_completions use two keys ordering if order == "has_cancelled_completions": ordered = sorted( pre_order, key=lambda x: ( DirectionSwitch(x["has_cancelled_completions"], not ascending), DirectionSwitch(x["completed_at"], False), ), ) # another orderings else: ordered = sorted(pre_order, key=lambda x: (DirectionSwitch(x[order], not ascending))) paginated = ordered[(page - 1) * page_size:page * page_size] # get tasks with completions tasks = [] for item in paginated: i = item["id"] task = project.get_task_with_completions(i) # no completions at task, get task without completions if task is None: task = project.source_storage.get(i) else: # evaluate completed_at time completed_at = item["completed_at"] if completed_at != "undefined" and completed_at is not None: completed_at = timestamp_to_local_datetime( completed_at).strftime("%Y-%m-%d %H:%M:%S") task["completed_at"] = completed_at task["has_cancelled_completions"] = item[ "has_cancelled_completions"] # don't resolve data (s3/gcs is slow) if it's not in fields if "all" in fields or "data" in fields: task = resolve_task_data_uri(task, project=project) # leave only chosen fields if "all" not in fields: task = {field: task[field] for field in fields} tasks.append(task) return tasks