def _validate_task(self, key, parsed_data): """ Validate parsed data with labeling config and task structure """ is_list = isinstance(parsed_data, list) # we support only one task per JSON file if not (is_list and len(parsed_data) == 1 or isinstance(parsed_data, dict)): raise TaskValidationError( 'Error at ' + key + ':\n' 'Cloud storages support one task per one JSON file only. ' 'Task must be {} or [{}] with length = 1') # classic validation for one task validator = TaskValidator(self.project) try: new_tasks = validator.to_internal_value( parsed_data if is_list else [parsed_data]) except TaskValidationError as e: # pretty format of errors messages = e.msg_to_list() out = [(key + ' :: ' + msg) for msg in messages] out = "\n".join(out) raise TaskValidationError(out) return new_tasks[0]
def api_import(): project = project_get_or_create() # make django compatibility for uploader module class DjangoRequest: POST = request.form GET = request.args FILES = request.files data = request.json if request.json else request.form content_type = request.content_type start = time.time() # get tasks from request parsed_data = uploader.load_tasks(DjangoRequest()) # validate tasks validator = TaskValidator(project) try: new_tasks = validator.to_internal_value(parsed_data) except ValidationError as e: return make_response(jsonify(e.msg_to_list()), status.HTTP_400_BAD_REQUEST) # save task file to input dir if os.path.isdir(project.config['input_path']): # tasks are in directory, write a new file with tasks task_dir = project.config['input_path'] now = datetime.now() data = json.dumps(new_tasks, ensure_ascii=False) md5 = hashlib.md5(json.dumps(data).encode('utf-8')).hexdigest() name = 'import-' + now.strftime('%Y-%m-%d-%H-%M') + '-' + str(md5[0:8]) path = os.path.join(task_dir, name + '.json') tasks = new_tasks else: # tasks are all in one file, append it path = project.config['input_path'] old_tasks = json.load(open(path)) assert isinstance(old_tasks, list), 'Tasks from input_path must be list' tasks = old_tasks + new_tasks logger.error("It's recommended to use directory as input_path: " + project.config['input_path'] + ' -> ' + os.path.dirname(project.config['input_path'])) with open(path, 'w') as f: json.dump(tasks, f, ensure_ascii=False, indent=4) # load new tasks project.reload() duration = time.time() - start return make_response( jsonify({ 'task_count': len(new_tasks), 'completion_count': validator.completion_count, 'prediction_count': validator.prediction_count, 'duration': duration }), status.HTTP_201_CREATED)
def api_import(): """ The main API for task import, supports * json task data * files (as web form, files will be hosted by this flask server) * url links to images, audio, csv (if you use TimeSeries in labeling config) """ # make django compatibility for uploader module class DjangoRequest: def __init__(self): pass POST = request.form GET = request.args FILES = request.files data = request.json if request.json else request.form content_type = request.content_type start = time.time() # get tasks from request parsed_data, formats = uploader.load_tasks(DjangoRequest(), g.project) # validate tasks validator = TaskValidator(g.project) try: new_tasks = validator.to_internal_value(parsed_data) except ValidationError as e: return make_response(jsonify(e.msg_to_list()), status.HTTP_400_BAD_REQUEST) # get the last task id max_id_in_old_tasks = -1 if not g.project.no_tasks(): max_id_in_old_tasks = g.project.source_storage.max_id() new_tasks = Tasks().from_list_of_dicts(new_tasks, max_id_in_old_tasks + 1) try: g.project.source_storage.set_many(new_tasks.keys(), new_tasks.values()) except NotImplementedError: raise NotImplementedError('Import is not supported for the current storage ' + str(g.project.source_storage)) # if tasks have completion - we need to implicitly save it to target for i in new_tasks.keys(): for completion in new_tasks[i].get('completions', []): g.project.save_completion(int(i), completion) # update schemas based on newly uploaded tasks g.project.update_derived_input_schema() g.project.update_derived_output_schema() duration = time.time() - start return make_response(jsonify({ 'task_count': len(new_tasks), 'completion_count': validator.completion_count, 'prediction_count': validator.prediction_count, 'duration': duration, 'formats': formats, 'new_task_ids': [t for t in new_tasks] }), status.HTTP_201_CREATED)
def api_import(): project = project_get_or_create() # make django compatibility for uploader module class DjangoRequest: POST = request.form GET = request.args FILES = request.files data = request.json if request.json else request.form content_type = request.content_type start = time.time() # get tasks from request parsed_data = uploader.load_tasks(DjangoRequest(), project) # validate tasks validator = TaskValidator(project) try: new_tasks = validator.to_internal_value(parsed_data) except ValidationError as e: return make_response(jsonify(e.msg_to_list()), status.HTTP_400_BAD_REQUEST) # tasks are all in one file, append it path = project.config['input_path'] old_tasks = json.load(open(path)) max_id_in_old_tasks = int(max(map(int, old_tasks.keys()))) if old_tasks else -1 new_tasks = Tasks().from_list_of_dicts(new_tasks, max_id_in_old_tasks + 1) old_tasks.update(new_tasks) with open(path, 'w') as f: json.dump(old_tasks, f, ensure_ascii=False, indent=4) # load new tasks and everything related project.load_tasks() project.load_derived_schemas() duration = time.time() - start return make_response( jsonify({ 'task_count': len(new_tasks), 'completion_count': validator.completion_count, 'prediction_count': validator.prediction_count, 'duration': duration, 'new_task_ids': [t for t in new_tasks] }), status.HTTP_201_CREATED)
def api_import(): # make django compatibility for uploader module class DjangoRequest: POST = request.form GET = request.args FILES = request.files data = request.json if request.json else request.form content_type = request.content_type start = time.time() # get tasks from request parsed_data, formats = uploader.load_tasks(DjangoRequest(), g.project) # validate tasks validator = TaskValidator(g.project) try: new_tasks = validator.to_internal_value(parsed_data) except ValidationError as e: return make_response(jsonify(e.msg_to_list()), status.HTTP_400_BAD_REQUEST) max_id_in_old_tasks = -1 if not g.project.no_tasks(): max_id_in_old_tasks = g.project.source_storage.max_id() new_tasks = Tasks().from_list_of_dicts(new_tasks, max_id_in_old_tasks + 1) g.project.source_storage.set_many(new_tasks.keys(), new_tasks.values()) # if tasks have completion - we need to implicitly save it to target for i in new_tasks.keys(): for completion in new_tasks[i].get('completions', []): g.project.save_completion(int(i), completion) # update schemas based on newly uploaded tasks g.project.update_derived_input_schema() g.project.update_derived_output_schema() duration = time.time() - start return make_response( jsonify({ 'task_count': len(new_tasks), 'completion_count': validator.completion_count, 'prediction_count': validator.prediction_count, 'duration': duration, 'formats': formats, 'new_task_ids': [t for t in new_tasks] }), status.HTTP_201_CREATED)
def _update(self): if self.filelist: self.tasks, found_formats, self.data_keys = self._read_tasks() self._raise_if_inconsistent_with_current_project() if not self.found_formats: # It's a first time we get all formats self.found_formats = found_formats if self.selected_formats is None: # It's a first time we get all formats self.selected_formats, self.selected_objects = [], [] for format in sorted(found_formats.keys()): self.selected_formats.append(format) self.selected_objects = self._get_selected_objects() self.show_files_as_tasks_list = self._show_files_as_tasks_list() # validate tasks self._validator = TaskValidator(self.project) self.tasks = self._validator.to_internal_value(self.tasks)
def api_import(): print("in api import") project = project_get_or_create() # make django compatibility for uploader module class DjangoRequest: POST = request.form GET = request.args FILES = request.files data = request.json if request.json else request.form content_type = request.content_type print("In api_import") start = time.time() # get tasks from request parsed_data = uploader.load_tasks(DjangoRequest()) # validate tasks validator = TaskValidator(project) try: new_tasks = validator.to_internal_value(parsed_data) except ValidationError as e: return make_response(jsonify(e.msg_to_list()), status.HTTP_400_BAD_REQUEST) # save task file to input dir if os.path.isdir(project.config['input_path']): # tasks are in directory, write a new file with tasks task_dir = project.config['input_path'] now = datetime.now() print("In new tasks api_import") data = json.dumps(new_tasks, ensure_ascii=False) md5 = hashlib.md5(json.dumps(data).encode('utf-8')).hexdigest() name = 'import-' + now.strftime('%Y-%m-%d-%H-%M') + '-' + str(md5[0:8]) path = os.path.join(task_dir, name + '.json') tasks = new_tasks else: # tasks are all in one file, append it path = project.config['input_path'] print("in old tasks section api_import") old_tasks = json.load(open(path)) assert isinstance(old_tasks, list), 'Tasks from input_path must be list' tasks = old_tasks + new_tasks temp = copy.deepcopy(tasks) tasks[:] = [] numcomps = 3 startingindex = 0 count = [0] * len(temp) c = 0 for i in range(0, len(temp)): for j in range(0, param): if (j + startingindex < len(temp)): if (count[j + startingindex] < 3): tasks.append(temp[j + startingindex]) count[j + startingindex] = count[j + startingindex] + 1 print(temp[j + startingindex]) c = c + 1 if (len(tasks) % (numcomps * param) == 0): startingindex = startingindex + param print(c) logger.error("It's recommended to use directory as input_path: " + project.config['input_path'] + ' -> ' + os.path.dirname(project.config['input_path'])) with open(path, 'w') as f: json.dump(tasks, f, ensure_ascii=False, indent=4) # load new tasks project.reload() duration = time.time() - start # #add to tasks queues # num_tasks = len(new_tasks) # temp = [] # a = 1 # while a < num_tasks: # for b in range(a,a+param): # temp.append(b) # a = a + param # if num_tasks - a < param: # # add all the rest # while(a <= num_tasks): # temp.append(a) # a = a + 1; # task_queue.append(temp) # temp=[] # print("JUST MADE THE QUEUE!!!*********") # print(task_queue) task_queue = make_task_queue(num_tasks) return make_response( jsonify({ 'task_count': len(new_tasks), 'completion_count': validator.completion_count, 'prediction_count': validator.prediction_count, 'duration': duration }), status.HTTP_201_CREATED)