def apply(self): # get the last task id max_id_in_old_tasks = -1 if not self.project.no_tasks(): max_id_in_old_tasks = self.project.source_storage.max_id() # now read all tasks # currently self._update() reads all tasks - uncomment this on change # all_tasks, _, _ = self._read_tasks() all_tasks = self.tasks new_tasks = Tasks().from_list_of_dicts(all_tasks, max_id_in_old_tasks + 1) try: self.project.source_storage.set_many(new_tasks.keys(), new_tasks.values()) except NotImplementedError: raise NotImplementedError( 'Import is not supported for the current storage, change storage type in project settings' + str(self.project.source_storage)) # if tasks have completion - we need to implicitly save it to target for i in new_tasks.keys(): for completion in new_tasks[i].get('completions', []): self.project.save_completion(int(i), completion) # update schemas based on newly uploaded tasks self.project.update_derived_input_schema() self.project.update_derived_output_schema() if self.project.label_config_is_empty: generated_label_config = self._generate_label_config() self.project.update_label_config(generated_label_config) return new_tasks
def api_import(): """ The main API for task import, supports * json task data * files (as web form, files will be hosted by this flask server) * url links to images, audio, csv (if you use TimeSeries in labeling config) """ # make django compatibility for uploader module class DjangoRequest: def __init__(self): pass POST = request.form GET = request.args FILES = request.files data = request.json if request.json else request.form content_type = request.content_type start = time.time() # get tasks from request parsed_data, formats = uploader.load_tasks(DjangoRequest(), g.project) # validate tasks validator = TaskValidator(g.project) try: new_tasks = validator.to_internal_value(parsed_data) except ValidationError as e: return make_response(jsonify(e.msg_to_list()), status.HTTP_400_BAD_REQUEST) # get the last task id max_id_in_old_tasks = -1 if not g.project.no_tasks(): max_id_in_old_tasks = g.project.source_storage.max_id() new_tasks = Tasks().from_list_of_dicts(new_tasks, max_id_in_old_tasks + 1) try: g.project.source_storage.set_many(new_tasks.keys(), new_tasks.values()) except NotImplementedError: raise NotImplementedError('Import is not supported for the current storage ' + str(g.project.source_storage)) # if tasks have completion - we need to implicitly save it to target for i in new_tasks.keys(): for completion in new_tasks[i].get('completions', []): g.project.save_completion(int(i), completion) # update schemas based on newly uploaded tasks g.project.update_derived_input_schema() g.project.update_derived_output_schema() duration = time.time() - start return make_response(jsonify({ 'task_count': len(new_tasks), 'completion_count': validator.completion_count, 'prediction_count': validator.prediction_count, 'duration': duration, 'formats': formats, 'new_task_ids': [t for t in new_tasks] }), status.HTTP_201_CREATED)
def _load_tasks(cls, input_path, args, label_config_file): with io.open(label_config_file, encoding="utf8") as f: label_config = f.read() task_loader = Tasks() if args.input_format == "json": return task_loader.from_json_file(input_path) if args.input_format == "json-dir": return task_loader.from_dir_with_json_files(input_path) input_data_tags = cls.get_input_data_tags(label_config) if len(input_data_tags) > 1: val = ",".join(tag.attrib.get("name") for tag in input_data_tags) print("Warning! Multiple input data tags found: " + val + ". Only first one is used.") elif len(input_data_tags) == 0: raise ValueError( 'You\'ve specified input format "{fmt}" which requires label config being explicitly defined. ' "Please specify --label-config=path/to/config.xml or use --format=json or format=json_dir" .format(fmt=args.input_format)) input_data_tag = input_data_tags[0] data_key = input_data_tag.attrib.get("value").lstrip("$") if args.input_format == "text": return task_loader.from_text_file(input_path, data_key) if args.input_format == "text-dir": return task_loader.from_dir_with_text_files(input_path, data_key) if args.input_format == "image-dir": return task_loader.from_dir_with_image_files(input_path, data_key) if args.input_format == "audio-dir": return task_loader.from_dir_with_audio_files(input_path, data_key) raise RuntimeError("Can't load tasks for input format={}".format( args.input_format))
def _load_tasks(cls, input_path, args, label_config_file): with io.open(label_config_file, encoding='utf8') as f: label_config = f.read() task_loader = Tasks() if args.input_format == 'json': return task_loader.from_json_file(input_path) if args.input_format == 'json-dir': return task_loader.from_dir_with_json_files(input_path) config_input_tags = cls.get_config_input_tags(label_config) if len(config_input_tags) > 1: val = ",".join(tag.attrib.get("name") for tag in config_input_tags) print('Warning! Multiple input data tags found: ' + val + '. Only first one is used.') elif len(config_input_tags) == 0: raise ValueError( 'You\'ve specified input format "{fmt}" which requires label config being explicitly defined. ' 'Please specify --label-config=path/to/config.xml or use --format=json or format=json_dir'.format( fmt=args.input_format) ) input_data_tag = config_input_tags[0] data_key = input_data_tag.attrib.get('value').lstrip('$') if args.input_format == 'text': return task_loader.from_text_file(input_path, data_key) if args.input_format == 'text-dir': return task_loader.from_dir_with_text_files(input_path, data_key) if args.input_format == 'image-dir': return task_loader.from_dir_with_image_files(input_path, data_key) if args.input_format == 'audio-dir': return task_loader.from_dir_with_audio_files(input_path, data_key) raise RuntimeError('Can\'t load tasks for input format={}'.format(args.input_format))
def api_import(): # make django compatibility for uploader module class DjangoRequest: POST = request.form GET = request.args FILES = request.files data = request.json if request.json else request.form content_type = request.content_type start = time.time() # get tasks from request parsed_data, formats = uploader.load_tasks(DjangoRequest(), g.project) # validate tasks validator = TaskValidator(g.project) try: new_tasks = validator.to_internal_value(parsed_data) except ValidationError as e: return make_response(jsonify(e.msg_to_list()), status.HTTP_400_BAD_REQUEST) max_id_in_old_tasks = -1 if not g.project.no_tasks(): max_id_in_old_tasks = g.project.source_storage.max_id() new_tasks = Tasks().from_list_of_dicts(new_tasks, max_id_in_old_tasks + 1) g.project.source_storage.set_many(new_tasks.keys(), new_tasks.values()) # if tasks have completion - we need to implicitly save it to target for i in new_tasks.keys(): for completion in new_tasks[i].get('completions', []): g.project.save_completion(int(i), completion) # update schemas based on newly uploaded tasks g.project.update_derived_input_schema() g.project.update_derived_output_schema() duration = time.time() - start return make_response( jsonify({ 'task_count': len(new_tasks), 'completion_count': validator.completion_count, 'prediction_count': validator.prediction_count, 'duration': duration, 'formats': formats, 'new_task_ids': [t for t in new_tasks] }), status.HTTP_201_CREATED)
def api_import(): project = project_get_or_create() # make django compatibility for uploader module class DjangoRequest: POST = request.form GET = request.args FILES = request.files data = request.json if request.json else request.form content_type = request.content_type start = time.time() # get tasks from request parsed_data = uploader.load_tasks(DjangoRequest(), project) # validate tasks validator = TaskValidator(project) try: new_tasks = validator.to_internal_value(parsed_data) except ValidationError as e: return make_response(jsonify(e.msg_to_list()), status.HTTP_400_BAD_REQUEST) # tasks are all in one file, append it path = project.config['input_path'] old_tasks = json.load(open(path)) max_id_in_old_tasks = int(max(map(int, old_tasks.keys()))) if old_tasks else -1 new_tasks = Tasks().from_list_of_dicts(new_tasks, max_id_in_old_tasks + 1) old_tasks.update(new_tasks) with open(path, 'w') as f: json.dump(old_tasks, f, ensure_ascii=False, indent=4) # load new tasks and everything related project.load_tasks() project.load_derived_schemas() duration = time.time() - start return make_response( jsonify({ 'task_count': len(new_tasks), 'completion_count': validator.completion_count, 'prediction_count': validator.prediction_count, 'duration': duration, 'new_task_ids': [t for t in new_tasks] }), status.HTTP_201_CREATED)
def _load_tasks(cls, input_path, args, label_config_file): with io.open(label_config_file) as f: label_config = f.read() task_loader = Tasks() if args.input_format == 'json': return task_loader.from_json_file(input_path) if args.input_format == 'json-dir': return task_loader.from_dir_with_json_files(input_path) input_data_tags = cls.get_input_data_tags(label_config) data_key = Project._get_single_input_value(input_data_tags) if args.input_format == 'text': return task_loader.from_text_file(input_path, data_key) if args.input_format == 'text-dir': return task_loader.from_dir_with_text_files(input_path, data_key) if args.input_format == 'image-dir': return task_loader.from_dir_with_image_files(input_path, data_key) if args.input_format == 'audio-dir': return task_loader.from_dir_with_audio_files(input_path, data_key) raise RuntimeError('Can\'t load tasks for input format={}'.format(args.input_format))