Esempio n. 1
0
    def apply(self):
        # get the last task id
        max_id_in_old_tasks = -1
        if not self.project.no_tasks():
            max_id_in_old_tasks = self.project.source_storage.max_id()

        # now read all tasks
        # currently self._update() reads all tasks - uncomment this on change
        # all_tasks, _, _ = self._read_tasks()
        all_tasks = self.tasks

        new_tasks = Tasks().from_list_of_dicts(all_tasks,
                                               max_id_in_old_tasks + 1)
        try:
            self.project.source_storage.set_many(new_tasks.keys(),
                                                 new_tasks.values())
        except NotImplementedError:
            raise NotImplementedError(
                'Import is not supported for the current storage, change storage type in project settings'
                + str(self.project.source_storage))

        # if tasks have completion - we need to implicitly save it to target
        for i in new_tasks.keys():
            for completion in new_tasks[i].get('completions', []):
                self.project.save_completion(int(i), completion)

        # update schemas based on newly uploaded tasks
        self.project.update_derived_input_schema()
        self.project.update_derived_output_schema()

        if self.project.label_config_is_empty:
            generated_label_config = self._generate_label_config()
            self.project.update_label_config(generated_label_config)
        return new_tasks
Esempio n. 2
0
def api_import():
    """ The main API for task import, supports
        * json task data
        * files (as web form, files will be hosted by this flask server)
        * url links to images, audio, csv (if you use TimeSeries in labeling config)
    """
    # make django compatibility for uploader module
    class DjangoRequest:
        def __init__(self): pass
        POST = request.form
        GET = request.args
        FILES = request.files
        data = request.json if request.json else request.form
        content_type = request.content_type

    start = time.time()
    # get tasks from request
    parsed_data, formats = uploader.load_tasks(DjangoRequest(), g.project)
    # validate tasks
    validator = TaskValidator(g.project)
    try:
        new_tasks = validator.to_internal_value(parsed_data)
    except ValidationError as e:
        return make_response(jsonify(e.msg_to_list()), status.HTTP_400_BAD_REQUEST)

    # get the last task id
    max_id_in_old_tasks = -1
    if not g.project.no_tasks():
        max_id_in_old_tasks = g.project.source_storage.max_id()

    new_tasks = Tasks().from_list_of_dicts(new_tasks, max_id_in_old_tasks + 1)
    try:
        g.project.source_storage.set_many(new_tasks.keys(), new_tasks.values())
    except NotImplementedError:
        raise NotImplementedError('Import is not supported for the current storage ' + str(g.project.source_storage))

    # if tasks have completion - we need to implicitly save it to target
    for i in new_tasks.keys():
        for completion in new_tasks[i].get('completions', []):
            g.project.save_completion(int(i), completion)

    # update schemas based on newly uploaded tasks
    g.project.update_derived_input_schema()
    g.project.update_derived_output_schema()

    duration = time.time() - start
    return make_response(jsonify({
        'task_count': len(new_tasks),
        'completion_count': validator.completion_count,
        'prediction_count': validator.prediction_count,
        'duration': duration,
        'formats': formats,
        'new_task_ids': [t for t in new_tasks]
    }), status.HTTP_201_CREATED)
Esempio n. 3
0
    def _load_tasks(cls, input_path, args, label_config_file):
        with io.open(label_config_file, encoding="utf8") as f:
            label_config = f.read()

        task_loader = Tasks()
        if args.input_format == "json":
            return task_loader.from_json_file(input_path)
        if args.input_format == "json-dir":
            return task_loader.from_dir_with_json_files(input_path)
        input_data_tags = cls.get_input_data_tags(label_config)

        if len(input_data_tags) > 1:
            val = ",".join(tag.attrib.get("name") for tag in input_data_tags)
            print("Warning! Multiple input data tags found: " + val +
                  ". Only first one is used.")
        elif len(input_data_tags) == 0:
            raise ValueError(
                'You\'ve specified input format "{fmt}" which requires label config being explicitly defined. '
                "Please specify --label-config=path/to/config.xml or use --format=json or format=json_dir"
                .format(fmt=args.input_format))
        input_data_tag = input_data_tags[0]
        data_key = input_data_tag.attrib.get("value").lstrip("$")

        if args.input_format == "text":
            return task_loader.from_text_file(input_path, data_key)
        if args.input_format == "text-dir":
            return task_loader.from_dir_with_text_files(input_path, data_key)
        if args.input_format == "image-dir":
            return task_loader.from_dir_with_image_files(input_path, data_key)
        if args.input_format == "audio-dir":
            return task_loader.from_dir_with_audio_files(input_path, data_key)
        raise RuntimeError("Can't load tasks for input format={}".format(
            args.input_format))
Esempio n. 4
0
    def _load_tasks(cls, input_path, args, label_config_file):
        with io.open(label_config_file, encoding='utf8') as f:
            label_config = f.read()

        task_loader = Tasks()
        if args.input_format == 'json':
            return task_loader.from_json_file(input_path)
        if args.input_format == 'json-dir':
            return task_loader.from_dir_with_json_files(input_path)
        config_input_tags = cls.get_config_input_tags(label_config)

        if len(config_input_tags) > 1:
            val = ",".join(tag.attrib.get("name") for tag in config_input_tags)
            print('Warning! Multiple input data tags found: ' +
                  val + '. Only first one is used.')
        elif len(config_input_tags) == 0:
            raise ValueError(
                'You\'ve specified input format "{fmt}" which requires label config being explicitly defined. '
                'Please specify --label-config=path/to/config.xml or use --format=json or format=json_dir'.format(
                    fmt=args.input_format)
            )
        input_data_tag = config_input_tags[0]
        data_key = input_data_tag.attrib.get('value').lstrip('$')

        if args.input_format == 'text':
            return task_loader.from_text_file(input_path, data_key)
        if args.input_format == 'text-dir':
            return task_loader.from_dir_with_text_files(input_path, data_key)
        if args.input_format == 'image-dir':
            return task_loader.from_dir_with_image_files(input_path, data_key)
        if args.input_format == 'audio-dir':
            return task_loader.from_dir_with_audio_files(input_path, data_key)
        raise RuntimeError('Can\'t load tasks for input format={}'.format(args.input_format))
Esempio n. 5
0
def api_import():
    # make django compatibility for uploader module
    class DjangoRequest:
        POST = request.form
        GET = request.args
        FILES = request.files
        data = request.json if request.json else request.form
        content_type = request.content_type

    start = time.time()
    # get tasks from request
    parsed_data, formats = uploader.load_tasks(DjangoRequest(), g.project)
    # validate tasks
    validator = TaskValidator(g.project)
    try:
        new_tasks = validator.to_internal_value(parsed_data)
    except ValidationError as e:
        return make_response(jsonify(e.msg_to_list()),
                             status.HTTP_400_BAD_REQUEST)

    max_id_in_old_tasks = -1
    if not g.project.no_tasks():
        max_id_in_old_tasks = g.project.source_storage.max_id()

    new_tasks = Tasks().from_list_of_dicts(new_tasks, max_id_in_old_tasks + 1)
    g.project.source_storage.set_many(new_tasks.keys(), new_tasks.values())

    # if tasks have completion - we need to implicitly save it to target
    for i in new_tasks.keys():
        for completion in new_tasks[i].get('completions', []):
            g.project.save_completion(int(i), completion)

    # update schemas based on newly uploaded tasks
    g.project.update_derived_input_schema()
    g.project.update_derived_output_schema()

    duration = time.time() - start
    return make_response(
        jsonify({
            'task_count': len(new_tasks),
            'completion_count': validator.completion_count,
            'prediction_count': validator.prediction_count,
            'duration': duration,
            'formats': formats,
            'new_task_ids': [t for t in new_tasks]
        }), status.HTTP_201_CREATED)
Esempio n. 6
0
def api_import():
    project = project_get_or_create()

    # make django compatibility for uploader module
    class DjangoRequest:
        POST = request.form
        GET = request.args
        FILES = request.files
        data = request.json if request.json else request.form
        content_type = request.content_type

    start = time.time()
    # get tasks from request
    parsed_data = uploader.load_tasks(DjangoRequest(), project)
    # validate tasks
    validator = TaskValidator(project)
    try:
        new_tasks = validator.to_internal_value(parsed_data)
    except ValidationError as e:
        return make_response(jsonify(e.msg_to_list()),
                             status.HTTP_400_BAD_REQUEST)

    # tasks are all in one file, append it
    path = project.config['input_path']
    old_tasks = json.load(open(path))
    max_id_in_old_tasks = int(max(map(int,
                                      old_tasks.keys()))) if old_tasks else -1

    new_tasks = Tasks().from_list_of_dicts(new_tasks, max_id_in_old_tasks + 1)
    old_tasks.update(new_tasks)

    with open(path, 'w') as f:
        json.dump(old_tasks, f, ensure_ascii=False, indent=4)

    # load new tasks and everything related
    project.load_tasks()
    project.load_derived_schemas()

    duration = time.time() - start
    return make_response(
        jsonify({
            'task_count': len(new_tasks),
            'completion_count': validator.completion_count,
            'prediction_count': validator.prediction_count,
            'duration': duration,
            'new_task_ids': [t for t in new_tasks]
        }), status.HTTP_201_CREATED)
Esempio n. 7
0
    def _load_tasks(cls, input_path, args, label_config_file):
        with io.open(label_config_file) as f:
            label_config = f.read()

        task_loader = Tasks()
        if args.input_format == 'json':
            return task_loader.from_json_file(input_path)
        if args.input_format == 'json-dir':
            return task_loader.from_dir_with_json_files(input_path)
        input_data_tags = cls.get_input_data_tags(label_config)
        data_key = Project._get_single_input_value(input_data_tags)
        if args.input_format == 'text':
            return task_loader.from_text_file(input_path, data_key)
        if args.input_format == 'text-dir':
            return task_loader.from_dir_with_text_files(input_path, data_key)
        if args.input_format == 'image-dir':
            return task_loader.from_dir_with_image_files(input_path, data_key)
        if args.input_format == 'audio-dir':
            return task_loader.from_dir_with_audio_files(input_path, data_key)
        raise RuntimeError('Can\'t load tasks for input format={}'.format(args.input_format))