コード例 #1
0
    def _validate_task(self, key, parsed_data):
        """ Validate parsed data with labeling config and task structure
        """
        is_list = isinstance(parsed_data, list)
        # we support only one task per JSON file
        if not (is_list and len(parsed_data) == 1
                or isinstance(parsed_data, dict)):
            raise TaskValidationError(
                'Error at ' + key + ':\n'
                'Cloud storages support one task per one JSON file only. '
                'Task must be {} or [{}] with length = 1')

        # classic validation for one task
        validator = TaskValidator(self.project)
        try:
            new_tasks = validator.to_internal_value(
                parsed_data if is_list else [parsed_data])
        except TaskValidationError as e:
            # pretty format of errors
            messages = e.msg_to_list()
            out = [(key + ' :: ' + msg) for msg in messages]
            out = "\n".join(out)
            raise TaskValidationError(out)

        return new_tasks[0]
コード例 #2
0
ファイル: server.py プロジェクト: YoonLee-lab/label-studio
def api_import():
    project = project_get_or_create()

    # make django compatibility for uploader module
    class DjangoRequest:
        POST = request.form
        GET = request.args
        FILES = request.files
        data = request.json if request.json else request.form
        content_type = request.content_type

    start = time.time()
    # get tasks from request
    parsed_data = uploader.load_tasks(DjangoRequest())
    # validate tasks
    validator = TaskValidator(project)
    try:
        new_tasks = validator.to_internal_value(parsed_data)
    except ValidationError as e:
        return make_response(jsonify(e.msg_to_list()),
                             status.HTTP_400_BAD_REQUEST)

    # save task file to input dir
    if os.path.isdir(project.config['input_path']):
        # tasks are in directory, write a new file with tasks
        task_dir = project.config['input_path']
        now = datetime.now()
        data = json.dumps(new_tasks, ensure_ascii=False)
        md5 = hashlib.md5(json.dumps(data).encode('utf-8')).hexdigest()
        name = 'import-' + now.strftime('%Y-%m-%d-%H-%M') + '-' + str(md5[0:8])
        path = os.path.join(task_dir, name + '.json')
        tasks = new_tasks
    else:
        # tasks are all in one file, append it
        path = project.config['input_path']
        old_tasks = json.load(open(path))
        assert isinstance(old_tasks,
                          list), 'Tasks from input_path must be list'
        tasks = old_tasks + new_tasks
        logger.error("It's recommended to use directory as input_path: " +
                     project.config['input_path'] + ' -> ' +
                     os.path.dirname(project.config['input_path']))

    with open(path, 'w') as f:
        json.dump(tasks, f, ensure_ascii=False, indent=4)

    # load new tasks
    project.reload()

    duration = time.time() - start
    return make_response(
        jsonify({
            'task_count': len(new_tasks),
            'completion_count': validator.completion_count,
            'prediction_count': validator.prediction_count,
            'duration': duration
        }), status.HTTP_201_CREATED)
コード例 #3
0
ファイル: blueprint.py プロジェクト: OlegJakushkin/labeling
def api_import():
    """ The main API for task import, supports
        * json task data
        * files (as web form, files will be hosted by this flask server)
        * url links to images, audio, csv (if you use TimeSeries in labeling config)
    """
    # make django compatibility for uploader module
    class DjangoRequest:
        def __init__(self): pass
        POST = request.form
        GET = request.args
        FILES = request.files
        data = request.json if request.json else request.form
        content_type = request.content_type

    start = time.time()
    # get tasks from request
    parsed_data, formats = uploader.load_tasks(DjangoRequest(), g.project)
    # validate tasks
    validator = TaskValidator(g.project)
    try:
        new_tasks = validator.to_internal_value(parsed_data)
    except ValidationError as e:
        return make_response(jsonify(e.msg_to_list()), status.HTTP_400_BAD_REQUEST)

    # get the last task id
    max_id_in_old_tasks = -1
    if not g.project.no_tasks():
        max_id_in_old_tasks = g.project.source_storage.max_id()

    new_tasks = Tasks().from_list_of_dicts(new_tasks, max_id_in_old_tasks + 1)
    try:
        g.project.source_storage.set_many(new_tasks.keys(), new_tasks.values())
    except NotImplementedError:
        raise NotImplementedError('Import is not supported for the current storage ' + str(g.project.source_storage))

    # if tasks have completion - we need to implicitly save it to target
    for i in new_tasks.keys():
        for completion in new_tasks[i].get('completions', []):
            g.project.save_completion(int(i), completion)

    # update schemas based on newly uploaded tasks
    g.project.update_derived_input_schema()
    g.project.update_derived_output_schema()

    duration = time.time() - start
    return make_response(jsonify({
        'task_count': len(new_tasks),
        'completion_count': validator.completion_count,
        'prediction_count': validator.prediction_count,
        'duration': duration,
        'formats': formats,
        'new_task_ids': [t for t in new_tasks]
    }), status.HTTP_201_CREATED)
コード例 #4
0
def api_import():
    project = project_get_or_create()

    # make django compatibility for uploader module
    class DjangoRequest:
        POST = request.form
        GET = request.args
        FILES = request.files
        data = request.json if request.json else request.form
        content_type = request.content_type

    start = time.time()
    # get tasks from request
    parsed_data = uploader.load_tasks(DjangoRequest(), project)
    # validate tasks
    validator = TaskValidator(project)
    try:
        new_tasks = validator.to_internal_value(parsed_data)
    except ValidationError as e:
        return make_response(jsonify(e.msg_to_list()),
                             status.HTTP_400_BAD_REQUEST)

    # tasks are all in one file, append it
    path = project.config['input_path']
    old_tasks = json.load(open(path))
    max_id_in_old_tasks = int(max(map(int,
                                      old_tasks.keys()))) if old_tasks else -1

    new_tasks = Tasks().from_list_of_dicts(new_tasks, max_id_in_old_tasks + 1)
    old_tasks.update(new_tasks)

    with open(path, 'w') as f:
        json.dump(old_tasks, f, ensure_ascii=False, indent=4)

    # load new tasks and everything related
    project.load_tasks()
    project.load_derived_schemas()

    duration = time.time() - start
    return make_response(
        jsonify({
            'task_count': len(new_tasks),
            'completion_count': validator.completion_count,
            'prediction_count': validator.prediction_count,
            'duration': duration,
            'new_task_ids': [t for t in new_tasks]
        }), status.HTTP_201_CREATED)
コード例 #5
0
def api_import():
    # make django compatibility for uploader module
    class DjangoRequest:
        POST = request.form
        GET = request.args
        FILES = request.files
        data = request.json if request.json else request.form
        content_type = request.content_type

    start = time.time()
    # get tasks from request
    parsed_data, formats = uploader.load_tasks(DjangoRequest(), g.project)
    # validate tasks
    validator = TaskValidator(g.project)
    try:
        new_tasks = validator.to_internal_value(parsed_data)
    except ValidationError as e:
        return make_response(jsonify(e.msg_to_list()),
                             status.HTTP_400_BAD_REQUEST)

    max_id_in_old_tasks = -1
    if not g.project.no_tasks():
        max_id_in_old_tasks = g.project.source_storage.max_id()

    new_tasks = Tasks().from_list_of_dicts(new_tasks, max_id_in_old_tasks + 1)
    g.project.source_storage.set_many(new_tasks.keys(), new_tasks.values())

    # if tasks have completion - we need to implicitly save it to target
    for i in new_tasks.keys():
        for completion in new_tasks[i].get('completions', []):
            g.project.save_completion(int(i), completion)

    # update schemas based on newly uploaded tasks
    g.project.update_derived_input_schema()
    g.project.update_derived_output_schema()

    duration = time.time() - start
    return make_response(
        jsonify({
            'task_count': len(new_tasks),
            'completion_count': validator.completion_count,
            'prediction_count': validator.prediction_count,
            'duration': duration,
            'formats': formats,
            'new_task_ids': [t for t in new_tasks]
        }), status.HTTP_201_CREATED)
コード例 #6
0
    def _update(self):
        if self.filelist:
            self.tasks, found_formats, self.data_keys = self._read_tasks()

            self._raise_if_inconsistent_with_current_project()

            if not self.found_formats:
                # It's a first time we get all formats
                self.found_formats = found_formats
            if self.selected_formats is None:
                # It's a first time we get all formats
                self.selected_formats, self.selected_objects = [], []
                for format in sorted(found_formats.keys()):
                    self.selected_formats.append(format)

            self.selected_objects = self._get_selected_objects()
            self.show_files_as_tasks_list = self._show_files_as_tasks_list()

        # validate tasks
        self._validator = TaskValidator(self.project)
        self.tasks = self._validator.to_internal_value(self.tasks)
コード例 #7
0
ファイル: server.py プロジェクト: vparv/label-studio
def api_import():
    print("in api import")
    project = project_get_or_create()

    # make django compatibility for uploader module
    class DjangoRequest:
        POST = request.form
        GET = request.args
        FILES = request.files
        data = request.json if request.json else request.form
        content_type = request.content_type

    print("In api_import")
    start = time.time()
    # get tasks from request
    parsed_data = uploader.load_tasks(DjangoRequest())
    # validate tasks
    validator = TaskValidator(project)
    try:
        new_tasks = validator.to_internal_value(parsed_data)
    except ValidationError as e:
        return make_response(jsonify(e.msg_to_list()),
                             status.HTTP_400_BAD_REQUEST)

    # save task file to input dir
    if os.path.isdir(project.config['input_path']):
        # tasks are in directory, write a new file with tasks
        task_dir = project.config['input_path']
        now = datetime.now()
        print("In new tasks api_import")
        data = json.dumps(new_tasks, ensure_ascii=False)
        md5 = hashlib.md5(json.dumps(data).encode('utf-8')).hexdigest()
        name = 'import-' + now.strftime('%Y-%m-%d-%H-%M') + '-' + str(md5[0:8])
        path = os.path.join(task_dir, name + '.json')
        tasks = new_tasks
    else:
        # tasks are all in one file, append it
        path = project.config['input_path']
        print("in old tasks section api_import")
        old_tasks = json.load(open(path))
        assert isinstance(old_tasks,
                          list), 'Tasks from input_path must be list'
        tasks = old_tasks + new_tasks

        temp = copy.deepcopy(tasks)
        tasks[:] = []
        numcomps = 3
        startingindex = 0
        count = [0] * len(temp)
        c = 0

        for i in range(0, len(temp)):
            for j in range(0, param):
                if (j + startingindex < len(temp)):
                    if (count[j + startingindex] < 3):
                        tasks.append(temp[j + startingindex])
                        count[j + startingindex] = count[j + startingindex] + 1
                        print(temp[j + startingindex])
                        c = c + 1

            if (len(tasks) % (numcomps * param) == 0):
                startingindex = startingindex + param

        print(c)
        logger.error("It's recommended to use directory as input_path: " +
                     project.config['input_path'] + ' -> ' +
                     os.path.dirname(project.config['input_path']))

    with open(path, 'w') as f:
        json.dump(tasks, f, ensure_ascii=False, indent=4)

    # load new tasks
    project.reload()

    duration = time.time() - start

    # #add to tasks queues
    # num_tasks = len(new_tasks)
    # temp = []
    # a = 1
    # while a < num_tasks:
    #     for b in range(a,a+param):
    #         temp.append(b)

    #     a = a + param
    #     if num_tasks - a < param:
    #         # add all the rest
    #         while(a <= num_tasks):
    #             temp.append(a)
    #             a = a + 1;

    #     task_queue.append(temp)
    #     temp=[]

    # print("JUST MADE THE QUEUE!!!*********")
    # print(task_queue)

    task_queue = make_task_queue(num_tasks)

    return make_response(
        jsonify({
            'task_count': len(new_tasks),
            'completion_count': validator.completion_count,
            'prediction_count': validator.prediction_count,
            'duration': duration
        }), status.HTTP_201_CREATED)