Esempio n. 1
0
    def generate_export_file(project, tasks, output_format, get_args):
        # prepare for saving
        now = datetime.now()
        data = json.dumps(tasks, ensure_ascii=False)
        md5 = hashlib.md5(json.dumps(data).encode('utf-8')).hexdigest()
        name = 'project-' + str(project.title) + '-at-' + now.strftime(
            '%Y-%m-%d-%H-%M') + f'-{md5[0:8]}'

        input_json = DataExport.save_export_files(project, now, get_args, data,
                                                  md5, name)
        converter = Converter(config=project.get_parsed_config(),
                              project_dir=None,
                              upload_dir=os.path.join(settings.MEDIA_ROOT,
                                                      settings.UPLOAD_DIR))
        with get_temp_dir() as tmp_dir:
            converter.convert(input_json, tmp_dir, output_format, is_dir=False)
            files = get_all_files_from_dir(tmp_dir)
            # if only one file is exported - no need to create archive
            if len(files) == 1:
                output_file = files[0]
                ext = os.path.splitext(output_file)[-1]
                content_type = f'application/{ext}'
                out = read_bytes_stream(output_file)
                filename = name + os.path.splitext(output_file)[-1]
                return out, content_type, filename

            # otherwise pack output directory into archive
            shutil.make_archive(tmp_dir, 'zip', tmp_dir)
            out = read_bytes_stream(os.path.abspath(tmp_dir + '.zip'))
            content_type = 'application/zip'
            filename = name + '.zip'
            return out, content_type, filename
Esempio n. 2
0
    def convert_file(self, to):
        with get_temp_dir() as tmp_dir:
            converter = Converter(
                config=self.project.get_parsed_config(),
                project_dir=None,
                upload_dir=tmp_dir,
                # download_resources=download_resources,
            )
            input_name = pathlib.Path(self.file.name).name
            input_file_path = pathlib.Path(tmp_dir) / input_name
            with open(input_file_path, 'wb') as out_file:
                out_file.write(self.file.open().read())

            converter.convert(input_file_path, tmp_dir, to, is_dir=False)

            files = get_all_files_from_dir(tmp_dir)
            output_file = [
                file_name for file_name in files
                if pathlib.Path(file_name).name != input_name
            ][0]

            out = read_bytes_stream(output_file)
            filename = pathlib.Path(input_name).stem + pathlib.Path(
                output_file).suffix
            return File(
                out,
                name=filename,
            )
Esempio n. 3
0
 def get_export_formats(project):
     converter = Converter(config=project.get_parsed_config(),
                           project_dir=None)
     formats = []
     supported_formats = set(converter.supported_formats)
     for format, format_info in converter.all_formats().items():
         format_info['name'] = format.name
         if format.name not in supported_formats:
             format_info['disabled'] = True
         formats.append(format_info)
     return sorted(formats, key=lambda f: f.get('disabled', False))
Esempio n. 4
0
    def reload(self):
        self.tasks = None
        self.derived_input_schema = []
        self.derived_output_schema = {
            'from_name_to_name_type': set(),
            'labels': defaultdict(set)
        }

        self._init()

        self.label_config_full = config_comments_free(
            open(self.config['label_config']).read())
        self.label_config_line = config_line_stripped(self.label_config_full)

        if self.analytics is None:
            self.analytics = Analytics(
                self.label_config_line,
                self.config.get('collect_analytics', True), self.name)
        else:
            self.analytics.update_info(
                self.label_config_line,
                self.config.get('collect_analytics', True), self.name)

        # configure project
        self.project_obj = ProjectObj(label_config=self.label_config_line,
                                      label_config_full=self.label_config_full)

        # configure machine learning backend
        if self.ml_backend is None:
            ml_backend_params = self.config.get('ml_backend')
            if ml_backend_params:
                ml_backend = MLBackend.from_params(ml_backend_params)
                self.project_obj.connect(ml_backend)

        self.converter = Converter(self.label_config_full)
Esempio n. 5
0
def export_projects():
    """Exports labelled data for all export enabled projects using Label Studio converter"""
    # Get all projects
    projects = Project.objects.all()
    for project in projects:
        if project.status == Project.Status.ACTIVE and project.export_format != Project.ExportFormat.NONE:
            logger.info(f"Exporting project {project.name}")
            output_paths = []
            project_annotators = ProjectAnnotators.objects.filter(
                project=project)
            for project_annotator in project_annotators:
                annotator = project_annotator.annotator
                annotator_dir = LABELIT_DIRS[
                    'projects'] / annotator.username / project.name
                label_config_file = annotator_dir / 'config.xml'
                if label_config_file.exists():
                    c = Converter(str(label_config_file))
                    completions_dir = annotator_dir / 'completions/'
                    output_path = LABELIT_DIRS[
                        'exports'] / project.name / annotator.username
                    logger.debug(
                        f"Exporting completions for annotator {annotator.username}, project {project.name}"
                    )
                    if project.export_format == Project.ExportFormat.JSON:
                        c.convert_to_json(completions_dir, output_path)
                    elif project.export_format == Project.ExportFormat.CSV:
                        c.convert_to_csv(completions_dir, output_path, sep=',')
                    elif project.export_format == Project.ExportFormat.TSV:
                        c.convert_to_csv(completions_dir,
                                         output_path,
                                         sep='\t')
                    elif project.export_format == Project.ExportFormat.CONLL:
                        c.convert_to_conll2003(completions_dir, output_path)
                    else:
                        logger.debug(
                            f"Export format {project.export_format} not supported for project {project.name}"
                        )
                        continue
                    output_paths.append(output_path)
Esempio n. 6
0
    def convert_file(self, to_format):
        with get_temp_dir() as tmp_dir:
            OUT = 'out'
            out_dir = pathlib.Path(tmp_dir) / OUT
            out_dir.mkdir(mode=0o700, parents=True, exist_ok=True)

            converter = Converter(
                config=self.project.get_parsed_config(),
                project_dir=None,
                upload_dir=out_dir,
                download_resources=False,
            )
            input_name = pathlib.Path(self.file.name).name
            input_file_path = pathlib.Path(tmp_dir) / input_name

            with open(input_file_path, 'wb') as file_:
                file_.write(self.file.open().read())

            converter.convert(input_file_path, out_dir, to_format, is_dir=False)

            files = get_all_files_from_dir(out_dir)
            dirs = get_all_dirs_from_dir(out_dir)

            if len(files) == 0 and len(dirs) == 0:
                return None
            elif len(files) == 1 and len(dirs) == 0:
                output_file = files[0]
                filename = pathlib.Path(input_name).stem + pathlib.Path(output_file).suffix
            else:
                shutil.make_archive(out_dir, 'zip', out_dir)
                output_file = pathlib.Path(tmp_dir) / (str(out_dir.stem) + '.zip')
                filename = pathlib.Path(input_name).stem + '.zip'

            out = read_bytes_stream(output_file)
            return File(
                out,
                name=filename,
            )
Esempio n. 7
0
 def load_converter(self):
     self.converter = Converter(self.parsed_label_config)
Esempio n. 8
0
 def load_converter(self):
     self.converter = Converter(self.label_config_full)
Esempio n. 9
0
 def load_converter(self):
     self.converter = Converter(self.parsed_label_config, project_dir=self.path)