def generate_export_file(project, tasks, output_format, get_args): # prepare for saving now = datetime.now() data = json.dumps(tasks, ensure_ascii=False) md5 = hashlib.md5(json.dumps(data).encode('utf-8')).hexdigest() name = 'project-' + str(project.title) + '-at-' + now.strftime( '%Y-%m-%d-%H-%M') + f'-{md5[0:8]}' input_json = DataExport.save_export_files(project, now, get_args, data, md5, name) converter = Converter(config=project.get_parsed_config(), project_dir=None, upload_dir=os.path.join(settings.MEDIA_ROOT, settings.UPLOAD_DIR)) with get_temp_dir() as tmp_dir: converter.convert(input_json, tmp_dir, output_format, is_dir=False) files = get_all_files_from_dir(tmp_dir) # if only one file is exported - no need to create archive if len(files) == 1: output_file = files[0] ext = os.path.splitext(output_file)[-1] content_type = f'application/{ext}' out = read_bytes_stream(output_file) filename = name + os.path.splitext(output_file)[-1] return out, content_type, filename # otherwise pack output directory into archive shutil.make_archive(tmp_dir, 'zip', tmp_dir) out = read_bytes_stream(os.path.abspath(tmp_dir + '.zip')) content_type = 'application/zip' filename = name + '.zip' return out, content_type, filename
def convert_file(self, to): with get_temp_dir() as tmp_dir: converter = Converter( config=self.project.get_parsed_config(), project_dir=None, upload_dir=tmp_dir, # download_resources=download_resources, ) input_name = pathlib.Path(self.file.name).name input_file_path = pathlib.Path(tmp_dir) / input_name with open(input_file_path, 'wb') as out_file: out_file.write(self.file.open().read()) converter.convert(input_file_path, tmp_dir, to, is_dir=False) files = get_all_files_from_dir(tmp_dir) output_file = [ file_name for file_name in files if pathlib.Path(file_name).name != input_name ][0] out = read_bytes_stream(output_file) filename = pathlib.Path(input_name).stem + pathlib.Path( output_file).suffix return File( out, name=filename, )
def get_export_formats(project): converter = Converter(config=project.get_parsed_config(), project_dir=None) formats = [] supported_formats = set(converter.supported_formats) for format, format_info in converter.all_formats().items(): format_info['name'] = format.name if format.name not in supported_formats: format_info['disabled'] = True formats.append(format_info) return sorted(formats, key=lambda f: f.get('disabled', False))
def reload(self): self.tasks = None self.derived_input_schema = [] self.derived_output_schema = { 'from_name_to_name_type': set(), 'labels': defaultdict(set) } self._init() self.label_config_full = config_comments_free( open(self.config['label_config']).read()) self.label_config_line = config_line_stripped(self.label_config_full) if self.analytics is None: self.analytics = Analytics( self.label_config_line, self.config.get('collect_analytics', True), self.name) else: self.analytics.update_info( self.label_config_line, self.config.get('collect_analytics', True), self.name) # configure project self.project_obj = ProjectObj(label_config=self.label_config_line, label_config_full=self.label_config_full) # configure machine learning backend if self.ml_backend is None: ml_backend_params = self.config.get('ml_backend') if ml_backend_params: ml_backend = MLBackend.from_params(ml_backend_params) self.project_obj.connect(ml_backend) self.converter = Converter(self.label_config_full)
def export_projects(): """Exports labelled data for all export enabled projects using Label Studio converter""" # Get all projects projects = Project.objects.all() for project in projects: if project.status == Project.Status.ACTIVE and project.export_format != Project.ExportFormat.NONE: logger.info(f"Exporting project {project.name}") output_paths = [] project_annotators = ProjectAnnotators.objects.filter( project=project) for project_annotator in project_annotators: annotator = project_annotator.annotator annotator_dir = LABELIT_DIRS[ 'projects'] / annotator.username / project.name label_config_file = annotator_dir / 'config.xml' if label_config_file.exists(): c = Converter(str(label_config_file)) completions_dir = annotator_dir / 'completions/' output_path = LABELIT_DIRS[ 'exports'] / project.name / annotator.username logger.debug( f"Exporting completions for annotator {annotator.username}, project {project.name}" ) if project.export_format == Project.ExportFormat.JSON: c.convert_to_json(completions_dir, output_path) elif project.export_format == Project.ExportFormat.CSV: c.convert_to_csv(completions_dir, output_path, sep=',') elif project.export_format == Project.ExportFormat.TSV: c.convert_to_csv(completions_dir, output_path, sep='\t') elif project.export_format == Project.ExportFormat.CONLL: c.convert_to_conll2003(completions_dir, output_path) else: logger.debug( f"Export format {project.export_format} not supported for project {project.name}" ) continue output_paths.append(output_path)
def convert_file(self, to_format): with get_temp_dir() as tmp_dir: OUT = 'out' out_dir = pathlib.Path(tmp_dir) / OUT out_dir.mkdir(mode=0o700, parents=True, exist_ok=True) converter = Converter( config=self.project.get_parsed_config(), project_dir=None, upload_dir=out_dir, download_resources=False, ) input_name = pathlib.Path(self.file.name).name input_file_path = pathlib.Path(tmp_dir) / input_name with open(input_file_path, 'wb') as file_: file_.write(self.file.open().read()) converter.convert(input_file_path, out_dir, to_format, is_dir=False) files = get_all_files_from_dir(out_dir) dirs = get_all_dirs_from_dir(out_dir) if len(files) == 0 and len(dirs) == 0: return None elif len(files) == 1 and len(dirs) == 0: output_file = files[0] filename = pathlib.Path(input_name).stem + pathlib.Path(output_file).suffix else: shutil.make_archive(out_dir, 'zip', out_dir) output_file = pathlib.Path(tmp_dir) / (str(out_dir.stem) + '.zip') filename = pathlib.Path(input_name).stem + '.zip' out = read_bytes_stream(output_file) return File( out, name=filename, )
def load_converter(self): self.converter = Converter(self.parsed_label_config)
def load_converter(self): self.converter = Converter(self.label_config_full)
def load_converter(self): self.converter = Converter(self.parsed_label_config, project_dir=self.path)