def prepare_reference(reference_params: dict, location: str, paired: bool): ParameterValidator.assert_keys(list(reference_params.keys()), ["format", "params"], location, "reference") seq_import_params = reference_params["params"] if "params" in reference_params else {} assert os.path.isfile(seq_import_params["path"]), f"{location}: the file {seq_import_params['path']} does not exist. " \ f"Specify the correct path under reference." if "is_repertoire" in seq_import_params: assert seq_import_params["is_repertoire"] == False, f"{location}: is_repertoire must be False for SequenceImport" else: seq_import_params["is_repertoire"] = False if "paired" in seq_import_params: assert seq_import_params["paired"] == paired, f"{location}: paired must be {paired} for SequenceImport" else: seq_import_params["paired"] = paired format_str = reference_params["format"] import_class = ReflectionHandler.get_class_by_name("{}Import".format(format_str)) default_params = DefaultParamsLoader.load(EnvironmentSettings.default_params_path / "datasets", DefaultParamsLoader.convert_to_snake_case(format_str)) params = {**default_params, **seq_import_params} processed_params = DatasetImportParams.build_object(**params) receptors = ImportHelper.import_items(import_class, reference_params["params"]["path"], processed_params) return receptors
def import_dataset(params, dataset_name: str) -> ReceptorDataset: generic_params = DatasetImportParams.build_object(**params) filenames = ImportHelper.get_sequence_filenames(generic_params.path, dataset_name) PathBuilder.build(generic_params.result_path, warn_if_exists=True) dataset = SingleLineReceptorImport._import_from_files(filenames, generic_params) dataset.name = dataset_name dataset.labels = ImportHelper.extract_sequence_dataset_params(params=generic_params) PickleExporter.export(dataset, generic_params.result_path) return dataset
def import_dataset(import_class, params: dict, dataset_name: str) -> Dataset: processed_params = DatasetImportParams.build_object(**params) dataset = ImportHelper.load_dataset_if_exists(params, processed_params, dataset_name) if dataset is None: # backwards compatibility: if is_repertoire is not specified but the metadata file is if processed_params.is_repertoire is None and processed_params.metadata_file is not None: processed_params.is_repertoire = True if processed_params.is_repertoire: dataset = ImportHelper.import_repertoire_dataset(import_class, processed_params, dataset_name) else: dataset = ImportHelper.import_sequence_dataset(import_class, processed_params, dataset_name) return dataset
def import_dataset(params: dict, dataset_name: str) -> Dataset: pickle_params = DatasetImportParams.build_object(**params) if pickle_params.path is not None: dataset = PickleImport._import_from_path(pickle_params) elif pickle_params.metadata_file is not None: dataset = PickleImport._import_from_metadata(pickle_params, dataset_name) else: raise ValueError(f"PickleImport: no path nor metadata file were defined under key {dataset_name}. At least one of these has " f"to be specified to import the dataset.") if isinstance(dataset, RepertoireDataset): dataset = PickleImport._update_repertoire_paths(pickle_params, dataset) else: dataset = PickleImport._update_receptor_paths(pickle_params, dataset) return dataset