def import_dataset(params, dataset_name: str) -> ReceptorDataset: generic_params = DatasetImportParams.build_object(**params) filenames = ImportHelper.get_sequence_filenames( generic_params.path, dataset_name) PathBuilder.build(generic_params.result_path, warn_if_exists=True) dataset = SingleLineReceptorImport._import_from_files( filenames, generic_params) dataset.name = dataset_name dataset.params = ImportHelper.extract_sequence_dataset_params( params=generic_params) PickleExporter.export(dataset, generic_params.result_path) return dataset
def load_sequence_dataset(params: dict, dataset_name: str) -> Dataset: iris_params = IRISImportParams.build_object(**params) filenames = ImportHelper.get_sequence_filenames(iris_params.path, dataset_name) file_index = 0 dataset_filenames = [] for index, filename in enumerate(filenames): items = IRISSequenceImport.import_items(filename, paired=iris_params.paired, all_dual_chains=iris_params.import_dual_chains, all_genes=iris_params.import_all_gene_combinations) while len(items) > iris_params.sequence_file_size or (index == len(filenames) - 1 and len(items) > 0): dataset_filenames.append(iris_params.result_path + "batch_{}.pickle".format(file_index)) ImportHelper.store_sequence_items(dataset_filenames, items, iris_params.sequence_file_size) items = items[iris_params.sequence_file_size:] file_index += 1 return ReceptorDataset(filenames=dataset_filenames, file_size=iris_params.sequence_file_size, name=dataset_name) if iris_params.paired \ else SequenceDataset(filenames=dataset_filenames, file_size=iris_params.sequence_file_size, name=dataset_name)