def process(dataset: RepertoireDataset, params: dict) -> RepertoireDataset: processed_dataset = dataset.clone() original_repertoires = processed_dataset.get_data() indices = MetadataRepertoireFilter.get_matching_indices( processed_dataset, params["criteria"]) processed_dataset.repertoires = [ original_repertoires[i] for i in indices ] processed_dataset.metadata_file = MetadataRepertoireFilter.build_new_metadata( dataset, indices, params["result_path"]) Filter.check_dataset_not_empty(processed_dataset, "MetadataRepertoireFilter") return processed_dataset
def process(dataset: RepertoireDataset, params: dict) -> RepertoireDataset: Preprocessor.check_dataset_type(dataset, [RepertoireDataset], "ClonesPerRepertoireFilter") processed_dataset = dataset.clone() repertoires = [] indices = [] for index, repertoire in enumerate(dataset.get_data()): if "lower_limit" in params.keys() and len(repertoire.sequences) >= params["lower_limit"] or \ "upper_limit" in params.keys() and len(repertoire.sequences) <= params["upper_limit"]: repertoires.append(dataset.repertoires[index]) indices.append(index) processed_dataset.repertoires = repertoires processed_dataset.metadata_file = ClonesPerRepertoireFilter.build_new_metadata(dataset, indices, params["result_path"]) Filter.check_dataset_not_empty(processed_dataset, "ClonesPerRepertoireFilter") return processed_dataset
def process(dataset: RepertoireDataset, params: dict) -> RepertoireDataset: processed_dataset = copy.deepcopy(dataset) with Pool(params["batch_size"]) as pool: repertoires = pool.starmap( CountPerSequenceFilter.process_repertoire, [(repertoire, params) for repertoire in dataset.repertoires]) if params["remove_empty_repertoires"]: repertoires = Filter.remove_empty_repertoires(repertoires) processed_dataset.repertoires = repertoires Filter.check_dataset_not_empty(processed_dataset, "CountPerSequenceFilter") return processed_dataset
def process(dataset: RepertoireDataset, params: dict) -> RepertoireDataset: processed_dataset = dataset.clone() PathBuilder.build(params["result_path"]) repertoires = [] indices = [] for index, repertoire in enumerate(dataset.get_data()): if all(sequence.metadata.chain == Chain.get_chain( params["keep_chain"]) for sequence in repertoire.sequences): repertoires.append(repertoire) indices.append(index) processed_dataset.repertoires = repertoires processed_dataset.metadata_file = ChainRepertoireFilter.build_new_metadata( processed_dataset, indices, params["result_path"]) Filter.check_dataset_not_empty(processed_dataset, "ChainRepertoireFilter") return processed_dataset