Esempio n. 1
0
    def process(dataset: RepertoireDataset, params: dict) -> RepertoireDataset:
        processed_dataset = dataset.clone()
        original_repertoires = processed_dataset.get_data()
        indices = MetadataRepertoireFilter.get_matching_indices(
            processed_dataset, params["criteria"])
        processed_dataset.repertoires = [
            original_repertoires[i] for i in indices
        ]
        processed_dataset.metadata_file = MetadataRepertoireFilter.build_new_metadata(
            dataset, indices, params["result_path"])

        Filter.check_dataset_not_empty(processed_dataset,
                                       "MetadataRepertoireFilter")

        return processed_dataset
    def process(dataset: RepertoireDataset, params: dict) -> RepertoireDataset:
        Preprocessor.check_dataset_type(dataset, [RepertoireDataset], "ClonesPerRepertoireFilter")
        processed_dataset = dataset.clone()
        repertoires = []
        indices = []
        for index, repertoire in enumerate(dataset.get_data()):
            if "lower_limit" in params.keys() and len(repertoire.sequences) >= params["lower_limit"] or \
                "upper_limit" in params.keys() and len(repertoire.sequences) <= params["upper_limit"]:
                repertoires.append(dataset.repertoires[index])
                indices.append(index)
        processed_dataset.repertoires = repertoires
        processed_dataset.metadata_file = ClonesPerRepertoireFilter.build_new_metadata(dataset, indices, params["result_path"])

        Filter.check_dataset_not_empty(processed_dataset, "ClonesPerRepertoireFilter")

        return processed_dataset
    def process(dataset: RepertoireDataset, params: dict) -> RepertoireDataset:
        processed_dataset = copy.deepcopy(dataset)

        with Pool(params["batch_size"]) as pool:
            repertoires = pool.starmap(
                CountPerSequenceFilter.process_repertoire,
                [(repertoire, params) for repertoire in dataset.repertoires])

        if params["remove_empty_repertoires"]:
            repertoires = Filter.remove_empty_repertoires(repertoires)

        processed_dataset.repertoires = repertoires

        Filter.check_dataset_not_empty(processed_dataset,
                                       "CountPerSequenceFilter")

        return processed_dataset
Esempio n. 4
0
    def process(dataset: RepertoireDataset, params: dict) -> RepertoireDataset:
        processed_dataset = dataset.clone()
        PathBuilder.build(params["result_path"])
        repertoires = []
        indices = []
        for index, repertoire in enumerate(dataset.get_data()):
            if all(sequence.metadata.chain == Chain.get_chain(
                    params["keep_chain"])
                   for sequence in repertoire.sequences):
                repertoires.append(repertoire)
                indices.append(index)

        processed_dataset.repertoires = repertoires
        processed_dataset.metadata_file = ChainRepertoireFilter.build_new_metadata(
            processed_dataset, indices, params["result_path"])

        Filter.check_dataset_not_empty(processed_dataset,
                                       "ChainRepertoireFilter")

        return processed_dataset