Ejemplo n.º 1
0
    def build_receptor_from_rows(first_row, second_row, identifier, chain_pair, metadata_columns):
        first_sequence = ImportHelper.import_sequence(first_row, metadata_columns=metadata_columns)
        second_sequence = ImportHelper.import_sequence(second_row, metadata_columns=metadata_columns)

        if chain_pair == ChainPair.TRA_TRB:
            receptor = TCABReceptor(alpha=first_sequence,
                                    beta=second_sequence,
                                    identifier=identifier,
                                    metadata={**second_sequence.metadata.custom_params})
        elif chain_pair == ChainPair.TRG_TRD:
            receptor = TCGDReceptor(gamma=first_sequence,
                                    delta=second_sequence,
                                    identifier=identifier,
                                    metadata={**second_sequence.metadata.custom_params})
        elif chain_pair == ChainPair.IGH_IGL:
            receptor = BCReceptor(heavy=first_sequence,
                                  light=second_sequence,
                                  identifier=identifier,
                                  metadata={**first_sequence.metadata.custom_params})
        elif chain_pair == ChainPair.IGH_IGK:
            receptor = BCKReceptor(heavy=first_sequence,
                                   kappa=second_sequence,
                                   identifier=identifier,
                                   metadata={**first_sequence.metadata.custom_params})
        else:
            raise NotImplementedError(f"ImportHelper: {chain_pair} chain pair is not supported.")

        return receptor
Ejemplo n.º 2
0
    def _prepare_parameters(reference: dict,
                            max_edit_distances: dict,
                            name: str = None):
        location = "MatchedReceptorsEncoder"

        legal_chains = [
            chain
            for receptor in (TCABReceptor(), TCGDReceptor(), BCReceptor())
            for chain in receptor.get_chains()
        ]

        if type(max_edit_distances) is int:
            max_edit_distances = {
                chain: max_edit_distances
                for chain in legal_chains
            }
        elif type(max_edit_distances) is dict:
            ParameterValidator.assert_keys(max_edit_distances.keys(),
                                           legal_chains,
                                           location,
                                           "max_edit_distances",
                                           exclusive=False)
        else:
            ParameterValidator.assert_type_and_value(max_edit_distances, dict,
                                                     location,
                                                     'max_edit_distances')

        reference_receptors = MatchedReferenceUtil.prepare_reference(
            reference, location=location, paired=True)

        return {
            "reference_receptors": reference_receptors,
            "max_edit_distances": max_edit_distances,
            "name": name
        }
Ejemplo n.º 3
0
 def build_object(cls,
                  sequences: dict,
                  identifier: str = None,
                  metadata: dict = None) -> Receptor:
     if all(chain in ChainPair.TRA_TRB.value for chain in sequences.keys()):
         return TCABReceptor(alpha=sequences[Chain.ALPHA.value],
                             beta=sequences[Chain.BETA.value],
                             identifier=identifier,
                             metadata=metadata)
     elif all(chain in ChainPair.TRG_TRD.value
              for chain in sequences.keys()):
         return TCGDReceptor(gamma=sequences[Chain.GAMMA.value],
                             delta=sequences[Chain.DELTA.value],
                             identifier=identifier,
                             metadata=metadata)
     elif all(chain in ChainPair.IGH_IGL.value
              for chain in sequences.keys()):
         return BCReceptor(heavy=sequences[Chain.HEAVY.value],
                           light=sequences[Chain.LIGHT.value],
                           identifier=identifier,
                           metadata=metadata)
     elif all(chain in ChainPair.IGH_IGK.value
              for chain in sequences.keys()):
         return BCKReceptor(heavy=sequences[Chain.HEAVY.value],
                            kappa=sequences[Chain.KAPPA.value],
                            identifier=identifier,
                            metadata=metadata)
     else:
         warnings.warn(
             f"ReceptorBuilder: attempt to build_from_objects receptor with chains {sequences.keys()}, returning None..."
         )
         return None
Ejemplo n.º 4
0
    def test_build_batch_generator(self):
        path = EnvironmentSettings.tmp_test_path / "element_batch_generator/"
        PathBuilder.build(path)
        receptors = [BCReceptor(identifier=str(i), heavy=ReceptorSequence('A'), light=ReceptorSequence('C')) for i in range(307)]
        file_list = [path / f"batch{i}.npy" for i in range(4)]

        for i in range(4):
            matrix = np.core.records.fromrecords([r.get_record() for r in receptors[i * 100: (i+1) * 100]], names=BCReceptor.get_record_names())
            np.save(str(file_list[i]), matrix, allow_pickle=False)

        receptor_generator = ElementGenerator(file_list, element_class_name=BCReceptor.__name__)
        generator = receptor_generator.build_batch_generator()

        counter = 0

        for batch in generator:
            for receptor in batch:
                self.assertEqual(counter, int(receptor.identifier))
                self.assertTrue(isinstance(receptor, BCReceptor))
                counter += 1

        self.assertEqual(307, counter)

        generator = receptor_generator.build_batch_generator()

        counter = 0

        for batch in generator:
            for receptor in batch:
                self.assertEqual(counter, int(receptor.identifier))
                self.assertTrue(isinstance(receptor, BCReceptor))
                counter += 1

        self.assertEqual(307, counter)

        shutil.rmtree(path)
Ejemplo n.º 5
0
    def test_build_batch_generator(self):
        path = EnvironmentSettings.tmp_test_path / "element_batch_generator/"
        PathBuilder.build(path)
        receptors = [BCReceptor(identifier=str(i)) for i in range(307)]
        file_list = [path / f"batch{i}.pkl" for i in range(4)]

        for i in range(4):
            with file_list[i].open("wb") as file:
                pickle.dump(receptors[i * 100:(i + 1) * 100], file)

        receptor_generator = ElementGenerator(file_list)
        generator = receptor_generator.build_batch_generator()

        counter = 0

        for batch in generator:
            for receptor in batch:
                self.assertEqual(counter, int(receptor.identifier))
                self.assertTrue(isinstance(receptor, BCReceptor))
                counter += 1

        self.assertEqual(307, counter)

        generator = receptor_generator.build_batch_generator()

        counter = 0

        for batch in generator:
            for receptor in batch:
                self.assertEqual(counter, int(receptor.identifier))
                self.assertTrue(isinstance(receptor, BCReceptor))
                counter += 1

        self.assertEqual(307, counter)

        shutil.rmtree(path)