def test(self): sequences = [ ReceptorSequence( amino_acid_sequence="AAACCC", nucleotide_sequence="AAACCC", identifier="1", metadata=SequenceMetadata(custom_params={"l1": 1})), ReceptorSequence( amino_acid_sequence="ACACAC", nucleotide_sequence="ACACAC", identifier="2", metadata=SequenceMetadata(custom_params={"l1": 2})), ReceptorSequence( amino_acid_sequence="CCCAAA", nucleotide_sequence="CCCAAA", identifier="3", metadata=SequenceMetadata(custom_params={"l1": 1})), ReceptorSequence( amino_acid_sequence="AAACCC", nucleotide_sequence="AAACCC", identifier="4", metadata=SequenceMetadata(custom_params={"l1": 2})), ReceptorSequence( amino_acid_sequence="ACACAC", nucleotide_sequence="ACACAC", identifier="5", metadata=SequenceMetadata(custom_params={"l1": 1})), ReceptorSequence( amino_acid_sequence="CCCAAA", nucleotide_sequence="CCCAAA", identifier="6", metadata=SequenceMetadata(custom_params={"l1": 2})), ReceptorSequence( amino_acid_sequence="AAACCC", nucleotide_sequence="AAACCC", identifier="7", metadata=SequenceMetadata(custom_params={"l1": 1})), ReceptorSequence( amino_acid_sequence="ACACAC", nucleotide_sequence="ACACAC", identifier="8", metadata=SequenceMetadata(custom_params={"l1": 2})), ReceptorSequence( amino_acid_sequence="CCCAAA", nucleotide_sequence="CCCAAA", identifier="9", metadata=SequenceMetadata(custom_params={"l1": 1})) ] path = EnvironmentSettings.tmp_test_path / "kmrefreqseqfacencoder/" PathBuilder.build(path) filename = path / "sequences.pkl" with open(filename, "wb") as file: pickle.dump(sequences, file) lc = LabelConfiguration() lc.add_label("l1", [1, 2]) dataset = SequenceDataset(labels={"l1": [1, 2]}, filenames=[filename], identifier="d2") encoder = KmerFreqSequenceEncoder.build_object( dataset, **{ "normalization_type": NormalizationType.RELATIVE_FREQUENCY.name, "reads": ReadsType.UNIQUE.name, "sequence_encoding": SequenceEncodingType.CONTINUOUS_KMER.name, "sequence_type": SequenceType.NUCLEOTIDE.name, "k": 3 }) encoded_dataset = encoder.encode( dataset, EncoderParams(result_path=path / "2/", label_config=lc, pool_size=2, learn_model=True, model={}, filename="dataset.csv")) self.assertEqual(9, encoded_dataset.encoded_data.examples.shape[0]) self.assertTrue( all(identifier in encoded_dataset.encoded_data.example_ids for identifier in ['1', '2', '3', '4', '5', '6', '7', '8', '9'])) self.assertTrue( numpy.array_equal(encoded_dataset.encoded_data.examples[0].A, encoded_dataset.encoded_data.examples[3].A)) shutil.rmtree(path)
def test(self): receptors = [ TCABReceptor(alpha=ReceptorSequence(amino_acid_sequence="AAACCC"), beta=ReceptorSequence(amino_acid_sequence="AAACCC"), identifier="1"), TCABReceptor(alpha=ReceptorSequence(amino_acid_sequence="AAA"), beta=ReceptorSequence(amino_acid_sequence="CCC"), identifier="2"), TCABReceptor(alpha=ReceptorSequence(amino_acid_sequence="AAACCC"), beta=ReceptorSequence(amino_acid_sequence="AAACCC"), identifier="3"), TCABReceptor(alpha=ReceptorSequence(amino_acid_sequence="AAA"), beta=ReceptorSequence(amino_acid_sequence="CCC"), identifier="4") ] path = EnvironmentSettings.tmp_test_path / "kmer_receptor_frequency/" PathBuilder.build(path) filename = path / "receptors.pkl" with open(filename, "wb") as file: pickle.dump(receptors, file) lc = LabelConfiguration() lc.add_label("l1", [1, 2]) dataset = ReceptorDataset(labels={"l1": [1, 2]}, filenames=[filename], identifier="d1") encoder = KmerFreqReceptorEncoder.build_object( dataset, **{ "normalization_type": NormalizationType.RELATIVE_FREQUENCY.name, "reads": ReadsType.UNIQUE.name, "sequence_encoding": SequenceEncodingType.CONTINUOUS_KMER.name, "k": 3 }) encoded_dataset = encoder.encode( dataset, EncoderParams(result_path=path / "2/", label_config=lc, pool_size=2, learn_model=True, model={}, filename="dataset.csv", encode_labels=False)) self.assertEqual(4, encoded_dataset.encoded_data.examples.shape[0]) self.assertTrue( all(identifier in encoded_dataset.encoded_data.example_ids for identifier in ['1', '2', '3', '4'])) self.assertTrue( numpy.array_equal(encoded_dataset.encoded_data.examples[0].A, encoded_dataset.encoded_data.examples[2].A)) self.assertTrue( all(feature_name in encoded_dataset.encoded_data.feature_names for feature_name in ["alpha_AAA", "alpha_AAC", "beta_CCC"])) shutil.rmtree(path)