Beispiel #1
0
    def test_make_subset(self):
        sequences = []
        for i in range(100):
            sequences.append(ReceptorSequence(amino_acid_sequence="AAA", identifier=str(i)))

        path = EnvironmentSettings.tmp_test_path / "element_generator_subset/"
        PathBuilder.build(path)

        for i in range(10):
            filepath = path / f"batch{i}.npy"
            sequences_to_pickle = sequences[i * 10:(i + 1) * 10]
            sequence_matrix = np.core.records.fromrecords([seq.get_record() for seq in sequences_to_pickle], names=ReceptorSequence.get_record_names())
            np.save(str(filepath), sequence_matrix, allow_pickle=False)

        d = SequenceDataset(filenames=[path / f"batch{i}.npy" for i in range(10)], file_size=10)

        indices = [1, 20, 21, 22, 23, 24, 25, 50, 52, 60, 70, 77, 78, 90, 92]

        d2 = d.make_subset(indices, path, SequenceDataset.TRAIN)

        for batch in d2.get_batch(1000):
            for sequence in batch:
                self.assertTrue(int(sequence.identifier) in indices)

        self.assertEqual(15, d2.get_example_count())

        shutil.rmtree(path)
    def test_make_subset(self):
        sequences = []
        for i in range(100):
            sequences.append(
                ReceptorSequence(amino_acid_sequence="AAA", identifier=str(i)))

        path = EnvironmentSettings.tmp_test_path / "element_generator_subset/"
        PathBuilder.build(path)

        for i in range(10):
            filepath = path / f"batch{i}.pkl"
            with filepath.open("wb") as file:
                sequences_to_pickle = sequences[i * 10:(i + 1) * 10]
                pickle.dump(sequences_to_pickle, file)

        d = SequenceDataset(
            filenames=[path / f"batch{i}.pkl" for i in range(10)],
            file_size=10)

        indices = [1, 20, 21, 22, 23, 24, 25, 50, 52, 60, 70, 77, 78, 90, 92]

        d2 = d.make_subset(indices, path, SequenceDataset.TRAIN)

        for batch in d2.get_batch(1000):
            for sequence in batch:
                self.assertTrue(int(sequence.identifier) in indices)

        self.assertEqual(15, d2.get_example_count())

        shutil.rmtree(path)