예제 #1
0
 def test_create_all_kmers(self):
     alphabet = list("ABCD")
     k = 2
     kmers = KmerHelper.create_all_kmers(k=k, alphabet=alphabet)
     self.assertEqual(len(kmers), 16)
     self.assertTrue("BD" in kmers)
     self.assertTrue("DA" in kmers)
예제 #2
0
    def create_model(self, dataset: RepertoireDataset, k: int, vector_size: int, batch_size: int, model_path: Path):
        model = Word2Vec(size=vector_size, min_count=1, window=5)  # creates an empty model
        all_kmers = KmerHelper.create_all_kmers(k=k, alphabet=EnvironmentSettings.get_sequence_alphabet())
        all_kmers = [[kmer] for kmer in all_kmers]
        model.build_vocab(all_kmers)

        for repertoire in dataset.get_data(batch_size=batch_size):
            sentences = KmerHelper.create_sentences_from_repertoire(repertoire=repertoire, k=k)
            model.train(sentences=sentences, total_words=len(all_kmers), epochs=15)

        model.save(str(model_path))

        return model
예제 #3
0
    def create_model(self, dataset: RepertoireDataset, k: int,
                     vector_size: int, batch_size: int, model_path: Path):

        model = Word2Vec(size=vector_size, min_count=1,
                         window=5)  # creates an empty model
        all_kmers = KmerHelper.create_all_kmers(
            k=k, alphabet=EnvironmentSettings.get_sequence_alphabet())
        all_kmers = [[kmer] for kmer in all_kmers]
        model.build_vocab(all_kmers)

        for kmer in all_kmers:
            sentences = KmerHelper.create_kmers_within_HD(
                kmer=kmer[0],
                alphabet=EnvironmentSettings.get_sequence_alphabet(),
                distance=1)
            model.train(sentences=sentences,
                        total_words=len(all_kmers),
                        epochs=model.epochs)

        model.save(str(model_path))

        return model