Esempio n. 1
0
    def setUp(self):
        self.temp_file = TempSMILESFile(tempfile_kwargs={'prefix': 'dataset'})
        self.fh = self.temp_file.open()

        self.item_list = self.temp_file.smiles_strings.split('\n')

        self.dataset = SMILESDataset(self.fh.name)
Esempio n. 2
0
    def setUp(self):
        self.temp_file = TempSMILESFile()
        self.fh = self.temp_file.open()

        # See `test_data.py` for data set test cases.
        self.dataset = SMILESDataset(self.fh.name)
        self.vocab = SMILESVocabulary(self.dataset, need_corpus=True)
    def setUp(self):
        temp_file = TempSMILESFile(
            tempfile_kwargs={'prefix': 'softmax_sampler'})
        self.fh = temp_file.open()

        dataset = SMILESDataset(self.fh.name)
        self.vocabulary = SMILESVocabulary(dataset, need_corpus=True)

        self.model = SMILESRNN(len(self.vocabulary))

        self.predictor = SoftmaxSearch(self.model, self.vocabulary)
Esempio n. 4
0
class SMILESDatasetTestCase(unittest.TestCase):
    def setUp(self):
        self.temp_file = TempSMILESFile(tempfile_kwargs={'prefix': 'dataset'})
        self.fh = self.temp_file.open()

        self.item_list = self.temp_file.smiles_strings.split('\n')

        self.dataset = SMILESDataset(self.fh.name)

    def test_1_read(self):

        self.assertTrue(
            all(
                s.startswith(Token.BOS) and s.endswith(Token.EOS)
                for s in self.dataset))
        self.assertListEqual(
            self.item_list,
            [Token.crop(s) for s in self.dataset],
        )

        self.assertEqual(
            len(self.item_list),
            len(self.dataset),
        )

    def test_2_map_(self):
        mapped_data_iter = self.dataset.map_(Token.crop)

        self.assertIsInstance(mapped_data_iter, Iterator)
        self.assertListEqual(self.item_list, list(mapped_data_iter))

    def test_3_filter_(self):
        filtered_data_iter = self.dataset.filter_(lambda s: len(s) < 10)

        self.assertIsInstance(filtered_data_iter, Iterator)
        self.assertListEqual(['{N#N}', '{CN=C=O}'], list(filtered_data_iter))

    def tearDown(self):
        self.fh.close()
Esempio n. 5
0
    def setUp(self):
        self.temp_file = TempSMILESFile(
            tempfile_kwargs={'prefix': 'dataloader'})
        self.fh = self.temp_file.open()

        dataset = SMILESDataset(self.fh.name)
        vocabulary = SMILESVocabulary(dataset=dataset, need_corpus=True)
        self.dataloader = SMILESBatchColumnSampler(
            corpus=vocabulary.corpus,
            batch_size=2,
            n_steps=4,
            shuffle=True,
        )
Esempio n. 6
0
    def setUp(self):
        self.smiles_strings = (
            'CC(=O)NCCC1=CNc2c1cc(OC)cc2\n'
            'CCc1c[n+]2ccc3c4ccccc4[nH]c3c2cc1\n'
            'O1C=C[C@H]([C@H]1O2)c3c2cc(OC)c4c3OC(=O)C5=C4CCC(=O)5'
        )

        self.fh = tempfile.NamedTemporaryFile(mode='w+', encoding='ascii')
        self.fh.write(self.smiles_strings)
        self.fh.seek(os.SEEK_SET)

        # See `test_data.py` for data set test cases.
        self.dataset = SMILESDataset(self.fh.name)
        self.vocab = Vocabulary(self.dataset, need_corpus=True)
    def setUp(self):
        temp_file = TempSMILESFile(tempfile_kwargs={'prefix': 'model'})
        self.fh = temp_file.open()

        dataset = SMILESDataset(self.fh.name)
        self.vocabulary = SMILESVocabulary(dataset, need_corpus=True)
        self.batch_sampler = SMILESBatchColumnSampler(
            corpus=self.vocabulary.corpus,
            batch_size=3,
            n_steps=8,
        )

        self.n_rnn_layers = 1  # Used in output/state shape testing.
        self.n_rnn_units = 32  # Used in output/state shape testing.

        self.model = SMILESRNN(len(self.vocabulary),
                               use_one_hot=False,
                               embedding_dim=4,
                               embedding_dropout=0.25,
                               embedding_dropout_axes=0,
                               embedding_init=mx.init.Uniform(),
                               embedding_prefix='embedding_',
                               rnn='lstm',
                               rnn_n_layers=self.n_rnn_layers,
                               rnn_n_units=self.n_rnn_units,
                               rnn_i2h_init='xavier_normal',
                               rnn_h2h_init='orthogonal_normal',
                               rnn_reinit_state=True,
                               rnn_detach_state=False,
                               rnn_state_init=mx.nd.random.uniform,
                               rnn_dropout=0.0,
                               rnn_prefix='encoder_',
                               dense_n_layers=2,
                               dense_n_units=32,
                               dense_activation='relu',
                               dense_dropout=0.5,
                               dense_init=mx.init.Xavier(),
                               dense_prefix='decoder_',
                               dtype='float32',
                               prefix='model_')
Esempio n. 8
0
 def setUp(self):
     self.smiles_string = 'CCc1c[n+]2ccc3c4ccccc4[nH]c3c2cc1'
     with TempSMILESFile(smiles_strings=self.smiles_string) as temp_fh:
         dataset = SMILESDataset(temp_fh.file_handler.name)
     self.vocabulary = SMILESVocabulary(dataset, need_corpus=True)