def setUp(self): self.temp_file = TempSMILESFile(tempfile_kwargs={'prefix': 'dataset'}) self.fh = self.temp_file.open() self.item_list = self.temp_file.smiles_strings.split('\n') self.dataset = SMILESDataset(self.fh.name)
def setUp(self): self.temp_file = TempSMILESFile() self.fh = self.temp_file.open() # See `test_data.py` for data set test cases. self.dataset = SMILESDataset(self.fh.name) self.vocab = SMILESVocabulary(self.dataset, need_corpus=True)
def setUp(self): temp_file = TempSMILESFile( tempfile_kwargs={'prefix': 'softmax_sampler'}) self.fh = temp_file.open() dataset = SMILESDataset(self.fh.name) self.vocabulary = SMILESVocabulary(dataset, need_corpus=True) self.model = SMILESRNN(len(self.vocabulary)) self.predictor = SoftmaxSearch(self.model, self.vocabulary)
class SMILESDatasetTestCase(unittest.TestCase): def setUp(self): self.temp_file = TempSMILESFile(tempfile_kwargs={'prefix': 'dataset'}) self.fh = self.temp_file.open() self.item_list = self.temp_file.smiles_strings.split('\n') self.dataset = SMILESDataset(self.fh.name) def test_1_read(self): self.assertTrue( all( s.startswith(Token.BOS) and s.endswith(Token.EOS) for s in self.dataset)) self.assertListEqual( self.item_list, [Token.crop(s) for s in self.dataset], ) self.assertEqual( len(self.item_list), len(self.dataset), ) def test_2_map_(self): mapped_data_iter = self.dataset.map_(Token.crop) self.assertIsInstance(mapped_data_iter, Iterator) self.assertListEqual(self.item_list, list(mapped_data_iter)) def test_3_filter_(self): filtered_data_iter = self.dataset.filter_(lambda s: len(s) < 10) self.assertIsInstance(filtered_data_iter, Iterator) self.assertListEqual(['{N#N}', '{CN=C=O}'], list(filtered_data_iter)) def tearDown(self): self.fh.close()
def setUp(self): self.temp_file = TempSMILESFile( tempfile_kwargs={'prefix': 'dataloader'}) self.fh = self.temp_file.open() dataset = SMILESDataset(self.fh.name) vocabulary = SMILESVocabulary(dataset=dataset, need_corpus=True) self.dataloader = SMILESBatchColumnSampler( corpus=vocabulary.corpus, batch_size=2, n_steps=4, shuffle=True, )
def setUp(self): self.smiles_strings = ( 'CC(=O)NCCC1=CNc2c1cc(OC)cc2\n' 'CCc1c[n+]2ccc3c4ccccc4[nH]c3c2cc1\n' 'O1C=C[C@H]([C@H]1O2)c3c2cc(OC)c4c3OC(=O)C5=C4CCC(=O)5' ) self.fh = tempfile.NamedTemporaryFile(mode='w+', encoding='ascii') self.fh.write(self.smiles_strings) self.fh.seek(os.SEEK_SET) # See `test_data.py` for data set test cases. self.dataset = SMILESDataset(self.fh.name) self.vocab = Vocabulary(self.dataset, need_corpus=True)
def setUp(self): temp_file = TempSMILESFile(tempfile_kwargs={'prefix': 'model'}) self.fh = temp_file.open() dataset = SMILESDataset(self.fh.name) self.vocabulary = SMILESVocabulary(dataset, need_corpus=True) self.batch_sampler = SMILESBatchColumnSampler( corpus=self.vocabulary.corpus, batch_size=3, n_steps=8, ) self.n_rnn_layers = 1 # Used in output/state shape testing. self.n_rnn_units = 32 # Used in output/state shape testing. self.model = SMILESRNN(len(self.vocabulary), use_one_hot=False, embedding_dim=4, embedding_dropout=0.25, embedding_dropout_axes=0, embedding_init=mx.init.Uniform(), embedding_prefix='embedding_', rnn='lstm', rnn_n_layers=self.n_rnn_layers, rnn_n_units=self.n_rnn_units, rnn_i2h_init='xavier_normal', rnn_h2h_init='orthogonal_normal', rnn_reinit_state=True, rnn_detach_state=False, rnn_state_init=mx.nd.random.uniform, rnn_dropout=0.0, rnn_prefix='encoder_', dense_n_layers=2, dense_n_units=32, dense_activation='relu', dense_dropout=0.5, dense_init=mx.init.Xavier(), dense_prefix='decoder_', dtype='float32', prefix='model_')
def setUp(self): self.smiles_string = 'CCc1c[n+]2ccc3c4ccccc4[nH]c3c2cc1' with TempSMILESFile(smiles_strings=self.smiles_string) as temp_fh: dataset = SMILESDataset(temp_fh.file_handler.name) self.vocabulary = SMILESVocabulary(dataset, need_corpus=True)