Esempio n. 1
0
 def _initialize_dataloader(self, training_set):
     dataset = md.Dataset(smiles_list=training_set,
                          vocabulary=self.model.vocabulary,
                          tokenizer=mv.SMILESTokenizer())
     return tud.DataLoader(dataset,
                           batch_size=self.batch_size,
                           shuffle=True,
                           collate_fn=md.Dataset.collate_fn)
Esempio n. 2
0
 def _initialize_dataloader(self, path):
     training_set = uc.read_smi_file(path)
     dataset = md.Dataset(smiles_list=training_set,
                          vocabulary=self._model.vocabulary,
                          tokenizer=mv.SMILESTokenizer())
     dataloader = torch.utils.data.DataLoader(
         dataset,
         batch_size=self._batch_size,
         shuffle=self._shuffle_each_epoch,
         collate_fn=md.Dataset.collate_fn)
     return dataloader
Esempio n. 3
0
 def run(self, smiles_list):
     """
     Calculates the NLL for a set of SMILES strings.
     :param smiles_list: List with SMILES.
     :return: An iterator with each NLLs in the same order as the SMILES list.
     """
     dataset = md.Dataset(smiles_list, self.model.vocabulary, self.model.tokenizer)
     dataloader = tud.DataLoader(dataset, batch_size=self.batch_size, collate_fn=md.Dataset.collate_fn,
                                 shuffle=False)
     for batch in dataloader:
         for nll in self.model.likelihood(*batch).data.cpu().numpy():
             yield nll
 def run(self, scaffold_list):
     """
     Samples the model for the given number of SMILES.
     :params scaffold_list: A list of scaffold SMILES.
     :return: An iterator with each of the batches sampled in (scaffold, decoration, nll) triplets.
     """
     dataset = md.Dataset(scaffold_list, self.model.vocabulary.scaffold_vocabulary,
                          self.model.vocabulary.scaffold_tokenizer)
     dataloader = tud.DataLoader(dataset, batch_size=self.batch_size,
                                 shuffle=False, collate_fn=md.Dataset.collate_fn)
     for batch in dataloader:
         for scaff, dec, nll in self.model.sample_decorations(*batch):
             yield scaff, dec, nll
Esempio n. 5
0
 def initialize_dataloader(self, data_path, batch_size, vocab, data_type):
     # Read train or validation
     data = pd.read_csv(os.path.join(data_path, data_type + '.csv'),
                        sep=",")
     dataset = md.Dataset(data=data,
                          vocabulary=vocab,
                          tokenizer=mv.SMILESTokenizer(),
                          prediction_mode=False)
     dataloader = torch.utils.data.DataLoader(
         dataset,
         batch_size,
         shuffle=True,
         collate_fn=md.Dataset.collate_fn)
     return dataloader
Esempio n. 6
0
 def _initialize_dataloader(self, path):
     training_set = chem_smiles.read_smiles_file(
         path,
         standardize=self._config.standardize,
         randomize=self._config.randomize)
     dataset = reinvent_dataset.Dataset(
         smiles_list=training_set,
         vocabulary=self._model.vocabulary,
         tokenizer=reinvent_vocabulary.SMILESTokenizer())
     dataloader = torch.utils.data.DataLoader(
         dataset,
         batch_size=self._config.batch_size,
         shuffle=self._config.shuffle_each_epoch,
         collate_fn=reinvent_dataset.Dataset.collate_fn)
     return dataloader
    def initialize_dataloader(self, opt, vocab, test_file):
        """
        Initialize dataloader
        :param opt:
        :param vocab: vocabulary
        :param test_file: test_file_name
        :return:
        """

        # Read test
        data = pd.read_csv(os.path.join(opt.data_path, test_file + '.csv'), sep=",")
        dataset = md.Dataset(data=data, vocabulary=vocab, tokenizer=self.tokenizer, prediction_mode=True)
        dataloader = torch.utils.data.DataLoader(dataset, opt.batch_size,
                                                 shuffle=False, collate_fn=md.Dataset.collate_fn)
        return dataloader