def _initialize_dataloader(self, training_set): dataset = md.Dataset(smiles_list=training_set, vocabulary=self.model.vocabulary, tokenizer=mv.SMILESTokenizer()) return tud.DataLoader(dataset, batch_size=self.batch_size, shuffle=True, collate_fn=md.Dataset.collate_fn)
def _initialize_dataloader(self, path): training_set = uc.read_smi_file(path) dataset = md.Dataset(smiles_list=training_set, vocabulary=self._model.vocabulary, tokenizer=mv.SMILESTokenizer()) dataloader = torch.utils.data.DataLoader( dataset, batch_size=self._batch_size, shuffle=self._shuffle_each_epoch, collate_fn=md.Dataset.collate_fn) return dataloader
def run(self, smiles_list): """ Calculates the NLL for a set of SMILES strings. :param smiles_list: List with SMILES. :return: An iterator with each NLLs in the same order as the SMILES list. """ dataset = md.Dataset(smiles_list, self.model.vocabulary, self.model.tokenizer) dataloader = tud.DataLoader(dataset, batch_size=self.batch_size, collate_fn=md.Dataset.collate_fn, shuffle=False) for batch in dataloader: for nll in self.model.likelihood(*batch).data.cpu().numpy(): yield nll
def run(self, scaffold_list): """ Samples the model for the given number of SMILES. :params scaffold_list: A list of scaffold SMILES. :return: An iterator with each of the batches sampled in (scaffold, decoration, nll) triplets. """ dataset = md.Dataset(scaffold_list, self.model.vocabulary.scaffold_vocabulary, self.model.vocabulary.scaffold_tokenizer) dataloader = tud.DataLoader(dataset, batch_size=self.batch_size, shuffle=False, collate_fn=md.Dataset.collate_fn) for batch in dataloader: for scaff, dec, nll in self.model.sample_decorations(*batch): yield scaff, dec, nll
def initialize_dataloader(self, data_path, batch_size, vocab, data_type): # Read train or validation data = pd.read_csv(os.path.join(data_path, data_type + '.csv'), sep=",") dataset = md.Dataset(data=data, vocabulary=vocab, tokenizer=mv.SMILESTokenizer(), prediction_mode=False) dataloader = torch.utils.data.DataLoader( dataset, batch_size, shuffle=True, collate_fn=md.Dataset.collate_fn) return dataloader
def _initialize_dataloader(self, path): training_set = chem_smiles.read_smiles_file( path, standardize=self._config.standardize, randomize=self._config.randomize) dataset = reinvent_dataset.Dataset( smiles_list=training_set, vocabulary=self._model.vocabulary, tokenizer=reinvent_vocabulary.SMILESTokenizer()) dataloader = torch.utils.data.DataLoader( dataset, batch_size=self._config.batch_size, shuffle=self._config.shuffle_each_epoch, collate_fn=reinvent_dataset.Dataset.collate_fn) return dataloader
def initialize_dataloader(self, opt, vocab, test_file): """ Initialize dataloader :param opt: :param vocab: vocabulary :param test_file: test_file_name :return: """ # Read test data = pd.read_csv(os.path.join(opt.data_path, test_file + '.csv'), sep=",") dataset = md.Dataset(data=data, vocabulary=vocab, tokenizer=self.tokenizer, prediction_mode=True) dataloader = torch.utils.data.DataLoader(dataset, opt.batch_size, shuffle=False, collate_fn=md.Dataset.collate_fn) return dataloader