def main(config): set_seed(config.seed) train = read_smiles_csv(config.train_load) vocab = CharVocab.from_data(train) torch.save(vocab, config.vocab_save) torch.save(config, config.config_save) device = torch.device(config.device) # condition mode if config.conditional: fps = read_fps_csv(config.train_load) fps = fps_to_list(fps) fps = [torch.tensor(f, dtype=torch.float, device=device) for f in fps] # fingerprints length fps_len = len(fps[0]) else: fps = None fps_len = 0 with Pool(config.n_jobs) as pool: reward_func = MetricsReward(train, config.n_ref_subsample, config.rollouts, pool, config.addition_rewards) model = ORGAN(vocab, config, fps_len, reward_func) model = model.to(device) trainer = ORGANTrainer(config) trainer.fit(model, train, fps) torch.save(model.state_dict(), config.model_save)
def __init__(self, config): model_config = torch.load(config.config_load) model_vocab = torch.load(config.vocab_load) model_state = torch.load(config.model_load) self.config = config device = torch.device(config.device) self.model = ORGAN(model_vocab, model_config) self.model.load_state_dict(model_state) self.model = self.model.to(device) self.model.eval()
def main(config): set_seed(config.seed) model_config = torch.load(config.config_load) model_vocab = torch.load(config.vocab_load) model_state = torch.load(config.model_load) device = torch.device(config.device) model = ORGAN(model_vocab, model_config) model.load_state_dict(model_state) model = model.to(device) model.eval() samples = [] n = config.n_samples with tqdm.tqdm(total=config.n_samples, desc='Generating samples') as T: while n > 0: current_samples = model.sample(min(n, config.n_batch), config.max_len) samples.extend(current_samples) n -= len(current_samples) T.update(len(current_samples)) samples = pd.DataFrame(samples, columns=['SMILES']) samples.to_csv(config.gen_save, index=False)
def main(config): set_seed(config.seed) train = read_smiles_csv(config.train_load) vocab = CharVocab.from_data(train) device = torch.device(config.device) with Pool(config.n_jobs) as pool: reward_func = MetricsReward(train, config.n_ref_subsample, config.rollouts, pool, config.addition_rewards) model = ORGAN(vocab, config, reward_func) model = model.to(device) trainer = ORGANTrainer(config) trainer.fit(model, train) torch.save(model.state_dict(), config.model_save) torch.save(config, config.config_save) torch.save(vocab, config.vocab_save)
class OrganGenerator(DistributionMatchingGenerator): def __init__(self, config): model_config = torch.load(config.config_load) model_vocab = torch.load(config.vocab_load) model_state = torch.load(config.model_load) self.config = config device = torch.device(config.device) self.model = ORGAN(model_vocab, model_config) self.model.load_state_dict(model_state) self.model = self.model.to(device) self.model.eval() def generate(self, number_samples: int) -> List[str]: samples = [] n = number_samples with tqdm.tqdm(total=number_samples, desc='Generating samples') as T: while n > 0: current_samples = self.model.sample( min(n, self.config.n_batch), self.config.max_len) samples.extend(current_samples) n -= len(current_samples) T.update(len(current_samples)) return samples
def main(config): set_seed(config.seed) model_config = torch.load(config.config_load) model_vocab = torch.load(config.vocab_load) model_state = torch.load(config.model_load) device = torch.device(config.device) # condition mode if config.conditional: print('Conditional generation') # target fingerprints fps_center = read_fps_csv(config.condition_load) fps_center = fps_to_list(list(set(fps_center))) fps_center = [ torch.tensor(f, dtype=torch.float, device=device) for f in fps_center ] # target fingerprints length fps_len = len(fps_center[0]) # target fingerprints number fps_num = len(fps_center) else: fps_center = [None] fps_len = 0 fps_num = 1 model = ORGAN(model_vocab, model_config, fps_len) model.load_state_dict(model_state) model = model.to(device) model.eval() # sample numbers gen_samples = [] n = config.n_samples with tqdm.tqdm(total=config.n_samples, desc='Generating samples') as T: for i in range(fps_num): samples = [] while n > 0: fps = fps_center[i] if config.conditional: fps = fps_center[i].unsqueeze(0) current_samples = model.sample(fps, config.conditional, min(n, config.n_batch), config.max_len) samples.extend(current_samples) n -= len(current_samples) T.update(len(current_samples)) gen_samples.extend(samples) df = pd.DataFrame(gen_samples, columns=['SMILES']) df.to_csv(config.gen_save, index=False) # tanimoto similarity score and summary if config.conditional: calculate_score(config.gen_save, config.condition_load, config.n_samples)