def reproduce(seed, samples_path=None, metrics_path=None, n_jobs=1, device='cpu', verbose=False, samples=30000): train = moses.get_dataset('train') model = CombinatorialGenerator(n_jobs=n_jobs) if verbose: print("Training...") model.fit(train) if verbose: print(f"Sampling for seed {seed}") seeds = list(range((seed - 1) * samples, seed * samples)) samples = mapper(n_jobs)(model.generate_one, seeds) if samples_path is not None: with open(samples_path, 'w') as f: f.write('SMILES\n') for sample in samples: f.write(sample + '\n') if verbose: print(f"Computing metrics for seed {seed}") metrics = moses.get_all_metrics(samples, n_jobs=n_jobs, device=device) if metrics_path is not None: with open(metrics_path, 'w') as f: for key, value in metrics.items(): f.write("%s,%f\n" % (key, value)) return samples, metrics
def reproduce(seed, samples_path=None, metrics_path=None, n_jobs=1, device='cpu', verbose=False, samples=30000): data = moses.get_dataset('train')[:100000] if verbose: print("Training...") model = HMM(n_jobs=n_jobs, seed=seed, verbose=verbose) model.fit(data) np.random.seed(seed) if verbose: print(f"Sampling for seed {seed}") np.random.seed(seed) samples = [model.generate_one() for _ in range(samples)] if samples_path is not None: with open(samples_path, 'w') as f: f.write('SMILES\n') for sample in samples: f.write(sample+'\n') if verbose: print(f"Computing metrics for seed {seed}") metrics = moses.get_all_metrics( samples, n_jobs=n_jobs, device=device) if metrics_path is not None: with open(samples_path, 'w') as f: for key, value in metrics.items(): f.write("%s,%f\n" % (key, value)) return samples, metrics
def reproduce(seed, samples_path=None, metrics_path=None, n_jobs=1, device='cpu', verbose=False, samples=30000): data = moses.get_dataset('train') model = NGram(10, verbose=verbose) model.fit(data) np.random.seed(seed) smiles = model.generate(samples, l_smooth=0.01) metrics = moses.get_all_metrics(smiles, n_jobs=n_jobs, device=device) if samples_path is not None: with open(samples_path, 'w') as out: out.write('SMILES\n') for s in smiles: out.write(s+'\n') if metrics_path is not None: with open(metrics_path, 'w') as out: for key, value in metrics.items(): out.write("%s,%f\n" % (key, value)) return smiles, metrics
torch.cuda.set_device(0) moses_qed_props_model_path = "../models/moses/" model.load(moses_qed_props_model_path) model.cuda() import random from rdkit import RDLogger RDLogger.DisableLog('rdApp.*') generated = [] verbose_lim = 10000 print("Sampling smiles", flush=True) while len(generated) < 30000: sampled = model.sample(1000) sampled_valid = [s for s in sampled if get_mol(s)] generated += sampled_valid n_generated = len(generated) if n_generated >= verbose_lim: print("Generated %d of %d SMILES" % (len(generated), 30000), flush=True) verbose_lim += 10000 with open("../moses_sampling/sampled_smiles.csv", "w") as f: f.writelines("%s\n" % sm for sm in generated) print("Calculating Metrics", flush=True) metrics = moses.get_all_metrics(generated) pickle.dump( metrics, open( "metrics.pkl", "wb" ) )
import os import sys import argparse if __name__=='__main__': parser = argparse.ArgumentParser() parser.add_argument('-i', "--generated_samples", help="Nbr to generate", type=str, default='data/gen.txt') args, _ = parser.parse_known_args() # ======================================= script_dir = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.join(script_dir, 'dataloaders')) sys.path.append(os.path.join(script_dir, 'data_processing')) with open(os.path.join(script_dir,'..',args.generated_samples), 'r') as f : smiles_list = [line.rstrip() for line in f] print(f'> Read {len(smiles_list)} smiles in data/gen.txt. Computing metrics...') metrics = moses.get_all_metrics(smiles_list) print('MOSES benchmark metrics :') for k,v in metrics.items(): print(k,':', f'{v:.4f}') # to copy values to excel sheet with benchmarks for k,v in metrics.items(): print( f'{v:.4f}')