Esempio n. 1
0
def reproduce(seed,
              samples_path=None,
              metrics_path=None,
              n_jobs=1,
              device='cpu',
              verbose=False,
              samples=30000):
    train = moses.get_dataset('train')
    model = CombinatorialGenerator(n_jobs=n_jobs)

    if verbose:
        print("Training...")
    model.fit(train)

    if verbose:
        print(f"Sampling for seed {seed}")
    seeds = list(range((seed - 1) * samples, seed * samples))
    samples = mapper(n_jobs)(model.generate_one, seeds)
    if samples_path is not None:
        with open(samples_path, 'w') as f:
            f.write('SMILES\n')
            for sample in samples:
                f.write(sample + '\n')
    if verbose:
        print(f"Computing metrics for seed {seed}")
    metrics = moses.get_all_metrics(samples, n_jobs=n_jobs, device=device)
    if metrics_path is not None:
        with open(metrics_path, 'w') as f:
            for key, value in metrics.items():
                f.write("%s,%f\n" % (key, value))
    return samples, metrics
Esempio n. 2
0
def reproduce(seed, samples_path=None, metrics_path=None,
              n_jobs=1, device='cpu', verbose=False,
              samples=30000):
    data = moses.get_dataset('train')[:100000]
    if verbose:
        print("Training...")
    model = HMM(n_jobs=n_jobs, seed=seed, verbose=verbose)
    model.fit(data)
    np.random.seed(seed)
    if verbose:
        print(f"Sampling for seed {seed}")
    np.random.seed(seed)
    samples = [model.generate_one()
               for _ in range(samples)]
    if samples_path is not None:
        with open(samples_path, 'w') as f:
            f.write('SMILES\n')
            for sample in samples:
                f.write(sample+'\n')
    if verbose:
        print(f"Computing metrics for seed {seed}")
    metrics = moses.get_all_metrics(
        samples, n_jobs=n_jobs, device=device)
    if metrics_path is not None:
        with open(samples_path, 'w') as f:
            for key, value in metrics.items():
                f.write("%s,%f\n" % (key, value))
    return samples, metrics
Esempio n. 3
0
def reproduce(seed, samples_path=None, metrics_path=None,
              n_jobs=1, device='cpu', verbose=False,
              samples=30000):
    data = moses.get_dataset('train')
    model = NGram(10, verbose=verbose)
    model.fit(data)
    np.random.seed(seed)
    smiles = model.generate(samples, l_smooth=0.01)
    metrics = moses.get_all_metrics(smiles, n_jobs=n_jobs, device=device)

    if samples_path is not None:
        with open(samples_path, 'w') as out:
            out.write('SMILES\n')
            for s in smiles:
                out.write(s+'\n')

    if metrics_path is not None:
        with open(metrics_path, 'w') as out:
            for key, value in metrics.items():
                out.write("%s,%f\n" % (key, value))

    return smiles, metrics
Esempio n. 4
0
torch.cuda.set_device(0)

moses_qed_props_model_path = "../models/moses/"
model.load(moses_qed_props_model_path)
model.cuda()

import random
from rdkit import RDLogger 
RDLogger.DisableLog('rdApp.*')

generated = []
verbose_lim = 10000

print("Sampling smiles", flush=True)

while len(generated) < 30000:
    sampled = model.sample(1000)
    sampled_valid = [s for s in sampled if get_mol(s)]
    
    generated += sampled_valid
    n_generated = len(generated)
    if n_generated >= verbose_lim:
        print("Generated %d of %d SMILES" % (len(generated), 30000), flush=True)
        verbose_lim += 10000

with open("../moses_sampling/sampled_smiles.csv", "w") as f:
    f.writelines("%s\n" % sm for sm in generated)

print("Calculating Metrics", flush=True)
metrics = moses.get_all_metrics(generated)
pickle.dump( metrics, open( "metrics.pkl", "wb" ) )
Esempio n. 5
0
import os 
import sys
import argparse

if __name__=='__main__':
    
    parser = argparse.ArgumentParser()

    parser.add_argument('-i', "--generated_samples", help="Nbr to generate", type=str, default='data/gen.txt')

    args, _ = parser.parse_known_args()
    # =======================================
    
    script_dir = os.path.dirname(os.path.realpath(__file__))
    sys.path.append(os.path.join(script_dir, 'dataloaders'))
    sys.path.append(os.path.join(script_dir, 'data_processing'))

    with open(os.path.join(script_dir,'..',args.generated_samples), 'r') as f :
        smiles_list = [line.rstrip() for line in f]
        
    print(f'> Read {len(smiles_list)} smiles in data/gen.txt. Computing metrics...')
    metrics = moses.get_all_metrics(smiles_list)
    
    print('MOSES benchmark metrics :')
    for k,v in metrics.items():
        print(k,':', f'{v:.4f}')
    
    # to copy values to excel sheet with benchmarks 
    for k,v in metrics.items():
        print( f'{v:.4f}')