Esempio n. 1
0
def reproduce(seed,
              samples_path=None,
              metrics_path=None,
              n_jobs=1,
              device='cpu',
              verbose=False,
              samples=30000):
    train = moses.get_dataset('train')
    model = CombinatorialGenerator(n_jobs=n_jobs)

    if verbose:
        print("Training...")
    model.fit(train)

    if verbose:
        print(f"Sampling for seed {seed}")
    seeds = list(range((seed - 1) * samples, seed * samples))
    samples = mapper(n_jobs)(model.generate_one, seeds)
    if samples_path is not None:
        with open(samples_path, 'w') as f:
            f.write('SMILES\n')
            for sample in samples:
                f.write(sample + '\n')
    if verbose:
        print(f"Computing metrics for seed {seed}")
    metrics = moses.get_all_metrics(samples, n_jobs=n_jobs, device=device)
    if metrics_path is not None:
        with open(metrics_path, 'w') as f:
            for key, value in metrics.items():
                f.write("%s,%f\n" % (key, value))
    return samples, metrics
Esempio n. 2
0
def reproduce(seed, samples_path=None, metrics_path=None,
              n_jobs=1, device='cpu', verbose=False,
              samples=30000):
    data = moses.get_dataset('train')[:100000]
    if verbose:
        print("Training...")
    model = HMM(n_jobs=n_jobs, seed=seed, verbose=verbose)
    model.fit(data)
    np.random.seed(seed)
    if verbose:
        print(f"Sampling for seed {seed}")
    np.random.seed(seed)
    samples = [model.generate_one()
               for _ in range(samples)]
    if samples_path is not None:
        with open(samples_path, 'w') as f:
            f.write('SMILES\n')
            for sample in samples:
                f.write(sample+'\n')
    if verbose:
        print(f"Computing metrics for seed {seed}")
    metrics = moses.get_all_metrics(
        samples, n_jobs=n_jobs, device=device)
    if metrics_path is not None:
        with open(samples_path, 'w') as f:
            for key, value in metrics.items():
                f.write("%s,%f\n" % (key, value))
    return samples, metrics
Esempio n. 3
0
def download_moses():
    script_dir = os.path.dirname(os.path.realpath(__file__))

    print('>>> Loading data from moses')
    train = moses.get_dataset('train')
    test = moses.get_dataset('test')
    test_scaffolds = moses.get_dataset('test_scaffolds')

    train = pd.DataFrame(train).rename(columns={0: 'smiles'})
    test = pd.DataFrame(test).rename(columns={0: 'smiles'})
    scaf = pd.DataFrame(test_scaffolds).rename(columns={0: 'smiles'})

    print(scaf.head())

    print('>>> Saving data to csv files in ./data')
    train.to_csv(os.path.join(script_dir, '../data/moses_train.csv'))
    test.to_csv(os.path.join(script_dir, '../data/moses_test.csv'))
    scaf.to_csv(os.path.join(script_dir, '../data/moses_test_scaffolds.csv'))
Esempio n. 4
0
def reproduce(seed, samples_path=None, metrics_path=None,
              n_jobs=1, device='cpu', verbose=False,
              samples=30000):
    data = moses.get_dataset('train')
    model = NGram(10, verbose=verbose)
    model.fit(data)
    np.random.seed(seed)
    smiles = model.generate(samples, l_smooth=0.01)
    metrics = moses.get_all_metrics(smiles, n_jobs=n_jobs, device=device)

    if samples_path is not None:
        with open(samples_path, 'w') as out:
            out.write('SMILES\n')
            for s in smiles:
                out.write(s+'\n')

    if metrics_path is not None:
        with open(metrics_path, 'w') as out:
            for key, value in metrics.items():
                out.write("%s,%f\n" % (key, value))

    return smiles, metrics
Esempio n. 5
0
 def setUp(self):
     self.train = moses.get_dataset('train')
Esempio n. 6
0
import moses

train = moses.get_dataset('train')

Esempio n. 7
0
                        default='images/',
                        help='Store images in this folder')
    return parser


if __name__ == "__main__":
    disable_rdkit_log()
    parser = get_parser()
    config, unknown = parser.parse_known_args()
    if len(unknown) != 0:
        raise ValueError("Unknown argument " + unknown[0])

    os.makedirs(config.img_folder, exist_ok=True)

    generated = OrderedDict(
        {'MOSES': pd.DataFrame({'SMILES': get_dataset('test')})})
    models = pd.read_csv(config.config)
    for path, name in zip(models['path'], models['name']):
        generated[name] = pd.read_csv(path)

    metrics = {'weight': weight, 'logP': logP, 'SA': SA, 'QED': QED}

    for s in generated.values():
        s['ROMol'] = mapper(config.n_jobs)(get_mol, s['SMILES'])

    distributions = OrderedDict()
    for metric_name, metric_fn in metrics.items():
        distributions[metric_name] = OrderedDict()
        for _set, _molecules in generated.items():
            distributions[metric_name][_set] = mapper(config.n_jobs)(
                metric_fn, _molecules['ROMol'].dropna().values)
Esempio n. 8
0
import moses
from rdkit import Chem
from rdkit.Chem import Draw
import numpy as np
from tqdm import tqdm


train = moses.get_dataset('train')
test = moses.get_dataset('test')
dataset = np.hstack([train, test]) #sumar los dos arrays que tenemos
#test_scaffolds = moses.get_dataset('test_scaffolds')

print("Number of molecules to train: ", len(train), "Number of molecules to test: ", len(test))

smiles_train = train[0]



def image_molecule(smiles, title = "molecule.png"):
    mol_train = Chem.MolFromSmiles(smiles)
    Draw.MolToFile(mol_train, title)
    return title

def smiles_number_to_smiles_string(input_molecule, dict):
    smile_char = [dict_number_to_char[number] for number in smile ]
    smile_string = "".join(smile_char)
    return smile_string


molecule1 = image_molecule(smiles_train)