def penalized_logP(mol_or_smiles, masked=False, default=-5):
    mol = get_mol(mol_or_smiles)
    if mol is None:
        return default
    reward = logP(mol) - SA(mol) - get_num_rings_6(mol)
    if masked and not mol_passes_filters(mol):
        return default
    return reward
Beispiel #2
0
    valid_smiles = valid_smiles[:50]
    if len(new_features) != 0:
        new_features = np.vstack(new_features)[:50]
    else:
        new_features = np.zeros((0, X_train.shape[1]))
    os.makedirs(args.save_dir, exist_ok=True)
    save_object(
        valid_smiles,
        os.path.join(args.save_dir, "valid_smiles{}.dat".format(iteration)))

    scores = []
    for i in range(len(valid_smiles)):
        mol = MolFromSmiles(valid_smiles[i])
        current_log_P_value = Descriptors.MolLogP(mol)
        current_SA_score = -SA(mol)
        current_cycle_score = max_ring_penalty(mol)

        current_SA_score_normalized = (current_SA_score -
                                       np.mean(SA_scores)) / np.std(SA_scores)
        current_log_P_value_normalized = (
            current_log_P_value - np.mean(logP_values)) / np.std(logP_values)
        current_cycle_score_normalized = (
            current_cycle_score - np.mean(cycle_scores)) / np.std(cycle_scores)

        score = (current_SA_score_normalized + current_log_P_value_normalized +
                 current_cycle_score_normalized)
        scores.append(-score)  # target is always minused

    print(f"{len(valid_smiles)} molecules found. Scores: {scores}")
    save_object(scores,
Beispiel #3
0
from rdkit.Chem import Descriptors
from rdkit.ML.Descriptors import MoleculeDescriptors
nms = [x[0] for x in Descriptors._descList]
calc = MoleculeDescriptors.MolecularDescriptorCalculator(nms)
from tensorflow.keras.models import load_model

from moses.metrics import mol_passes_filters, QED, SA, logP
from moses.metrics.utils import get_n_rings, get_mol

from rdkit.Chem import Descriptors as des
from rdkit.Chem.rdMolDescriptors import CalcNumAtomStereoCenters, CalcNumAmideBonds
from desfun import Similarity, User_Fragment

descrittori = {
    'SA': (lambda x: SA(x)),
    'QED': (lambda x: QED(x)),
    ### rdkit ###
    'MolWt': (lambda x: des.MolWt(x)),
    'LogP': (lambda x: des.MolLogP(x)),
    'MolMR': (lambda x: des.MolMR(x)),
    'NumHDonors': (lambda x: des.NumHDonors(x)),
    'NumHAcceptors': (lambda x: des.NumHAcceptors(x)),
    'HeavyAtomCount': (lambda x: des.HeavyAtomCount(x)),
    'NHOHCount': (lambda x: des.NHOHCount(x)),
    'NOCount': (lambda x: des.NOCount(x)),
    'CalcNumAtomStereoCenters': (lambda x: CalcNumAtomStereoCenters(x)),
    'CalcNumAmideBonds': (lambda x: CalcNumAmideBonds(x)),
    'NumAliphaticCarbocycles': (lambda x: des.NumAliphaticCarbocycles(x)),
    'NumAliphaticHeterocycles': (lambda x: des.NumAliphaticHeterocycles(x)),
    'NumAliphaticRings': (lambda x: des.NumAliphaticRings(x)),
Beispiel #4
0
print("Preparing dataset...")
collate_pad = partial(collate, pad=model.vocab.pad, return_data=True)
dataset = StringDataset(model.vocab, smiles)
data_loader = DataLoader(dataset,
                         collate_fn=collate_pad,
                         batch_size=512,
                         shuffle=False)
print("Getting latent codes...")
for batch in tqdm(data_loader):
    z = model.encode(batch_to_device(batch[:-1], args.device))
    mu, _ = model.get_mu_std(z)
    latent_points.append(mu.detach().cpu().numpy())
    romol = [Chem.MolFromSmiles(x.strip()) for x in batch[-1]]
    logP_values.extend([Descriptors.MolLogP(m) for m in romol])
    SA_scores.extend([-SA(m) for m in romol])
    cycle_scores.extend([max_ring_penalty(m) for m in romol])

SA_scores = np.array(SA_scores)
logP_values = np.array(logP_values)
cycle_scores = np.array(cycle_scores)

SA_scores_normalized = (SA_scores - SA_scores.mean()) / SA_scores.std()
logP_values_normalized = (logP_values - logP_values.mean()) / logP_values.std()
cycle_scores_normalized = (cycle_scores -
                           cycle_scores.mean()) / cycle_scores.std()

latent_points = np.vstack(latent_points)

targets = (SA_scores_normalized + logP_values_normalized +
           cycle_scores_normalized)