Ejemplo n.º 1
0
 def test_predict_compound_names(self):
     try:
         self.args.test_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'delaney_toy_smiles_names.csv')
         self.args.use_compound_names = True
         modify_predict_args(self.args)
         make_predictions(self.args)
     except:
         self.fail('predict_compound_names')
Ejemplo n.º 2
0
def predict_outside(args_dict):
    """
    Used for calling this script from another python script.
    :dict args_dict: dict of args to use
    """
    sys.argv = create_args(args_dict, 'predict.py')

    args = PredictArgs().parse_args()
    make_predictions(args)
Ejemplo n.º 3
0
    def mlCharges(mols):
        """
        Calc ML charges
        """

        #MUST use mols with hydrogens!
        smiles = [Chem.MolToSmiles(mol) for mol in mols]

        path = pkg_resources.resource_filename(__name__, 'QM_137k.pt')
        args = Namespace(batch_size=50,
                         checkpoint_dir=None,
                         checkpoint_path=path,
                         checkpoint_paths=[path],
                         cuda=False,
                         features_generator=None,
                         features_path=None,
                         gpu=None,
                         max_data_size=None,
                         no_features_scaling=False,
                         preds_path=None,
                         test_path=None,
                         use_compound_names=False)

        with open(os.devnull, 'w') as devnull:
            with contextlib.redirect_stdout(devnull):
                test_preds, test_smiles = make_predictions(args, smiles=smiles)
        n_atoms, n_bonds = zip(*[num_atoms_bonds(x) for x in smiles])
        partial_charge = test_preds[0]
        partial_charge = np.split(partial_charge.flatten(),
                                  np.cumsum(np.array(n_atoms)))[:-1]

        charges = []
        for i, mol in enumerate(mols):
            try:
                reorder_list = get_reorder_list(mol)
                charges.append([
                    partial_charge[i][reorder_list[x]]
                    for x in range(mol.GetNumAtoms())
                ])
            except ValueError:
                #Could not get prediction, default to Gasteiger
                print(
                    "Warning: could not obtain prediction, defaulting to Gasteiger charges for one molecule"
                )
                AllChem.ComputeGasteigerCharges(mol)
                charges.append([
                    a.GetDoubleProp('_GasteigerCharge')
                    for a in mol.GetAtoms()
                ])

        return charges
Ejemplo n.º 4
0
def predict():
    """Renders the predict page and makes predictions if the method is POST."""
    if request.method == 'GET':
        return render_predict()

    # Get arguments
    ckpt_id = request.form['checkpointName']

    if request.form['textSmiles'] != '':
        smiles = request.form['textSmiles'].split()
    elif request.form['drawSmiles'] != '':
        smiles = [request.form['drawSmiles']]
    else:
        # Upload data file with SMILES
        data = request.files['data']
        data_name = secure_filename(data.filename)
        data_path = os.path.join(app.config['TEMP_FOLDER'], data_name)
        data.save(data_path)

        # Check if header is smiles
        possible_smiles = get_header(data_path)[0]
        smiles = [possible_smiles
                  ] if Chem.MolFromSmiles(possible_smiles) is not None else []

        # Get remaining smiles
        smiles.extend(get_smiles(data_path))

    models = db.get_models(ckpt_id)
    model_paths = [
        os.path.join(app.config['CHECKPOINT_FOLDER'], f'{model["id"]}.pt')
        for model in models
    ]

    task_names = load_task_names(model_paths[0])
    num_tasks = len(task_names)
    gpu = request.form.get('gpu')
    train_args = load_args(model_paths[0])

    # Build arguments
    arguments = [
        '--test_path', 'None', '--preds_path',
        os.path.join(app.config['TEMP_FOLDER'],
                     app.config['PREDICTIONS_FILENAME']), '--checkpoint_paths',
        *model_paths
    ]

    if gpu is not None:
        if gpu == 'None':
            arguments.append('--no_cuda')
        else:
            arguments += ['--gpu', gpu]

    # Handle additional features
    if train_args.features_path is not None:
        # TODO: make it possible to specify the features generator if trained using features_path
        arguments += [
            '--features_generator', 'rdkit_2d_normalized',
            '--no_features_scaling'
        ]
    elif train_args.features_generator is not None:
        arguments += ['--features_generator', *train_args.features_generator]

        if not train_args.features_scaling:
            arguments.append('--no_features_scaling')

    # Parse arguments
    args = PredictArgs().parse_args(arguments)

    # Run predictions
    preds = make_predictions(args=args, smiles=smiles)

    if all(p is None for p in preds):
        return render_predict(errors=['All SMILES are invalid'])

    # Replace invalid smiles with message
    invalid_smiles_warning = 'Invalid SMILES String'
    preds = [
        pred if pred is not None else [invalid_smiles_warning] * num_tasks
        for pred in preds
    ]

    return render_predict(
        predicted=True,
        smiles=smiles,
        num_smiles=min(10, len(smiles)),
        show_more=max(0,
                      len(smiles) - 10),
        task_names=task_names,
        num_tasks=len(task_names),
        preds=preds,
        warnings=["List contains invalid SMILES strings"]
        if None in preds else None,
        errors=["No SMILES strings given"] if len(preds) == 0 else None)
def predict_desc(args):
    import chemprop
    chemprop_root = os.path.dirname(os.path.dirname(chemprop.__file__))

    #trick chemprop
    args.test_path = 'foo'
    args.preds_path = 'foo'
    args.checkpoint_path = os.path.join(chemprop_root, 'trained_model',
                                        'QM_137k.pt')
    modify_predict_args(args)

    def num_atoms_bonds(smiles):
        m = Chem.MolFromSmiles(smiles)

        m = Chem.AddHs(m)

        return len(m.GetAtoms()), len(m.GetBonds())

    # predict descriptors for reactants in the reactions
    reactivity_data = pd.read_csv(args.data_path, index_col=0)
    reactants = set()
    for _, row in reactivity_data.iterrows():
        rs, _, _ = row['rxn_smiles'].split('>')
        rs = rs.split('.')
        for r in rs:
            reactants.add(r)
    reactants = list(reactants)

    print('Predicting descriptors for reactants...')
    test_preds, test_smiles = make_predictions(args, smiles=reactants)

    partial_charge = test_preds[0]
    partial_neu = test_preds[1]
    partial_elec = test_preds[2]
    NMR = test_preds[3]

    bond_order = test_preds[4]
    bond_distance = test_preds[5]

    n_atoms, n_bonds = zip(*[num_atoms_bonds(x) for x in reactants])

    partial_charge = np.split(partial_charge.flatten(),
                              np.cumsum(np.array(n_atoms)))[:-1]
    partial_neu = np.split(partial_neu.flatten(),
                           np.cumsum(np.array(n_atoms)))[:-1]
    partial_elec = np.split(partial_elec.flatten(),
                            np.cumsum(np.array(n_atoms)))[:-1]
    NMR = np.split(NMR.flatten(), np.cumsum(np.array(n_atoms)))[:-1]

    bond_order = np.split(bond_order.flatten(),
                          np.cumsum(np.array(n_bonds)))[:-1]
    bond_distance = np.split(bond_distance.flatten(),
                             np.cumsum(np.array(n_bonds)))[:-1]

    df = pd.DataFrame({
        'smiles': reactants,
        'partial_charge': partial_charge,
        'fukui_neu': partial_neu,
        'fukui_elec': partial_elec,
        'NMR': NMR,
        'bond_order': bond_order,
        'bond_length': bond_distance
    })

    invalid = check_chemprop_out(df)
    # FIXME remove invalid molecules from reaction dataset
    print(invalid)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    df.to_pickle(os.path.join(args.output_dir, 'reactants_descriptors.pickle'))
    save_dir = args.model_dir

    if not args.predict:
        df, scalers = min_max_normalize(df)
        pickle.dump(scalers,
                    open(os.path.join(save_dir, 'scalers.pickle'), 'wb'))
    else:
        scalers = pickle.load(
            open(os.path.join(save_dir, 'scalers.pickle'), 'rb'))
        df, _ = min_max_normalize(df, scalers)

    df.to_pickle(
        os.path.join(args.output_dir, 'reactants_descriptors_norm.pickle'))

    return df
Ejemplo n.º 6
0
def num_atoms_bonds(smiles):
    m = Chem.MolFromSmiles(smiles)

    m = Chem.AddHs(m)

    return len(m.GetAtoms()), len(m.GetBonds())


if __name__ == '__main__':
    args = parse_predict_args()

    test_df = pd.read_csv(args.test_path, index_col=0)
    smiles = test_df.smiles.tolist()

    start = time.time()
    test_preds, test_smiles = make_predictions(args, smiles=smiles)
    end = time.time()

    print('time:{}s'.format(end - start))

    partial_charge = test_preds[0]
    partial_neu = test_preds[1]
    partial_elec = test_preds[2]
    NMR = test_preds[3]

    bond_order = test_preds[4]
    bond_distance = test_preds[5]

    n_atoms, n_bonds = zip(*[num_atoms_bonds(x) for x in smiles])

    partial_charge = np.split(partial_charge.flatten(),
Ejemplo n.º 7
0
 def test_predict(self):
     try:
         modify_predict_args(self.args)
         make_predictions(self.args)
     except:
         self.fail('predict')
Ejemplo n.º 8
0
# -*- coding: utf-8 -*-
"""
Created on Wed Dec  4 19:42:36 2019

@author: SY
"""
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from chemprop.parsing import parse_train_args, modify_train_args
from chemprop.train import make_predictions

if __name__ == '__main__':
    args = parse_train_args()
    # args.checkpoint_dir = './ckpt'
    modify_train_args(args)

    df = pd.read_csv(args.data_path)
    pred, smiles = make_predictions(args, df.smiles.tolist())
    df = pd.DataFrame({'smiles': smiles})
    for i in range(len(pred[0])):
        df[f'pred_{i}'] = [item[i] for item in pred]
    df.to_csv(f'./predict.csv', index=False)
Ejemplo n.º 9
0
"""Loads a trained model checkpoint and makes predictions on a dataset."""

from chemprop.parsing import parse_predict_args
from chemprop.train import make_predictions

if __name__ == '__main__':
    args = parse_predict_args()
    make_predictions(args)