Ejemplo n.º 1
0
def predict_interaction(smiles,
                        smiles_b,
                        model='mlp',
                        feature='ECFP',
                        directory=''):
    '''Use model to predict interactions

    Args :
        smiles (str): First SMILES string
        smiles_b (str): Second SMILES string
        model (str): Name of model used to train
        feature (str): Name of feature embedding used
        directory (str): Path to directory containing model

    Returns :
        prediction (numpy.ndarray): Array containing prediction from model
    '''

    model_path = os.path.join(directory, model + '_' + feature + '.h5')
    model = tf.keras.models.load_model(model_path)

    vec_a = smiles_to_ECFP(smiles)
    vec_b = smiles_to_ECFP(smiles_b)
    test = np.concatenate((vec_a, vec_b)).reshape((1, -1))
    prediction = model.predict(test)

    return prediction
def predict_from_files(candidates_file: object, drugs_file: object,
                       target_file: object, model_file: object) -> object:
    '''Use model to predict interaction between candidates and drugs

    Args :
        candidates_file (str): Path to txt file with candidate SMILES strings
        drugs_file (str): Path to txt file with drug SMILES strings
        target_file (str): Path to csv file to write results to
        model (object): Pre-trained model to use to predict interactions from

    Returns :
        None
    '''

    candidates_list = []
    with open(candidates_file) as file:
        for line in file:
            candidates_list.append(line)
    print('Loaded drug candidates.')

    drugs_list = []
    with open(drugs_file) as file:
        for line in file:
            drugs_list.append(line)
    print('Loaded existing drugs')

    print('********************')
    print(os.path)
    label_lookup = read_dict_from_csv(os.path.join('label_lookup.csv'))

    interactions_df = pd.DataFrame(columns = ['Candidate SMILES', 'Drug SMILES', \
        'Interaction 1', 'Probability 1', 'Interaction 2', 'Probability 2', \
            'Interaction 3', 'Probability 3'])

    model = tf.keras.models.load_model(model_file)

    print('Predicting interactions ...')
    for candidate in tqdm(candidates_list, desc='Candidates : '):
        vec_a = smiles_to_ECFP(candidate)
        if vec_a is not None:
            for drug in tqdm(drugs_list, desc='Drugs : '):
                vec_b = smiles_to_ECFP(drug)
                if vec_b is not None:
                    test = np.concatenate((vec_a, vec_b)).reshape((1, -1))
                    prediction = model.predict(test)
                    top_labels, top_probs = get_top_n(prediction, 3)
                    top_labels = list(
                        map(lambda x: label_lookup[str(x)], top_labels))
                    interactions_df = interactions_df.append({
                        'Candidate SMILES':candidate, 'Drug SMILES':drug, \
                            'Interaction 1':top_labels[0], 'Probability 1':top_probs[0], \
                                'Interaction 2':top_labels[1], 'Probability 2':top_probs[1], \
                                    'Interaction 3':top_labels[2], 'Probability 3':top_probs[2]}, \
                                        ignore_index = True)

    interactions_df.to_csv(target_file, index=False)
Ejemplo n.º 3
0
def predict():
    global SMILES_DICT
    smiles = request.args.get('SMILES').strip()
    drug_name = request.args.get('drug_name').strip()

    if smiles == '':
        return render_template('error.html', error='no_smiles')

    if drug_name == '':
        return render_template('error.html', error='no_drug')

    fp = smiles_to_ECFP(smiles)
    if fp is None:
        return render_template('error.html', error='invalid_smiles')

    if drug_name not in SMILES_DICT:
        return render_template('error.html', error='drug_not_found')

    prediction = predict_interaction(smiles,
                                     SMILES_DICT[drug_name],
                                     directory='flaskapp')

    top_labels, top_probs = get_top_n(prediction, 5)

    label_lookup = read_dict_from_csv('flaskapp/label_lookup.csv')

    top_labels = list(map(lambda x: label_lookup[str(x)], top_labels))

    return render_template('result.html', top_labels = top_labels, top_probs = top_probs, \
        smiles = smiles, drug_name = drug_name)