예제 #1
0
 def predict_ms(self):
     base = 'RIKEN_PlaSMA_'
     mode = self.ModInput.currentText()[0:3]
     eneg = self.EgyInput.currentText()[0:2]
     smi = self.SmiInput.toPlainText()
     model = load_model(base + mode + '_' + eneg)
     ms = model_predict(smi, model)
     # plot_ms(ms)
     self.F = MyFigure(width=3, height=2, dpi=100)
     self.F.axes.cla()
     self.F.axes.vlines(ms['mz'], np.zeros(ms.shape[0]),
                        np.array(ms['intensity']), 'red')
     self.F.axes.axhline(0, color='black')
     self.gridlayout = QtWidgets.QGridLayout(self.groupBox)
     self.gridlayout.addWidget(self.F, 0, 1)
     self.gridlayout.deleteLater()
예제 #2
0
def identification(ms, candidates, model, method='correlation'):
    smiles = []
    scores = []
    inchis = []
    masses = []
    pred_ms = []
    if method == 'residual':
        score = ms_residual
    elif method == 'correlation':
        score = ms_correlation
    else:
        score = ms_jaccard
    if 'InChI=' in candidates[0]:
        read_candidate = Chem.MolFromInchi
    else:
        read_candidate = Chem.MolFromSmiles
    for i in candidates:
        try:
            mol = read_candidate(i)
            smi = Chem.MolToSmiles(mol)
            inchi = Chem.MolToInchi(mol)
            mass = CalcExactMolWt(mol)
        except:
            continue
        pms = model_predict(smi, model)
        scr = score(ms, pms)
        smiles.append(smi)
        inchis.append(inchi)
        scores.append(scr)
        masses.append(mass)
        pred_ms.append(pms)
    output = pd.DataFrame({
        'SMILES': smiles,
        'InChI': inchis,
        'mass': masses,
        'scores': scores,
        'pred_ms': pred_ms
    })
    output = output.sort_values('scores', ascending=False)
    return output
plt.xlabel('epoch')
plt.legend(['train forward', 'train reverse', 'test forward', 'test reverse'],
           loc='upper left')
plt.show()

model_json = model.to_json()
save_path = 'Model/RIKEN_PlaSMA_Pos_10'
with open(save_path + '.json', "w") as json_file:
    json_file.write(model_json)
model.save_weights(save_path + '.h5')

result = pd.DataFrame(
    columns=['idx', 'smiles', 'DeepFrag', 'CFM_10', 'CFM_20', 'CFM_40'])
for i in tqdm(test_index):
    try:
        smi = smiles[i]
        ms_cfm = cfm_predict(smi)
        ms_real = ms[i]
        ms_pred = model_predict(smi, model)
        trans = ms_correlation(ms_real, ms_pred)
        cfm_10 = ms_correlation(ms_real, ms_cfm['low_energy'])
        cfm_20 = ms_correlation(ms_real, ms_cfm['medium_energy'])
        cfm_40 = ms_correlation(ms_real, ms_cfm['high_energy'])
    except:
        continue
    '''
    plot_compare_ms(ms_real, ms_cfm['medium_energy'])
    plot_compare_ms(ms_real, ms_pred)
    '''
    result.loc[len(result)] = [i, smi, trans, cfm_10, cfm_20, cfm_40]
result.to_csv('Result/RIKEN_PlaSMA_Pos_10.csv')
예제 #4
0
        continue
    data = pd.DataFrame(np.array(data))
    data.columns = ['mz', 'intensity']
    modes.append(ion_mode)
    ms.append(data)
    smiles.append(smi)
    energies.append(energy)
summary = pd.DataFrame({'smiles': smiles, 'ion_mode': modes, 'energy': energies})


# example 1
idx = 1297
smi = smiles[idx]
mol = Chem.MolFromSmiles(smi)
t1 = time.time()
ms_pred = model_predict(smi, model)
t2 = time.time()
ms_cfm = cfm_predict(smi)
t3 = time.time()
ms_pretrain = model_predict(smi, pretrain)
ms_real = ms[idx]
plot_compare_ms(ms_real, ms_pretrain)
plot_compare_ms(ms_pretrain, ms_cfm['low_energy'])
plot_compare_ms(ms_real, ms_pred)
plot_compare_ms(ms_real, ms_cfm['low_energy'])
print ('computing time of CFM is: ' + str(t3-t2) + ' s')
print ('computing time of DeepFrag is: ' + str(t2-t1) + ' s')

## precursor
Chem.MolFromSmiles('Oc1cc(O)c2c(c1)OC(c1ccc(O)c(O)c1)C([OH2+])C2')