def predict_ms(self): base = 'RIKEN_PlaSMA_' mode = self.ModInput.currentText()[0:3] eneg = self.EgyInput.currentText()[0:2] smi = self.SmiInput.toPlainText() model = load_model(base + mode + '_' + eneg) ms = model_predict(smi, model) # plot_ms(ms) self.F = MyFigure(width=3, height=2, dpi=100) self.F.axes.cla() self.F.axes.vlines(ms['mz'], np.zeros(ms.shape[0]), np.array(ms['intensity']), 'red') self.F.axes.axhline(0, color='black') self.gridlayout = QtWidgets.QGridLayout(self.groupBox) self.gridlayout.addWidget(self.F, 0, 1) self.gridlayout.deleteLater()
def identification(ms, candidates, model, method='correlation'): smiles = [] scores = [] inchis = [] masses = [] pred_ms = [] if method == 'residual': score = ms_residual elif method == 'correlation': score = ms_correlation else: score = ms_jaccard if 'InChI=' in candidates[0]: read_candidate = Chem.MolFromInchi else: read_candidate = Chem.MolFromSmiles for i in candidates: try: mol = read_candidate(i) smi = Chem.MolToSmiles(mol) inchi = Chem.MolToInchi(mol) mass = CalcExactMolWt(mol) except: continue pms = model_predict(smi, model) scr = score(ms, pms) smiles.append(smi) inchis.append(inchi) scores.append(scr) masses.append(mass) pred_ms.append(pms) output = pd.DataFrame({ 'SMILES': smiles, 'InChI': inchis, 'mass': masses, 'scores': scores, 'pred_ms': pred_ms }) output = output.sort_values('scores', ascending=False) return output
plt.xlabel('epoch') plt.legend(['train forward', 'train reverse', 'test forward', 'test reverse'], loc='upper left') plt.show() model_json = model.to_json() save_path = 'Model/RIKEN_PlaSMA_Pos_10' with open(save_path + '.json', "w") as json_file: json_file.write(model_json) model.save_weights(save_path + '.h5') result = pd.DataFrame( columns=['idx', 'smiles', 'DeepFrag', 'CFM_10', 'CFM_20', 'CFM_40']) for i in tqdm(test_index): try: smi = smiles[i] ms_cfm = cfm_predict(smi) ms_real = ms[i] ms_pred = model_predict(smi, model) trans = ms_correlation(ms_real, ms_pred) cfm_10 = ms_correlation(ms_real, ms_cfm['low_energy']) cfm_20 = ms_correlation(ms_real, ms_cfm['medium_energy']) cfm_40 = ms_correlation(ms_real, ms_cfm['high_energy']) except: continue ''' plot_compare_ms(ms_real, ms_cfm['medium_energy']) plot_compare_ms(ms_real, ms_pred) ''' result.loc[len(result)] = [i, smi, trans, cfm_10, cfm_20, cfm_40] result.to_csv('Result/RIKEN_PlaSMA_Pos_10.csv')
continue data = pd.DataFrame(np.array(data)) data.columns = ['mz', 'intensity'] modes.append(ion_mode) ms.append(data) smiles.append(smi) energies.append(energy) summary = pd.DataFrame({'smiles': smiles, 'ion_mode': modes, 'energy': energies}) # example 1 idx = 1297 smi = smiles[idx] mol = Chem.MolFromSmiles(smi) t1 = time.time() ms_pred = model_predict(smi, model) t2 = time.time() ms_cfm = cfm_predict(smi) t3 = time.time() ms_pretrain = model_predict(smi, pretrain) ms_real = ms[idx] plot_compare_ms(ms_real, ms_pretrain) plot_compare_ms(ms_pretrain, ms_cfm['low_energy']) plot_compare_ms(ms_real, ms_pred) plot_compare_ms(ms_real, ms_cfm['low_energy']) print ('computing time of CFM is: ' + str(t3-t2) + ' s') print ('computing time of DeepFrag is: ' + str(t2-t1) + ' s') ## precursor Chem.MolFromSmiles('Oc1cc(O)c2c(c1)OC(c1ccc(O)c(O)c1)C([OH2+])C2')