def writefile1(): Xtr = file1.iloc[:, 2:] ytr = file1.iloc[:, 1:2] ntr = file1.iloc[:, 0:1] Xts = file2.iloc[:, 2:] yts = file2.iloc[:, 1:2] nts = file2.iloc[:, 0:1] if var1.get(): a, b, c, d = trainsetfit(Xtr, ytr) filer = open(str(c_) + "_fslda.txt", "w") elif var2.get(): a, b, c, d = trainsetfit2(Xtr, ytr) filer = open(str(c_) + "_sfslda.txt", "w") #filer = open("resultsx.txt","w") filer.write("Sub-training set results " + "\n") filer.write("\n") #file3.write("Selected features are:"+str(a)+"\n") filer.write("Wilks lambda: " + str(b) + "\n") filer.write("Fvalue: " + str(c) + "\n") filer.write("pvalue: " + str(d) + "\n") model.fit(Xtr[a], ytr) filer.write("Selected features :" + str(a) + "\n") filer.write("intercept: " + str(model.intercept_) + "\n") filer.write("coefficients: " + str(model.coef_) + "\n") yprtr = pd.DataFrame(model.predict(Xtr[a])) yprtr.columns = ['Pred'] yprtr2 = pd.DataFrame(model.predict_proba(Xtr[a])) yprtr2.columns = ['%Prob(-1)', '%Prob(+1)'] adstr = apdom(Xtr[a], Xtr[a]) yadstr = adstr.fit() dfstr = pd.concat([ntr, Xtr[a], ytr, yprtr, yprtr2, yadstr], axis=1) dfstr['Set'] = 'Sub_train' yprts = pd.DataFrame(model.predict(Xts[a])) yprts.columns = ['Pred'] yprts2 = pd.DataFrame(model.predict_proba(Xts[a])) yprts2.columns = ['%Prob(-1)', '%Prob(+1)'] adts = apdom(Xts[a], Xtr[a]) yadts = adts.fit() dfsts = pd.concat([nts, Xts[a], yts, yprts, yprts2, yadts], axis=1) dfsts['Set'] = 'Test' tb = Xtr[a].corr() mx, mn = corr(tb) tbn = str(c_) + '_corr.csv' tb.to_csv(tbn) finda = pd.concat([dfstr, dfsts], axis=0) #finda.to_csv('find.csv',index=False) #savename4 = filedialog.asksaveasfilename(initialdir=initialdir,title = "Save File with Predicted Activity") savename4 = str(c_) + '_pred.csv' finda.to_csv(savename4, index=False) writefile2(Xtr[a], ytr, model, filer) filer.write('Maximum correlation between descriptors: ' + str(mx) + "\n") filer.write('Minimum correlation between descriptors: ' + str(mn)) filer.write("\n") filer.write("Test set results: " + "\n") filer.write("\n") writefile2(Xts[a], yts, model, filer) filer.close()
def writefile3(): nf = secondEntryTabFive.get() nf = int(nf) global file5 file5 = file4[file4['Set'] == 'Sub_train'] #global Xtr5 Xtr5 = file5.iloc[:, 1:nf + 1] ytr5 = file5.iloc[:, nf + 1:nf + 2] file6 = file4[file4['Set'] == 'Test'] Xts = file6.iloc[:, 1:nf + 1] yts = file6.iloc[:, nf + 1:nf + 2] model.fit(Xtr5, ytr5) Xvd = file3[Xtr5.columns] if ytr5.columns[0] in file3.columns: yvd = file3[ytr5.columns] nvd = file3.iloc[:, 0:1] yprvd = pd.DataFrame(model.predict(Xvd)) yprvd.columns = ['Pred'] yprvd2 = pd.DataFrame(model.predict_proba(Xvd)) yprvd2.columns = ['%Prob(-1)', '%Prob(+1)'] advd = apdom(Xvd, Xtr5) yadvd = advd.fit() dfsvd = pd.concat([nvd, Xvd, yvd, yprvd, yprvd2, yadvd], axis=1) dfsvd['Set'] = 'Validation' #findv=pd.concat([dfstr,dfsts],axis=0) #finda.to_csv('find.csv',index=False) #savename4 = filedialog.asksaveasfilename(initialdir=initialdir,title = "Save File with Predicted Activity") savename4 = str(e_) + '_pred.csv' dfsvd.to_csv(savename4, index=False) filer2 = open(str(e_) + "_pred.txt", "w") filer2.write("Validation set results: " + "\n") filer2.write("\n") writefile2(Xvd, yvd, model, filer2) e, f = ROCplot(Xtr5, ytr5) g, h = ROCplot(Xts, yts) i, j = ROCplot(Xvd, yvd) pyplot.figure(figsize=(15, 10)) pyplot.plot(e, f, label='Sub-train', color='blue', marker='.', linewidth=1, markersize=10) pyplot.plot(g, h, label='Test', color='red', marker='.', linewidth=1, markersize=10) pyplot.plot(i, j, label='Validation', color='green', marker='.', linewidth=1, markersize=10) pyplot.ylabel('True postive rate', fontsize=28) pyplot.xlabel('False postive rate', fontsize=28) pyplot.legend(fontsize=18) pyplot.tick_params(labelsize=18) rocn = str(e_) + '_ROC.png' pyplot.savefig(rocn, dpi=300, facecolor='w', edgecolor='w',orientation='portrait', papertype=None, \ format=None,transparent=False, bbox_inches=None, pad_inches=0.1,frameon=None,metadata=None) else: #filer2 = open("resultvd.txt","w") #vd=pd.DataFrame(np.zeros(Xvd.shape[0])) nvd = file3.iloc[:, 0:1] yprvd = pd.DataFrame(model.predict(Xvd)) yprvd.columns = ['Pred'] yprvd2 = pd.DataFrame(model.predict_proba(Xvd)) yprvd2.columns = ['%Prob(-1)', '%Prob(+1)'] advd = apdom(Xvd, Xtr5) yadvd = advd.fit() dfsvd = pd.concat([nvd, Xvd, yprvd, yprvd2, yadvd], axis=1) dfsvd['Set'] = 'Screening' #findv=pd.concat([dfstr,dfsts],axis=0) #finda.to_csv('find.csv',index=False) #savename4 = filedialog.asksaveasfilename(initialdir=initialdir,title = "Save File with Predicted Activity") savename4 = str(e_) + '_scpred.csv' dfsvd.to_csv(savename4, index=False) e, f = ROCplot(Xtr5, ytr5) g, h = ROCplot(Xts, yts) pyplot.figure(figsize=(15, 10)) pyplot.plot(e, f, label='Sub-train', color='blue', marker='.', linewidth=1, markersize=10) pyplot.plot(g, h, label='Test', color='red', marker='.', linewidth=1, markersize=10) pyplot.ylabel('True postive rate', fontsize=28) pyplot.xlabel('False postive rate', fontsize=28) pyplot.legend(fontsize=18) pyplot.tick_params(labelsize=18) rocn = str(e_) + '_ROC.png' pyplot.savefig(rocn, dpi=300, facecolor='w', edgecolor='w',orientation='portrait', papertype=None, \ format=None,transparent=False, bbox_inches=None, pad_inches=0.1,frameon=None,metadata=None)
def writefile1(): Xtr = file1.iloc[:, 2:] ytr = file1.iloc[:, 1:2] ntr = file1.iloc[:, 0:1] Xts = file2.iloc[:, 2:] yts = file2.iloc[:, 1:2] nts = file2.iloc[:, 0:1] pc = int(flabel2.get()) lt = [0] ls = [] if var1.get() and Criterionx.get() == True: ms = int(fifthBoxTabThreer6c1.get()) a1, b1, c1, d1 = trainsetfit(Xtr, ytr, ms) for i in range(1, len(a1) + 1, 1): filer = open(str(c_) + "_fslda.txt", "w") filer.write("Note that it is a Increment based selection result" + "\n") a, b, c, d = trainsetfit(Xtr[a1], ytr, i) model.fit(Xtr[a], ytr) lt.append(b) ln = len(lt) dv = abs(lt[ln - 1] - lt[ln - 2]) val2 = dv / lt[len(lt) - 2] * 100 ls.append(val2) if val2 < pc: break filer.write("Increments :" + str(ls) + "\n") elif var1.get() and Criterionx.get() == False: ms = int(fifthBoxTabThreer6c1.get()) filer = open(str(c_) + "_fslda.txt", "w") filer.write("Note that it is not a Increment based selection result" + "\n") a, b, c, d = trainsetfit(Xtr, ytr, ms) elif var2.get() and Criterionx.get() == True: ms = int(fifthBoxTabThreer6c2.get()) a1, b1, c1, d1 = trainsetfit2(Xtr, ytr, ms) for i in range(1, len(a1) + 1, 1): a, b, c, d = trainsetfit2(Xtr, ytr, i) filer = open(str(c_) + "_sfslda.txt", "w") filer.write("Note that it is a Increment based selection result" + "\n") model.fit(Xtr[a], ytr) lt.append(b) ln = len(lt) dv = abs(lt[ln - 1] - lt[ln - 2]) val2 = dv / lt[len(lt) - 2] * 100 ls.append(val2) if val2 < pc: break filer.write("Increments :" + str(ls) + "\n") elif var2.get() and Criterionx.get() == False: ms = int(fifthBoxTabThreer6c2.get()) filer = open(str(c_) + "_sfslda.txt", "w") filer.write("Note that it is not a Increment based selection result" + "\n") a, b, c, d = trainsetfit2(Xtr, ytr, ms) #filer = open("resultsx.txt","w") filer.write("Sub-training set results " + "\n") filer.write("\n") #file3.write("Selected features are:"+str(a)+"\n") filer.write("Wilks lambda: " + str(b) + "\n") filer.write("Fvalue: " + str(c) + "\n") filer.write("pvalue: " + str(d) + "\n") model.fit(Xtr[a], ytr) filer.write("Selected features :" + str(a) + "\n") filer.write("intercept: " + str(model.intercept_) + "\n") filer.write("coefficients: " + str(model.coef_) + "\n") yprtr = pd.DataFrame(model.predict(Xtr[a])) yprtr.columns = ['Pred'] yprtr2 = pd.DataFrame(model.predict_proba(Xtr[a])) yprtr2.columns = ['%Prob(-1)', '%Prob(+1)'] adstr = apdom(Xtr[a], Xtr[a]) yadstr = adstr.fit() dfstr = pd.concat([ntr, Xtr[a], ytr, yprtr, yprtr2, yadstr], axis=1) dfstr['Set'] = 'Sub_train' yprts = pd.DataFrame(model.predict(Xts[a])) yprts.columns = ['Pred'] yprts2 = pd.DataFrame(model.predict_proba(Xts[a])) yprts2.columns = ['%Prob(-1)', '%Prob(+1)'] adts = apdom(Xts[a], Xtr[a]) yadts = adts.fit() dfsts = pd.concat([nts, Xts[a], yts, yprts, yprts2, yadts], axis=1) dfsts['Set'] = 'Test' tb = Xtr[a].corr() tbn = str(c_) + '_corr.csv' tb.to_csv(tbn) mx, mn = corr(tb) finda = pd.concat([dfstr, dfsts], axis=0) #finda.to_csv('find.csv',index=False) #savename4 = filedialog.asksaveasfilename(initialdir=initialdir,title = "Save File with Predicted Activity") savename4 = str(c_) + '_pred.csv' finda.to_csv(savename4, index=False) writefile2(Xtr[a], ytr, model, filer) filer.write('Maxmimum intercorrelation between descriptors: ' + str(mx) + "\n") filer.write('Minimum intercorrelation between descriptors: ' + str(mn) + "\n") filer.write("\n") filer.write("Test set results: " + "\n") filer.write("\n") writefile2(Xts[a], yts, model, filer) filer.close()
def writefilex(): Xtr = file1.iloc[:, 2:] ytr = file1.iloc[:, 1:2] ntr = file1.iloc[:, 0:1] a, b, c, m, mx, mn, l, filer = trainsetfit2(Xtr, ytr) reg.fit(Xtr[a], ytr) r2 = reg.score(Xtr[a], ytr) ypr = pd.DataFrame(reg.predict(Xtr[a])) ypr.columns = ['Pred'] rm2tr, drm2tr = rm2(ytr, l).fit() #savefile.to_csv('savefile.csv',index=False) d = mean_absolute_error(ytr, ypr) e = (mean_squared_error(ytr, ypr))**0.5 adstr = apdom(Xtr[a], Xtr[a]) yadstr = adstr.fit() df = pd.concat([ntr, Xtr[a], ytr, ypr, l, yadstr], axis=1) df.to_csv(str(c_) + "_sfslda_trpr.csv", index=False) #filer = open(str(c_)+"_sfslda.txt","w") filer.write("Sub-training set results " + "\n") filer.write("\n") filer.write("Selected features are:" + str(a) + "\n") filer.write("Statistics:" + str(b) + "\n") filer.write('Training set results: ' + "\n") filer.write('Maxmimum intercorrelation between descriptors: ' + str(mx) + "\n") filer.write('Minimum intercorrelation between descriptors: ' + str(mn) + "\n") filer.write('MAE: ' + str(d) + "\n") filer.write('RMSE: ' + str(e) + "\n") filer.write('Q2LOO: ' + str(c) + "\n") if ytr.columns[0] in file2.columns: Xts = file2.iloc[:, 2:] nts = file2.iloc[:, 0:1] yts = file2.iloc[:, 1:2] ytspr = pd.DataFrame(reg.predict(Xts[a])) ytspr.columns = ['Pred'] rm2ts, drm2ts = rm2(yts, ytspr).fit() tsdf = pd.concat([yts, pd.DataFrame(ytspr)], axis=1) tsdf.columns = ['Active', 'Predict'] tsdf['Aver'] = m tsdf['Aver2'] = tsdf['Predict'].mean() tsdf['diff'] = tsdf['Active'] - tsdf['Predict'] tsdf['diff2'] = tsdf['Active'] - tsdf['Aver'] tsdf['diff3'] = tsdf['Active'] - tsdf['Aver2'] r2pr = 1 - ((tsdf['diff']**2).sum() / (tsdf['diff2']**2).sum()) r2pr2 = 1 - ((tsdf['diff']**2).sum() / (tsdf['diff3']**2).sum()) RMSEP = ((tsdf['diff']**2).sum() / tsdf.shape[0])**0.5 adts = apdom(Xts[a], Xtr[a]) yadts = adts.fit() dfts = pd.concat([nts, Xts[a], yts, ytspr, yadts], axis=1) dfts.to_csv(str(c_) + "_sfslda_tspr.csv", index=False) filer.write('rm2LOO: ' + str(rm2tr) + "\n") filer.write('delta rm2LOO: ' + str(drm2tr) + "\n") filer.write("\n") filer.write('Test set results: ' + "\n") filer.write('Number of observations: ' + str(yts.shape[0]) + "\n") filer.write('Q2F1/R2Pred: ' + str(r2pr) + "\n") filer.write('Q2F2: ' + str(r2pr2) + "\n") filer.write('rm2test: ' + str(rm2ts) + "\n") filer.write('delta rm2test: ' + str(drm2ts) + "\n") filer.write('RMSEP: ' + str(RMSEP) + "\n") filer.write("\n") plt1 = pyplot.figure(figsize=(15, 10)) pyplot.scatter(ytr, ypr, label='Train', color='blue') pyplot.plot([ytr.min(), ytr.max()], [ytr.min(), ytr.max()], 'k--', lw=4) pyplot.scatter(yts, ytspr, label='Test', color='red') pyplot.ylabel('Predicted values', fontsize=28) pyplot.xlabel('Observed values', fontsize=28) pyplot.legend(fontsize=18) pyplot.tick_params(labelsize=18) rocn = str(c_) + '_obspred.png' plt1.savefig(rocn, dpi=300, facecolor='w', edgecolor='w',orientation='portrait', papertype=None, \ format=None,transparent=False, bbox_inches=None, pad_inches=0.1,frameon=None,metadata=None) plt2 = pyplot.figure(figsize=(15, 10)) pyplot.scatter(ytr, l, label='Train(LOO)', color='blue') pyplot.plot([ytr.min(), ytr.max()], [ytr.min(), ytr.max()], 'k--', lw=4) pyplot.scatter(yts, ytspr, label='Test', color='red') pyplot.ylabel('Predicted values', fontsize=28) pyplot.xlabel('Observed values', fontsize=28) pyplot.legend(fontsize=18) pyplot.tick_params(labelsize=18) rocn = str(c_) + '_loopred.png' plt2.savefig(rocn, dpi=300, facecolor='w', edgecolor='w',orientation='portrait', papertype=None, \ format=None,transparent=False, bbox_inches=None, pad_inches=0.1,frameon=None,metadata=None) else: Xts = file2.iloc[:, 1:] nts = file2.iloc[:, 0:1] ytspr = pd.DataFrame(reg.predict(Xts[a])) ytspr.columns = ['Pred'] adts = apdom(Xts[a], Xtr[a]) yadts = adts.fit() dfts = pd.concat([nts, Xts[a], ytspr, yadts], axis=1) dfts.to_csv(str(c_) + "_sfslda_scpr.csv", index=False) if var3.get(): ls = [] nr = int(N1B1_x.get()) for i in range(0, nr): yr = shuffling(ytr) reg.fit(Xtr[a], yr) ls.append(reg.score(Xtr[a], yr)) rr = np.mean(ls) reg.score(Xtr[a], ytr) #r2=b.rsquared crp2 = math.sqrt(r2) * math.sqrt(r2 - rr) filer.write('Crp2 after ' + str(nr) + ' run: ' + str(crp2) + "\n")