Exemple #1
0
def writefile1():
    Xtr = file1.iloc[:, 2:]
    ytr = file1.iloc[:, 1:2]
    ntr = file1.iloc[:, 0:1]
    Xts = file2.iloc[:, 2:]
    yts = file2.iloc[:, 1:2]
    nts = file2.iloc[:, 0:1]
    if var1.get():
        a, b, c, d = trainsetfit(Xtr, ytr)
        filer = open(str(c_) + "_fslda.txt", "w")
    elif var2.get():
        a, b, c, d = trainsetfit2(Xtr, ytr)
        filer = open(str(c_) + "_sfslda.txt", "w")

    #filer = open("resultsx.txt","w")
    filer.write("Sub-training set results " + "\n")
    filer.write("\n")
    #file3.write("Selected features are:"+str(a)+"\n")
    filer.write("Wilks lambda: " + str(b) + "\n")
    filer.write("Fvalue: " + str(c) + "\n")
    filer.write("pvalue: " + str(d) + "\n")
    model.fit(Xtr[a], ytr)
    filer.write("Selected features :" + str(a) + "\n")
    filer.write("intercept: " + str(model.intercept_) + "\n")
    filer.write("coefficients: " + str(model.coef_) + "\n")
    yprtr = pd.DataFrame(model.predict(Xtr[a]))
    yprtr.columns = ['Pred']
    yprtr2 = pd.DataFrame(model.predict_proba(Xtr[a]))
    yprtr2.columns = ['%Prob(-1)', '%Prob(+1)']
    adstr = apdom(Xtr[a], Xtr[a])
    yadstr = adstr.fit()
    dfstr = pd.concat([ntr, Xtr[a], ytr, yprtr, yprtr2, yadstr], axis=1)
    dfstr['Set'] = 'Sub_train'
    yprts = pd.DataFrame(model.predict(Xts[a]))
    yprts.columns = ['Pred']
    yprts2 = pd.DataFrame(model.predict_proba(Xts[a]))
    yprts2.columns = ['%Prob(-1)', '%Prob(+1)']
    adts = apdom(Xts[a], Xtr[a])
    yadts = adts.fit()
    dfsts = pd.concat([nts, Xts[a], yts, yprts, yprts2, yadts], axis=1)
    dfsts['Set'] = 'Test'
    tb = Xtr[a].corr()
    mx, mn = corr(tb)
    tbn = str(c_) + '_corr.csv'
    tb.to_csv(tbn)

    finda = pd.concat([dfstr, dfsts], axis=0)
    #finda.to_csv('find.csv',index=False)
    #savename4 = filedialog.asksaveasfilename(initialdir=initialdir,title = "Save File with Predicted Activity")
    savename4 = str(c_) + '_pred.csv'
    finda.to_csv(savename4, index=False)
    writefile2(Xtr[a], ytr, model, filer)
    filer.write('Maximum correlation between descriptors: ' + str(mx) + "\n")
    filer.write('Minimum correlation between descriptors: ' + str(mn))
    filer.write("\n")
    filer.write("Test set results: " + "\n")
    filer.write("\n")
    writefile2(Xts[a], yts, model, filer)

    filer.close()
Exemple #2
0
def writefile3():
    nf = secondEntryTabFive.get()
    nf = int(nf)
    global file5
    file5 = file4[file4['Set'] == 'Sub_train']
    #global Xtr5
    Xtr5 = file5.iloc[:, 1:nf + 1]
    ytr5 = file5.iloc[:, nf + 1:nf + 2]
    file6 = file4[file4['Set'] == 'Test']
    Xts = file6.iloc[:, 1:nf + 1]
    yts = file6.iloc[:, nf + 1:nf + 2]
    model.fit(Xtr5, ytr5)
    Xvd = file3[Xtr5.columns]
    if ytr5.columns[0] in file3.columns:
        yvd = file3[ytr5.columns]
        nvd = file3.iloc[:, 0:1]
        yprvd = pd.DataFrame(model.predict(Xvd))
        yprvd.columns = ['Pred']
        yprvd2 = pd.DataFrame(model.predict_proba(Xvd))
        yprvd2.columns = ['%Prob(-1)', '%Prob(+1)']
        advd = apdom(Xvd, Xtr5)
        yadvd = advd.fit()
        dfsvd = pd.concat([nvd, Xvd, yvd, yprvd, yprvd2, yadvd], axis=1)
        dfsvd['Set'] = 'Validation'
        #findv=pd.concat([dfstr,dfsts],axis=0)
        #finda.to_csv('find.csv',index=False)
        #savename4 = filedialog.asksaveasfilename(initialdir=initialdir,title = "Save File with Predicted Activity")
        savename4 = str(e_) + '_pred.csv'
        dfsvd.to_csv(savename4, index=False)
        filer2 = open(str(e_) + "_pred.txt", "w")
        filer2.write("Validation set results: " + "\n")
        filer2.write("\n")
        writefile2(Xvd, yvd, model, filer2)
        e, f = ROCplot(Xtr5, ytr5)
        g, h = ROCplot(Xts, yts)
        i, j = ROCplot(Xvd, yvd)
        pyplot.figure(figsize=(15, 10))
        pyplot.plot(e,
                    f,
                    label='Sub-train',
                    color='blue',
                    marker='.',
                    linewidth=1,
                    markersize=10)
        pyplot.plot(g,
                    h,
                    label='Test',
                    color='red',
                    marker='.',
                    linewidth=1,
                    markersize=10)
        pyplot.plot(i,
                    j,
                    label='Validation',
                    color='green',
                    marker='.',
                    linewidth=1,
                    markersize=10)
        pyplot.ylabel('True postive rate', fontsize=28)
        pyplot.xlabel('False postive rate', fontsize=28)
        pyplot.legend(fontsize=18)
        pyplot.tick_params(labelsize=18)
        rocn = str(e_) + '_ROC.png'
        pyplot.savefig(rocn, dpi=300, facecolor='w', edgecolor='w',orientation='portrait', papertype=None, \
                       format=None,transparent=False, bbox_inches=None, pad_inches=0.1,frameon=None,metadata=None)

    else:
        #filer2 = open("resultvd.txt","w")
        #vd=pd.DataFrame(np.zeros(Xvd.shape[0]))
        nvd = file3.iloc[:, 0:1]
        yprvd = pd.DataFrame(model.predict(Xvd))
        yprvd.columns = ['Pred']
        yprvd2 = pd.DataFrame(model.predict_proba(Xvd))
        yprvd2.columns = ['%Prob(-1)', '%Prob(+1)']
        advd = apdom(Xvd, Xtr5)
        yadvd = advd.fit()
        dfsvd = pd.concat([nvd, Xvd, yprvd, yprvd2, yadvd], axis=1)
        dfsvd['Set'] = 'Screening'
        #findv=pd.concat([dfstr,dfsts],axis=0)
        #finda.to_csv('find.csv',index=False)
        #savename4 = filedialog.asksaveasfilename(initialdir=initialdir,title = "Save File with Predicted Activity")
        savename4 = str(e_) + '_scpred.csv'
        dfsvd.to_csv(savename4, index=False)
        e, f = ROCplot(Xtr5, ytr5)
        g, h = ROCplot(Xts, yts)
        pyplot.figure(figsize=(15, 10))
        pyplot.plot(e,
                    f,
                    label='Sub-train',
                    color='blue',
                    marker='.',
                    linewidth=1,
                    markersize=10)
        pyplot.plot(g,
                    h,
                    label='Test',
                    color='red',
                    marker='.',
                    linewidth=1,
                    markersize=10)
        pyplot.ylabel('True postive rate', fontsize=28)
        pyplot.xlabel('False postive rate', fontsize=28)
        pyplot.legend(fontsize=18)
        pyplot.tick_params(labelsize=18)
        rocn = str(e_) + '_ROC.png'
        pyplot.savefig(rocn, dpi=300, facecolor='w', edgecolor='w',orientation='portrait', papertype=None, \
                       format=None,transparent=False, bbox_inches=None, pad_inches=0.1,frameon=None,metadata=None)
Exemple #3
0
def writefile1():
    Xtr = file1.iloc[:, 2:]
    ytr = file1.iloc[:, 1:2]
    ntr = file1.iloc[:, 0:1]
    Xts = file2.iloc[:, 2:]
    yts = file2.iloc[:, 1:2]
    nts = file2.iloc[:, 0:1]
    pc = int(flabel2.get())
    lt = [0]
    ls = []

    if var1.get() and Criterionx.get() == True:
        ms = int(fifthBoxTabThreer6c1.get())
        a1, b1, c1, d1 = trainsetfit(Xtr, ytr, ms)
        for i in range(1, len(a1) + 1, 1):
            filer = open(str(c_) + "_fslda.txt", "w")
            filer.write("Note that it is a Increment based selection result" +
                        "\n")
            a, b, c, d = trainsetfit(Xtr[a1], ytr, i)
            model.fit(Xtr[a], ytr)
            lt.append(b)
            ln = len(lt)
            dv = abs(lt[ln - 1] - lt[ln - 2])
            val2 = dv / lt[len(lt) - 2] * 100
            ls.append(val2)
            if val2 < pc:
                break
        filer.write("Increments :" + str(ls) + "\n")
    elif var1.get() and Criterionx.get() == False:
        ms = int(fifthBoxTabThreer6c1.get())
        filer = open(str(c_) + "_fslda.txt", "w")
        filer.write("Note that it is not a Increment based selection result" +
                    "\n")
        a, b, c, d = trainsetfit(Xtr, ytr, ms)
    elif var2.get() and Criterionx.get() == True:
        ms = int(fifthBoxTabThreer6c2.get())
        a1, b1, c1, d1 = trainsetfit2(Xtr, ytr, ms)
        for i in range(1, len(a1) + 1, 1):
            a, b, c, d = trainsetfit2(Xtr, ytr, i)
            filer = open(str(c_) + "_sfslda.txt", "w")
            filer.write("Note that it is a Increment based selection result" +
                        "\n")
            model.fit(Xtr[a], ytr)
            lt.append(b)
            ln = len(lt)
            dv = abs(lt[ln - 1] - lt[ln - 2])
            val2 = dv / lt[len(lt) - 2] * 100
            ls.append(val2)
            if val2 < pc:
                break
        filer.write("Increments :" + str(ls) + "\n")
    elif var2.get() and Criterionx.get() == False:
        ms = int(fifthBoxTabThreer6c2.get())
        filer = open(str(c_) + "_sfslda.txt", "w")
        filer.write("Note that it is not a Increment based selection result" +
                    "\n")
        a, b, c, d = trainsetfit2(Xtr, ytr, ms)

    #filer = open("resultsx.txt","w")
    filer.write("Sub-training set results " + "\n")
    filer.write("\n")
    #file3.write("Selected features are:"+str(a)+"\n")
    filer.write("Wilks lambda: " + str(b) + "\n")
    filer.write("Fvalue: " + str(c) + "\n")
    filer.write("pvalue: " + str(d) + "\n")
    model.fit(Xtr[a], ytr)
    filer.write("Selected features :" + str(a) + "\n")
    filer.write("intercept: " + str(model.intercept_) + "\n")
    filer.write("coefficients: " + str(model.coef_) + "\n")
    yprtr = pd.DataFrame(model.predict(Xtr[a]))
    yprtr.columns = ['Pred']
    yprtr2 = pd.DataFrame(model.predict_proba(Xtr[a]))
    yprtr2.columns = ['%Prob(-1)', '%Prob(+1)']
    adstr = apdom(Xtr[a], Xtr[a])
    yadstr = adstr.fit()
    dfstr = pd.concat([ntr, Xtr[a], ytr, yprtr, yprtr2, yadstr], axis=1)
    dfstr['Set'] = 'Sub_train'
    yprts = pd.DataFrame(model.predict(Xts[a]))
    yprts.columns = ['Pred']
    yprts2 = pd.DataFrame(model.predict_proba(Xts[a]))
    yprts2.columns = ['%Prob(-1)', '%Prob(+1)']
    adts = apdom(Xts[a], Xtr[a])
    yadts = adts.fit()
    dfsts = pd.concat([nts, Xts[a], yts, yprts, yprts2, yadts], axis=1)
    dfsts['Set'] = 'Test'
    tb = Xtr[a].corr()
    tbn = str(c_) + '_corr.csv'
    tb.to_csv(tbn)
    mx, mn = corr(tb)
    finda = pd.concat([dfstr, dfsts], axis=0)
    #finda.to_csv('find.csv',index=False)
    #savename4 = filedialog.asksaveasfilename(initialdir=initialdir,title = "Save File with Predicted Activity")
    savename4 = str(c_) + '_pred.csv'
    finda.to_csv(savename4, index=False)
    writefile2(Xtr[a], ytr, model, filer)
    filer.write('Maxmimum intercorrelation between descriptors: ' + str(mx) +
                "\n")
    filer.write('Minimum intercorrelation between descriptors: ' + str(mn) +
                "\n")
    filer.write("\n")
    filer.write("Test set results: " + "\n")
    filer.write("\n")
    writefile2(Xts[a], yts, model, filer)

    filer.close()
Exemple #4
0
def writefilex():
    Xtr = file1.iloc[:, 2:]
    ytr = file1.iloc[:, 1:2]
    ntr = file1.iloc[:, 0:1]
    a, b, c, m, mx, mn, l, filer = trainsetfit2(Xtr, ytr)
    reg.fit(Xtr[a], ytr)
    r2 = reg.score(Xtr[a], ytr)
    ypr = pd.DataFrame(reg.predict(Xtr[a]))
    ypr.columns = ['Pred']
    rm2tr, drm2tr = rm2(ytr, l).fit()
    #savefile.to_csv('savefile.csv',index=False)
    d = mean_absolute_error(ytr, ypr)
    e = (mean_squared_error(ytr, ypr))**0.5
    adstr = apdom(Xtr[a], Xtr[a])
    yadstr = adstr.fit()
    df = pd.concat([ntr, Xtr[a], ytr, ypr, l, yadstr], axis=1)
    df.to_csv(str(c_) + "_sfslda_trpr.csv", index=False)

    #filer = open(str(c_)+"_sfslda.txt","w")

    filer.write("Sub-training set results " + "\n")
    filer.write("\n")
    filer.write("Selected features are:" + str(a) + "\n")
    filer.write("Statistics:" + str(b) + "\n")
    filer.write('Training set results: ' + "\n")
    filer.write('Maxmimum intercorrelation between descriptors: ' + str(mx) +
                "\n")
    filer.write('Minimum intercorrelation between descriptors: ' + str(mn) +
                "\n")
    filer.write('MAE: ' + str(d) + "\n")
    filer.write('RMSE: ' + str(e) + "\n")
    filer.write('Q2LOO: ' + str(c) + "\n")

    if ytr.columns[0] in file2.columns:
        Xts = file2.iloc[:, 2:]
        nts = file2.iloc[:, 0:1]
        yts = file2.iloc[:, 1:2]
        ytspr = pd.DataFrame(reg.predict(Xts[a]))
        ytspr.columns = ['Pred']
        rm2ts, drm2ts = rm2(yts, ytspr).fit()
        tsdf = pd.concat([yts, pd.DataFrame(ytspr)], axis=1)
        tsdf.columns = ['Active', 'Predict']
        tsdf['Aver'] = m
        tsdf['Aver2'] = tsdf['Predict'].mean()
        tsdf['diff'] = tsdf['Active'] - tsdf['Predict']
        tsdf['diff2'] = tsdf['Active'] - tsdf['Aver']
        tsdf['diff3'] = tsdf['Active'] - tsdf['Aver2']
        r2pr = 1 - ((tsdf['diff']**2).sum() / (tsdf['diff2']**2).sum())
        r2pr2 = 1 - ((tsdf['diff']**2).sum() / (tsdf['diff3']**2).sum())
        RMSEP = ((tsdf['diff']**2).sum() / tsdf.shape[0])**0.5
        adts = apdom(Xts[a], Xtr[a])
        yadts = adts.fit()
        dfts = pd.concat([nts, Xts[a], yts, ytspr, yadts], axis=1)
        dfts.to_csv(str(c_) + "_sfslda_tspr.csv", index=False)
        filer.write('rm2LOO: ' + str(rm2tr) + "\n")
        filer.write('delta rm2LOO: ' + str(drm2tr) + "\n")
        filer.write("\n")
        filer.write('Test set results: ' + "\n")
        filer.write('Number of observations: ' + str(yts.shape[0]) + "\n")
        filer.write('Q2F1/R2Pred: ' + str(r2pr) + "\n")
        filer.write('Q2F2: ' + str(r2pr2) + "\n")
        filer.write('rm2test: ' + str(rm2ts) + "\n")
        filer.write('delta rm2test: ' + str(drm2ts) + "\n")
        filer.write('RMSEP: ' + str(RMSEP) + "\n")
        filer.write("\n")
        plt1 = pyplot.figure(figsize=(15, 10))
        pyplot.scatter(ytr, ypr, label='Train', color='blue')
        pyplot.plot([ytr.min(), ytr.max()], [ytr.min(), ytr.max()],
                    'k--',
                    lw=4)
        pyplot.scatter(yts, ytspr, label='Test', color='red')
        pyplot.ylabel('Predicted values', fontsize=28)
        pyplot.xlabel('Observed values', fontsize=28)
        pyplot.legend(fontsize=18)
        pyplot.tick_params(labelsize=18)
        rocn = str(c_) + '_obspred.png'
        plt1.savefig(rocn, dpi=300, facecolor='w', edgecolor='w',orientation='portrait', papertype=None, \
                       format=None,transparent=False, bbox_inches=None, pad_inches=0.1,frameon=None,metadata=None)
        plt2 = pyplot.figure(figsize=(15, 10))
        pyplot.scatter(ytr, l, label='Train(LOO)', color='blue')
        pyplot.plot([ytr.min(), ytr.max()], [ytr.min(), ytr.max()],
                    'k--',
                    lw=4)
        pyplot.scatter(yts, ytspr, label='Test', color='red')
        pyplot.ylabel('Predicted values', fontsize=28)
        pyplot.xlabel('Observed values', fontsize=28)
        pyplot.legend(fontsize=18)
        pyplot.tick_params(labelsize=18)
        rocn = str(c_) + '_loopred.png'
        plt2.savefig(rocn, dpi=300, facecolor='w', edgecolor='w',orientation='portrait', papertype=None, \
                       format=None,transparent=False, bbox_inches=None, pad_inches=0.1,frameon=None,metadata=None)
    else:
        Xts = file2.iloc[:, 1:]
        nts = file2.iloc[:, 0:1]
        ytspr = pd.DataFrame(reg.predict(Xts[a]))
        ytspr.columns = ['Pred']
        adts = apdom(Xts[a], Xtr[a])
        yadts = adts.fit()
        dfts = pd.concat([nts, Xts[a], ytspr, yadts], axis=1)
        dfts.to_csv(str(c_) + "_sfslda_scpr.csv", index=False)
    if var3.get():
        ls = []
        nr = int(N1B1_x.get())
        for i in range(0, nr):
            yr = shuffling(ytr)
            reg.fit(Xtr[a], yr)
            ls.append(reg.score(Xtr[a], yr))
        rr = np.mean(ls)
        reg.score(Xtr[a], ytr)
        #r2=b.rsquared
        crp2 = math.sqrt(r2) * math.sqrt(r2 - rr)
        filer.write('Crp2 after ' + str(nr) + ' run: ' + str(crp2) + "\n")