Example #1
0
def crossval(X,Y,size,k=10):
    score = []
    for i in range(0,k-1):
        rem = range(size*i,size*i+size)
        rem = set(rem)
        m = X.shape[0]
        left = set(range(0,m)) - rem
        left = list(left)
        train = np.take(X,left,axis=0)
        tree = learn(train)
        a,b = dt.test(tree,Y)
        c = dt.accr(a,b)
        score.append(c)
    return score
Example #2
0
def learningcurve(df,p=10,n=100):
    m,z = df.shape
    size = int(0.7*m/n)
    sizes =[]
    traina = []
    testa = []
    times = []
    for i in range(1,n):
        train,trial = dt.split(df,size*i,int(0.3*m))
        s = time.clock()
        tree = learn(train)
        a,b = dt.test(tree, trial)
        score = dt.accr(a,b)
        c,d = dt.test(tree, train)
        scoret = dt.accr(c,d)
        e = time.clock()
        sizes.append(size*i)
        traina.append(scoret)
        testa.append(score)
        times.append(e-s)
    print("Trial Times")
    print(times)
    return sizes,testa,traina
Example #3
0
    print(times)
    return sizes,testa,traina

if __name__=="__main__":
    print ("Boosted Decision Tree")
    df1,df2 = dt.readin()
    train1, trial1, size = dt.cross(df1)
    score = crossval(train1,trial1,size)
    print("Cross Validation for Collection 1")
    print(score)
    m,n = df1.shape
    train1, trial1 = dt.split(df1,int(0.7*m),int(0.3*m))
    s = time.clock()
    tree1 = learn(train1)
    a,b = dt.test(tree1, trial1)
    score = dt.accr(a,b)
    c,d = dt.test(tree1, train1)
    scoret = dt.accr(c,d)
    e = time.clock()
    print("Testing Set Score for Collection 1")
    print(score)
    print("Training Set Score for Collection 1")
    print(scoret)
    print("Runtime")
    print(e-s)

    a,b,c = learningcurve(df1,50)
    plt.plot(a,b)
    plt.title("Boosting - Collection 1")
    plt.xlabel("Training Sample Size")
    plt.ylabel("Accuracy")