def run_debuging_tests():
    inf = open(sys.argv[1])
    data = np.array(
        [map(float,
             s.strip().split(',')) for s in inf.readlines()])
    # print data.shape[0]
    data = np.random.permutation(data)
    # compute how much of the data is training and testing
    np.random.shuffle(data)
    train_rows = int(0.6 * data.shape[0])
    test_rows = data.shape[0] - train_rows
    # separate out training and testing data
    trainX = data[:train_rows, 0:-1]
    trainY = data[:train_rows, -1]
    testX = data[train_rows:, 0:-1]
    testY = data[train_rows:, -1]
    # create a learner and train it
    learner = lrl.LinRegLearner(verbose=True)  # create a LinRegLearner
    run_learner(learner, trainX, trainY, testX, testY)  # training step
    learner = dt.DTLearner(leaf_size=1, verbose=False)  # constructor
    run_learner(learner, trainX, trainY, testX, testY)  # training step
    learner = rt.RTLearner(leaf_size=1, verbose=False)  # constructor
    run_learner(learner, trainX, trainY, testX, testY)
    learner = bl.BagLearner(learner=lrl.LinRegLearner,
                            kwargs={},
                            bags=10,
                            boost=False,
                            verbose=False)
    run_learner(learner, trainX, trainY, testX, testY)
    learner = il.InsaneLearner(verbose=False)
    run_learner(learner, trainX, trainY, testX, testY)
 def insane():
     import InsaneLearner as it
     learner = it.InsaneLearner(verbose=False)
     learner.addEvidence(trainX, trainY)
     Y = learner.query(testX)
Example #3
0
    # create a learner and train it 			  		 			     			  	   		   	  			  	

    if ltype == 0:
        learner = lrl.LinRegLearner(verbose = True) # create a LinRegLearner
    elif ltype == 1:
        print ('Creating DTLearner')
        learner = dtl.DTLearner(1,verbose=False)
    elif ltype == 2:
        print ('Creating RTLearner')
        learner = rtl.RTLearner(1, verbose=False)
    elif ltype == 3:
        print ('Creating BAGLearner')
        learner = bagl.BagLearner(dtl.DTLearner,kwargs = {"leaf_size":1},bags=15,verbose=False)
    elif ltype == 4:
        print ('Creating InsaneLearner')
        learner = itl.InsaneLearner(verbose=False)
    elif ltype == 5:
        print ('Creating plot for DTlearner for overfitting vs leaf size')
        leafSizePlot(dtl.DTLearner,trainX,trainY,testX,testY,"DTLearner",data,100)
    elif ltype == 6:
        print ('Creating plot for BagLearner for overfitting vs leaf size')
        leafSizePlot(bagl.BagLearner,trainX,trainY,testX,testY,"BagLearner",data,25)
    elif ltype == 7:
        print('Calculating metrics')
        calculateMetrics(data)

    if(ltype in range(0,5)):
        learner.addEvidence(trainX, trainY) # train it
        print learner.author()
 			  		 			     			  	   		   	  			  	
        # evaluate in sample
Example #4
0
    # compute how much of the data is training and testing
    train_rows = int(0.6 * data.shape[0])
    #train_rows = int(data.shape[0])
    test_rows = data.shape[0] - train_rows

    # separate out training and testing data
    trainX = data[:train_rows, 0:-1]
    trainY = data[:train_rows, -1]
    testX = data[train_rows:, 0:-1]
    testY = data[train_rows:, -1]

    print trainX.shape
    #trainY = trainY.reshape(8,1)

    # create a learner and train it
    learner = il.InsaneLearner()
    learner.addEvidence(trainX, trainY)
    print learner.author()

    # evaluate in sample
    predY = learner.query(trainX)  # get the predictions
    print "PRED Y Shape: " + str(predY.shape)
    print "TRAINX SHAPE: " + str(trainX.shape)
    rmse = math.sqrt(((trainY - predY)**2).sum() / trainY.shape[0])
    print
    print "In sample results"
    print "RMSE: ", rmse
    c = np.corrcoef(predY, y=trainY)
    print "corr: ", c[0, 1]

    # evaluate out of sample
    # compute how much of the data is training and testing
    train_rows = int(0.6* data.shape[0])
    test_rows = data.shape[0] - train_rows

    # separate out training and testing data
    trainX = data[:train_rows,0:-1]
    trainY = data[:train_rows,-1]
    testX = data[train_rows:,0:-1]
    testY = data[train_rows:,-1]

    print testX.shape
    print testY.shape

    # create a learner and train it; API for BagLearner
    learner = it.InsaneLearner(verbose = False) # create a InsaneLearner
    learner.addEvidence(trainX, trainY) # train it
    print learner.author()

    # evaluate in sample
    predY = learner.query(trainX) # get the predictions
    rmse = math.sqrt(((trainY - predY) ** 2).sum()/trainY.shape[0])
    print
    print "In sample results"
    print "RMSE: ", rmse
    c = np.corrcoef(predY, y=trainY)
    print "corr: ", c[0,1]

    # evaluate out of sample
    predY = learner.query(testX) # get the predictions
    rmse = math.sqrt(((testY - predY) ** 2).sum()/testY.shape[0])
import numpy as np
import InsaneLearner as it

if __name__ == '__main__':
    data = np.genfromtxt('Data/best4lrr_data.csv', delimiter=',')
    #    np.random.shuffle(data) # Don't shuffle to compare output with single linreglearner
    split = int(0.6 * data.shape[0])  # 60-40 break into train-test sets
    trainX = data[:split, :-1]
    trainY = data[:split, -1]  # last column is labels
    testX = data[split:, :-1]
    testY = data[split:, -1]  # last column is labels

    learner = it.InsaneLearner(verbose=False)  # constructor for InsaneLearner
    learner.add_Evidence(trainX, trainY)

    Y = learner.query(trainX)  # get the predictions
    rmse = np.sqrt(((Y - trainY)**2).sum() / trainY.shape[0])
    corr = np.corrcoef(Y, trainY)
    print("In sample results")
    print("RMSE: ", rmse)
    print("corr: ", corr[0, 1])

    Y = learner.query(testX)  # get the predictions
    rmse = np.sqrt(((Y - testY)**2).sum() / testY.shape[0])
    corr = np.corrcoef(Y, testY)
    print
    print("Out of sample results")
    print("RMSE: ", rmse)
    print("corr: ", corr[0, 1])
Example #7
0
    # DTlearner
    # compute how much of the data is training and testing
    train_rows = int(0.6 * data.shape[0])
    test_rows = data.shape[0] - train_rows

    # separate out training and testing data
    trainX = data[:train_rows, 0:-1]
    trainY = data[:train_rows, -1]
    testX = data[train_rows:, 0:-1]
    testY = data[train_rows:, -1]

    print testX.shape
    print testY.shape

    # create a learner and train it
    learner = it.InsaneLearner()  # constructor
    learner.addEvidence(trainX, trainY)  # training step
    Y = learner.query(testX)  # query
    print learner.author()
    learner = lrl.LinRegLearner(verbose=True)  # create a LinRegLearner
    learner.addEvidence(trainX, trainY)  # train it
    pred = learner.query(trainX)
    print learner.author()

    # evaluate in sample
    # predY = dt_learner.query(trainX) # get the predictions
    rmse = math.sqrt(((trainY - pred)**2).sum() / trainY.shape[0])
    print
    print "In sample results"
    print "RMSE: ", rmse
    c = np.corrcoef(pred, y=trainY)