Ejemplo n.º 1
0
def output():
    #read in training set
    train = numpy.genfromtxt("../nomissing.csv",delimiter=',')
    Xtrain = train[:,2:-1]
    ytrain = train[:,-1]
    print "data read"
    #put model here
    mod = model(Xtrain,ytrain)
    print mod.get_params()
    #now test
    test = read("../avtest.csv")
    to_output.to_output(mod.predict(test),"predictions2.csv")
Ejemplo n.º 2
0
import svrcomp
import to_output
import numpy as np
from sklearn import cross_validation,svm,metrics
from sklearn.grid_search import GridSearchCV

if __name__=='__main__':
    Xtrain,ytrain,data = svrcomp.getdata("nomissing.csv")
    Xtest = svrcomp.gettestdata("../avtest.csv")
    Cs = np.logspace(-1, 5, 7)
    gammas = np.logspace(-5,-1,5)
    classifier = GridSearchCV(estimator=svm.LinearSVR(), scoring='mean_absolute_error',\
        param_grid=dict(C=Cs,epsilon=[0],dual=[False],loss=['squared_epsilon_insensitive']\
        ))
    classifier.fit(Xtrain,ytrain)
    preds = classifier.predict(Xtest)
    for i in range(len(preds)):
        if (preds[i] < 0):
            preds[i] = 0
    to_output.to_output(preds,"svrtest.csv")
    
    
Ejemplo n.º 3
0
dtrain = []
gc.collect()
print "cleaning memory...."
# now run on the actual testing data for kaggle
testreader = csv.reader(open("../avtest.csv", "r"), delimiter=",")
test = []
i = 0
for row in testreader:
    i += 1
    # convert strings to floats
    converted = []
    # remove old converted every 5000 cycles
    if (i % 5000.0) == 0:
        print "clean up" + str(i)
        gc.collect()
    for j in row:
        if len(j) > 0:
            converted.append(float(j))
        else:
            converted.append(float("nan"))
    test.append(converted)

print "done looping"
test = numpy.array(test)
print test[0]
print test.shape
dfintest = xgboost.DMatrix(test, missing=float("nan"))
finpred = model.predict(dfintest)
print finpred
to_output.to_output(finpred, "predictions.csv")