def output(): #read in training set train = numpy.genfromtxt("../nomissing.csv",delimiter=',') Xtrain = train[:,2:-1] ytrain = train[:,-1] print "data read" #put model here mod = model(Xtrain,ytrain) print mod.get_params() #now test test = read("../avtest.csv") to_output.to_output(mod.predict(test),"predictions2.csv")
import svrcomp import to_output import numpy as np from sklearn import cross_validation,svm,metrics from sklearn.grid_search import GridSearchCV if __name__=='__main__': Xtrain,ytrain,data = svrcomp.getdata("nomissing.csv") Xtest = svrcomp.gettestdata("../avtest.csv") Cs = np.logspace(-1, 5, 7) gammas = np.logspace(-5,-1,5) classifier = GridSearchCV(estimator=svm.LinearSVR(), scoring='mean_absolute_error',\ param_grid=dict(C=Cs,epsilon=[0],dual=[False],loss=['squared_epsilon_insensitive']\ )) classifier.fit(Xtrain,ytrain) preds = classifier.predict(Xtest) for i in range(len(preds)): if (preds[i] < 0): preds[i] = 0 to_output.to_output(preds,"svrtest.csv")
dtrain = [] gc.collect() print "cleaning memory...." # now run on the actual testing data for kaggle testreader = csv.reader(open("../avtest.csv", "r"), delimiter=",") test = [] i = 0 for row in testreader: i += 1 # convert strings to floats converted = [] # remove old converted every 5000 cycles if (i % 5000.0) == 0: print "clean up" + str(i) gc.collect() for j in row: if len(j) > 0: converted.append(float(j)) else: converted.append(float("nan")) test.append(converted) print "done looping" test = numpy.array(test) print test[0] print test.shape dfintest = xgboost.DMatrix(test, missing=float("nan")) finpred = model.predict(dfintest) print finpred to_output.to_output(finpred, "predictions.csv")