#data = np.array(data) data = [[i for i in y] for y in data] # working with weather______________________________________________________ key = csv.reader(open(r'key.csv')) #load the station data and the store place key.next() key = [[int(i) for i in y ]for y in key] keyS = {} for i in range(len(key)): keyS[key[i][0]] = key[i][1] snow = da.getCollumn(data,13) rain = da.getCollumn( data,14) codeSum = da.getCollumn(data,12) station = da.getCollumn(data,0) date = da.getCollumn(data,1) for i in range ( len ( rain)): # replace the Missing and trace values from the if (rain[i] == 'M' ): rain [i] = 0.0 if(rain[i] == ' T') : rain[i] = 0.01 if (snow[i] == 'M'): snow[i] = 0.0
actualValue = [] for i in range(1806350,len(train),10): actualValue.append(train[i][3]) print len(train[0]) da.delCollumn(train,5) da.delCollumn(train,4) da.delCollumn(train,3) print len(actualValue) x = csv.reader(open(r'testData.csv')) x = [[i for i in y ] for y in x] x = [[float(i) for i in y ] for y in x] o = da.getCollumn(x,3) da.delCollumn(x,3) print "data uploaded" #from sklearn.tree import DecisionTreeRegressor #clf = DecisionTreeRegressor(max_depth = 38) #16:18=45,14=48,22=0.075,24=0,067,26=0.06,28=0.056,32=0.053,34=0,05,40=0.049 #from sklearn.svm import SVR #clf= SVR(kernel= 'rbf', C = 1e3) #from sklearn import linear_model #clf = linear_model.LinearRegression() from sklearn.ensemble import RandomForestRegressor clf = RandomForestRegressor(random_state=0, n_estimators=60,max_depth = 38) clf.fit(x,o)
data = csv.reader(open(r'weather.csv')) data.next() # this removes the header in the file i.e the first collumn #data = np.array(data) data = [[i for i in y] for y in data] # working with weather______________________________________________________ key = csv.reader(open(r'key.csv')) #load the station data and the store place key.next() key = [[int(i) for i in y] for y in key] keyS = {} for i in range(len(key)): keyS[key[i][0]] = key[i][1] snow = da.getCollumn(data, 13) rain = da.getCollumn(data, 14) codeSum = da.getCollumn(data, 12) station = da.getCollumn(data, 0) date = da.getCollumn(data, 1) for i in range(len(rain)): # replace the Missing and trace values from the if (rain[i] == 'M'): rain[i] = 0.0 if (rain[i] == ' T'): rain[i] = 0.01 if (snow[i] == 'M'): snow[i] = 0.0