#data = np.array(data)

data = [[i for i in y] for y in data]


# working with weather______________________________________________________
key = csv.reader(open(r'key.csv')) #load the station data and the store place
key.next()
key = [[int(i) for i in y ]for y in key]
keyS = {}
for i in range(len(key)):
    keyS[key[i][0]] = key[i][1]



snow = da.getCollumn(data,13)
rain = da.getCollumn( data,14)
codeSum = da.getCollumn(data,12)
station = da.getCollumn(data,0)
date  = da.getCollumn(data,1)

for i in range ( len ( rain)): # replace the Missing and trace values from the

    if (rain[i] == 'M' ):
        rain [i] = 0.0

    if(rain[i] == '  T') :
        rain[i] = 0.01

    if (snow[i] == 'M'):
        snow[i] = 0.0
actualValue = []
for i in range(1806350,len(train),10):
    actualValue.append(train[i][3])


print len(train[0])
da.delCollumn(train,5)
da.delCollumn(train,4)
da.delCollumn(train,3)
print len(actualValue)


x = csv.reader(open(r'testData.csv'))
x = [[i for i in y ] for y in x]
x = [[float(i) for i in y ] for y in x]
o = da.getCollumn(x,3)
da.delCollumn(x,3)
print "data uploaded"

#from sklearn.tree import DecisionTreeRegressor
#clf = DecisionTreeRegressor(max_depth = 38)
#16:18=45,14=48,22=0.075,24=0,067,26=0.06,28=0.056,32=0.053,34=0,05,40=0.049
#from sklearn.svm import SVR
#clf= SVR(kernel= 'rbf', C = 1e3)
#from sklearn import linear_model
#clf = linear_model.LinearRegression()
from sklearn.ensemble import RandomForestRegressor
clf = RandomForestRegressor(random_state=0, n_estimators=60,max_depth = 38)


clf.fit(x,o)
Example #3
0
data = csv.reader(open(r'weather.csv'))
data.next()  # this removes the header in the file i.e the first collumn
#data = np.array(data)

data = [[i for i in y] for y in data]

# working with weather______________________________________________________
key = csv.reader(open(r'key.csv'))  #load the station data and the store place
key.next()
key = [[int(i) for i in y] for y in key]
keyS = {}
for i in range(len(key)):
    keyS[key[i][0]] = key[i][1]

snow = da.getCollumn(data, 13)
rain = da.getCollumn(data, 14)
codeSum = da.getCollumn(data, 12)
station = da.getCollumn(data, 0)
date = da.getCollumn(data, 1)

for i in range(len(rain)):  # replace the Missing and trace values from the

    if (rain[i] == 'M'):
        rain[i] = 0.0

    if (rain[i] == '  T'):
        rain[i] = 0.01

    if (snow[i] == 'M'):
        snow[i] = 0.0