import numpy as np import matplotlib.pyplot as plt from sklearn import ensemble from sklearn import datasets from sklearn.utils import shuffle from sklearn.metrics import mean_squared_error import lineq6 as lq ############################################################################### # Load data #boston = datasets.load_boston() sql = "select winprob,loseprob,drawprob,winconc,loseconc,drawconc,homeformadj/100,awayformadj/100,coalesce(pctwin,0),coalesce(pctnotwin,0) from traingamepred limit 50" #sql="select winprob,coalesce(pctwin,0) from traingamepred limit 30" X = lq.getdata2(sql) sql = "select case ftr when 'H' then 2 when 'A' then 1 when 'D' then 0 end from traingamepred limit 50" y = lq.getdata2(sql) #X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) offset = int(X.shape[0] * 0.5) X_train, y_train = X[:offset], y[:offset] X_test, y_test = X[offset:], y[offset:] ############################################################################### # Fit regression model params = { 'n_estimators': 2000, 'max_depth': 16, 'learning_rate': 0.0001, 'loss': 'ls',
import numpy as np import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier from sklearn import datasets from sklearn.utils import shuffle from sklearn.metrics import mean_squared_error import lineq6 as lq ############################################################################### # Load data #boston = datasets.load_boston() train = "select winprob,loseconc,loseprob,winconc,homeformadj/100,awayformadj/100,coalesce(pctwin,0),coalesce(pctnotwin,0) from pred_res where matchdate < '2016-01-18'" #sql="select winprob,coalesce(pctwin,0) from traingamepred" traindata = lq.getdata2(train) sql = "select case result when 'H' then 20 when 'A' then 10 when 'D' then 0 end from pred_res where matchdate < '2016-01-18'" result = lq.getdata2(sql) traindata = traindata.astype(np.float32) sql = "select winprob,loseconc,loseprob,winconc,homeformadj/100,awayformadj/100,coalesce(pctwin,0),coalesce(pctnotwin,0) from gamepred p join rescuttmp r on replace(lower(r.hometeam),' ','')=lower(p.hometeam) and replace(lower(r.awayteam),' ','')=lower(p.awayteam) and r.matchdate=p.matchdate where p.matchdate between '2016-01-19' and '2016-04-20' and winprob is not null and loseconc is not null" predict = lq.getdata2(sql) sql = "select case r.ftr when 'H' then 20 when 'A' then 10 when 'D' then 0 end from gamepred p join rescuttmp r on replace(lower(r.hometeam),' ','')=lower(p.hometeam) and replace(lower(r.awayteam),' ','')=lower(p.awayteam) and r.matchdate=p.matchdate where p.matchdate between '2016-01-19' and '2016-04-20' and winprob is not null and loseconc is not null " teams = lq.getdata2(sql) # Fit regression model clf = RandomForestClassifier(n_estimators=10, random_state=1) clf.fit(traindata, result.ravel()) #clf.predict(predict) #mse = mean_squared_error(y_test, clf.predict(X_test)) #print (y_test) #print (clf.predict(X_test)) #print("MSE: %.4f" % mse)