], 1) train_dataset = a.sample(frac=0.85, random_state=16) test_dataset = a.drop(train_dataset.index) train_labels = train_dataset.pop('Result') test_labels = test_dataset.pop('Result') clf = LogisticRegression(n_jobs=-1) train_dataset = preprocessing.normalize(train_dataset) test_dataset = preprocessing.normalize(test_dataset) clf.fit(train_dataset, train_labels) joblib.dump(clf, 'Logistic.joblib') acc = clf.score(test_dataset, test_labels) preds = clf.predict(test_dataset) print(f.acc(preds, test_labels)) zeros, ones = 0, 0 for pred in preds: if round(pred) == 1: ones += 1 else: zeros += 1 print('lenght of test:', len(preds)) print('0s:', zeros / len(preds)) print('1s:', ones / len(preds))
if abs(corr[x]) < 0.07: del2.append(x) data=data.drop(del2,1) """ clf = MLPRegressor(activation='logistic',random_state=1,max_iter=500) Y=data.pop('Result') X=data accs=[] for rs in range(10): x_train,x_test,y_train,y_test = train_test_split(X, Y, test_size=0.2, random_state=rs) clf.fit(x_train,y_train) preds=clf.predict(x_test) #print('zeros:',f.get0and1(preds)) accs.append(f.acc(preds,y_test)) print(sum(accs)/len(accs)) #joblib.dump(clf,'regression_linear.joblib') """ games=pd.read_csv(path2data+'games.csv') df2log=pd.DataFrame() df2log['home']=games['home'] df2log['away']=games['away'] df2log['date']=games['date'] # predict today's games c2_avg=['PTS', 'FGM', 'FGA','FG%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'PF', '+/-']
data=pd.read_csv(path2data+'train.csv') data=data.dropna() data=data.drop(['Team_home','Match Up_home','Game Date_home','Team_away', 'Match Up_away','Game Date_away','MIN_home','MIN_away', 'W/L_home','W/L_away'],1) clf = ExtraTreesRegressor(n_estimators=1000, random_state=11,n_jobs=-1) # split data into train and test sets Y=data.pop('Result') X=data x_train,x_test,y_train,y_test = train_test_split(X, Y, test_size=0.01, random_state=1) clf.fit(x_train,y_train) preds=clf.predict(x_test) print('test:',f.acc(preds,y_test)) #print('zeros:',f.get0and1(preds)) #joblib.dump(clf,'regression_linear.joblib') games=pd.read_csv(path2data+'games.csv') df2log=pd.DataFrame() df2log['home']=games['home'] df2log['away']=games['away'] df2log['date']=games['date'] # predict today's games c2_avg=['PTS', 'FGM', 'FGA','FG%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'PF', '+/-'] preds=[]
# split data into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.01, random_state=7) # fit model no training data model = xgb.XGBRegressor() model.fit(X_train, y_train) # make predictions for test data y_pred = model.predict(X_test) print('zeros:', f.get0and1(y_pred)) # evaluate predictions accuracy = f.acc(y_test, y_pred) print("Accuracy: %.2f%%" % (accuracy * 100.0)) preds = model.predict games = pd.read_csv(path2data + 'games.csv') df2log = pd.DataFrame() df2log['home'] = games['home'] df2log['away'] = games['away'] df2log['date'] = games['date'] # predict today's games c2_avg = [ 'PTS', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'PF', '+/-' ]