def trainR(self, X_list, y_list, space=spaceR, cv=5): """ RandomSearchCV method :param X_list: List of training sets :param y_list: List of targets :param space: parameter space :return: models an metrics """ n_calls = self.n_calls scores = [] val_scores = [] best_models = [] for j in range(len(X_list)): classifier = RandomForestClassifier(n_jobs=-1) y = y_list.copy() X = X_list.copy() y_test = y.pop(j) X_test = X.pop(j) y_train = np.concatenate(y, axis=0) X_train = np.concatenate(X, axis=0) X_train = Features().div_cols(X_train).values X_test = Features().div_cols(X_test).values start = time() opt = RandomizedSearchCV(classifier, param_distributions=space, n_iter=n_calls, scoring=self.scorer, cv=cv, n_jobs=-1, iid=False) opt.fit(X_train, y_train) model = opt.best_estimator_ print('Season', 2019 - j) print("Random CV search took %.2f seconds for %d candidates" " parameter settings." % ((time() - start), n_calls)) print("val. score:", opt.best_score_) print("test score:", opt.score(X_test, y_test)) # print(model) print("") best_models.append(model) val_scores.append(opt.best_score_) scores.append(opt.score(X_test, y_test)) return scores, val_scores, best_models
def score_data(self, games): """ Prepare the most recent round for scoring. :param games: :return: """ mapping = self.mapping proxy = self.proxy enc = self.enc scoring = [] for i in games: home = i[0] away = i[1] home_df = History(mapping, proxy, enc).team_roll(home, season=0, shift=0, web=True).tail(1)[[ 'Rnd', 'F_mean', 'F_std', 'A_mean', 'A_std', 'M_mean', 'A_std', 'R_mean', 'perc' ]] home_df['Rnd'] = home_df['Rnd'] + 1 away_df = History(mapping, proxy, enc).team_roll(away, season=0, shift=0, web=True).tail(1)[[ 'F_mean', 'F_std', 'A_mean', 'A_std', 'M_mean', 'A_std', 'R_mean', 'perc', 'grnd' ]] features = np.concatenate([home_df.values[0], away_df.values[0]], axis=0) scoring.append(features) return Features().div_cols(scoring)
def generate_past_scores(self, data_path, best_models, team_df): """ Use models to simulate past scores (based on score_f above) and output each as numpy arrays ready to be used as features :param self: :param best_models: Input the season models :param team_df: :return: """ mapping = self.mapping proxy = self.proxy for season in range(1, len(best_models) + 1): X = np.load(data_path + '/training-' + str(2019 - season) + '.npy') X_train = Features().div_cols(X).values y = np.load(data_path + '/results-' + str(2019 - season) + '.npy') score = Simulate.score_f( y, best_models[season - 1].predict_proba(X_train)[:, 1]) year = str(2019 - season) teams = list(mapping.keys()) teams.remove('Kangaroos') if season >= 8: teams.remove('Greater Western Sydney') if season >= 9: teams.remove('Gold Coast') out = pd.DataFrame() for team in teams: df = History(mapping, proxy).team_roll(team, season, team_df) home_df = df[df['T'] == 'H'].reset_index(drop=True) l = len(home_df) out = pd.concat([ out, pd.DataFrame(np.c_[[year] * l, [team] * l, home_df['Opponent']]) ], axis=0, ignore_index=True) out.columns = ['year', 'home', 'away'] out['score'] = score out = out.set_index(['year', 'home']) arr1 = out["score"].groupby( ['year', 'home']).transform(lambda x: x.cumsum().shift()).values arr2 = out["score"].groupby(['year', 'home']).transform( lambda x: x.rolling(20, min_periods=1).std().shift()).values arr3 = out["score"].groupby(['year', 'home']).transform( lambda x: x.rolling(20, min_periods=1).mean().shift()).values np.save(data_path + '/scores-' + str(2019 - season) + '.npy', np.c_[arr1, arr2, arr3]) return None
def averagingModels(self, X, models=[]): predictions = np.column_stack([ model.predict_proba(Features().div_cols(X).values)[:, 1] for model in models ]) return np.mean(predictions, axis=1)