def trainR(self, X_list, y_list, space=spaceR, cv=5):
        """
        RandomSearchCV method
        :param X_list: List of training sets
        :param y_list: List of targets
        :param space: parameter space
        :return: models an metrics
        """
        n_calls = self.n_calls

        scores = []
        val_scores = []
        best_models = []

        for j in range(len(X_list)):
            classifier = RandomForestClassifier(n_jobs=-1)
            y = y_list.copy()
            X = X_list.copy()
            y_test = y.pop(j)
            X_test = X.pop(j)
            y_train = np.concatenate(y, axis=0)
            X_train = np.concatenate(X, axis=0)

            X_train = Features().div_cols(X_train).values
            X_test = Features().div_cols(X_test).values

            start = time()

            opt = RandomizedSearchCV(classifier,
                                     param_distributions=space,
                                     n_iter=n_calls,
                                     scoring=self.scorer,
                                     cv=cv,
                                     n_jobs=-1,
                                     iid=False)

            opt.fit(X_train, y_train)
            model = opt.best_estimator_
            print('Season', 2019 - j)
            print("Random CV search took %.2f seconds for %d candidates"
                  " parameter settings." % ((time() - start), n_calls))
            print("val. score:", opt.best_score_)
            print("test score:", opt.score(X_test, y_test))
            # print(model)
            print("")
            best_models.append(model)
            val_scores.append(opt.best_score_)
            scores.append(opt.score(X_test, y_test))
        return scores, val_scores, best_models
Exemple #2
0
    def score_data(self, games):
        """
        Prepare the most recent round for scoring.
        :param games:
        :return:
        """
        mapping = self.mapping
        proxy = self.proxy
        enc = self.enc

        scoring = []
        for i in games:
            home = i[0]
            away = i[1]
            home_df = History(mapping, proxy,
                              enc).team_roll(home, season=0, shift=0,
                                             web=True).tail(1)[[
                                                 'Rnd', 'F_mean', 'F_std',
                                                 'A_mean', 'A_std', 'M_mean',
                                                 'A_std', 'R_mean', 'perc'
                                             ]]
            home_df['Rnd'] = home_df['Rnd'] + 1
            away_df = History(mapping, proxy,
                              enc).team_roll(away, season=0, shift=0,
                                             web=True).tail(1)[[
                                                 'F_mean', 'F_std', 'A_mean',
                                                 'A_std', 'M_mean', 'A_std',
                                                 'R_mean', 'perc', 'grnd'
                                             ]]
            features = np.concatenate([home_df.values[0], away_df.values[0]],
                                      axis=0)
            scoring.append(features)
        return Features().div_cols(scoring)
Exemple #3
0
    def generate_past_scores(self, data_path, best_models, team_df):
        """
        Use models to simulate past scores (based on score_f above) and output each as numpy arrays ready to be used
        as features
        :param self:
        :param best_models: Input the season models
        :param team_df:
        :return:
        """
        mapping = self.mapping
        proxy = self.proxy
        for season in range(1, len(best_models) + 1):
            X = np.load(data_path + '/training-' + str(2019 - season) + '.npy')
            X_train = Features().div_cols(X).values
            y = np.load(data_path + '/results-' + str(2019 - season) + '.npy')

            score = Simulate.score_f(
                y, best_models[season - 1].predict_proba(X_train)[:, 1])

            year = str(2019 - season)
            teams = list(mapping.keys())
            teams.remove('Kangaroos')

            if season >= 8:
                teams.remove('Greater Western Sydney')
            if season >= 9:
                teams.remove('Gold Coast')

            out = pd.DataFrame()
            for team in teams:
                df = History(mapping, proxy).team_roll(team, season, team_df)
                home_df = df[df['T'] == 'H'].reset_index(drop=True)
                l = len(home_df)
                out = pd.concat([
                    out,
                    pd.DataFrame(np.c_[[year] * l, [team] * l,
                                       home_df['Opponent']])
                ],
                                axis=0,
                                ignore_index=True)
            out.columns = ['year', 'home', 'away']
            out['score'] = score
            out = out.set_index(['year', 'home'])

            arr1 = out["score"].groupby(
                ['year',
                 'home']).transform(lambda x: x.cumsum().shift()).values
            arr2 = out["score"].groupby(['year', 'home']).transform(
                lambda x: x.rolling(20, min_periods=1).std().shift()).values
            arr3 = out["score"].groupby(['year', 'home']).transform(
                lambda x: x.rolling(20, min_periods=1).mean().shift()).values

            np.save(data_path + '/scores-' + str(2019 - season) + '.npy',
                    np.c_[arr1, arr2, arr3])
        return None
Exemple #4
0
 def averagingModels(self, X, models=[]):
     predictions = np.column_stack([
         model.predict_proba(Features().div_cols(X).values)[:, 1]
         for model in models
     ])
     return np.mean(predictions, axis=1)