コード例 #1
0
ファイル: state_space_data.py プロジェクト: YounesZ/NHL_stats
 def list_all_games(self):
     # List games
     games_lst = pd.DataFrame()
     for iy in self.seasons:
         iSea = Season(self.db_root, int(iy.replace('Season_', '')[:4]))
         games_lst = pd.concat((games_lst, iSea.games_info), axis=0)
     self.games_lst = games_lst
コード例 #2
0
ファイル: test_game.py プロジェクト: YounesZ/NHL_stats
    def setUp(self):
        """Initialization"""
        self.db_root = Config(
        ).data_dir  # This is the location of the Hockey database
        self.repoCode = get_git_root()

        self.repoModel = path.join(
            self.repoCode,
            'ReinforcementLearning/NHL/playerstats/offVSdef/Automatic_classification/MODEL_perceptron_1layer_10units_relu'
        )

        # Now lets get game data
        self.season = Season(
            db_root=self.db_root, repo_model=self.repoModel,
            year_begin=2012)  # Season.from_year_begin(2012) # '20122013'
        # Montreal received Ottawa on march 13, 2013, let's convert game date to game code
        gameId = self.season.get_game_id(home_team_abbr='MTL',
                                         game_date=datetime.date(year=2013,
                                                                 month=3,
                                                                 day=13))
        self.a_game = Game(self.season, gameId=gameId)
コード例 #3
0
 def test_games_before_january_not_2012(self):
     """Can I fetch games for home teams on the months October-December of a season?"""
     for _ in range(20):  # number of time to do the test
         year_begin = choice(list(set(range(2005, 2014)) - {2012}))
         # year_begin = randint(2005, 2014)
         season = Season(global_logger,
                         db_root=self.db_root,
                         repo_model=self.repoModel,
                         year_begin=year_begin)
         home_team = choice(list(season.get_teams()))
         the_month = randint(10, 12)
         the_day = randint(15, 30)
         base_date = datetime.date(year=season.year_begin,
                                   month=the_month,
                                   day=the_day)
         delta_in_days = randint(
             20,
             40)  # let's make this large enough for a game to always exist.
         # print("[base: %s] Trying to fetch games for '%s', up until %d days before" % (base_date, home_team, delta_in_days))
         result = season.get_game_at_or_just_before(
             game_date=base_date,
             home_team_abbr=home_team,
             delta_in_days=delta_in_days)
         if result is None:
             season.get_game_at_or_just_before(game_date=base_date,
                                               home_team_abbr=home_team,
                                               delta_in_days=delta_in_days)
         self.assertIsNotNone(
             result,
             "[%s] Impossible to find a game for '%s' up to %d days before %s"
             % (season, home_team, delta_in_days, base_date))
コード例 #4
0
ファイル: test_game.py プロジェクト: YounesZ/NHL_stats
class TestGame(unittest.TestCase):
    """Testing definitions of Game's."""
    def setUp(self):
        """Initialization"""
        self.db_root = Config(
        ).data_dir  # This is the location of the Hockey database
        self.repoCode = get_git_root()

        self.repoModel = path.join(
            self.repoCode,
            'ReinforcementLearning/NHL/playerstats/offVSdef/Automatic_classification/MODEL_perceptron_1layer_10units_relu'
        )

        # Now lets get game data
        self.season = Season(
            db_root=self.db_root, repo_model=self.repoModel,
            year_begin=2012)  # Season.from_year_begin(2012) # '20122013'
        # Montreal received Ottawa on march 13, 2013, let's convert game date to game code
        gameId = self.season.get_game_id(home_team_abbr='MTL',
                                         game_date=datetime.date(year=2013,
                                                                 month=3,
                                                                 day=13))
        self.a_game = Game(self.season, gameId=gameId)

    def test_shifts_differential(self):
        """Are the differential for lines properly calculated?"""

        days_before = 20
        all_teams = self.season.get_teams()
        for _ in range(10):  # repeat this test 10 times
            random_date = datetime.date(year=2013,
                                        month=random.randint(1, 4),
                                        day=random.randint(1, 28))
            random_team = random.sample(all_teams, 1)[0]
            result = self.season.get_game_at_or_just_before(
                random_date,
                home_team_abbr=random_team,
                delta_in_days=days_before)
            if result is None:
                print(
                    "WARNING => No home game for '%s' up to %d days before %s"
                    % (random_team, days_before, random_date))
            else:
                random_game_id, game_date = result
                print("[home team: '%s'] Examining game %d (from %s)" %
                      (random_team, random_game_id, game_date))
                random_game = Game(self.season, gameId=random_game_id)
                df_differential = random_game.lineShifts.shifts[
                    'differential'].reset_index()  # ?
                idxs_change_differential = df_differential.diff()[
                    df_differential.diff().differential != 0].index.values
                for idx in idxs_change_differential[
                        1:]:  # skip fist one, because it's a NaN
                    self.assertNotEqual(
                        random_game.lineShifts.shifts.iloc[idx]['GOAL'], 0,
                        "Differential is %d but there was no goal scored" %
                        (random_game.lineShifts.shifts.iloc[idx]
                         ['differential']))

    def test_shifts_goals_generate_differential(self):
        """Are the differential for lines properly calculated?"""

        days_before = 20
        all_teams = self.season.get_teams()
        for _ in range(10):  # repeat this test 10 times
            random_date = datetime.date(year=2013,
                                        month=random.randint(1, 4),
                                        day=random.randint(1, 28))
            random_team = random.sample(all_teams, 1)[0]
            result = self.season.get_game_at_or_just_before(
                random_date,
                home_team_abbr=random_team,
                delta_in_days=days_before)
            if result is None:
                print(
                    "WARNING => No home game for '%s' up to %d days before %s"
                    % (random_team, days_before, random_date))
            else:
                random_game_id, game_date = result
                print("[home team: '%s'] Examining game %d (from %s)" %
                      (random_team, random_game_id, game_date))
                random_game = Game(self.season, gameId=random_game_id)
                df_goals = random_game.lineShifts.shifts['GOAL'].reset_index()
                idxs_goals = [
                    a_val
                    for a_val in df_goals[df_goals.GOAL != 0].index.values
                    if a_val > 0
                ]
                for idx in idxs_goals:
                    diff = random_game.lineShifts.shifts.iloc[idx][
                        'differential']
                    diff_before = random_game.lineShifts.shifts.iloc[
                        idx - 1]['differential']
                    goals_now = random_game.lineShifts.shifts.iloc[idx]['GOAL']
                    goals_before = random_game.lineShifts.shifts.iloc[
                        idx - 1]['GOAL']
                    expected_result = diff_before + goals_now
                    self.assertEqual(
                        diff, expected_result,
                        "\n%s\n [index: %d] Differential is %d but it should be => %d (== (diff before) %d + %d (goals now))"
                        %
                        (random_game.lineShifts.shifts.iloc[idx - 2:idx + 2][[
                            'GOAL', 'differential'
                        ]], idx, diff, expected_result, diff_before,
                         goals_now))
コード例 #5
0
ファイル: state_space_data.py プロジェクト: YounesZ/NHL_stats
    def pull_RL_data(self,
                     repoModel,
                     repoSave=None,
                     verbose=0,
                     fetcher='default'):
        # Prepare players model: reload info
        CLS = ANN_classifier()
        sess, annX, annY = CLS.ann_reload_model(repoModel)
        self.classifier = {'annX': annX, 'annY': annY, 'sess': sess}
        self.players_model = pickle.load(
            open(path.join(repoModel, 'baseVariables.p'), 'rb'))
        # Make lines dictionary
        self.make_line_dictionary()
        # List line shifts
        RL_data = pd.DataFrame()
        GAME_data = pd.DataFrame()
        PLAYER_data = pd.DataFrame()
        count = 0
        allR = []

        # Loop on seasons
        for iy in np.unique(self.games_lst['season'].values):

            # Extract season data
            iSea = Season(self.db_root, int(str(iy)[:4]))

            # List games
            games = self.games_lst[self.games_lst['season'] == iy]

            # Loop on games
            for ic, ih, ia in zip(games['gcode'].values,
                                  games['hometeam'].values,
                                  games['awayteam'].values):

                # Extract game data
                iGame = Game(iSea, gameId=ic)

                # Check if some data was retrieved:
                if len(iGame.df_wc) > 0:
                    iGame.players_classes_mgr.set_stats_fetcher = fetcher
                    player_classes = iGame.players_classes_mgr.get(
                        equal_strength=True,
                        regular_time=True,
                        min_duration=20,
                        nGames=30)
                    # update shifts to reflect the same parameters
                    lineSHFT = iGame.as_df('both', True, True, 20)
                    # Check if some data was retrieved:
                    if len(player_classes) > 0:
                        # Add game identifier data
                        lineSHFT['season'] = iy
                        lineSHFT['gameCode'] = ic
                        lineSHFT['hometeam'] = ih
                        lineSHFT['awayteam'] = ia

                        S, A, R, nS, nA, coded = iGame.build_statespace(
                            self.line_dictionary)
                        allR.append(np.sum(R))
                        # Concatenate data
                        df_ic = np.transpose(
                            np.reshape(np.concatenate((S, A, R)), [3, -1]))
                        RL_data = pd.concat(
                            (RL_data,
                             pd.DataFrame(
                                 df_ic, columns=['state', 'action', 'reward'
                                                 ])),
                            axis=0)
                        GAME_data = pd.concat((GAME_data, lineSHFT[coded]),
                                              axis=0)
                        # Players data
                        plDT = player_classes
                        plDT['season'] = iy
                        plDT['gameCode'] = ic
                        PLAYER_data = pd.concat((PLAYER_data, plDT), axis=0)
                        # Save data
                        if not repoSave is None and count % 20 == 0:
                            pickle.dump(
                                {
                                    'RL_data': RL_data,
                                    'nStates': nS,
                                    'nActions': nA
                                },
                                open(path.join(repoSave, 'RL_teaching_data.p'),
                                     'wb'))
                            pickle.dump(
                                GAME_data,
                                open(path.join(repoSave, 'GAME_data.p'), 'wb'))
                            pickle.dump(
                                PLAYER_data,
                                open(path.join(repoSave, 'PLAYER_data.p'),
                                     'wb'))
                    elif verbose > 0:
                        print('*** EMPTY GAME ***')
                elif verbose > 0:
                    print('*** EMPTY GAME ***')

                # Status bar
                if verbose > 0:
                    stdout.write('\r')
                    # the exact output you're looking for:
                    stdout.write(
                        "Game %i/%i - season %s game %s: [%-60s] %d%%, completed"
                        % (count, len(self.games_lst), iy, ic,
                           '=' * int(count / len(self.games_lst) * 60),
                           100 * count / len(self.games_lst)))
                    stdout.flush()
                    count += 1

        self.RL_data = RL_data
        self.state_size = nS
        self.action_size = nA
コード例 #6
0
    def evaluate_all_coaches(self, season_year_begin: int,
                             teams_opt: Optional[List[str]], n_games: int):
        from ReinforcementLearning.NHL.playbyplay.state_space_data import HockeySS
        """Initialization"""
        os.makedirs(self.base_dir, exist_ok=True)
        my_config = Config()
        self.alogger.debug("Data configured to be in '%s'" %
                           (my_config.data_dir))

        db_root = my_config.data_dir
        repoCode = get_git_root()

        repoModel = path.join(
            repoCode,
            'ReinforcementLearning/NHL/playerstats/offVSdef/Automatic_classification/MODEL_perceptron_1layer_10units_relu'
        )

        season = Season(self.alogger,
                        db_root=db_root,
                        year_begin=season_year_begin,
                        repo_model=repoModel)

        # Line translation table
        linedict = HockeySS(db_root)
        linedict.make_line_dictionary()
        linedict = linedict.line_dictionary

        # Load the Qvalues table
        Qvalues = \
        pickle.load(open(path.join(repoCode, 'ReinforcementLearning/NHL/playbyplay/data/stable/RL_action_values.p'), 'rb'))[
            'action_values']

        # Visualize it dimensions (period x differential x away line's code x home line's code)
        print('Q-table dimensions: ', Qvalues.shape)

        # for what teams will we run this calculation?
        calc_teams = season.get_teams() if teams_opt is None else teams_opt
        for a_team in calc_teams:
            season.alogger.debug("=============> calculating %s" % (a_team))
            seconds = get_teams_coach_performance(season,
                                                  team_abbr=a_team,
                                                  maybe_a_starting_date=None,
                                                  line_dict=linedict,
                                                  Qvalues=Qvalues,
                                                  how_many_games=n_games)
            season.alogger.debug(seconds)

            if seconds["does_not_match_optimal"] == seconds[
                    "matches_optimal"] == 0:
                season.alogger.info(
                    "[team: '%s'] No evidence for coach to be evaluated on." %
                    (a_team))
            else:
                total_secs = seconds['matches_optimal'] + seconds[
                    'does_not_match_optimal']
                season.alogger.info(
                    "['%s'] Home coach's score is %d (secs. optimal) / %d (secs. total) = %.2f (in [0,1])"
                    % (a_team, seconds['matches_optimal'], total_secs,
                       seconds['matches_optimal'] / total_secs))
            file_to_save = os.path.join(self.base_dir, a_team + ".pkl")
            self.alogger.debug("Saving data for '%s' in file %s" %
                               (a_team, file_to_save))
            with open(file_to_save, 'wb') as dict_file:
                pickle.dump(seconds, dict_file)
            self.alogger.debug("DONE")
コード例 #7
0
def get_teams_coach_performance(season: Season, team_abbr: str,
                                maybe_a_starting_date: Optional[datetime.date],
                                how_many_games: int, line_dict: dict,
                                Qvalues) -> dict:
    """

    :param season: 
    :param team_abbr: 
    :param maybe_a_starting_date: 
    :param line_dict: 
    :param Qvalues: 
    :return: 
    """
    params = {
        "games_to_predict_away_lines": 7,
        "optimal_examine_num_first_lines": 5,  # None examines ALL
        "first_day_of_season": datetime.date(year=season.year_end,
                                             month=2,
                                             day=1),
        # "first_day_of_season": datetime.date(year=season.year_begin, month=12, day=1), # TODO: check why I don't find anything in season.year_begin?
    }

    assert (how_many_games > 0)
    last_game_date_accounted_for = None
    seconds = {  # result will be stored here.
        'does_not_match_optimal':
        0,  # number of seconds the action of coach did not match the optimal
        'matches_optimal':
        0,  # number of seconds the action of coach matched the optimal
        'unknown_adversary':
        0  # number of seconds we can't determine the value of the action of coach
    }
    while how_many_games > 0:
        season.alogger.info("games left: %d; so far: %s" %
                            (how_many_games, seconds))
        how_many_games -= 1
        if last_game_date_accounted_for is None:
            base_date = maybe_a_starting_date if maybe_a_starting_date is not None else params[
                "first_day_of_season"]
            result = season.get_game_at_or_just_before(
                game_date=base_date,
                home_team_abbr=team_abbr,
                delta_in_days=20)
            if result is None:
                season.alogger.info(
                    "There is no game for '%s' just before %s" %
                    (team_abbr, base_date))
                return seconds
            gameId, d = result
            base_date = base_date + datetime.timedelta(
                days=1)  # convenient for next cycle of computation
            last_game_date_accounted_for = base_date
        else:
            found = False
            while not found:
                result = season.get_game_at_or_just_before(
                    game_date=base_date, home_team_abbr=team_abbr)
                # result should never be None, as at worst it will get the game it computed before.
                assert result is not None
                gameId, d = result
                base_date = base_date + datetime.timedelta(
                    days=1)  # convenient for next cycle of computation
                found = (d != last_game_date_accounted_for)
            season.alogger.info("Fetched game %d, played on %s" % (gameId, d))
        data_for_a_game = Game(season, gameId)

        home_players = data_for_a_game.get_ids_of_home_players()
        if len(home_players) < 12:
            season.alogger.info(
                "Can't get enough info for home players (WEIRD!!). Ignoring game %d"
                % (data_for_a_game.gameId))
        else:
            # prediction of the lines that the 'away' team will use:
            formation_opt = season.get_lines_for(
                d - datetime.timedelta(days=1),
                how_many_games_back=params["games_to_predict_away_lines"],
                team_abbrev=data_for_a_game.away_team)
            if formation_opt is None:
                season.alogger.debug(
                    "Couldn't get a prediction of lines %s will use" %
                    (data_for_a_game.away_team))
            else:
                formation = formation_opt
                away_lines_names = formation.as_names
                away_lines = formation.as_categories
                season.alogger.info(away_lines_names)
                season.alogger.info(away_lines)

                # === Now we get the indices in the Q-values tables corresponding to lines

                # Get lines and translate them
                playersCode = data_for_a_game.encode_line_players()
                linesCode = np.array(
                    [[data_for_a_game.recode_line(line_dict, a) for a in b]
                     for b in playersCode])

                # Get the Q-value for that specific line
                iShift = 0  # First shift
                lineShifts = data_for_a_game.lineShifts.as_df(
                    team='both',
                    equal_strength=data_for_a_game.shifts_equal_strength,
                    regular_time=data_for_a_game.shifts_regular_time,
                    min_duration=20)

                player_classes = data_for_a_game.players_classes_mgr.get(
                    equal_strength=True,
                    regular_time=True,
                    min_duration=20,
                    nGames=30)  # TODO: why these parameters?
                plList = list(player_classes.loc[lineShifts['playersID'].iloc[iShift][0]]['firstlast'].values) + \
                         list(player_classes.loc[lineShifts['playersID'].iloc[iShift][1]]['firstlast'].values)
                diff = data_for_a_game.recode_differential(
                    lineShifts.iloc[iShift].differential)
                period = data_for_a_game.recode_period(
                    lineShifts.iloc[iShift].period)
                q_values = Qvalues[period, diff, linesCode[iShift, 0],
                                   linesCode[iShift, 1]]
                season.alogger.info(
                    '[diff = %d, period = %d] First shift: \n\thome team: %s, %s, %s \n\taway team: %s, %s, %s \n\tQvalue: %.2f'
                    % (diff, period, plList[0], plList[1], plList[2],
                       plList[3], plList[4], plList[5], q_values))

                q_values_fetcher_from_game_data = QValuesFetcherFromGameData(
                    game_data=data_for_a_game,
                    lines_dict=line_dict,
                    q_values=Qvalues)

                line_rec = LineRecommender(
                    game=data_for_a_game,
                    player_category_fetcher=CategoryFetcher(
                        data_for_game=data_for_a_game),
                    q_values_fetcher=q_values_fetcher_from_game_data)

                home_lines_rec = line_rec.recommend_lines_maximize_average(
                    home_team_players_ids=data_for_a_game.
                    get_ids_of_home_players(),
                    away_team_lines=away_lines,
                    examine_max_first_lines=params[
                        "optimal_examine_num_first_lines"])
                season.alogger.info(home_lines_rec)

                season.alogger.info(
                    data_for_a_game.formation_ids_to_str(home_lines_rec))

                # let's examine actual decisions and how it compares with optimal:
                for data in lineShifts[[
                        'home_line', 'away_line', 'iceduration'
                ]].itertuples():
                    home_line = data.home_line
                    away_line = data.away_line
                    num_seconds = data.iceduration
                    away_line_cats = data_for_a_game.classes_of_line(away_line)
                    if None in away_line_cats:
                        season.alogger.info(
                            "Can't get category of one of away players")
                    else:
                        away_line_cats = tuple(np.sort(away_line_cats))
                        if away_line_cats not in away_lines:
                            # season.alogger.debug("%s (categories %s): no optimal calculated" % (away_line, away_line_cats))
                            seconds['unknown_adversary'] += num_seconds
                        else:
                            idx_of_away = away_lines.index(away_line_cats)
                            cats_of_optimal = data_for_a_game.classes_of_line(
                                home_lines_rec[idx_of_away])
                            home_line_cats = data_for_a_game.classes_of_line(
                                home_line)
                            if set(cats_of_optimal) == set(home_line_cats):
                                seconds['matches_optimal'] += num_seconds
                            else:
                                seconds[
                                    'does_not_match_optimal'] += num_seconds
    return seconds