def get_5v5_df_start_end(**kwargs): """ This method retrieves the correct years of the 5v5 player log and concatenates them. :param kwargs: the relevant ones here are startseason and endseason :return: dataframe """ startdate, enddate = get_startdate_enddate_from_kwargs(**kwargs) startseason, endseason = (helper.infer_season_from_date(x) for x in (startdate, enddate)) df = [] for season in range(startseason, endseason + 1): temp = manip.get_5v5_player_log(season) sch = schedules.get_season_schedule(season) temp = temp.merge(sch[['Game', 'Date']], how='left', on='Game') temp = temp[(temp.Date >= startdate) & (temp.Date <= enddate)] temp = temp.assign(Season=season) df.append(temp) df = pd.concat(df).sort_values(['Date']).drop( 'Date', axis=1) # When games rescheduled, Game ID not in order. return df
def get_team_schedule(season=None, team=None, startdate=None, enddate=None): """ Gets the schedule for given team in given season. Or if startdate and enddate are specified, searches between those dates. If season and startdate (and/or enddate) are specified, searches that season between those dates. :param season: int, the season :param team: int or str, the team :param startdate: str, YYYY-MM-DD :param enddate: str, YYYY-MM-DD :return: dataframe """ # TODO handle case when only team and startdate, or only team and enddate, are given if season is not None: df = get_season_schedule(season).query('Status != "Scheduled"') if startdate is not None: df = df.query('Date >= "{0:s}"'.format(startdate)) if enddate is not None: df = df.query('Date <= "{0:s}"'.format(enddate)) tid = team_info.team_as_id(team) return df[(df.Home == tid) | (df.Road == tid)] if startdate is not None and enddate is not None: dflst = [] startseason = helpers.infer_season_from_date(startdate) endseason = helpers.infer_season_from_date(enddate) for season in range(startseason, endseason + 1): df = get_team_schedule(season, team) \ .query('Status != "Scheduled"') \ .assign(Season=season) if season == startseason: df = df.query('Date >= "{0:s}"'.format(startdate)) if season == endseason: df = df.query('Date <= "{0:s}"'.format(enddate)) dflst.append(df) df = pd.concat(dflst) return df
def get_enddate_from_kwargs(**kwargs): """Returns 6/21 of endseason + 1, or enddate""" if 'enddate' in kwargs: return kwargs['enddate'] elif 'endseason' in kwargs: today = datetime.datetime.now().strftime('%Y-%m-%d') return min('{0:d}-06-21'.format(kwargs['endseason']+1), today) elif 'startseason' in kwargs: return get_enddate_from_kwargs(endseason=kwargs['startseason']) elif 'season' in kwargs: return get_enddate_from_kwargs(endseason=kwargs['season']) elif 'startdate' in kwargs: return get_enddate_from_kwargs(endseason=helper.infer_season_from_date(kwargs['startdate'])) else: return get_enddate_from_kwargs(endseason=schedules.get_current_season())
def get_fline_shot_rates(team, startdate, enddate): """ Gets CF/60 and CA/60 by defenseman duo (5v5 only) for this team between given range of dates :param team: int or str, team :param startdate: str, start date :param enddate: str, end date (inclusive) :return: dataframe with PlayerID1, PlayerID2, CF, CA, TOI (in secs), CF/60 and CA/60 """ # TODO this method is so slow startseason, endseason = [ helper.infer_season_from_date(x) for x in (startdate, enddate) ] dflst = [] for season in range(startseason, endseason + 1): games_played = schedules.get_team_games(season, team, startdate, enddate) games_played = [g for g in games_played if 20001 <= g <= 30417] toi = combos.get_team_combo_toi(season, team, games_played, n_players=3) \ .rename(columns={'Secs': 'TOI'}) cfca = combos.get_team_combo_corsi(season, team, games_played, n_players=3) joined = toi.merge(cfca, how='outer', on=['PlayerID1', 'PlayerID2', 'PlayerID3']) \ .assign(Season=season) dflst.append(joined) df = pd.concat(dflst) \ .groupby(['PlayerID1', 'PlayerID2', 'PlayerID3'], as_index=False).sum() df.loc[:, 'CF60'] = df.CF * 3600 / df.TOI df.loc[:, 'CA60'] = df.CA * 3600 / df.TOI forwards = players.get_player_ids_file().query('Pos != "D"')[['ID']] df = df.merge(forwards.rename(columns={'ID': 'PlayerID1'}), how='inner', on='PlayerID1') \ .merge(forwards.rename(columns={'ID': 'PlayerID2'}), how='inner', on='PlayerID2') \ .merge(forwards.rename(columns={'ID': 'PlayerID3'}), how='inner', on='PlayerID3') return df
def get_startdate_enddate_from_kwargs(**kwargs): """Returns startseason and endseason kwargs. Defaults to current - 3 and current""" enddate = get_enddate_from_kwargs(**kwargs) if 'last_n_days' in kwargs: enddate2 = datetime.datetime(*[int(x) for x in enddate.split('-')]) startdate2 = enddate2 - datetime.timedelta(days=kwargs['last_n_days']) startdate = startdate2.strftime('%Y-%m-%d') elif 'startdate' in kwargs: startdate = kwargs['startdate'] elif 'startseason' in kwargs: startdate = '{0:d}-09-15'.format(kwargs['startseason']) elif 'season' in kwargs: startdate = '{0:d}-09-15'.format(kwargs['season']) else: startdate = '{0:d}-09-15'.format(helper.infer_season_from_date(enddate) - 3) return startdate, enddate
def get_dpair_shot_rates(team, startdate, enddate): """ Gets CF/60 and CA/60 by defenseman duo (5v5 only) for this team between given range of dates :param team: int or str, team :param startdate: str, start date :param enddate: str, end date (inclusive) :return: dataframe with PlayerID1, PlayerID2, CF, CA, TOI (in secs), CF/60 and CA/60 """ startseason, endseason = [ helper.infer_season_from_date(x) for x in (startdate, enddate) ] dflst = [] for season in range(startseason, endseason + 1): games_played = schedules.get_team_games(season, team, startdate, enddate) games_played = [g for g in games_played if g >= 20001 and g <= 30417] toi = manip.get_game_h2h_toi( season, games_played).rename(columns={'Secs': 'TOI'}) cf = manip.get_game_h2h_corsi(season, games_played, 'cf').rename(columns={'HomeCorsi': 'CF'}) ca = manip.get_game_h2h_corsi(season, games_played, 'ca').rename(columns={'HomeCorsi': 'CA'}) # TOI, CF, and CA have columns designating which team--H or R # Use schedule to find appropriate ones to filter for sch = schedules.get_team_schedule(season, team, startdate, enddate) sch = helper.melt_helper(sch[['Game', 'Home', 'Road']], id_vars='Game', var_name='HR', value_name='Team') sch = sch.query('Team == {0:d}'.format(int( team_info.team_as_id(team)))) sch.loc[:, 'HR'] = sch.HR.apply(lambda x: x[0]) sch = sch.assign(Team1=sch.HR, Team2=sch.HR).drop({'Team', 'HR'}, axis=1) toi = toi.merge(sch, how='inner', on=['Game', 'Team1', 'Team2']) cf = cf.merge(sch, how='inner', on=['Game', 'Team1', 'Team2']) ca = ca.merge(sch, how='inner', on=['Game', 'Team1', 'Team2']) # CF and CA from home perspective, so switch if necessary cfca = cf.merge( ca, how='outer', on=['Game', 'PlayerID1', 'PlayerID2', 'Team1', 'Team2']) cfca.loc[:, 'tempcf'] = cfca.CF cfca.loc[:, 'tempca'] = cfca.CA cfca.loc[cf.Team1 == 'R', 'CF'] = cfca[cfca.Team1 == 'R'].tempca cfca.loc[ca.Team1 == 'R', 'CA'] = cfca[cfca.Team1 == 'R'].tempcf cfca = cfca.drop({'Team1', 'Team2', 'tempcf', 'tempca'}, axis=1) toi = toi.drop({'Team1', 'Team2', 'Min'}, axis=1) joined = toi.merge(cfca, how='outer', on=['PlayerID1', 'PlayerID2', 'Game']) \ .assign(Season=season) dflst.append(joined) df = pd.concat(dflst) \ .groupby(['PlayerID1', 'PlayerID2'], as_index=False).sum() df.loc[:, 'CF60'] = df.CF * 3600 / df.TOI df.loc[:, 'CA60'] = df.CA * 3600 / df.TOI defensemen = players.get_player_ids_file().query('Pos == "D"')[['ID']] df = df.merge(defensemen.rename(columns={'ID': 'PlayerID1'}), how='inner', on='PlayerID1') \ .merge(defensemen.rename(columns={'ID': 'PlayerID2'}), how='inner', on='PlayerID2') return df