Ejemplo n.º 1
0
def test_get_game_data_from_schedule(mocker):

    get_season_schedule_mock = mocker.patch(
        "scrapenhl2.scrape.schedules.get_season_schedule")

    get_game_data_from_schedule(2017, 1234)
    get_season_schedule_mock.assert_called_once_with(2017)
    get_season_schedule_mock().query.assert_called_once_with('Game == 1234')
    get_season_schedule_mock().query().to_dict.assert_called_once_with(
        orient='series')
Ejemplo n.º 2
0
def parse_game_toi_from_html(season, game, force_overwrite=False):
    """
    Parses TOI from the html shift log from this game.

    :param season: int, the season
    :param game: int, the game
    :param force_overwrite: bool. If True, will execute. If False, executes only if file does not exist yet.

    :return: nothing
    """
    # TODO force_overwrite support
    filenames = (scrape_toi.get_home_shiftlog_filename(season, game),
                 scrape_toi.get_road_shiftlog_filename(season, game))
    if force_overwrite is False and os.path.exists(scrape_toi.get_home_shiftlog_filename(season, game)) and \
            os.path.exists(scrape_toi.get_home_shiftlog_filename(season, game)):
        return False

    gameinfo = schedules.get_game_data_from_schedule(season, game)
    try:
        parsedtoi = read_shifts_from_html_pages(
            scrape_toi.get_raw_html_toi(season, game, 'H'),
            scrape_toi.get_raw_html_toi(season, game, 'R'), gameinfo['Home'],
            gameinfo['Road'], season, game)
    except ValueError as ve:
        # ed.print_and_log('Error with {0:d} {1:d}'.format(season, game), 'warning')
        # ed.print_and_log(str(ve), 'warning')
        parsedtoi = None

    save_parsed_toi(parsedtoi, season, game)
    # ed.print_and_log('Parsed shifts for {0:d} {1:d}'.format(season, game))
    return True
Ejemplo n.º 3
0
def update_player_logs_from_page(pbp, season, game):
    """
    Takes the game play by play and adds players to the master player log file, noting that they were on the roster
    for this game, which team they played for, and their status (P for played, S for scratch).

    :param season: int, the season
    :param game: int, the game
    :param pbp: json, the pbp of the game

    :return: nothing
    """

    # Get players who played, and scratches, from boxscore
    home_played = helpers.try_to_access_dict(pbp, 'liveData', 'boxscore', 'teams', 'home', 'players')
    road_played = helpers.try_to_access_dict(pbp, 'liveData', 'boxscore', 'teams', 'away', 'players')
    home_scratches = helpers.try_to_access_dict(pbp, 'liveData', 'boxscore', 'teams', 'home', 'scratches')
    road_scratches = helpers.try_to_access_dict(pbp, 'liveData', 'boxscore', 'teams', 'away', 'scratches')

    # Played are both dicts, so make them lists
    home_played = [int(pid[2:]) for pid in home_played]
    road_played = [int(pid[2:]) for pid in road_played]

    # Played may include scratches, so make sure to remove them
    home_played = list(set(home_played).difference(set(home_scratches)))
    road_played = list(set(road_played).difference(set(road_scratches)))

    # Get home and road names
    gameinfo = schedules.get_game_data_from_schedule(season, game)

    # Update player logs
    update_player_log_file(home_played, season, game, gameinfo['Home'], 'P')
    update_player_log_file(home_scratches, season, game, gameinfo['Home'], 'S')
    update_player_log_file(road_played, season, game, gameinfo['Road'], 'P')
    update_player_log_file(road_scratches, season, game, gameinfo['Road'], 'S')
Ejemplo n.º 4
0
def update_schedule_with_result_using_pbp(pbp, season, game):
    """
    Uses the PbP to update results for this game.

    :param pbp: json, the pbp for this game
    :param season: int, the season
    :param game: int, the game

    :return: nothing
    """

    gameinfo = schedules.get_game_data_from_schedule(season, game)
    result = None  # In case they have the same score. Like 2006 10009 has incomplete data, shows 0-0

    # If game is not final yet, don't do anything
    if gameinfo['Status'] != 'Final':
        return False

    # If one team one by at least two, we know it was a regulation win
    if gameinfo['HomeScore'] >= gameinfo['RoadScore'] + 2:
        result = 'W'
    elif gameinfo['RoadScore'] >= gameinfo['HomeScore'] + 2:
        result = 'L'
    else:
        # Check for the final period
        finalplayperiod = helpers.try_to_access_dict(pbp, 'liveData',
                                                     'linescore',
                                                     'currentPeriodOrdinal')

        # Identify SO vs OT vs regulation
        if finalplayperiod is None:
            pass
        elif finalplayperiod == 'SO':
            if gameinfo['HomeScore'] > gameinfo['RoadScore']:
                result = 'SOW'
            elif gameinfo['RoadScore'] > gameinfo['HomeScore']:
                result = 'SOL'
        elif finalplayperiod[-2:] == 'OT':
            if gameinfo['HomeScore'] > gameinfo['RoadScore']:
                result = 'OTW'
            elif gameinfo['RoadScore'] > gameinfo['HomeScore']:
                result = 'OTL'
        else:
            if gameinfo['HomeScore'] > gameinfo['RoadScore']:
                result = 'W'
            elif gameinfo['RoadScore'] > gameinfo['HomeScore']:
                result = 'L'

    update_schedule_with_result(season, game, result)
Ejemplo n.º 5
0
def read_events_from_page(rawpbp, season, game):
    """
    This method takes the json pbp and returns a pandas dataframe with the following columns:

    * Index: int, index of event
    * Period: str, period of event. In regular season, could be 1, 2, 3, OT, or SO. In playoffs, 1, 2, 3, 4, 5...
    * MinSec: str, m:ss, time elapsed in period
    * Time: int, time elapsed in game
    * Event: str, the event name
    * Team: int, the team id. Note that this is switched to blocked team for blocked shots to ease Corsi calculations.
    * Actor: int, the acting player id. Switched with recipient for blocks (see above)
    * ActorRole: str, e.g. for faceoffs there is a "Winner" and "Loser". Switched with recipient for blocks (see above)
    * Recipient: int, the receiving player id. Switched with actor for blocks (see above)
    * RecipientRole: str, e.g. for faceoffs there is a "Winner" and "Loser". Switched with actor for blocks (see above)
    * X: int, the x coordinate of event (or NaN)
    * Y: int, the y coordinate of event (or NaN)
    * Note: str, additional notes, which may include penalty duration, assists on a goal, etc.

    :param rawpbp: json, the raw json pbp
    :param season: int, the season
    :param game: int, the game

    :return: pandas dataframe, the pbp in a nicer format
    """
    pbp = helpers.try_to_access_dict(rawpbp, 'liveData', 'plays', 'allPlays')
    if pbp is None:
        return

    gameinfo = schedules.get_game_data_from_schedule(season, game)
    pbpdf = _create_pbp_df_json(pbp, gameinfo)
    if len(pbpdf) == 0:
        return pbpdf

    pbpdf = _add_scores_to_pbp(pbpdf, gameinfo)
    pbpdf = _add_times_to_pbp(pbpdf)

    return pbpdf
Ejemplo n.º 6
0
def _finish_toidf_manipulations(df, season, game):
    """
    Takes dataframe of shifts (one row per shift) and makes into a matrix of players on ice for each second.

    :param df: dataframe
    :param season: int, the season
    :param game: int, the game

    :return: dataframe
    """
    gameinfo = schedules.get_game_data_from_schedule(season, game)

    # TODO don't read end times. Use duration, which has good coverage, to infer end. Then end + 1200 not needed below.
    # Sometimes shifts have the same start and time.
    # By the time we're here, they'll have start = end + 1
    # So let's remove shifts with duration -1
    df = df[df.Start != df.End + 1]

    # Sometimes you see goalies with a shift starting in one period and ending in another
    # This is to help in those cases.
    if sum(df.End < df.Start) > 0:
        # ed.print_and_log('Have to adjust a shift time', 'warn')
        # TODO I think I'm making a mistake with overtime shifts--end at 3900!
        # TODO also, maybe only go to the end of the period, not to 1200
        # ed.print_and_log(df[df.End < df.Start])
        df.loc[df.End < df.Start,
               'End'] = df.loc[df.End < df.Start, 'End'] + 1200
    # One issue coming up is when the above line comes into play--missing times are filled in as 0:00
    tempdf = df[['PlayerID', 'Start', 'End', 'Team',
                 'Duration']].query("Duration > 0")
    tempdf = tempdf.assign(Time=tempdf.Start)
    # print(tempdf.head(20))

    # Let's filter out goalies for now. We can add them back in later.
    # This will make it easier to get the strength later
    pids = players.get_player_ids_file()
    tempdf = tempdf.merge(pids[['ID', 'Pos']],
                          how='left',
                          left_on='PlayerID',
                          right_on='ID')

    # toi = pd.DataFrame({'Time': [i for i in range(0, max(df.End) + 1)]})
    toi = pd.DataFrame(
        {'Time': [i for i in range(0, int(round(max(df.End))))]})

    # Originally used a hacky way to fill in times between shift start and end: increment tempdf by one, filter, join
    # Faster to work with base structures
    # Or what if I join each player to full df, fill backward on start and end, and filter out rows where end > time
    # toidict = toi.to_dict(orient='list')
    # players_by_sec = [[] for _ in range(min(toidict['Start'], toidict['End'] + 1))]
    # for i in range(len(players_by_sec)):
    #    for j in range(toidict['Start'][i], toidict['End'][i] + 1):
    #        players_by_sec[j].append(toidict['PlayerID'][i])
    # Maybe I can create a matrix with rows = time and columns = players
    # Loop over start and end, and use iloc[] to set booleans en masse.
    # Then melt and filter

    # Create one row per second
    alltimes = toi.Time
    newdf = pd.DataFrame(index=alltimes)

    # Add rows and set times to True simultaneously
    for i, (pid, start, end, team, duration, time, pid,
            pos) in tempdf.iterrows():
        newdf.loc[start:end, pid] = True

    # Fill NAs to False
    for col in newdf:
        newdf.loc[:, col] = newdf[col].fillna(False)

    # Go wide to long and then drop unneeded rows
    newdf = helpers.melt_helper(
        newdf.reset_index(),
        id_vars=
        'Time',  # value_vars=newdf.columns,  # cols with num colnames causing errors
        var_name='PlayerID',
        value_name='OnIce')
    newdf = newdf[newdf.OnIce].drop('OnIce', axis=1)
    newdf = newdf.merge(tempdf.drop('Time', axis=1), how='left', on='PlayerID') \
        .query("Time <= End & Time >= Start") \
        .drop('ID', axis=1)

    # In case there were rows that were all missing, join onto TOI
    tempdf = toi.merge(newdf, how='left', on='Time')
    # TODO continue here--does newdf match tempdf after sort_values?

    # Old method
    # toidfs = []
    # while len(tempdf.index) > 0:
    #    temptoi = toi.merge(tempdf, how='inner', on='Time')
    #    toidfs.append(temptoi)

    #    tempdf = tempdf.assign(Time=tempdf.Time + 1)
    #    tempdf = tempdf.query('Time <= End')

    # tempdf = pd.concat(toidfs)
    # tempdf = tempdf.sort_values(by='Time')

    goalies = tempdf[tempdf.Pos == 'G'].drop({'Pos'}, axis=1)
    tempdf = tempdf[tempdf.Pos != 'G'].drop({'Pos'}, axis=1)

    # Append team name to start of columns by team
    home = str(gameinfo['Home'])
    road = str(gameinfo['Road'])

    # Goalies
    # Let's assume we get only one goalie per second per team.
    # TODO: flag if there are multiple listed and pick only one
    goalies.loc[:, 'GTeam'] = goalies.Team.apply(
        lambda x: 'HG' if str(int(x)) == home else 'RG')
    try:
        goalies2 = goalies[['Time', 'PlayerID', 'GTeam']] \
            .pivot(index='Time', columns='GTeam', values='PlayerID') \
            .reset_index()
    except ValueError:
        # Duplicate entries in index error.
        # ed.print_and_log('Multiple goalies for a team in {0:d} {1:d}, picking one with the most TOI'.format(
        #    season, game), 'warn')

        # Find times with multiple goalies
        too_many_goalies_h = goalies[goalies.GTeam == 'HG'][['Time']] \
            .assign(GoalieCount=1) \
            .groupby('Time').count() \
            .reset_index() \
            .query('GoalieCount > 1')

        too_many_goalies_r = goalies[goalies.GTeam == 'RG'][['Time']] \
            .assign(GoalieCount=1) \
            .groupby('Time').count() \
            .reset_index() \
            .query('GoalieCount > 1')

        # Find most common goalie for each team
        if len(too_many_goalies_h) == 0:
            problem_times_revised_h = goalies
        else:  # i.e. if len(too_many_goalies_h) > 0:
            top_goalie_h = goalies[goalies.GTeam == 'HG'][['PlayerID']] \
                .assign(GoalieCount=1) \
                .groupby('PlayerID').count() \
                .reset_index() \
                .sort_values('GoalieCount', ascending=False) \
                .PlayerID.iloc[0]
            # and now finally drop problem times
            problem_times_revised_h = goalies \
                .merge(too_many_goalies_h[['Time']], how='outer', on='Time', indicator=True)
            problem_times_revised_h.loc[:, 'ToDrop'] = (problem_times_revised_h._merge == 'both') & \
                                                       (problem_times_revised_h.PlayerID != top_goalie_h)
            problem_times_revised_h = problem_times_revised_h[problem_times_revised_h.ToDrop == False] \
                .drop({'_merge', 'ToDrop'}, axis=1)

        if len(too_many_goalies_r) == 0:
            problem_times_revised_r = problem_times_revised_h
        else:  # i.e. if len(too_many_goalies_r) > 0:
            top_goalie_r = goalies[goalies.GTeam == 'RG'][['PlayerID']] \
                .assign(GoalieCount=1) \
                .groupby('PlayerID').count() \
                .reset_index() \
                .sort_values('GoalieCount', ascending=False) \
                .PlayerID.iloc[0]
            problem_times_revised_r = problem_times_revised_h \
                .merge(too_many_goalies_r[['Time']], how='outer', on='Time', indicator=True)
            problem_times_revised_r.loc[:, 'ToDrop'] = (problem_times_revised_r._merge == 'both') & \
                                                       (problem_times_revised_r.PlayerID != top_goalie_r)
            problem_times_revised_r = problem_times_revised_r[problem_times_revised_r.ToDrop == False] \
                .drop({'_merge', 'ToDrop'}, axis=1)

        # Pivot again
        goalies2 = problem_times_revised_r[['Time', 'PlayerID', 'GTeam']] \
            .pivot(index='Time', columns='GTeam', values='PlayerID') \
            .reset_index()

    # Home
    hdf = tempdf.query('Team == "' + home + '"').sort_values(
        ['Time', 'Duration'], ascending=[True, False])
    if len(hdf) == 0:
        # Earlier versions of pandas can have diff behavior
        hdf = tempdf.query('Team == ' + home).sort_values(
            ['Time', 'Duration'], ascending=[True, False])
    hdf2 = hdf[['Time', 'Duration']].groupby('Time').rank(method='first',
                                                          ascending=False)
    hdf2 = hdf2.rename(columns={'Duration': 'rank'})
    hdf2.loc[:, 'rank'] = hdf2['rank'].apply(lambda x: int(x))
    hdf.loc[:, 'rank'] = 'H' + hdf2['rank'].astype('str')

    rdf = tempdf.query('Team == "' + road + '"').sort_values(
        ['Time', 'Duration'], ascending=[True, False])
    if len(rdf) == 0:
        rdf = tempdf.query('Team == ' + road).sort_values(
            ['Time', 'Duration'], ascending=[True, False])
    rdf2 = rdf[['Time', 'Duration']].groupby('Time').rank(method='first',
                                                          ascending=False)
    rdf2 = rdf2.rename(columns={'Duration': 'rank'})
    rdf2.loc[:, 'rank'] = rdf2['rank'].apply(lambda x: int(x))
    rdf.loc[:, 'rank'] = 'R' + rdf2['rank'].astype('str')

    # Remove values above 6--looking like there won't be many
    # But in those cases take shifts with longest durations
    # That's why we create hdf and rdf by also sorting by Time and Duration above, and select duration for rank()
    if len(hdf[hdf['rank'] == "H7"]) > 0:
        # ed.print_and_log('Some times from {0:d} {1:d} have too many home players; cutting off at 6'.format(
        #    season, game), 'warn')
        # ed.print_and_log('Longest shift being lost was {0:d} seconds'.format(
        #    hdf[hdf['rank'] == "H7"].Duration.max()), 'warn')
        pass
    if len(rdf[rdf['rank'] == "R7"]) > 0:
        # ed.print_and_log('Some times from {0:d} {1:d} have too many road players; cutting off at 6'.format(
        #    season, game), 'warn')
        # ed.print_and_log('Longest shift being lost was {0:d} seconds'.format(
        #    rdf[rdf['rank'] == "H7"].Duration.max()), 'warn')
        pass

    hdf = hdf.pivot(index='Time', columns='rank', values='PlayerID').iloc[:,
                                                                          0:6]
    hdf.reset_index(inplace=True)  # get time back as a column
    rdf = rdf.pivot(index='Time', columns='rank', values='PlayerID').iloc[:,
                                                                          0:6]
    rdf.reset_index(inplace=True)

    toi = toi.merge(hdf, how='left', on='Time') \
        .merge(rdf, how='left', on='Time') \
        .merge(goalies2, how='left', on='Time')

    column_order = list(toi.columns.values)
    column_order = ['Time'] + [x for x in sorted(column_order[1:])
                               ]  # First entry is Time; sort rest
    toi = toi[column_order]
    # Now should be Time, H1, H2, ... HG, R1, R2, ..., RG

    # For games in the first, HG and RG may not exist yet. Have dummy replacements in there.
    # Will be wrong for when goalie is pulled in first, but oh well...
    if 'HG' not in toi.columns:
        newcol = [0 for _ in range(len(toi))]
        toi.insert(loc=toi.columns.get_loc('R1'), column='HG', value=newcol)
    if 'RG' not in toi.columns:
        toi.loc[:, 'RG'] = 0

    toi.loc[:, 'HomeSkaters'] = 0
    for col in toi.loc[:, 'H1':'HG'].columns[:-1]:
        toi.loc[:, 'HomeSkaters'] = toi[col].notnull() + toi.HomeSkaters
    toi.loc[:, 'HomeSkaters'] = 100 * toi['HG'].notnull(
    ) + toi.HomeSkaters  # a hack to make it easy to recognize
    toi.loc[:, 'RoadSkaters'] = 0
    for col in toi.loc[:, 'R1':'RG'].columns[:-1]:
        toi.loc[:, 'RoadSkaters'] = toi[col].notnull() + toi.RoadSkaters
    toi.loc[:, 'RoadSkaters'] = 100 * toi['RG'].notnull(
    ) + toi.RoadSkaters  # a hack to make it easy to recognize

    # This is how we label strengths: 5 means 5 skaters plus goalie; five skaters w/o goalie is 4+1.
    toi.loc[:,
            'HomeStrength'] = toi.HomeSkaters.apply(lambda x: '{0:d}'.format(
                x - 100) if x >= 100 else '{0:d}+1'.format(x - 1))
    toi.loc[:,
            'RoadStrength'] = toi.RoadSkaters.apply(lambda x: '{0:d}'.format(
                x - 100) if x >= 100 else '{0:d}+1'.format(x - 1))

    toi.drop({'HomeSkaters', 'RoadSkaters'}, axis=1, inplace=True)

    # Also drop -1+1 and 0+1 cases, which are clearly errors, and the like.
    # Need at least 3 skaters apiece, 1 goalie apiece, time, and strengths to be non-NA = 11 non NA values
    toi2 = toi.dropna(axis=0,
                      thresh=11)  # drop rows without at least 11 non-NA values
    if len(toi2) < len(toi):
        # ed.print_and_log('Dropped {0:d}/{1:d} times in {2:d} {3:d} because of invalid strengths'.format(
        #    len(toi) - len(toi2), len(toi), season, game), 'warn')
        pass

    # TODO data quality check that I don't miss times in the middle of the game

    return toi2
Ejemplo n.º 7
0
def read_shifts_from_page(rawtoi, season, game):
    """
    Turns JSON shift start-ends into TOI matrix with one row per second and one col per player

    :param rawtoi: dict, json from NHL API
    :param season: int, the season
    :param game: int, the game

    :return: dataframe
    """
    toi = rawtoi['data']
    if len(toi) == 0:
        return
    ids = ['' for _ in range(len(toi))]
    periods = [0 for _ in range(len(toi))]
    starts = ['0:00' for _ in range(len(toi))]
    ends = ['0:00' for _ in range(len(toi))]
    teams = ['' for _ in range(len(toi))]
    durations = [0 for _ in range(len(toi))]

    # The shifts are ordered shortest duration to longest.
    for i, dct in enumerate(toi):
        ids[i] = helpers.try_to_access_dict(dct, 'playerId', default_return='')
        periods[i] = helpers.try_to_access_dict(dct,
                                                'period',
                                                default_return=0)
        starts[i] = helpers.try_to_access_dict(dct,
                                               'startTime',
                                               default_return='0:00')
        ends[i] = helpers.try_to_access_dict(dct,
                                             'endTime',
                                             default_return='0:00')
        durations[i] = helpers.try_to_access_dict(dct,
                                                  'duration',
                                                  default_return=0)
        teams[i] = helpers.try_to_access_dict(dct, 'teamId', default_return='')

    gameinfo = schedules.get_game_data_from_schedule(season, game)

    # I originally took start times at face value and subtract 1 from end times
    # This caused problems with joining events--when there's a shot and the goalie freezes immediately
    # then, when you join this to the pbp, you'll get the players on the ice for the next draw as having
    # been on ice for the shot.
    # So I switch to adding 1 to start times, and leaving end times as-are.
    # That means that when joining on faceoffs, add 1 to faceoff times.
    # Exception: start time 1 --> start time 0
    startmin = [x[:x.index(':')] for x in starts]
    startsec = [x[x.index(':') + 1:] for x in starts]
    starttimes = [
        1200 * (p - 1) + 60 * int(m) + int(s) + 1
        for p, m, s in zip(periods, startmin, startsec)
    ]
    # starttimes = [0 if x == 1 else x for x in starttimes]
    endmin = [x[:x.index(':')] for x in ends]
    endsec = [x[x.index(':') + 1:] for x in ends]
    # There is an extra -1 in endtimes to avoid overlapping start/end
    endtimes = [
        1200 * (p - 1) + 60 * int(m) + int(s)
        for p, m, s in zip(periods, endmin, endsec)
    ]

    durationtime = [e - s for s, e in zip(starttimes, endtimes)]

    df = pd.DataFrame({
        'PlayerID': ids,
        'Period': periods,
        'Start': starttimes,
        'End': endtimes,
        'Team': teams,
        'Duration': durationtime
    })

    return _finish_toidf_manipulations(df, season, game)
Ejemplo n.º 8
0
    def on_success(self, data):
        if 'text' in data:
            print(data['text'])

            if r'https://t.co/' in data['text']:
                print('This looks like an image')
                return
            if data['text'][:3] == 'RT ':
                print('This looks like a retweet')
                return

            global LAST_UPDATE, SCRAPED_NEW
            try:
                if player_cf_graphs(data):
                    return

                try:
                    season, gameid = games.find_playoff_game(data['text'])
                except ValueError:
                    season = None
                    gameid = None

                # Get season with a 4-digit regex
                if season is None:
                    text = data['text'] + ' '
                    if re.search(r'\s\d{4}\s', text) is not None:
                        season = int(re.search(r'\s\d{4}\s', text).group(0))
                        if season < 2015 or season > schedules.get_current_season():
                            tweet_error("Sorry, I don't have data for this season yet", data)
                            print('Invalid season')
                            return
                    else:
                        season = schedules.get_current_season()

                # Get game with a 5-digit regex
                if gameid is None:
                    if re.search(r'\s\d{5}\s', text) is not None:
                        gameid = int(re.search(r'\s\d{5}\s', text).group(0))
                        if not schedules.check_valid_game(season, gameid):
                            tweet_error("Sorry, this game ID doesn't look right", data)
                            print('Game ID not right')
                            return
                    else:
                        pass

                if gameid is None:
                    # Get team names
                    parts = data['text'].replace('@h2hbot', '').strip().split(' ')
                    teams = []
                    for part in parts:
                        if re.match(r'[A-z]{3}', part.strip()):
                            part = part.upper()
                            if team_info.team_as_id(part) is not None:
                                teams.append(part)
                    if len(teams) == 0:
                        print('Think this was a tagged discussion')
                        return
                    elif len(teams) != 2:
                        tweet_error("Sorry, I need 2 teams. Found {0:d}. Make sure abbreviations are correct"
                                    .format(len(teams)), data)
                        return

                    team1, team2 = teams[:2]
                    gameid = games.most_recent_game_id(team1, team2)

                h2hfile = 'bot/{0:d}0{1:d}h2h.png'.format(season, gameid)
                tlfile = 'bot/{0:d}0{1:d}tl.png'.format(season, gameid)

                oldstatus = schedules.get_game_status(season, gameid)

                # Scrape only if:
                # Game is in current season AND
                # Game is today, and my schedule says it's "scheduled", OR
                # Game is today, and my schedule doesn't say it's final yet, and it's been at least
                #   5 min since last scrape, OR
                # Game was before today and my schedule doesn't say "final"
                # Update in these cases
                scrapeagain = False
                if season == schedules.get_current_season():
                    today = datetime.datetime.now().strftime('%Y-%m-%d')
                    gdata = schedules.get_game_data_from_schedule(season, gameid)
                    if gdata['Date'] == today:
                        if gdata['Status'] == 'Scheduled':
                            scrapeagain = True
                        elif gdata['Status'] != 'Final' and \
                                (LAST_UPDATE is None or time.time() - LAST_UPDATE >= 60 * 5):
                            scrapeagain = True
                    elif gdata['Date'] < today and gdata['Status'] != 'Final':
                        scrapeagain = True
                if scrapeagain:
                    autoupdate.autoupdate(season, update_team_logs=False)
                    LAST_UPDATE = time.time()
                    SCRAPED_NEW = True

                hname = schedules.get_home_team(season, gameid)
                rname = schedules.get_road_team(season, gameid)
                status = schedules.get_game_status(season, gameid)

                if 'In Progress' in oldstatus or status != oldstatus or not os.path.exists(tlfile):
                    try:
                        game_timeline.game_timeline(season, gameid, save_file=tlfile)
                        game_h2h.game_h2h(season, gameid, save_file=h2hfile)
                        tweet_game_images(h2hfile, tlfile, hname, rname, status, data)
                        print('Success!')
                    except Exception as e:
                        print(data['text'], time.time(), e, e.args)
                        tweet_error("Sorry, there was an unknown error while making the charts (cc @muneebalamcu)",
                                    data)

            except Exception as e:
                print('Unexpected error')
                print(time.time(), data['text'], e, e.args)