def plays_before_time(connection, gsis_id_, before_quarter, before_time): quarter, time = process_time_col(pd.read_sql_query( """SELECT play_id, time FROM play WHERE gsis_id = %(gsis_id)s """, connection, index_col='play_id', params=dict(gsis_id=gsis_id_), ).sort_index()['time']) return [ int(play_id) for play_id in quarter.index[is_before(quarter, time, before_quarter, before_time)] ]
def team_stats_by_drive(connection, include_preseason=False): sum_columns_sql = ', '.join(_sum_query(col) for col in offense_team_stat_columns) team_sums = pd.read_sql_query( """SELECT gsis_id, drive_id, {} FROM drive INNER JOIN agg_play USING(gsis_id, drive_id) {} GROUP BY gsis_id, drive_id """.format( sum_columns_sql, '' if include_preseason else """ INNER JOIN game USING(gsis_id) WHERE season_type != 'Preseason' """ ), connection, index_col=['gsis_id', 'drive_id'], ).sort_index() drive = pd.read_sql_table( 'drive', connection, index_col=['gsis_id', 'drive_id'], ).sort_index() drive['team'] = drive['pos_team'] del drive['pos_team'] for col in ['start_field', 'end_field', 'pos_time']: drive.loc[~drive[col].isnull(), col] = _de_parenthesize(drive.loc[~drive[col].isnull(), col]) for time_type in ['start', 'end']: drive[time_type + '_quarter'], drive[time_type + '_time'] = process_time_col(drive[time_type + '_time']) drive = (pd.concat([drive, team_sums], axis=1, join='inner') .reset_index() .set_index(['gsis_id', 'team', 'drive_id']) .sort_index() ) drive['offense_score'] = 0 drive['defense_score'] = 0 for name, row in drive.iterrows(): gsis_id, team, drive_id = name scores = score_before_time(connection, gsis_id, row['start_quarter'], row['start_time']) drive.loc[name, 'offense_score'] = scores[team] drive.loc[name, 'defense_score'] = scores[scores.index != team][0] return drive