def get_pitches(pitcher_id, date, count, columns=()): """Retrieve a given number (count) of pitches, before or after a given date, thrown by a pitcher.""" # If count is negative, find pitches prior to the date. If positive, on or after the date. if count < 0: operator = "<" else: operator = ">=" conn = connect.open() # Built and execute the query to retrieve pitch list sql = ''' SELECT p.px AS x, p.pz AS y FROM gameday.game g INNER JOIN gameday.pitch p ON g.game_id=p.game_id WHERE p.pitcher = %s AND g.date __operator__ %s ORDER BY g.date DESC LIMIT %s''' sql = sql.replace("__operator__", operator) params = (pitcher_id, date.strftime("%Y-%m-%d"), abs(count)) cur = conn.cursor(MySQLdb.cursors.DictCursor) cur.execute(sql, params) return_list = list(cur.fetchall()) return return_list
def get_atbats(batter_id, date, count, columns=()): """Retrieve a given number (count) of at bats, before or after a given date, by a batter.""" # If count is negative, find at-bats prior to the date. If positive, on or after the date. if count < 0: operator = "<" else: operator = ">=" conn = connect.open() # Build and execute the query sql = ''' SELECT g.date, ab.event FROM gameday.game g INNER JOIN JOIN gameday.atbat ab ON g.game_id=ab.game_id WHERE ab.batter = %s AND g.date __operator__ %s ORDER BY g.date DESC LIMIT %s ''' sql = sql.replace("__operator__", operator) params = (batter_id, date.strftime("%Y-%m-%d"), abs(count)) cur = conn.cursor(MySQLdb.cursors.DictCursor) cur.execute(sql, params) return_list = list(cur.fetchall()) return return_list
def get_player_injuries(player_id): """Retrieve a list of all injuries for a given player.""" conn = connect.open() sql = ''' SELECT i.injury_id, i.player_id_mlbam, i.team_id_mlbam, i.injury, i.side, i.parts, i.dl_type, i.start_date, i.end_date, p.first_name, p.last_name FROM injuryfx.injuries i INNER JOIN gameday.player p ON p.id = i.player_id_mlbam WHERE i.player_id_mlbam = %s AND i.end_date IS NOT NULL ORDER BY i.start_date DESC ''' params = (player_id, ) cur = conn.cursor(MySQLdb.cursors.DictCursor) cur.execute(sql, params) list = [] for row in cur: row['parts'] = json.loads(row['parts']) if row['parts'] else "" list.append(row) return list
def log_save(transaction_id, transaction_date): """Logs the latest save point of the transaction import, so that the process can pick up where it left off.""" conn = connect.open() sql = "INSERT INTO injury_load_log VALUES (NULL, %s, %s, CURRENT_TIMESTAMP)" params = (transaction_id, transaction_date.strftime("%Y-%m-%d")) cur = conn.cursor() cur.execute(sql, params) conn.commit()
def get_max_atbat_window(injury_id, player_type=None, break_on_off_season=False): """Calculate the maximum window size on each side of an injury for a batter. The borders are either defined as another injury or the break in a season if break_on_off_season == TRUE (TODO: make season break actually happen) """ boundaries = _get_window_boundaries(injury_id, break_on_off_season) current_injury_date = boundaries["current_injury"]["start_date"] player_id = boundaries["current_injury"]["player_id_mlbam"] if player_type is None: player_type = player.split_type(player_id) conn = connect.open() # get total number of events before injury and with lower boundary prior_sql = ''' select count(*) total_prior_at_bats from gameday.atbat where _player_type_ = %s and substring(game_id, 1, 10) <= %s and substring(game_id, 1, 10) >= %s order by game_id, event_num; ''' prior_sql = prior_sql.replace("_player_type_", player_type) params = (player_id, current_injury_date, boundaries["prior_injury_date"]) cur = conn.cursor() cur.execute(prior_sql, params) prior_count = cur.fetchone()[0] # get total number of events after injury and with upper boundary post_sql = ''' select count(*) total_prior_at_bats from gameday.atbat where _player_type_ = %s and substring(game_id, 1, 10) > %s and substring(game_id, 1, 10) < %s order by game_id, event_num; ''' post_sql = post_sql.replace("_player_type_", player_type) params = (player_id, current_injury_date, boundaries["next_injury_date"]) cur = conn.cursor() cur.execute(post_sql, params) post_count = cur.fetchone()[0] # return the smaller of these as the maximum window size max_window = min([post_count, prior_count]) return max_window
def _get_window_boundaries(injury_id, break_on_off_season=False): """Calculate max date boundaries around an injury.""" conn = connect.open() # Get all injuries for a player based on the current injury_id # TODO there are instances where the end_date is null on an injury # They can cause weird results (such as the max window size being zero) # Code should handle these more elegantly. injuries_for_this_player_sql = ''' select i2.injury_id, i1.player_id_mlbam, replace(i2.start_date, '-', '/') start_date, i2.dl_type from injuryfx.injuries i1 INNER JOIN injuryfx.injuries i2 ON i1.player_id_mlbam = i2.player_id_mlbam where i1.injury_id = %s order by i2.start_date ''' params = (injury_id, ) cur = conn.cursor() cur.execute(injuries_for_this_player_sql, params) injuries = [] for res in cur.fetchall(): injury = { "injury_id": res[0], "player_id_mlbam": res[1], "start_date": res[2], "dl_type": res[3], } injuries.append(injury) # determine injury date as well as prior and next injury date if they exist # otherwise set boundaries outside of the data time frames boundaries = { "prior_injury_date": '0000/00/00', "next_injury_date": '9999/99/99', "current_injury": None } for i, injury in enumerate(injuries): if injury["injury_id"] == injury_id: if i > 0: boundaries["prior_injury_date"] = injuries[i - 1]["start_date"] if i < len(injuries) - 1: boundaries["next_injury_date"] = injuries[i + 1]["start_date"] boundaries["current_injury"] = injury break return boundaries
def select_first_row(query, params=()): """Returns the entire first row of a SELECT result. Intended for single-row queries, such as retrieving a record based on primary key.""" conn = connect.open() row = None try: cur = conn.cursor(MySQLdb.cursors.DictCursor) cur.execute(query, params) if cur.rowcount > 0: row = cur.fetchone() finally: conn.close() return row
def select_single(query, params=()): """Returns a the first column of the first row of a SELECT. Intended for single-value queries, such as retrieving a general body region based on a specific body part.""" conn = connect.open() val = None try: cur = conn.cursor() cur.execute(query, params) if cur.rowcount > 0: val = cur.fetchone()[0] finally: conn.close() return val
def select_list(query): """Generates a list from a single-column SELECT. If the query returns multiple columns, only the first will be returned""" conn = connect.open() out = [] try: cur = conn.cursor() cur.execute(query) for row in cur: out.append(row[0]) finally: conn.close() return out
def get_pitches(batter_id, date, count, columns=(), result="swing"): """Retrieve a given number (count) of pitches, before or after a given date, thrown to a batter. Can be filtered on batter action/pitch result.""" # If count is negative, find pitches prior to the date. If positive, on or after the date. if count < 0: operator = "<" else: operator = ">=" conn = connect.open() # Filter based on the desired outcome of the pitch if result == "swing": # All pitches swung at by the batter result_condition = "AND p.des NOT IN ('Called Strike', 'Ball')" elif result == "contact": # All pitches contacted by the batter result_condition = "AND p.des NOT IN ('Called Strike', 'Ball', 'Swinging Strike')" elif result == "play": # All pitches put into play result_condition = "AND p.des LIKE 'In play%'" elif result == "miss": # All pitches actively swung at and missed result_condition = "AND p.des = 'Swinging Strike'" else: result_condition = "" # Built and execute the query to retrieve pitch list sql = ''' SELECT p.px AS x, p.pz AS y FROM gameday.game g INNER JOIN gameday.pitch p ON g.game_id=p.game_id WHERE p.batter = %s AND g.date __operator__ %s __result_condition__ ORDER BY g.date ASC LIMIT %s ''' sql = sql.replace("__operator__", operator).replace("__result_condition__", result_condition) params = (batter_id, date.strftime("%Y-%m-%d"), abs(count)) cur = conn.cursor(MySQLdb.cursors.DictCursor) cur.execute(sql, params) return_list = list(cur.fetchall()) return return_list
def get_injury(inj_id, columns=""): """Load the details of a single injury by id""" conn = connect.open() # Build and execute the query to get all the needed player and injury information sql = ''' SELECT i.injury_id, i.player_id_mlbam, i.team_id_mlbam, i.injury, i.side, i.parts, i.dl_type, i.start_date, i.end_date, p.first_name, p.last_name FROM injuryfx.injuries i INNER JOIN gameday.player p ON p.id = i.player_id_mlbam WHERE injury_id = %s ''' params = (inj_id, ) cur = conn.cursor() cur.execute(sql, params) if cur.rowcount > 0: res = cur.fetchone() # Process the output into correct data formats inj = { "injury_id": res[0], "player_id_mlbam": res[1], "team_id_mlbam": res[2], "injury": res[3], "side": res[4], "parts": json.loads(res[5]) if res[5] else "", "dl_type": res[6], "start_date": res[7], "end_date": res[8], "first_name": res[9], "last_name": res[10] } return inj else: return None
def all_players_with_injuries(type="all", year=None): """Retrieve all players with at least one completed DL stint. Can be filtered by player type and year.""" conn = connect.open() # If type is set, filter. Otherwise, get all players. if type in ["batter", "pitcher"]: type_condition = "AND type='%s'" % type else: type_condition = "" # If year is set, filter. if year: year_condition = "AND YEAR(start_date) = %s" % year else: year_condition = "" # Build and execute query sql = ''' SELECT p.id, p.first_name, p.last_name, p.type, COUNT(*) AS injury_count, MAX(i.start_date) AS latest_injury FROM injuryfx.injuries i INNER JOIN gameday.player p ON p.id = i.player_id_mlbam WHERE i.end_date IS NOT NULL %s %s GROUP BY p.id ORDER BY p.last_name, p.first_name ''' % (type_condition, year_condition) cur = conn.cursor(MySQLdb.cursors.DictCursor) cur.execute(sql) list = [] for row in cur: list.append(row) return list
def run_sql(sql): conn = connect.open() params = () cur = conn.cursor(MySQLdb.cursors.DictCursor) cur.execute(sql, params)
def save_injury(inj): """Takes a parsed injury dict, as returned by parse.parse_injury_transaction, and saves it to the database. This function is intended to be run chronologically over the entries in the MLB.com JSON files, as transfer and activate actions rely on finding the most recent prior transaction for the player.""" out = "" conn = connect.open() try: cur = conn.cursor() if inj['action'] == "placed": # This is a new injury, so insert it fresh. insert_sql = ''' INSERT INTO injuries ( injury_id, player_id_mlbam, team_id_mlbam, injury, side, parts, dl_type, start_date, end_date ) VALUES ( null, %s, %s, %s, %s, %s, %s, %s, NULL ) ''' params = (inj['player_id_mlbam'], inj['team_id_mlbam'], inj['injury'] if 'injury' in inj else '', inj['side'] if 'side' in inj else '', json.dumps(inj['parts']) if 'parts' in inj else '', inj['dl_type'], inj['transaction_date'].strftime("%Y-%m-%d")) out = cur.execute(insert_sql, params) conn.commit() else: # This is an activation or a transfer to the 60-day DL, which means it is an update to an existing row. # Get the most recent uncompleted injury by the player select_sql = ''' SELECT injury_id FROM injuries WHERE player_id_mlbam = %s AND end_date IS NULL ORDER BY start_date DESC LIMIT 1 ''' inj_id = query.select_single(select_sql, (inj['player_id_mlbam'], )) if inj_id: if inj['action'] == 'activated': # The injury incident is over, set the end_date update_sql = ''' UPDATE injuries SET end_date = %s WHERE injury_id = %s ''' params = (inj['transaction_date'].strftime("%Y-%m-%d"), inj_id) out = cur.execute(update_sql, params) conn.commit() else: # The player has moved from 15-day to 60-day DL, update dl_type update_sql = ''' UPDATE injuries SET dl_type = %s WHERE injury_id = %s ''' params = (inj['dl_type'], inj_id) out = cur.execute(update_sql, params) conn.commit() else: # No matching injury entry was found. out = "Error: no injury match found." # TODO: log the unmatched injury to somewhere, for later review. except exceptions.TypeError as e: print(e) except exceptions.AttributeError as e: print(e) except: print("Error!", sys.exc_info()[0]) return out