def add_player_state(line, lahman_shelf, game_state, state_by_id): # note, this doesn't keep track of who gets subbed out, just the latest state for each player try: _, retrosheetid, name, team, batpos, fieldpos = csv_split(line) except Exception: logging.error("Choked on line: %s" % line) raise player_state = PlayerState() player_state.retrosheetid = retrosheetid player_state.name = name player_state.visorhome = "visteam" if team == "0" else "hometeam" player_state.team = getattr(game_state, player_state.visorhome) player_state.batpos = batpos player_state.fieldpos = fieldpos # update for pitcher changes if fieldpos == "1": setattr(game_state, "%s_pitcherid" % player_state.visorhome, retrosheetid) if retrosheetid in lahman_shelf: lahman_stats = lahman_shelf[retrosheetid] for lahman_key in PlayerState.lahman_stats: setattr(player_state, "lahman_%s" % lahman_key, lahman_stats[lahman_key]) state_by_id[retrosheetid] = player_state
def get_feature_sets(playbyplay, lahman_shelf, game_state, base_featureset, player_state_by_id): for line in playbyplay.split("\n"): line = line.strip() if line.startswith("sub"): add_player_state(line, lahman_shelf, game_state, player_state_by_id) elif line.startswith("play"): try: _, inning, visorhome, retrosheetid, count, pitches, play = csv_split(line) except Exception: logging.error("Choked on line: %s" % line) raise if any( play.startswith(ignore) for ignore in ["NP"] ): continue try: featureset = base_featureset.copy() # player info featureset.add_batter_info(player_state_by_id[retrosheetid]) # make sure to select the OPPOSITE pitcher from the current batter game_state_key = "%s_pitcherid" % ("visteam" if visorhome == "1" else "hometeam") featureset.add_pitcher_info(player_state_by_id[getattr(game_state, game_state_key)]) # at-bat stats featureset.ab_inning = inning try: numballs, numstrikes = count[:2] featureset.ab_numballs = numballs featureset.ab_numstrikes = numstrikes except Exception: pass # TODO: keep track of outs, runners on? # label if play.startswith("HR"): featureset.label = Label.HR elif play.startswith("K"): featureset.label = Label.K else: featureset.label = Label.OTHER yield featureset except Exception: logging.error("Choked on line: %s" % line) raise
def process_master_file(shelf_fn, master_fn): lahman_shelf = shelve.open(shelf_fn, flag='n') for i, line in enumerate(open(master_fn)): line = line.strip() if i == 0: schema = line.split(",") else: values = csv_split(line) if len(values) != len(schema): raise Exception("Line mismatch: expected %d values, got %d. Schema:\n%s\nLine:\n%s" % (len(schema), len(values), ",".join(schema), line)) stats = dict( zip(schema, values) ) lahman_shelf[stats["retroID"]] = stats lahman_shelf.close()
def parse_header(header): game_state = GameState() base_featureset = FeatureSet() for line in header.split("\n"): line = line.strip() if line.startswith("info,"): try: _, key, value = csv_split(line) except Exception: logging.error("Choked on line: %s" % line) raise if key in ["visteam", "hometeam"]: setattr(GameState, key, value) fs_key = "game_%s" % key if fs_key in FeatureSet.__slots__: setattr(FeatureSet, fs_key, value) return game_state, base_featureset