Exemple #1
0
def add_player_state(line, lahman_shelf, game_state, state_by_id):
    # note, this doesn't keep track of who gets subbed out, just the latest state for each player
    try:
        _, retrosheetid, name, team, batpos, fieldpos = csv_split(line)
    except Exception:
        logging.error("Choked on line: %s" % line)
        raise

    player_state = PlayerState()
    player_state.retrosheetid = retrosheetid
    player_state.name = name
    player_state.visorhome = "visteam" if team == "0" else "hometeam"
    player_state.team = getattr(game_state, player_state.visorhome)
    player_state.batpos = batpos
    player_state.fieldpos = fieldpos

    # update for pitcher changes
    if fieldpos == "1":
        setattr(game_state, "%s_pitcherid" % player_state.visorhome, retrosheetid)

    if retrosheetid in lahman_shelf:
        lahman_stats = lahman_shelf[retrosheetid]
        for lahman_key in PlayerState.lahman_stats:
            setattr(player_state, "lahman_%s" % lahman_key, lahman_stats[lahman_key])

    state_by_id[retrosheetid] = player_state
Exemple #2
0
def get_feature_sets(playbyplay, lahman_shelf, game_state, base_featureset, player_state_by_id):
    for line in playbyplay.split("\n"):
        line = line.strip()

        if line.startswith("sub"):
            add_player_state(line, lahman_shelf, game_state, player_state_by_id)

        elif line.startswith("play"):
            try:
                _, inning, visorhome, retrosheetid, count, pitches, play = csv_split(line)
            except Exception:
                logging.error("Choked on line: %s" % line)
                raise

            if any( play.startswith(ignore) for ignore in ["NP"] ):
                continue

            try:
                featureset = base_featureset.copy()

                # player info
                featureset.add_batter_info(player_state_by_id[retrosheetid])

                # make sure to select the OPPOSITE pitcher from the current batter
                game_state_key = "%s_pitcherid" % ("visteam" if visorhome == "1" else "hometeam")
                featureset.add_pitcher_info(player_state_by_id[getattr(game_state, game_state_key)])

                # at-bat stats
                featureset.ab_inning = inning
                try:
                    numballs, numstrikes = count[:2]
                    featureset.ab_numballs = numballs
                    featureset.ab_numstrikes = numstrikes
                except Exception:
                    pass

                # TODO: keep track of outs, runners on?

                # label
                if play.startswith("HR"):
                    featureset.label = Label.HR
                elif play.startswith("K"):
                    featureset.label = Label.K
                else:
                    featureset.label = Label.OTHER

                yield featureset
            except Exception:
                logging.error("Choked on line: %s" % line)
                raise
def process_master_file(shelf_fn, master_fn):
    lahman_shelf = shelve.open(shelf_fn, flag='n')

    for i, line in enumerate(open(master_fn)):
        line = line.strip()
        if i == 0:
            schema = line.split(",")
        else:
            values = csv_split(line)

            if len(values) != len(schema):
                raise Exception("Line mismatch: expected %d values, got %d.  Schema:\n%s\nLine:\n%s" % (len(schema), len(values), ",".join(schema), line))
            stats = dict( zip(schema, values) )
            lahman_shelf[stats["retroID"]] = stats

    lahman_shelf.close()
Exemple #4
0
def process_master_file(shelf_fn, master_fn):
    lahman_shelf = shelve.open(shelf_fn, flag='n')

    for i, line in enumerate(open(master_fn)):
        line = line.strip()
        if i == 0:
            schema = line.split(",")
        else:
            values = csv_split(line)

            if len(values) != len(schema):
                raise Exception("Line mismatch: expected %d values, got %d.  Schema:\n%s\nLine:\n%s" % (len(schema), len(values), ",".join(schema), line))
            stats = dict( zip(schema, values) )
            lahman_shelf[stats["retroID"]] = stats

    lahman_shelf.close()
Exemple #5
0
def parse_header(header):
    game_state = GameState()
    base_featureset = FeatureSet()

    for line in header.split("\n"):
        line = line.strip()
        if line.startswith("info,"):
            try:
                _, key, value = csv_split(line)
            except Exception:
                logging.error("Choked on line: %s" % line)
                raise

            if key in ["visteam", "hometeam"]:
                setattr(GameState, key, value)

            fs_key = "game_%s" % key
            if fs_key in FeatureSet.__slots__:
                setattr(FeatureSet, fs_key, value)

    return game_state, base_featureset