예제 #1
0
def run():
    options, args = commandline_args('Parse Gameday XML files')
    d = DB()
    conn, meta = d.connect(options['database'])
    for league in options['leagues']:
        dir = os.path.join(options['output_dir'], league)
        for day in each_day(options['start'], options['end']):
            parsed_games = parse_day(dir, day)
            d.add_games(parsed_games)
예제 #2
0
def run():
    options, args = commandline_args("Dump BIP into JSON files")
    d = DB()
    conn, meta = d.connect(options["database"])

    park_table = d.model.park
    park_dim_table = d.model.park_dimension
    bip_table = d.model.bip
    game_table = d.model.game
    player_table = d.model.mlbam_player
    ab_table = d.model.atbat

    # Arg, postgres requires every column to be in the ORDER BY clause
    park_sql = select(
        [park_table.c.id, park_table.c.name, func.count(bip_table.c.id).label("num")],
        from_obj=park_table.join(bip_table),
    ).group_by(park_table.c.id, park_table.c.name)

    parks = {}
    for row in conn.execute(park_sql):
        p = {}
        for key in row.keys():
            if key in set(["id", "num"]):
                p[key] = int(row[key])
            else:
                p[key] = str(row[key])
        p["bip"] = p["num"]
        p["years"] = {}
        del p["num"]
        parks[p["id"]] = p

    # Skipping the closing year for simplicity. If a team switches parks in the
    # middle of the year, it will be a park with a new id. I believe the only
    # issue will be if MLBAM updates the image in the middle of the season.
    # This might happen for Citi Field (no bases or foul lines!).
    dimension_sql = select(
        [
            park_dim_table.c.park_id,
            park_dim_table.c.image_file,
            park_dim_table.c.opening,
            park_dim_table.c.hp_x,
            park_dim_table.c.hp_y,
            park_dim_table.c.image_hp_x,
            park_dim_table.c.image_hp_y,
            park_dim_table.c.scale,
        ],
        from_obj=park_dim_table,
    )

    for row in conn.execute(dimension_sql):
        id = row["park_id"]
        if id not in parks:
            continue
        opening = row["opening"]
        images = {
            "file": row["image_file"],
            "scale": float(row["scale"]),
            "hp_x": float(row["hp_x"]),
            "hp_y": float(row["hp_y"]),
            "image_hp_x": float(row["image_hp_x"]),
            "image_hp_y": float(row["image_hp_y"]),
            "opening": opening.year,
        }
        if "images" not in parks[id]:
            parks[id]["images"] = {}
        parks[id]["images"][opening.year] = images

    years_sql = select([func.distinct(text(get_year(conn, "day")))], from_obj=game_table)
    years = [int(row[0]) for row in conn.execute(years_sql)]

    p = player_table.alias()
    b = player_table.alias()
    bip_sql = select(
        [
            game_table.c.day.label("day"),
            bip_table.c.type.label("type"),
            bip_table.c.x.label("x"),
            bip_table.c.y.label("y"),
            ab_table.c.event.label("event"),
            (b.c.namelast + ", " + b.c.namefirst).label("batter"),
            ab_table.c.batter_stand.label("stand"),
            (p.c.namelast + ", " + p.c.namefirst).label("pitcher"),
            ab_table.c.pitcher_throw.label("throw"),
        ],
        and_(park_table.c.id == bindparam("park"), text(get_year(conn, "day")) == bindparam("year")),
        from_obj=bip_table.join(park_table)
        .join(ab_table)
        .join(game_table)
        .outerjoin(p, onclause=p.c.mlbamid == ab_table.c.pitcher)
        .outerjoin(b, onclause=b.c.mlbamid == ab_table.c.batter),
    )

    for park_id in parks.keys():
        for year in years:
            bip_list = []
            str_y = str(year)
            for bip in conn.execute(bip_sql, {"park": park_id, "year": str_y}):
                bip_list.append(
                    {
                        "x": bip["x"],
                        "y": bip["y"],
                        "event": bip["event"],
                        "type": bip["type"],
                        "pitcher": bip["pitcher"],
                        "throw": bip["throw"],
                        "batter": bip["batter"],
                        "stand": bip["stand"],
                    }
                )
            # No need to write empty files!
            if len(bip_list) > 0:
                parks[park_id]["years"][year] = True
                park_file = os.path.join(options["output_dir"], "park-" + str(park_id) + "-" + str(year) + ".json")
                dump_json(park_file, bip_list)

    parks_file = os.path.join(options["output_dir"], "parks.json")
    dump_json(parks_file, parks)