class EventsParser(): def __init__(self, game_id): self.game_id = game_id self.collection = DbCollection() def save_half_inning(self, inning): at_bats = inning.findall('atbat') for at_bat in at_bats: attrib = at_bat.attrib event = Event() event.pitcher = int(attrib['pitcher']) event.batter = int(attrib['batter']) event.description = attrib['des'] event.event = attrib['event'] event.game_id = self.game_id self.collection.add_event(event) def parse(self, data): root = ET.fromstring(data) innings = root.findall('inning') for inning in innings: self.save_half_inning(inning.find('top')) self.save_half_inning(inning.find('bottom')) return self.collection
def parse_events(game, collection): try: event_parser = EventsParser(game.id) event_collection = event_parser.parse(get_event_data(game)) collection.join(event_collection) except ParseError: print "Unable to parse events for %s" % (game.to_json()) if __name__ == '__main__': global game_set game_set = set(session.query(Game).all()) now = datetime.datetime.now() day = datetime.timedelta(days=1) collection = DbCollection() db_batch = 0 while start_date < now: game = Game(start_date.year, start_date.month, start_date.day, 1) game.year = start_date.year game.month = start_date.month game.day = start_date.day while has_game(game): if game in game_set: game = Game(start_date.year, start_date.month, start_date.day, game.game_num + 1) continue session.add(game) session.commit() db_batch += 1 parse_boxscore(game, collection) parse_events(game, collection)
def __init__(self, game_num): self.game_num = game_num self.collection = DbCollection()
def __init__(self, game_id): self.game_id = game_id self.collection = DbCollection()
class BoxscoreParser(): def __init__(self, game_num): self.game_num = game_num self.collection = DbCollection() def get_team(self, team_type, attrib): try: team = Team() team.code = attrib['%s_team_code' % team_type] team.name = attrib['%s_fname' % team_type] team.mlb_id = int(attrib['%s_id' % team_type]) return team except: return None def save_team_names(self, root): home_team = self.get_team(HOME_TEAM, root.attrib) if home_team is not None: self.collection.add_team(home_team) away_team = self.get_team(AWAY_TEAM, root.attrib) if away_team is not None: self.collection.add_team(away_team) def save_batters(self, root): batting_sections = root.findall('batting') for batting_section in batting_sections: batters = batting_section.findall('batter') for batter_xml in batters: try: attrib = batter_xml.attrib batter = Batter() if 'name_display_first_last' in attrib: batter.name = attrib['name_display_first_last'] elif 'name' in attrib: batter.name = attrib['name'] batter.mlb_id = int(attrib['id']) self.collection.add_batter(batter) except: print("error saving batter") def save_pitchers(self, root): pitching_sections = root.findall('pitching') for pitching_section in pitching_sections: pitchers = pitching_section.findall('pitcher') for pitcher_xml in pitchers: try: attrib = pitcher_xml.attrib pitcher = Pitcher() if 'name_display_first_last' in attrib: pitcher.name = attrib['name_display_first_last'] elif 'name' in attrib: pitcher.name = attrib['name'] pitcher.mlb_id = int(attrib['id']) self.collection.add_pitcher(pitcher) except: print("error saving pitcher") def parse(self, data): root = ET.fromstring(data) self.save_team_names(root) self.save_batters(root) self.save_pitchers(root) return self.collection