def handle(self, *args, **options): year = options['year'] # load MLB structure call_command('load_MLB_structure', year) # TODO load past players # for now: load current rosters to at least have current players call_command('load_players', year) # load games from season firstgame = mlbgame.important_dates(year).first_date_seas[:10] lastgame = mlbgame.important_dates(year).last_date_seas[:10] asg = mlbgame.important_dates(year).all_star_date[:10] d = datetime.datetime.strptime(firstgame, '%Y-%m-%d') while d < datetime.datetime.strptime(lastgame, '%Y-%m-%d'): d += datetime.timedelta(1) if d != datetime.datetime.strptime(asg, '%Y-%m-%d'): call_command('load_day', d.year, d.month, d.day) print('Complete')
def __fetch_important_dates(self): today = datetime.today() dates = mlbgame.important_dates() if dates.playoffs_end_date != '': season_end_date = datetime.strptime(dates.playoffs_end_date, MLB_DATE_FORMAT) if (season_end_date - today).days < 0: dates = mlbgame.important_dates(today.year + 1) return dates
def crawl_xmls(self, browser): xmls_to_fetch = [ "boxscore.xml", "rawboxscore.xml", "game_events.xml", "linescore.xml", "players.xml", "inning/inning_all.xml", "game.xml" ] event_dates = mlbgame.important_dates(year=self.year) last_date = event_dates.last_date_seas first_date = event_dates.first_date_seas if self.start_date is None else self.start_date dates = date_range(first_date, last_date) for date in dates: game_scoreboards = mlbgame.day(date.year, date.month, date.day) game_scoreboards = [[match] for match in game_scoreboards] games = mlbgame.combine_games(game_scoreboards) year = date.year month = "{0:02d}".format(date.month) day = "{0:02d}".format(date.day) for game in games: gameid = game.game_id if not os.path.exists("xml/" + gameid): os.makedirs("xml/" + gameid) dir_path = "http://gd2.mlb.com/components/game/mlb/year_%s/month_%s/day_%s/gid_%s/" % \ (year, month, day, gameid) for xml in xmls_to_fetch: url_fetch = dir_path + xml path_write = "xml/" + gameid + "/" + xml self.fetch_xml(url_fetch, path_write)
def create_json(input_folder, input_summaries, output_folder): for filename in os.listdir(input_folder): d = None with codecs.open(input_folder + filename) as json_data: d = json.load(json_data) print 'filename', input_folder + filename output = [] for entry in d: datetime_object = datetime.strptime(entry['day'], '%m_%d_%y') begin_date = mlbgame.important_dates( datetime_object.year).first_date_seas begin_date = datetime.strptime(begin_date, '%Y-%m-%dT%H:%M:%S') if datetime_object < begin_date: print 'datetime_object', datetime_object, filename continue html_file_name = [] html_file_name.append(datetime_object.strftime("%Y%m%d")) visname_homename = entry['vis_name'].replace( " ", "_") + "-" + entry['home_name'].replace(" ", "_") visname_homename = visname_homename.replace( 'D-backs', 'Diamondbacks') html_file_name.append(visname_homename) html_file_name.append( str(entry['vis_line']['team_runs']) + "-" + str(entry['home_line']['team_runs'])) files = glob.glob(input_summaries + "*" + "_".join(html_file_name)) if len(files) < 1: print input_summaries + "*" + "_".join( html_file_name) + " not found" elif len(files) > 1: print input_summaries + "*" + "_".join( html_file_name) + " multiple found" else: fname = files[0] with codecs.open(fname, encoding='utf-8') as f: content = f.readlines() # you may also want to remove whitespace characters like `\n` at the end of each line content = [x.strip() for x in content] text = " ".join(content) words = nltk.word_tokenize(text) newtokes = [] [ newtokes.append(toke) if toke[0].isupper() or '-' not in toke else newtokes.extend( toke.replace('-', " - ").split()) for toke in words ] entry['summary'] = newtokes output.append(entry) if len(output) > 0: with codecs.open(output_folder + 'combined_' + filename, 'w+') as outfile: json.dump(output, outfile) outfile.close()
def test_important_dates(self): important_dates = mlbgame.important_dates(2017) output = ("Opening Day 2017: Sunday, April 02.\n" "Last day of the 1st half: Sunday, July 09.\n" "2017 All Star Game: Tuesday, July 11.\n" "First day of the 2nd half: Friday, July 14.\n" "Last day of the 2017 season: Sunday, October 01.\n" "2017 Playoffs start: Tuesday, October 03.\n" "2017 Playoffs end: Wednesday, November 01.") self.assertRaises(ValueError, lambda: mlbgame.important_dates(2050)) self.assertIsInstance(important_dates.organization_id, int) self.assertIsInstance(important_dates.year, int) self.assertIsInstance(important_dates.org_code, str) self.assertIsInstance(important_dates.org_type, str) self.assertIsInstance(important_dates.parent_org, str) self.assertIsInstance(important_dates.parent_abbrev, str) self.assertIsInstance(important_dates.name_full, str) self.assertIsInstance(important_dates.name_short, str) self.assertIsInstance(important_dates.name_abbrev, str) self.assertIsInstance(important_dates.file_code, str) self.assertIsInstance(important_dates.games, int) self.assertIsInstance(important_dates.first_date_seas, str) self.assertIsInstance(important_dates.last_date_1sth, str) self.assertIsInstance(important_dates.first_date_2ndh, str) self.assertIsInstance(important_dates.last_date_seas, str) self.assertIsInstance(important_dates.split_season_sw, str) self.assertIsInstance(important_dates.games_1sth, str) self.assertIsInstance(important_dates.games_2ndh, str) self.assertIsInstance(important_dates.all_star_sw, str) self.assertIsInstance(important_dates.all_star_date, str) self.assertIsInstance(important_dates.playoff_sw, str) self.assertIsInstance(important_dates.playoff_teams, str) self.assertIsInstance(important_dates.wildcard_sw, str) self.assertIsInstance(important_dates.wildcard_teams, str) self.assertIsInstance(important_dates.playoff_points_sw, str) self.assertIsInstance(important_dates.point_values, str) self.assertIsInstance(important_dates.playoffs_start_date, str) self.assertIsInstance(important_dates.playoffs_end_date, str) self.assertIsInstance(important_dates.playoff_rounds, str) self.assertIsInstance(important_dates.playoff_games, str) self.assertEqual(important_dates.organization_id, 1) self.assertEqual(important_dates.year, 2017) self.assertEqual(important_dates.org_code, 'mlb') self.assertEqual(important_dates.org_type, 'S') self.assertEqual(important_dates.parent_org, '') self.assertEqual(important_dates.parent_abbrev, '') self.assertEqual(important_dates.name_full, 'Major League Baseball') self.assertEqual(important_dates.name_short, '') self.assertEqual(important_dates.name_abbrev, 'MLB') self.assertEqual(important_dates.file_code, 'mlb') self.assertEqual(important_dates.games, 162) self.assertEqual(important_dates.first_date_seas, '2017-04-02T00:00:00') self.assertEqual(important_dates.last_date_1sth, '2017-07-09T00:00:00') self.assertEqual(important_dates.first_date_2ndh, '2017-07-14T00:00:00') self.assertEqual(important_dates.last_date_seas, '2017-10-01T00:00:00') self.assertEqual(important_dates.split_season_sw, 'N') self.assertEqual(important_dates.games_1sth, '') self.assertEqual(important_dates.games_2ndh, '') self.assertEqual(important_dates.all_star_sw, 'Y') self.assertEqual(important_dates.all_star_date, '2017-07-11T00:00:00') self.assertEqual(important_dates.playoff_sw, 'N') self.assertEqual(important_dates.playoff_teams, '') self.assertEqual(important_dates.wildcard_sw, 'N') self.assertEqual(important_dates.wildcard_teams, '') self.assertEqual(important_dates.playoff_points_sw, 'N') self.assertEqual(important_dates.point_values, '') self.assertEqual(important_dates.playoffs_start_date, '2017-10-03T00:00:00') self.assertEqual(important_dates.playoffs_end_date, '2017-11-01T00:00:00') self.assertEqual(important_dates.playoff_rounds, '') self.assertEqual(important_dates.playoff_games, '') self.assertEqual(mlbgame.info.date_format('2017-04-02T00:00:00'), 'Sunday, April 02') self.assertEqual(mlbgame.info.date_format('not_a_date'), '') self.assertEqual(important_dates.nice_output(), output) self.assertEqual(important_dates.__str__(), output) self.assertEqual(mlbgame.info.str_format('test-{0}', [1]), 'test-1') mlbgame.data.IMPORTANT_DATES = '{0}' self.assertRaises(ValueError, lambda: mlbgame.data.get_important_dates(2050))
def search(request, team_searched): fullNames = { "Dodgers": "Los Angeles Dodgers", "Indians": "Cleveland Indians", "Rays": "Tampa Bay Rays", "Twins": "Minnesota Twins", "Athletics": "Oakland Athletics", "White Sox": "Chicago White Sox", "Reds": "Cincinnati Reds", "Padres": "San Diego Padres", "Cardinals": "St. Louis Cardinals", "Cubs": "Chicago Cubs", "Brewers": "Milwaukee Brewers", "Royals": "Kansas City Royals", "Astros": "Houston Astros", "Yankees": "New York Yankees", "Braves": "Atlanta Braves", "Orioles": "Baltimore Orioles", "Blue Jays": "Toronto Blue Jays", "Giants": "San Francisco Giants", "Pirates": "Pittsburgh Pirates", "Diamondbacks": "Arizona Diamondbacks", "Marlins": "Miami Marlins", "Mets": "New York Mets", "Mariners": "Seattle Mariners", "Rangers": "Texas Rangers", "Angels": "Los Angeles Angels", "Nationals": "Washington Nationals", "Phillies": "Philadelphia Phillies", "Red Sox": "Boston Red Sox", "Rockies": "Colorado Rockies", "Tigers": "Detroit Tigers" } teamNames = {v: k for k, v in fullNames.items()} searched = True now = datetime.datetime.now() areGamesToday = True #retrieve today's games games = mlbgame.day(now.year, now.month, now.day, home=team_searched, away=team_searched)[0:10] dayCount = 1 #retrieve this week's games if not enough games today while len(games) < 10 and dayCount < 7: dateToCheck = datetime.date.today() dateToCheck += datetime.timedelta(days=dayCount) gamesOnDay = mlbgame.day(dateToCheck.year, dateToCheck.month, dateToCheck.day, home=team_searched, away=team_searched)[0:10 - len(games)] if len(gamesOnDay) > 0: games.append(gamesOnDay[0]) dayCount += 1 dayCount = 0 if len(games) == 0: areGamesToday = False #go to next season date = mlbgame.important_dates(now.year + 1).first_date_seas.split('-') year = int(date[0]) month = int(date[1]) day = int(date[2][:2]) while len(games) < 10 and dayCount < 6: dateToCheck = datetime.date(year, month, day) dateToCheck += datetime.timedelta(days=dayCount) gamesOnDay = mlbgame.day(dateToCheck.year, dateToCheck.month, dateToCheck.day, home=team_searched, away=team_searched)[0:10] if len(gamesOnDay) > 0: for game in gamesOnDay: games.append(game) dayCount += 1 predictions = [] for game in games: predictions.append(teamNames[predict(fullNames[game.home_team], fullNames[game.home_team])]) zipped = list(zip(games, predictions)) return render( request, 'home.html', { 'data': zipped, 'today': areGamesToday, 'team': team_searched, 'searched': searched })
def set_mlb_dates(self): if self.season: self.mlb_dates = mlbgame.important_dates(self.season) else: self.mlb_dates = mlbgame.important_dates(self.current_season)