def run(self): DB = store.Store() soup = BeautifulSoup(Fetcher.fetch(self.url)) for link in soup.findAll('a'): if link['href'].find('gid_') >= 0: gid = link['href'].rstrip('/') g = game.Game(gid) if (g.game_type != self.gametype): continue; g.save() game_id = g.game_id ab = atbats.AtBats(gid, game_id) ab.save() chart = hitchart.HitChart(gid, game_id) chart.save() batters = players.Batters(gid, game_id) batters.save() pitchers = players.Pitchers(gid, game_id) pitchers.save()
def run(self): DB = store.Store() soup = BeautifulSoup(Fetcher.fetch(self.url)) for link in soup.findAll('a'): if link['href'].find('gid_') >= 0: gid = link['href'].rstrip('/') g = game.Game(gid) if (g.game_type != self.gametype): continue g.save() game_id = g.game_id ab = atbats.AtBats(gid, game_id) ab.save() chart = hitchart.HitChart(gid, game_id) chart.save() batters = players.Batters(gid, game_id) batters.save() pitchers = players.Pitchers(gid, game_id) pitchers.save()
def getMonths(year, start=1): months = [] url = '%syear_%4d/' % (CONSTANTS.BASE, year) soup = BeautifulSoup(Fetcher.fetch(url)) for link in soup.findAll('a'): if link['href'].find('month') >= 0: month = int(link['href'].replace('month_', '').rstrip('/')) if month >= start: months.append(month) return months
def getMonths(year, start = 1): months = [] url = '%syear_%4d/' % (CONSTANTS.BASE, year) soup = BeautifulSoup(Fetcher.fetch(url)) for link in soup.findAll('a'): if link['href'].find('month') >= 0: month = int(link['href'].replace('month_', '').rstrip('/')) if month >= start: months.append(month) return months
def getDays(year, month, start=1): days = [] url = '%syear_%4d/month_%02d/' % (CONSTANTS.BASE, year, month) soup = BeautifulSoup(Fetcher.fetch(url)) for link in soup.findAll('a'): if link['href'].find('day') >= 0: try: day = int(link['href'].replace('day_', '').rstrip('/')) except: # sometimes gameday will have like a '26_bak' directory continue if day >= start: days.append(day) return days
def getDays(year, month, start = 1): days = [] url = '%syear_%4d/month_%02d/' % (CONSTANTS.BASE, year, month) soup = BeautifulSoup(Fetcher.fetch(url)) for link in soup.findAll('a'): if link['href'].find('day') >= 0: try: day = int(link['href'].replace('day_', '').rstrip('/')) except: # sometimes gameday will have like a '26_bak' directory continue if day >= start: days.append(day) return days
log.addHandler(logging.StreamHandler()) else: log.setLevel(logging.ERROR) log.addHandler(logging.StreamHandler()) logfilename = './log.txt' filelog = logging.FileHandler(logfilename, 'a') filelog.setLevel(logging.ERROR) filelog.setFormatter(formatter) log.addHandler(filelog) CONSTANTS.BASE = CONSTANTS.BASE.replace('%TYPE%', TYPE) url = '%syear_%4d/' % (CONSTANTS.BASE, YEAR) try: soup = BeautifulSoup(Fetcher.fetch(url)) except TypeError, e: print 'Could not fetch %s' % url raise SystemExit if MONTH is None: if startmonth: months = getMonths(YEAR, startmonth) else: months = getMonths(YEAR) else: months = MONTH for month in months: if DAY is None: if startday:
log.addHandler(logging.StreamHandler()) else: log.setLevel(logging.ERROR) log.addHandler(logging.StreamHandler()) logfilename = './' + args.errors filelog = logging.FileHandler(logfilename, 'a') filelog.setLevel(logging.ERROR) filelog.setFormatter(formatter) log.addHandler(filelog) CONSTANTS.BASE = CONSTANTS.BASE.replace('%LEAGUE%', args.league) url = '%syear_%4d/' % (CONSTANTS.BASE, args.year) try: soup = BeautifulSoup(Fetcher.fetch(url)) except TypeError, e: print 'Could not fetch %s' % url raise SystemExit if args.month is None: if startmonth: months = getMonths(args.year, startmonth) else: months = getMonths(args.year) else: months = args.month for month in months: if args.day is None: if startday: