def parse_sheet(url): sheet_page_html = urllib2.urlopen(url).read() soup = BeautifulSoup(sheet_page_html) gameid_re = re.compile("gameid=(.*?)&") gameid_str = soup.find("form", action=gameid_re)['action'] gameid = gameid_re.search(gameid_str).group(1) matchup = Matchup() matchup.game_id = gameid matchup.save() all_matchup_trends = matchup.matchup_trends.all().delete() #gameid_form = soup.find("form", action=re.compile("gameid=(.*?)&")) team_trends = soup.findAll(text=re.compile('.*Recent ATS Trends.*')) for team_trend in team_trends: team_name = team_trend.string.split("-")[0].strip() logger.info("processing team: " + team_name) matchup_table = team_trend.findParent("table") trend_rows = matchup_table.findAll("tr") new_trends = list() for idx in range(4, len(trend_rows)): trend_row = trend_rows[idx] data_columns = trend_row.findAll("td") #instantiate the matchup and set the rigth matchup matchupTrend = MatchupTrend() matchupTrend.game_matchup = matchup matchupTrend.team = team_name matchupTrend.description = data_columns[0].string.strip() logger.debug("trend description: " + matchupTrend.description) matchupTrend.current_win, matchupTrend.current_loss = return_win_loss(data_columns[1]) matchupTrend.last3_win, matchupTrend.last3_loss = return_win_loss(data_columns[2]) matchupTrend.since1992_win, matchupTrend.since1992_loss = return_win_loss(data_columns[3]) matchupTrend.save() new_trends.append( matchupTrend ) matchup.matchup_trends = new_trends
def parse_matchup(matchup_data): matchup_table = matchup_data.find("table") team_rx = re.compile(".*college-football/teams/team-page.cfm/team.*") teams = matchup_table.findAll("a", href=team_rx) away_team, home_team = get_teams(teams) if away_team == None or home_team == None: #if we dont find a team go onto the next team matchup return matchup_date = None try: date_re = re.compile("date/(.*)") anchor_tag = matchup_table.find(href=date_re) matchup_date_str = date_re.search(anchor_tag['href']).group(1) matchup_date = datetime.datetime.strptime(matchup_date_str,"%m-%d-%y") except: logger.exception( "matchup date could not be found" ) logger.error( "anchor tag: " + str(anchor_tag ) ) logger.error( "matchup: " + matchup_table.prettify() ) return matchup = Matchup() insider_game_id = "%s-@-%s" % (away_team.name.replace(' ','-'), home_team.name.replace(' ','-')) found_matchups = Matchup.objects.filter(insider_game_id=insider_game_id, gametime=matchup_date, season=YEAR) if len(found_matchups) > 0: matchup = found_matchups[0] else: logger.info("!! could not find matchup for: " +insider_game_id + " and date: " + str(matchup_date_str) ) matchup.insider_game_id = insider_game_id matchup.gametime = matchup_date matchup.home_team = home_team matchup.away_team = away_team matchup.season = int(YEAR) get_lines_and_score( matchup, matchup_table ) logger.info("final score was: %s to %s for game: %s " % (str(matchup.away_score), str(matchup.home_score), matchup.insider_game_id) ) try: if not DEBUG: matchup.save() except: logger.exception("problem saving matchup for game id: " + matchup.insider_game_id) logger.exception("path: " + path) exit(1)
def parse_matchup(matchup_data): matchup_table = matchup_data.findParent("table") team_rx = re.compile(".*college-football/teams/team-page.cfm/team.*") teams = matchup_table.findAll("a", href=team_rx) away_team, home_team = get_teams(teams) if away_team == None or home_team == None: logger.error("couldn't find teams") logger.error(matchup_table) return date_re = re.compile("date/(.*?)/time") anchor_tag = matchup_table.find(href=date_re) matchup_date_str = date_re.search(anchor_tag['href']).group(1) matchup_date = time.strptime(matchup_date_str,"%m-%d-%y") matchup = Matchup() insider_game_id = "%s-@-%s" % (away_team.name.replace(' ','-'), home_team.name.replace(' ','-')) gametime = strftime("%Y-%m-%d", matchup_date) found_matchups = Matchup.objects.filter(insider_game_id=insider_game_id, gametime=gametime, season=year) if len(found_matchups) > 1: logger.error("!! there can't be more than one matchup for:%s and time %s",(insider_game_id, gametime)) if len(found_matchups) == 1: matchup = found_matchups[0] matchup.insider_game_id = insider_game_id matchup.gametime = gametime matchup.home_team = home_team matchup.away_team = away_team matchup.season = year get_lines( matchup, matchup_table ) try: matchup.save() except: logger.exception("problem saving matchup for game id: " + matchup.insider_game_id) exit(1)