Beispiel #1
0
def parse_sheet(url):
    sheet_page_html = urllib2.urlopen(url).read()
    soup = BeautifulSoup(sheet_page_html)

    gameid_re = re.compile("gameid=(.*?)&")
    gameid_str = soup.find("form", action=gameid_re)['action']
    gameid = gameid_re.search(gameid_str).group(1)
    matchup = Matchup()
    matchup.game_id = gameid
    matchup.save()

    all_matchup_trends = matchup.matchup_trends.all().delete()
    #gameid_form = soup.find("form", action=re.compile("gameid=(.*?)&"))
    team_trends = soup.findAll(text=re.compile('.*Recent ATS Trends.*'))
    for team_trend in team_trends:
        team_name = team_trend.string.split("-")[0].strip()
        logger.info("processing team: " + team_name)
        matchup_table = team_trend.findParent("table")     
        trend_rows = matchup_table.findAll("tr")
        new_trends = list()
        for idx in range(4, len(trend_rows)):
            trend_row = trend_rows[idx]
            data_columns = trend_row.findAll("td")
            
            #instantiate the matchup and set the rigth matchup
            matchupTrend = MatchupTrend()
            matchupTrend.game_matchup = matchup

            matchupTrend.team = team_name
            matchupTrend.description = data_columns[0].string.strip()
            logger.debug("trend description: " + matchupTrend.description)
            matchupTrend.current_win, matchupTrend.current_loss = return_win_loss(data_columns[1])
            matchupTrend.last3_win, matchupTrend.last3_loss = return_win_loss(data_columns[2])
            matchupTrend.since1992_win, matchupTrend.since1992_loss = return_win_loss(data_columns[3])
            matchupTrend.save()
            new_trends.append( matchupTrend )

        matchup.matchup_trends = new_trends
def parse_matchup(matchup_data):
    matchup_table = matchup_data.find("table")    
    team_rx = re.compile(".*college-football/teams/team-page.cfm/team.*")
    teams = matchup_table.findAll("a", href=team_rx)
    away_team, home_team = get_teams(teams)
    if away_team == None or home_team == None:
        #if we dont find a team go onto the next team matchup
        return

    matchup_date = None
    try:
        date_re = re.compile("date/(.*)")
        anchor_tag = matchup_table.find(href=date_re)
        matchup_date_str = date_re.search(anchor_tag['href']).group(1)
        matchup_date = datetime.datetime.strptime(matchup_date_str,"%m-%d-%y")
    except:
        logger.exception( "matchup date could not be found" )
        logger.error( "anchor tag: " + str(anchor_tag ) )    
        logger.error( "matchup: " + matchup_table.prettify() )    
        return

    matchup = Matchup()
    insider_game_id = "%s-@-%s" % (away_team.name.replace(' ','-'), home_team.name.replace(' ','-'))
    found_matchups = Matchup.objects.filter(insider_game_id=insider_game_id, gametime=matchup_date, season=YEAR)
    if len(found_matchups) > 0:
        matchup = found_matchups[0]
    else:
        logger.info("!! could not find matchup for: " +insider_game_id + " and date: " + str(matchup_date_str) )
    matchup.insider_game_id = insider_game_id
    matchup.gametime = matchup_date
    matchup.home_team = home_team
    matchup.away_team = away_team
    matchup.season = int(YEAR)
    get_lines_and_score( matchup, matchup_table )
    logger.info("final score was: %s to %s for game: %s " % (str(matchup.away_score), str(matchup.home_score), matchup.insider_game_id) )
    try:
        if not DEBUG:
            matchup.save()
    except:
        logger.exception("problem saving matchup for game id: " + matchup.insider_game_id)
        logger.exception("path: " + path)
        exit(1)
def parse_matchup(matchup_data):
    matchup_table = matchup_data.findParent("table")    
    team_rx = re.compile(".*college-football/teams/team-page.cfm/team.*")
    teams = matchup_table.findAll("a", href=team_rx)
    away_team, home_team = get_teams(teams)
    if away_team == None or home_team == None:
        logger.error("couldn't find teams")
        logger.error(matchup_table)
        return

    date_re = re.compile("date/(.*?)/time")
    anchor_tag = matchup_table.find(href=date_re)
    matchup_date_str = date_re.search(anchor_tag['href']).group(1)
    matchup_date = time.strptime(matchup_date_str,"%m-%d-%y")

    matchup = Matchup()
    insider_game_id = "%s-@-%s" % (away_team.name.replace(' ','-'), home_team.name.replace(' ','-'))
    gametime = strftime("%Y-%m-%d",  matchup_date)
    found_matchups = Matchup.objects.filter(insider_game_id=insider_game_id, gametime=gametime, season=year)
    if len(found_matchups) > 1:
        logger.error("!! there can't be more than one matchup for:%s and time %s",(insider_game_id, gametime))
    if len(found_matchups) == 1:
        matchup = found_matchups[0]


    matchup.insider_game_id = insider_game_id
    matchup.gametime = gametime
    matchup.home_team = home_team
    matchup.away_team = away_team
    matchup.season = year
    get_lines( matchup, matchup_table )
    try:
        matchup.save()
    except:
        logger.exception("problem saving matchup for game id: " + matchup.insider_game_id)
        exit(1)