예제 #1
0
def scrape_nba_results():
    ''' Scrape recent NBA results'''
    url = 'http://www.betexplorer.com/basketball/usa/nba/results/'
    df = pd.read_html(get(url).text)[0]
    homeTeam = df[0].apply(lambda r: str.split(r, sep='-')[0].strip())
    homeTeam[homeTeam == 'Portland Trail Blazers'] = 'Portland Trailblazers'
    homePoints = df[1].apply(lambda r: str.split(r, sep=':')[0].strip())    
    awayTeam = df[0].apply(lambda r: str.split(r, sep='-')[1].strip())
    awayTeam[awayTeam == 'Portland Trail Blazers'] = 'Portland Trailblazers'
    awayPoints = df[1].apply(lambda r: str.split(r, sep=':')[1].strip())
    awayPoints = awayPoints.apply(lambda r: str.split(r, sep='ET')[0].strip())
    dates = df[4].apply(lambda r: datetime.strptime(r, '%d.%m.%Y'))
    # The dates on this website are GMT so are one day advanced.
    dates = dates.apply(lambda r: datetime.strftime(r - timedelta(days=1), 
                                                    '%d/%m/%Y'))
    df['Date'] = dates
    df['HomeTeam'] = homeTeam
    df['AwayTeam'] = awayTeam
    df['HomePoints'] = homePoints
    df['AwayPoints'] = awayPoints
    df['HomeWin'] = homePoints > awayPoints
    df = df.ix[:, 5:11]
    
    teams = lookup_teams()
    df['HomeId'] = df.merge(df.merge(teams, left_on='HomeTeam', 
                            right_on='Franchise', sort=False))['TeamId']
    df['AwayId']= df.merge(df.merge(teams, left_on='AwayTeam', 
                            right_on='Franchise', sort=False))['TeamId']
    return df
예제 #2
0
def scrape_model_probs():
    '''Scrape CARM-Elo probabilities for next round of matches'''
    url = 'http://projects.fivethirtyeight.com/2016-nba-picks/'
    df = pd.read_html(get(url).text)[1].ix[:, 1:5]  
    df = df[df.ix[:, 0] != 'X']
    df.columns = ['AwayTeam', 'AwayModelProb', 'HomeModelProb', 'HomeTeam']
    
    def parse_probability(string):
        return float(str.split(string, sep='%')[0])/100
        
    df['AwayModelProb'] = df['AwayModelProb'].apply(parse_probability)
    df['HomeModelProb'] = df['HomeModelProb'].apply(parse_probability)
    
    teams = lookup_teams()
    away = pd.merge(df, teams, left_on='AwayTeam', right_on='Abbreviation538')
    home = pd.merge(df, teams, left_on='HomeTeam', right_on='Abbreviation538')
    df['Date'] = datetime.today().strftime('%d/%m/%Y')
    df['AwayId'] = away['TeamId']
    df['AwayTeam'] = away['Franchise']
    df['HomeId'] = home['TeamId']
    df['HomeTeam'] = home['Franchise']
    df = df[['Date', 'HomeId', 'AwayId', 'HomeTeam', 'AwayTeam', 
             'HomeModelProb', 'AwayModelProb']]
    return df