Example #1
0
def parse_and_write_data(soup, date, time, not_ML = True):
## Parse HTML to gather line data by book
    def book_line(book_id, line_id, homeaway):
        ## Get Line info from book ID
        line = soup.find_all('div', attrs = {'class':'el-div eventLine-book', 'rel':book_id})[line_id].find_all('div')[homeaway].get_text().strip()
        return line
    '''
    BookID  BookName
    238     Pinnacle
    19      5Dimes
    93      Bookmaker
    1096    BetOnline
    169     Heritage
    123     BetDSI
    999996  Bovada
    139     Youwager
    999991  SIA
    '''
    if not_ML:
        df = DataFrame(
                columns=('key','date','time',
                         'team','opp_team','pinnacle_line','pinnacle_odds',
                         '5dimes_line','5dimes_odds',
                         'heritage_line','heritage_odds',
                         'bovada_line','bovada_odds',
                         'betonline_line','betonline_odds'))
    else:
        df = DataFrame(
            columns=('key','date','time',
                     'team',
                     'opp_team',
                     'pinnacle','5dimes',
                     'heritage','bovada','betonline'))
    counter = 0
    number_of_games = len(soup.find_all('div', attrs = {'class':'el-div eventLine-rotation'}))
    for i in range(0, number_of_games):
        A = []
        H = []
        print str(i+1)+'/'+str(number_of_games)
        
        ## Gather all useful data from unique books
        # consensus_data = 	soup.find_all('div', 'el-div eventLine-consensus')[i].get_text()
        info_A = 		        soup.find_all('div', attrs = {'class':'el-div eventLine-team'})[i].find_all('div')[0].get_text().strip()
        hyphen_A =              info_A.find('-')
        paren_A =               info_A.find("(")
        team_A =                info_A[:hyphen_A - 1]
        # pitcher_A =             info_A[hyphen_A + 2 : paren_A - 1]
        # hand_A =                info_A[paren_A + 1 : -1]
        ## get line/odds info for unique book. Need error handling to account for blank data
        try:
            pinnacle_A = 	    book_line('238', i, 0)
        except IndexError:
            pinnacle_A = ''
        try:
            fivedimes_A = 	    book_line('19', i, 0)
        except IndexError:
            fivedimes_A = ''
        try:
            heritage_A =        book_line('169', i, 0)
        except IndexError:
            heritage_A = ''
        try:
            bovada_A = 		    book_line('999996', i, 0)
        except IndexError:
            bovada_A = ''
        try:
            betonline_A = 		book_line('1096', i, 0)
        except IndexError:
            betonline_A = ''
        info_H = 		        soup.find_all('div', attrs = {'class':'el-div eventLine-team'})[i].find_all('div')[1].get_text().strip()
        hyphen_H =              info_H.find('-')
        paren_H =               info_H.find("(")
        team_H =                info_H[:hyphen_H - 1]
        # pitcher_H =             info_H[hyphen_H + 2 : paren_H - 1]
        # hand_H =                info_H[paren_H + 1 : -1]
        try:
            pinnacle_H = 	    book_line('238', i, 1)
        except IndexError:
            pinnacle_H = ''
        try:
            fivedimes_H = 	    book_line('19', i, 1)
        except IndexError:
            fivedimes_H = ''
        try:
            heritage_H = 	    book_line('169', i, 1)
        except IndexError:
            heritage_H = '.'
        try:
            bovada_H = 		    book_line('999996', i, 1)
        except IndexError:
            bovada_H = '.'
        try:
            betonline_H = 		book_line('1096', i, 1)
        except IndexError:
            betonline_H = ''
        # if team_H == 'Detroit':
        #     team_H = 'Detroit'
        # elif team_H == 'Indiana':
        #     team_H = 'Indiana'
        # elif team_H == 'Brooklyn':
        #     team_H = 'Brooklyn'
        # elif team_H == 'L.A. Lakers':
        #     team_H = 'L.A. Lakers'
        # elif team_H == 'Washington':
        #     team_H = 'Washington'
        # elif team_H == 'Miami':
        #     team_H = 'Miami'
        # elif team_H == 'Minnesota':
        #     team_H = 'Minnesota'
        # elif team_H == 'Chicago':
        #     team_H = 'Chicago'
        # elif team_H == 'Oklahoma City':
        #     team_H = 'Oklahoma City'
        # if team_A == 'New Orleans':
        #     team_A = 'New Orleans'
        # elif team_A == 'Houston':
        #     team_A = 'Houston'
        # elif team_A == 'Dallas':
        #     team_A = 'Dallas'
        # elif team_A == 'Cleveland':
        #     team_A = 'Cleveland'
        # elif team_A == 'L.A. Clippers':
        #     team_A = 'L.A. Clippers'
        # elif team_A == 'Golden State':
        #     team_A = 'Golden State'
        # elif team_A == 'Denver':
        #     team_A = 'Denver'
        # elif team_A == 'Boston':
        #     team_A = 'Boston'
        # elif team_A == 'Milwaukee':
        #     team_A = 'Milwaukee'            

       # A.append(str(date) + '_' + team_A.replace(u'\xa0',' ') + '_' + team_H.replace(u'\xa0',' '))
        A.append(date)
        A.append(time)
        A.append('away')
        A.append(team_A)
        # A.append(pitcher_A)
        # A.append(hand_A)
        A.append(team_H)
        # A.append(pitcher_H)
        # A.append(hand_H)
        if not_ML:
            pinnacle_A = pinnacle_A.replace(u'\xa0',' ').replace(u'\xbd','.5')
            pinnacle_A_line = pinnacle_A[:pinnacle_A.find(' ')]
            pinnacle_A_odds = pinnacle_A[pinnacle_A.find(' ') + 1:]
            A.append(pinnacle_A_line)
            A.append(pinnacle_A_odds)
            fivedimes_A = fivedimes_A.replace(u'\xa0',' ').replace(u'\xbd','.5')
            fivedimes_A_line = fivedimes_A[:fivedimes_A.find(' ')]
            fivedimes_A_odds = fivedimes_A[fivedimes_A.find(' ') + 1:]
            A.append(fivedimes_A_line)
            A.append(fivedimes_A_odds)
            heritage_A = heritage_A.replace(u'\xa0',' ').replace(u'\xbd','.5')
            heritage_A_line = heritage_A[:heritage_A.find(' ')]
            heritage_A_odds = heritage_A[heritage_A.find(' ') + 1:]
            A.append(heritage_A_line)
            A.append(heritage_A_odds)
            bovada_A = bovada_A.replace(u'\xa0',' ').replace(u'\xbd','.5')
            bovada_A_line = bovada_A[:bovada_A.find(' ')]
            bovada_A_odds = bovada_A[bovada_A.find(' ') + 1:]
            A.append(bovada_A_line)
            A.append(bovada_A_odds)
            betonline_A = betonline_A.replace(u'\xa0',' ').replace(u'\xbd','.5')
            betonline_A_line = betonline_A[:betonline_A.find(' ')]
            betonline_A_odds = betonline_A[betonline_A.find(' ') + 1:]
            A.append(betonline_A_line)
            A.append(betonline_A_odds)
        else:
            A.append(pinnacle_A.replace(u'\xa0',' ').replace(u'\xbd','.5'))
            A.append(fivedimes_A.replace(u'\xa0',' ').replace(u'\xbd','.5'))
            A.append(heritage_A.replace(u'\xa0',' ').replace(u'\xbd','.5'))
            A.append(bovada_A.replace(u'\xa0',' ').replace(u'\xbd','.5'))
            A.append(betonline_A.replace(u'\xa0',' ').replace(u'\xbd','.5'))
        #H.append(str(date) + '_' + team_A.replace(u'\xa0',' ') + '_' + team_H.replace(u'\xa0',' '))
        H.append(date)
        H.append(time)
        H.append('home')
        H.append(team_H)
        # H.append(pitcher_H)
        # H.append(hand_H)
        H.append(team_A)
        # H.append(pitcher_A)
        # H.append(hand_A)
        if not_ML:
            pinnacle_H = pinnacle_H.replace(u'\xa0',' ').replace(u'\xbd','.5')
            pinnacle_H_line = pinnacle_H[:pinnacle_H.find(' ')]
            pinnacle_H_odds = pinnacle_H[pinnacle_H.find(' ') + 1:]
            H.append(pinnacle_H_line)
            H.append(pinnacle_H_odds)
            fivedimes_H = fivedimes_H.replace(u'\xa0',' ').replace(u'\xbd','.5')
            fivedimes_H_line = fivedimes_H[:fivedimes_H.find(' ')]
            fivedimes_H_odds = fivedimes_H[fivedimes_H.find(' ') + 1:]
            H.append(fivedimes_H_line)
            H.append(fivedimes_H_odds)
            heritage_H = heritage_H.replace(u'\xa0',' ').replace(u'\xbd','.5')
            heritage_H_line = heritage_H[:heritage_H.find(' ')]
            heritage_H_odds = heritage_H[heritage_H.find(' ') + 1:]
            H.append(heritage_H_line)
            H.append(heritage_H_odds)
            bovada_H = bovada_H.replace(u'\xa0',' ').replace(u'\xbd','.5')
            bovada_H_line = bovada_H[:bovada_H.find(' ')]
            bovada_H_odds = bovada_H[bovada_H.find(' ') + 1:]
            H.append(bovada_H_line)
            H.append(bovada_H_odds)
            betonline_H = betonline_H.replace(u'\xa0',' ').replace(u'\xbd','.5')
            betonline_H_line = betonline_H[:betonline_H.find(' ')]
            betonline_H_odds = betonline_H[betonline_H.find(' ') + 1:]
            H.append(betonline_H_line)
            H.append(betonline_H_odds)
        else:
            H.append(pinnacle_H.replace(u'\xa0',' ').replace(u'\xbd','.5'))
            H.append(fivedimes_H.replace(u'\xa0',' ').replace(u'\xbd','.5'))
            H.append(heritage_H.replace(u'\xa0',' ').replace(u'\xbd','.5'))
            H.append(bovada_H.replace(u'\xa0',' ').replace(u'\xbd','.5'))
            H.append(betonline_H.replace(u'\xa0',' ').replace(u'\xbd','.5'))
        
	##For testing purposes..
	#for j in range(len(A)):
		#print 'Test: ', A[j]

        ## Take data from A and H (lists) and put them into DataFrame
        df.loc[counter]   = ([A[j] for j in range(len(A))])
        df.loc[counter+1] = ([H[j] for j in range(len(H))])
        counter += 2
    return df
Example #2
0
def parse_and_write_data(soup, date, time, not_ML = True):
## Parse HTML to gather line data by book
    def book_line(book_id, line_id, homeaway):
        ## Get Line info from book ID
        line = soup.find_all('div', attrs = {'class':'el-div eventLine-book', 'rel':book_id})[line_id].find_all('div')[homeaway].get_text().strip()
        return line
    '''
    BookID  BookName
    238     Pinnacle
    19      5Dimes
    93      Bookmaker
    1096    BetOnline
    169     Heritage
    123     BetDSI
    999996  Bovada
    139     Youwager
    999991  SIA
    '''
    if not_ML:
        df = DataFrame(
                columns=('key','date','time','H/A',
                         'team','pitcher','hand',
                         'opp_team','opp_pitcher',
                         'opp_hand',
                         'pinnacle_line','pinnacle_odds',
                         '5dimes_line','5dimes_odds',
                         'heritage_line','heritage_odds',
                         'bovada_line','bovada_odds',
                         'betonline_line','betonline_odds'))
    else:
        df = DataFrame(
            columns=('key','date','time','H/A',
                     'team','pitcher','hand',
                     'opp_team','opp_pitcher',
                     'opp_hand','pinnacle','5dimes',
                     'heritage','bovada','betonline'))
    counter = 0
    number_of_games = len(soup.find_all('div', attrs = {'class':'el-div eventLine-rotation'}))
    for i in range(0, number_of_games):
        A = []
        H = []
        print(str(i+1)+'/'+str(number_of_games))
        
        ## Gather all useful data from unique books
        # consensus_data =  soup.find_all('div', 'el-div eventLine-consensus')[i].get_text()
        info_A =                soup.find_all('div', attrs = {'class':'el-div eventLine-team'})[i].find_all('div')[0].get_text().strip()
        hyphen_A =              info_A.find('-')
        paren_A =               info_A.find("(")
        team_A =                info_A[:hyphen_A - 1]
        pitcher_A =             info_A[hyphen_A + 2 : paren_A - 1]
        hand_A =                info_A[paren_A + 1 : -1]
        
        ## get line/odds info for unique book. Need error handling to account for blank data
        def try_except_book_line(id, i , x):
            try:
                return book_line(id, i, x)
            except IndexError:
                return ''
        
        pinnacle_A = try_except_book_line('238',i, 0)
        fivedimes_A = try_except_book_line('19',i, 0)
        heritage_A = try_except_book_line('169', i, 0)
        bovada_A = try_except_book_line('999996', i, 0)
        betonline_A = try_except_book_line('1096', i, 0)

        info_H =                soup.find_all('div', attrs = {'class':'el-div eventLine-team'})[i].find_all('div')[2].get_text().strip()
        hyphen_H =              info_H.find('-')
        paren_H =               info_H.find("(")
        team_H =                info_H[:hyphen_H - 1]
        pitcher_H =             info_H[hyphen_H + 2 : paren_H - 1]
        hand_H =                info_H[paren_H + 1 : -1]

        pinnacle_H = try_except_book_line('238',i, 1)
        fivedimes_H = try_except_book_line('19',i, 1)
        heritage_H = try_except_book_line('169', i, 1)
        bovada_H = try_except_book_line('999996', i, 1)
        betonline_H = try_except_book_line('1096', i, 1)

        short_to_long_abbr = dict()
        short_to_long_abbr['LA'] = 'LAD'
        short_to_long_abbr['SD'] = 'SDG'
        short_to_long_abbr['SF'] = 'SFO'
        short_to_long_abbr['NY'] = 'NYM'
        short_to_long_abbr['KC'] = 'KCA'
        short_to_long_abbr['TB'] = 'TBA'
        short_to_long_abbr['CWS'] = 'CHW'
        short_to_long_abbr['CHI'] = 'CHC'
        short_to_long_abbr['WSH'] = 'WAS'
        
        if team_H in short_to_long_abbr:
            team_H = short_to_long_abbr[team_H]
        if team_A in short_to_long_abbr:
            team_A = short_to_long_abbr[team_A]
        
        A.append(str(date) + '_' + team_A.replace(u'\xa0',' ') + '_' + team_H.replace(u'\xa0',' '))
        A.append(date)
        A.append(time)
        A.append('away')
        A.append(team_A)
        A.append(pitcher_A)
        A.append(hand_A)
        A.append(team_H)
        A.append(pitcher_H)
        A.append(hand_H)
        if not_ML:
            pinnacle_A = replace_unicode(pinnacle_A)
            pinnacle_A_line = pinnacle_A[:pinnacle_A.find(' ')]
            pinnacle_A_odds = pinnacle_A[pinnacle_A.find(' ') + 1:]
            A.append(pinnacle_A_line)
            A.append(pinnacle_A_odds)
            fivedimes_A = replace_unicode(fivedimes_A)
            fivedimes_A_line = fivedimes_A[:fivedimes_A.find(' ')]
            fivedimes_A_odds = fivedimes_A[fivedimes_A.find(' ') + 1:]
            A.append(fivedimes_A_line)
            A.append(fivedimes_A_odds)
            heritage_A = replace_unicode(heritage_A)
            heritage_A_line = heritage_A[:heritage_A.find(' ')]
            heritage_A_odds = heritage_A[heritage_A.find(' ') + 1:]
            A.append(heritage_A_line)
            A.append(heritage_A_odds)
            bovada_A = replace_unicode(bovada_A)
            bovada_A_line = bovada_A[:bovada_A.find(' ')]
            bovada_A_odds = bovada_A[bovada_A.find(' ') + 1:]
            A.append(bovada_A_line)
            A.append(bovada_A_odds)
            betonline_A = replace_unicode(betonline_A)
            betonline_A_line = betonline_A[:betonline_A.find(' ')]
            betonline_A_odds = betonline_A[betonline_A.find(' ') + 1:]
            A.append(betonline_A_line)
            A.append(betonline_A_odds)
        else:
            A.append(replace_unicode(pinnacle_A))
            A.append(replace_unicode(fivedimes_A))
            A.append(replace_unicode(heritage_A))
            A.append(replace_unicode(bovada_A))
            A.append(replace_unicode(betonline_A))
        H.append(str(date) + '_' + team_A.replace(u'\xa0',' ') + '_' + team_H.replace(u'\xa0',' '))
        H.append(date)
        H.append(time)
        H.append('home')
        H.append(team_H)
        H.append(pitcher_H)
        H.append(hand_H)
        H.append(team_A)
        H.append(pitcher_A)
        H.append(hand_A)
        if not_ML:
            pinnacle_H = replace_unicode(pinnacle_H)
            pinnacle_H_line = pinnacle_H[:pinnacle_H.find(' ')]
            pinnacle_H_odds = pinnacle_H[pinnacle_H.find(' ') + 1:]
            H.append(pinnacle_H_line)
            H.append(pinnacle_H_odds)
            fivedimes_H = replace_unicode(fivedimes_H)
            fivedimes_H_line = fivedimes_H[:fivedimes_H.find(' ')]
            fivedimes_H_odds = fivedimes_H[fivedimes_H.find(' ') + 1:]
            H.append(fivedimes_H_line)
            H.append(fivedimes_H_odds)
            heritage_H = replace_unicode(heritage_H)
            heritage_H_line = heritage_H[:heritage_H.find(' ')]
            heritage_H_odds = heritage_H[heritage_H.find(' ') + 1:]
            H.append(heritage_H_line)
            H.append(heritage_H_odds)
            bovada_H = replace_unicode(bovada_H)
            bovada_H_line = bovada_H[:bovada_H.find(' ')]
            bovada_H_odds = bovada_H[bovada_H.find(' ') + 1:]
            H.append(bovada_H_line)
            H.append(bovada_H_odds)
            betonline_H = replace_unicode(betonline_H)
            betonline_H_line = betonline_H[:betonline_H.find(' ')]
            betonline_H_odds = betonline_H[betonline_H.find(' ') + 1:]
            H.append(betonline_H_line)
            H.append(betonline_H_odds)
        else:
            H.append(replace_unicode(pinnacle_H))
            H.append(replace_unicode(fivedimes_H))
            H.append(replace_unicode(heritage_H))
            H.append(replace_unicode(bovada_H))
            H.append(replace_unicode(betonline_H))
        
        ## Take data from A and H (lists) and put them into DataFrame
        df.loc[counter]   = ([A[j] for j in range(len(A))])
        df.loc[counter+1] = ([H[j] for j in range(len(H))])
        counter += 2
    return df
def parse_and_write_data(soup, date, time_of_move, not_ML = True):
## Parse HTML to gather line data by book
    '''
    using ['238','19','999996','1096','169']
    BookID  BookName
    238     Pinnacle
    19      5Dimes
    999996  Bovada
    1096    BetOnline
    169     Heritage
    93      Bookmaker
    123     BetDSI
    139     Youwager
    999991  SIA
    '''
    def book_line(book_id, line_id, homeaway):
        ## Get Line info from book ID
        try:
            lo0 = soup.find_all('div', attrs = {'class':'el-div eventLine-book', 'rel':book_id})[line_id].find_all('div')[homeaway].get_text().strip()
            lo1 = lo0.replace(u'\xa0',' ').replace(u'\xbd','.5')
            line = lo1[:lo1.find(' ')]
            odds = lo1[lo1.find(' ') + 1:]
        except IndexError:
            line = ''
            odds = ''
        return line,odds

    if not_ML:
        df = DataFrame(
                columns=('key','day_game_nbr','date','time','H/A',
                         'team','pitcher','hand',
                         'opp_team','opp_pitcher',
                         'opp_hand',
                         'pinnacle_line','pinnacle_odds',
                         '5dimes_line','5dimes_odds',
                         'heritage_line','heritage_odds',
                         'bovada_line','bovada_odds',
                         'betonline_line','betonline_odds'))
    else:
        df = DataFrame(
            columns=('key','day_game_nbr','date','time','H/A',
                     'team','pitcher','hand',
                     'opp_team','opp_pitcher',
                     'opp_hand','pinnacle','5dimes',
                     'heritage','bovada','betonline'))

    counter = 0
    number_of_games = len(soup.find_all('div', attrs = {'class':'el-div eventLine-rotation'}))
    #print('number of games:' + str(number_of_games)
    for i in range(0, number_of_games):
        A = []
        H = []
        print(str(i+1)+'/'+str(number_of_games))

        info_A =                soup.find_all('div', attrs = {'class':'el-div eventLine-team'})[i].find_all('div')[0].get_text().strip()
        hyphen_A =              info_A.find('-')
        paren_A =               info_A.find("(")
        team_A =                info_A[:hyphen_A - 1]
        pitcher_A =             info_A[hyphen_A + 2 : paren_A - 1]
        hand_A =                info_A[paren_A + 1 : -1]

        info_H =                soup.find_all('div', attrs = {'class':'el-div eventLine-team'})[i].find_all('div')[2].get_text().strip()
        hyphen_H =              info_H.find('-')
        paren_H =               info_H.find("(")
        team_H =                info_H[:hyphen_H - 1]
        pitcher_H =             info_H[hyphen_H + 2 : paren_H - 1]
        hand_H =                info_H[paren_H + 1 : -1]

        pinnacle_A_lines, pinnacle_A_odds   =   book_line('238', i, 0)
        fivedimes_A_lines, fivedimes_A_odds =   book_line('19', i, 0)
        heritage_A_lines, heritage_A_odds   =   book_line('169', i, 0)
        bovada_A_lines, bovada_A_odds       =   book_line('999996', i, 0)
        betonline_A_lines, betonline_A_odds =   book_line('1096', i, 0)

        pinnacle_H_lines, pinnacle_H_odds   =   book_line('238', i, 1)
        fivedimes_H_lines, fivedimes_H_odds =   book_line('19', i, 1)
        heritage_H_lines, heritage_H_odds   =   book_line('169', i, 1)
        bovada_H_lines, bovada_H_odds       =   book_line('999996', i, 1)
        betonline_H_lines, betonline_H_odds =   book_line('1096', i, 1)

        ## Edit team names to match personal preference
        team_H = team_name_check(team_H)
        team_A = team_name_check(team_A)

        A.append(str(date) + '_' + team_A.replace(u'\xa0',' ') + '_' + team_H.replace(u'\xa0',' '))
        A.extend([str(i+1),date,time_of_move,'away',team_A,pitcher_A,hand_A,team_H,pitcher_H,hand_H])

        ## Account for runline and totals. Usually come in format '7 -110' or '-0.5 -110'.
        ## Use these if statements to separate line from odds
        if not_ML:
            ## write pinnacle data in list
            A.extend([pinnacle_A_lines,pinnacle_A_odds
                      , fivedimes_A_lines, fivedimes_A_odds
                      , heritage_A_lines, heritage_A_odds
                      , bovada_A_lines, bovada_A_odds
                      , betonline_A_lines, betonline_A_odds])
        else:
            ## write ML book data in list
            A.extend([pinnacle_A_odds
                      , fivedimes_A_odds
                      , heritage_A_odds
                      , bovada_A_odds
                      , betonline_A_odds])

        H.append(str(date) + '_' + team_A.replace(u'\xa0',' ') + '_' + team_H.replace(u'\xa0',' '))
        H.extend([str(i+1),date,time_of_move,'home',team_H,pitcher_H,hand_H,team_A,pitcher_A,hand_A])
        if not_ML:
            ## write pinnacle data in list
            H.extend([pinnacle_H_lines,pinnacle_H_odds
                      , fivedimes_H_lines, fivedimes_H_odds
                      , heritage_H_lines, heritage_H_odds
                      , bovada_H_lines, bovada_H_odds
                      , betonline_H_lines, betonline_H_odds])
        else:
            ## write ML book data in list
            H.extend([pinnacle_H_odds
                      , fivedimes_H_odds
                      , heritage_H_odds
                      , bovada_H_odds
                      , betonline_H_odds])

        ## Write List (A & H) into dataframe
        df.loc[counter]   = ([A[j] for j in range(len(A))])
        df.loc[counter+1] = ([H[j] for j in range(len(H))])
        counter += 2
    return df