class TopBetEu: def __init__(self, url, headers, params, cnx, cursor): self.url = url self.params = params self.headers = headers self.cnx = cnx self.cursor = cursor self.website = Website(url, params, headers, cursor, cnx) def parse(self): # get the webpage soup soup = self.website.soup() # for each game games = [] for eventdiv in soup.find_all('div', class_='event'): # read the game header header = eventdiv.find('h3').text # read the teams match = re.search('(\w.+) at (\w.+) ', header) awayteamstr = re.sub('-.+', '', match.group(1).replace('-N','').replace('-A','')).replace('.', '%') hometeamstr = re.sub('-.+', '', match.group(2).replace('-N','').replace('-A','')).replace('.', '%') # read the gametime match = re.search('(....)-(..)-(..)\s+(..):(..)', header) gametime = datetime( int(match.group(1)), int(match.group(2)), int(match.group(3)) ) # read the lines awaycell, homecell = [line for line in eventdiv.find_all('td', class_='money')] awayline = int(awaycell.text) homeline = int(homecell.text) games.append(( awayteamstr, awayline, hometeamstr, homeline, gametime, )) return games def __repr__(self): return '{0}({1},{2},{3})'.format(self.__class__.__name__, self.url, self.params, self.headers)
class Pinnacle: def __init__(self, url, params, headers, cnx, cursor): self.url = url self.params = params self.headers = headers self.cnx = cnx self.cursor = cursor self.website = Website(url, params, headers, cursor, cnx) def parse(self): print 'parsing:', str(self) # get the webpage soup soup = self.website.soup() # the datatables tables = soup.find_all('table', class_='linesTbl') # slurp up rows (they come in groups of three) gamerows = {} for table in tables: # get the date for this table datestr = table.select('.linesHeader')[0].find('h4').text match = re.search('(\d{0,2})/(\d{0,2})', datestr) month = int(match.group(1)) day = int(match.group(2)) date = datetime.date(2015, month, day) gamerows[date] = [] # sigh, go through all colors of table for row in table.select('.linesAlt1'): gamerows[date].append(row) for row in table.select('.linesAlt2'): gamerows[date].append(row) # group rows into 3 tuples # http://code.activestate.com/recipes/303060-group-a-list-into-sequential-n-tuples/ gametuples = {} for date in gamerows: gametuples[date] = [] for i in range(0, len(gamerows[date]), 3): tup = gamerows[date][i:i+3] if len(tup) == 3: gametuples[date].append(tuple(tup)) # go through for times and lines lines = [] for date in gametuples: for linerowa, linerowb, draw in gametuples[date]: # get the lines lineaname = linerowa.select('.linesTeam')[0].text linebname = linerowb.select('.linesTeam')[0].text linealine = float(linerowa.select('.linesMLine')[0].text or -1) linebline = float(linerowb.select('.linesMLine')[0].text or -1) drawline = float(draw.select('.linesMLine')[0].text or -1) lines.append((lineaname, linealine, linebname, linebline, drawline, datetime.datetime.combine(date, datetime.time()))) return lines def __repr__(self): return '{0}({1})'.format(self.__class__.__name__, self.website)