def __init__(self, event): self.data = event self.spider = Spider(gui=0) self.driver = self.spider.driver self.spider.get_url(self.data['link']) self.fname = 'data_' + str(self.data['time_e']).replace( " ", "_") + '_' + self.data['link'].split("/")[-1] + '.pkl' self.data['last_get'] = datetime.datetime(1999, 1, 1)
def get_data_sportsbooks(link): #~ print 'get data %s' % link cup_name = link.split("/")[-3] spider = Spider(gui=0) spider.get_url(link) market_name = spider.driver.find_element_by_xpath( '//div[@class="page-description module"]/header/h1').text.replace( " Winner Betting Odds", "") dic = {'market-name': market_name, 'cup-name': cup_name} rates = [] #~ headers=spider.driver.find_elements_by_xpath('//thead/tr[@class="eventTableHeader"]/td') headers = spider.driver.find_elements_by_xpath( '//thead/tr[@class="eventTableHeader"]/td/aside/a') options = spider.driver.find_elements_by_xpath('//tbody/tr') for option in options: elems = option.find_elements_by_xpath('./td') sel = elems[0].text.split("\n")[0] books = {} for i in xrange(len(headers)): #~ head = headers[i].get_attribute("data-bk") head = headers[i].get_attribute("title") if head == None: continue else: elem = elems[i].get_attribute('data-odig') if elem != None: elem = float(elem) else: elem = 0. books[head] = elem rates.append(books) dic['rates'] = rates spider.close() return dic
def get_links_sportsbooks(args): url = args[0] names = args[1] spider = Spider(gui=0) print 'get links from: %s' % url matchlinks = [] spider.get_url(url) elems = spider.driver.find_elements_by_xpath('//td[@class="betting"]/a') for elem in elems: if "in-play" in elem.get_attribute("class"): continue else: link = elem.get_attribute("href") if names != None: for name in names: if name.lower() in link: matchlinks.append(link) break else: matchlinks.append(link) spider.close() return matchlinks
def get_data_exchange(): start = 'unknown' spider = Spider(gui=0) #get urls competitions = [] spider.get_url('https://www.betfair.com/exchange/football') elems = spider.driver.find_elements_by_xpath( '//ul/li/a[@data-section-title="Top Competitions"]') for elem in elems: dic = { 'name': elem.get_attribute("data-ga-title"), 'url': elem.get_attribute("href") } if 'Rio' in dic['name']: continue if 'UEFA' in dic['name']: continue if 'National' in dic['name']: continue if 'Primera' in dic['name']: continue if 'Premier' in dic['name']: continue competitions.append(dic) print 'get data from betfair exchange: (%d urls)' % len(competitions) exchange = [] for j, comp in enumerate(competitions): print 'get data from market %d/%d: (%s)' % (j + 1, len(competitions), comp['name']) spider.get_url(comp['url']) #~ markets = spider.driver.find_elements_by_xpath('//tbody[@data-sportid="1"]') markets = spider.driver.find_elements_by_xpath( '//div[@class="container-market"]') #skip empty markets if len(markets) == 0: print '--WARNING: market empty' continue bar = progressbar.ProgressBar() for market in bar(markets): textlist = market.text.split('\n') dic = { 'competition': comp['name'], 'market-name': textlist[0], 'start': textlist[1] } selections = dic['market-name'].split(" v ") if len(selections) != 2: continue else: prices = [] for i in xrange(3): if i == 0: j = 0 if i == 1: j = 2 if i == 2: j = 1 xpath = './div/ul/li/ul/li[contains(@class,"lay selection-%d")]' % j price = market.find_element_by_xpath(xpath).text if price != ' ': price = float(price) else: price = None prices.append(price) dic['rates'] = prices exchange.append(dic) spider.close() return exchange
def get_data_sportsbooks(names=None): urls = [ 'http://www.oddschecker.com/tennis/atp-winston-salem', 'http://www.oddschecker.com/tennis/challenger-tour', 'http://www.oddschecker.com/tennis/us-open/mens', 'http://www.oddschecker.com/tennis/us-open/womens' ] market_names = [item.split("tennis/")[-1] for item in urls] spider = Spider(gui=0) #get links matchlinks = [] print '' for i, url in enumerate(urls): print 'get links from: %s' % url spider.get_url(url) elems = spider.driver.find_elements_by_xpath( '//td[@class="betting"]/a') for elem in elems: if "in-play" in elem.get_attribute("class"): continue else: link = elem.get_attribute("href") for name in names: if name.lower() in link: matchlinks.append((link, market_names[i])) break if len(matchlinks) == 0: raise ValueError("no events found in specified markets") #get data from oddschecker sportsbook = [] print 'getting data from sportsbooks: (%d events)' % len(matchlinks) bar = progressbar.ProgressBar() for item in bar(matchlinks): link = item[0] market_name = item[1] #skip handicaps if "handicap" in link: continue spider.get_url(link) players = spider.driver.find_elements_by_xpath('//tbody/tr') for player in players: dic = {'market-name': market_name} elems = player.find_elements_by_xpath('./td') dic['name'] = elems[0].get_attribute('textContent') split = dic['name'].split("/") if len(split) > 1: dic['name'] = split[0].split(" ")[-1] + '/' + split[1].split( " ")[-1] else: dic['name'] = dic['name'].split(" ")[-1] headers = spider.driver.find_elements_by_xpath( '//thead/tr[@class="eventTableHeader"]/td') rates = {} for i in xrange(len(headers)): if headers[i] == None: continue else: head = headers[i].get_attribute("data-bk") elem = elems[i].get_attribute('data-odig') if elem != None: elem = float(elem) else: elem = 0. rates[head] = elem dic['rates'] = rates sportsbook.append(dic) spider.close() return sportsbook
def get_data_exchange(): start = 'unknown' spider = Spider(gui=0) #get urls urls, market_names = [], [] spider.get_url('https://www.betfair.com/exchange/tennis') elems = spider.driver.find_elements_by_xpath( '//ul[@class="children"]/li/a') for elem in elems: url = elem.get_attribute("href") market_name = elem.get_attribute("market-name") if 'Challenger' in market_name: pass elif 'Winston' in market_name: pass elif 'US Open' in market_name: pass else: continue urls.append(url) market_names.append(market_name) print 'get data from betfair exchange: (%d urls)' % len(urls) names, prices, starts, mnames = [], [], [], [] for j, url in enumerate(urls): print 'get data from market %d/%d: (%s)' % (j, len(urls), market_names[j]) spider.get_url(url) markets = spider.driver.find_elements_by_xpath( '//div[@class="container-market"]') #skip empty markets if len(markets) == 0: print '--WARNING: market empty (%s)' % market_names[j] continue bar = progressbar.ProgressBar() for market in bar(markets): #~ market = markets[5] text = market.find_element_by_xpath('./a/div').text if len(text.split('\n')) > 1: text, start = text.split('\n') players = text.split(" v ") if len(players) != 2: continue else: for i in xrange(len(players)): split = players[i].split("/") if len(split) == 2: players[i] = split[0].rstrip().split( " ")[-1] + '/' + split[1].rstrip().split(" ")[-1] else: players[i] = players[i].split(" ")[-1] price1 = market.find_element_by_xpath( './div/ul/li/ul/li[contains(@class,"lay selection-0")]/button' ).text price2 = market.find_element_by_xpath( './div/ul/li/ul/li[contains(@class,"lay selection-1")]/button' ).text if price1 != ' ': price1 = float(price1) names.append(players[0]) prices.append(price1) starts.append(start) mnames.append(market_names[j]) if price2 != ' ': price2 = float(price2) names.append(players[1]) prices.append(price2) starts.append(start) mnames.append(market_names[j]) spider.close() return names, prices, starts, mnames
def get_events(timespan=(90, 150), countries=None): import progressbar from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By events, races = [], [] print 'searching for events on betfair.com' spider = Spider(gui=0) spider.get_url('https://www.betfair.com/exchange/horse-racing') spider.wait.until( EC.presence_of_element_located( (By.XPATH, '//div[@class="single-race"]/span/a'))) #select all available races if countries == None: cou = 'None' tags = spider.driver.find_elements_by_xpath( '//div[@class="single-race"]/span/a') for tag in tags: races.append((cou, tag)) #select races from specified countries else: cs = spider.driver.find_elements_by_xpath( '//span[@class="country-code"]') for cou in countries: for c in cs: if c.get_attribute("rel") == cou: mod = c.find_element_by_xpath('./../../../..') tags = mod.find_elements_by_xpath( "./div/div/div/div/div/div/span/a") for tag in tags: races.append((cou, tag)) if len(races) > 0: bar = progressbar.ProgressBar() for item in bar(races): r = item[1] dic = {'country': item[0]} dic['link'] = r.get_attribute('href') if dic['link'][-1] != '#': #set date if 'today' in r.get_attribute('class'): date = datetime.date.today() elif 'tomorrow' in r.get_attribute('class'): date = datetime.date.today() + datetime.timedelta(days=1) else: continue #set time time = r.get_attribute('textContent') time = datetime.time(int(time[:2]), int(time[-2:])) #combine date and time time_e = datetime.datetime.combine(date, time) #fix date for races in the first hours of the morning if (time_e - datetime.datetime.now()) <= datetime.timedelta(days=0): time_e = time_e + datetime.timedelta(days=1) #fix time difference time_e = time_e - datetime.timedelta(hours=5) #select events with timespan time diff to start if datetime.timedelta(minutes=timespan[1]) > ( time_e - datetime.datetime.now()) > datetime.timedelta( minutes=timespan[0]): dic['time_e'] = time_e events.append(dic) print 'available: %d events' % len(events) spider.driver.close() else: spider.driver.close() raise ValueError('no events available in timespan') return events
def get_data_sportsbooks(urls, exch=None): #get links spider = Spider(gui=0) names = [] for item in exch: split = item['market-name'].lower().split(" v ") names.append(split[0]) names.append(split[-1]) matchlinks = [] print '' for i, url in enumerate(urls): print 'get links from: %s' % url spider.get_url(url) elems = spider.driver.find_elements_by_xpath( '//td[@class="betting"]/a') for elem in elems: if "in-play" in elem.get_attribute("class"): continue else: link = elem.get_attribute("href") for name in names: if name.lower() in link: matchlinks.append(link) break #get data from oddschecker sportsbook = [] print 'getting data from sportsbooks: (%d events)' % len(matchlinks) bar = progressbar.ProgressBar() for link in bar(matchlinks): spider.get_url(link) market_name = spider.driver.find_element_by_xpath( '//div[@class="page-description module"]/header/h1').text.replace( " Winner Betting Odds", "") dic = {'market-name': market_name} tmp = {} headers = spider.driver.find_elements_by_xpath( '//thead/tr[@class="eventTableHeader"]/td') options = spider.driver.find_elements_by_xpath('//tbody/tr') for option in options: elems = option.find_elements_by_xpath('./td') sel = elems[0].text.split("\n")[0] rates = {} for i in xrange(len(headers)): head = headers[i].get_attribute("data-bk") if head == None: continue else: elem = elems[i].get_attribute('data-odig') if elem != None: elem = float(elem) else: elem = 0. rates[head] = elem tmp[sel] = rates home, away = dic['market-name'].split(" v ") rates = [tmp[home], tmp['Draw'], tmp[away]] dic['rates'] = rates sportsbook.append(dic) spider.close() return sportsbook