Python linkFilter Examples, Bookies.help_func.linkFilter Python Examples

Example #1

0

Show file

File: Tonybet.py Project: lee-hodg/oddsbot

    def parseLeague(self, response):
        # Probably most efficient to scrape all tournIds
        # then build one big request for all leagues
        # after filtering bad leagues.

        lnames = response.xpath(
            '//li[@id="sport_2"]/div[@class="subCategories"]'
            '/ul/li/label/text()').extract()
        lids = response.xpath('//li[@id="sport_2"]/div[@class="subCategories"]'
                              '/ul/li/input/@id').extract()
        # checkboxTournament_5406 is format of lids, chop:
        lids = [id[19:] for id in lids]
        # Make pairs for easy filtering
        lpairs = zip(lnames, lids)

        lids = [
            lid for (lname, lid) in lpairs if not linkFilter(self.name, lname)
        ]

        # Build request for leagues
        base_url = 'https://tonybet.com/cached_sports/football?'
        GETstr = 'country=gb&eo_format=eu&'
        for lid in lids:
            GETstr += 'tournaments_ids[]=%s&' % lid
        GETstr += 't=t'
        headers = {
            'Referer': 'https://tonybet.com/football',
            'X-Requested-With': 'XMLHttpRequest',
            'Host': 'tonybet.com'
        }
        yield Request(url=base_url + GETstr,
                      headers=headers,
                      callback=self.pre_parseData)

Example #2

0

Show file

File: Betway_spider.py Project: lee-hodg/oddsbot

    def parse(self, response):

        # Use some ninja xpath to get only li for soccer
        li = response.xpath(
            '//div[@id="oddsmenu-inner"]/ul[@class="parent"]/'
            'li[descendant::div[@class="section "]/a[@id="betclass_soccer"]]')
        # league links:
        league_links = li.xpath(
            'ul[@class="child"]/li/ul/li//a/@href').extract()

        # Remove unwanted links, returns True to filter out link.
        league_links = [
            link for link in league_links if not linkFilter(self.name, link)
        ]

        base_url = 'https://sports.betway.com/?u='
        headers = {
            'Referer': 'https://sports.betway.com/',
            'X-Requested-With': 'XMLHttpRequest',
            'Host': 'sports.betway.com',
        }
        for link in league_links:
            yield Request(url=base_url + link + '&m=win-draw-win',
                          headers=headers,
                          callback=self.parse_Data)

Example #3

0

Show file

File: Oddsring.py Project: lee-hodg/oddsbot

    def parseLeague(self, response):

        # If need to can access the last cookie you set with
        # request.headers.getlist('Cookie')

        lnames = response.xpath(
            '//ul[@id="sb-sportlist"]/li[1]/ul[@id="lg1"]/li/'
            'div[@class="line"]/a/text()').extract()
        links = response.xpath(
            '//ul[@id="sb-sportlist"]/li[1]/ul[@id="lg1"]/li/'
            'div[@class="line"]/a/@href').extract()

        # Make pairs for easy filtering
        lpairs = zip(lnames, links)
        links = [
            link for (lname, link) in lpairs
            if not linkFilter(self.name, lname)
        ]
        # Build request for leagues
        # I seem to be having a problem with cookies
        # If you make the request w.o them you get 302 redirect
        # , which is not being coped with well. How do I cope with it?
        # or why are the cookies not working consistently?
        headers = {
            'Referer': 'http://www.oddsring.com',
            'Host': 'www.oddsring.com'
        }
        for link in links:
            yield Request(url=link,
                          headers=headers,
                          callback=self.parseData,
                          dont_filter=True)

Example #4

0

Show file

File: bet188_spider.py Project: lee-hodg/oddsbot

    def parse_leagues(self, response):

        # Extract the needed params from the JSON response
        try:
            jResp = json.loads(response.body)
        except:
            log.msg('lostconn perhaps?', level=log.ERROR)
            log.msg('response dump: \n%s' % response.body, level=log.ERROR)
            yield []

        # Load the string from the mod key into json
        jsonCountries = json.loads(jResp['mod'])

        # Reap the comp ids (cids) (keep name too for filter)
        cids = [(comp['n'], comp['id']) for country in jsonCountries
                for comp in country['c']]

        # Filter the comps
        cids = [(cname, cid) for (cname, cid) in cids if not
                linkFilter(self.name, cname)]

        base_url = 'http://sb.188bet.co.uk/en-gb/Service/CentralService?GetData'
        headers = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                   'X-Requested-With': 'XMLHttpRequest'}

        for (name, cid) in cids:
            # Seems the referer also gets set to requested cid.
            # Would be nice if there was a way to test if 1x2 avail before making req,
            # otherwise in parse_Data there will be no keys needed.
            formdata = {'reqUrl': '/en-gb/sports/1/competition/1x2?competitionids='+str(cid)+'&'}
            headers['Referer'] = 'http://sb.188bet.co.uk/en-gb/sports/1/competition/1x2?competitionids='+str(cid)
            yield FormRequest(url=base_url, formdata=formdata, headers=headers,
                              meta={'league_name': name},
                              callback=self.parse_Data, dont_filter=True)

Example #5

0

Show file

File: Interwetten_spider.py Project: lee-hodg/oddsbot

    def parse(self, response):

        log.msg('Grabbing all checkbox ids..')
        cbids = response.xpath('//table[@id="TBL_Content_Leagues"]'
                               '/tr/td/input/@id').extract()
        leagueids = [cbid[2:] for cbid in cbids]  # drop cb prefix
        leaguenames = response.xpath('//table[@id="TBL_Content_Leagues"]'
                                     '/tr/td/a/text()').extract()
        lpairs = zip(leagueids, leaguenames)
        # Remove unwanted links, returns True to filter out link
        leagueids = [
            id for (id, name) in lpairs if not linkFilter(self.name, name)
        ]

        base_url = 'https://www.interwetten.com/en/SportsBook/Betting/BettingOffer.aspx'
        GETstr = '?leagueid=' + ','.join(leagueids) + '&type=0&ogPreselect=1'
        headers = {
            'Host': 'www.interwetten.com',
            'Referer':
            'https://www.interwetten.com/en/sportsbook/o/10/fussball',
        }

        yield Request(url=base_url + GETstr,
                      headers=headers,
                      callback=self.parse_ListMatches,
                      dont_filter=True)

Example #6

0

Show file

File: Skybet_spider.py Project: lee-hodg/oddsbot

    def parse_leagues(self, response):

        # Get competitions section
        sections = response.xpath('//div[@class="section"]')
        for sec in sections:
            if sec.xpath('h3[@class="hecto"]/text()').extract() == [
                    u'Competitions'
            ]:
                compSec = sec

        leagues = compSec.xpath(
            'ul[@class="limit-list"]//li/a/@href').extract()

        # Filter.
        leagues = [
            league for league in leagues if not linkFilter(self.name, league)
        ]

        # Request leagues.
        base_url = 'http://www.skybet.com'
        headers = {'Referer': 'http://www.skybet.com/football'}
        for league in leagues:
            yield Request(url=base_url + league,
                          headers=headers,
                          callback=self.pre_parse_Data)

Example #7

0

Show file

 def traverseNav(self, response):
     '''
     This will call itself back until
     we hit bottom rung of bonavigation tree
     '''
     log.msg('traverseNav is at %s' % response.url, level=log.INFO)
     bonav_nodes = response.xpath('//bonavigationnodes/bonavigationnode')
     markets = response.xpath('//marketgroups//marketgroup')
     if bonav_nodes and not markets:
         base_url = 'http://www.betfred.com'
         headers = {'Accept': 'application/xml, text/xml, */*; q=0.01',
                    'X-Requested-With': 'XMLHttpRequest',
                    'Referer': 'http://www.betfred.com/sport'}
         log.msg('traverseNav there ARE bonav nodes', level=log.INFO)
         # whilst still nodes, get id
         for n in bonav_nodes:
             bid = take_first(n.xpath('idfwbonavigation/text()').extract())
             bname = take_first(n.xpath('name/text()').extract())
             if linkFilter(self.name, bname):
                 # cont = raw_input('Ent to cont...')
                 continue
             # req next level
             stamp = str(int(time.time() * 1000))
             GETstr = ('/__Admin/Proxy.aspx?proxyurl=http://warp.betfred.com/cache/'
                       'boNavigationList/2/UK/'+str(bid)+'.xml&'+'_='+str(stamp))
             yield Request(url=base_url+GETstr, headers=headers,
                           callback=self.traverseNav)
     else:
         log.msg('traverseNav there are NO MORE bonav nodes', level=log.INFO)
         base_url = 'http://warp.betfred.com'
         headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                    'Referer': 'http://warp.betfred.com/UK/2/bettingsc?bettingscIndex=3'}
         # Hit bottom parse markets
         for market in markets:
             mname = take_first(market.xpath('name/text()').extract())
             if linkFilter(self.name, mname):
                 # stop = raw_input('Ent to cont...')
                 continue
             mid = take_first(market.xpath('idfwmarketgroup/text()').extract())
             log.msg('traverseNav making market req for market %s with id %s' % (mname, mid),
                     level=log.INFO)
             # For each marketId (i.e. each league) build AJAX GET request, to
             # receive back event data for that league in XML format. (lightMarketGroup
             # has no price data)
             GETstr = '/cache/marketGroup/UK/'+str(mid)+'.xml'
             yield Request(url=base_url+GETstr, headers=headers, callback=self.parse_Data)

Example #8

0

Show file

    def parse_leagues(self, response):

        sx = SgmlLinkExtractor(allow=[
            r'http://www.sportingbet.com/sports-football/'
            '[A-Za-z0-9-]+/1-102-\d+.html'
        ])

        league_links = sx.extract_links(response)

        # Remove unwanted links, returns True to filter out link
        league_links = [
            link for link in league_links
            if not linkFilter(self.name, link.url)
        ]

        eventClassIdList = []
        # Extract eventClassId from the link.url with regex
        for link in league_links:
            matches = re.findall(
                r'http://www.sportingbet.com/sports-football/'
                '[A-Za-z0-9-]+/1-102-(\d+?).html', link.url)
            if matches:
                eventClassIdList.append(matches[0])

        base_url = 'http://www.sportingbet.com/services/CouponTemplate.mvc/GetCoupon'
        headers = {
            'Content-Type': 'application/x-www-form-urlencoded',
            'Referer':
            'http://www.sportingbet.com/sports-football/0-102-410.html',
            'X-Requested-With': 'XMLHttpRequest',
            'Host': 'www.sportingbet.com',
        }
        # cookies =response.meta['cookies']
        for id in eventClassIdList:
            # Build GETstr
            GETstr = '?couponAction=EVENTCLASSCOUPON&'
            GETstr += 'sportIds=102&'
            GETstr += 'marketTypeId=&'
            GETstr += 'eventId=&'
            GETstr += 'bookId=&'
            GETstr += 'eventClassId=' + str(id) + '&'
            GETstr += 'sportId=102&'
            GETstr += 'eventTimeGroup=ETG_NextFewHours_0_0'
            # make req

            yield Request(url=base_url + GETstr,
                          headers=headers,
                          meta={'eventClassId': str(id)},
                          callback=self.pre_parse_Data)

Example #9

0

Show file

 def parse(self, response):
     league_links = response.xpath(
         '//ul[@class="hierarchy"]/'
         'li[@class="expander expander-collapsed sport-FOOT"]/'
         'ul[@class="expander-content"]/'
         'li[@class="expander expander-collapsed"]/'
         'ul[@class="expander-content"]/li/a/@href').extract()
     league_links = [
         l for l in league_links if not linkFilter(self.name, l)
     ]
     headers = {
         'Referer': 'http://sports.titanbet.co.uk/en/football',
         'Host': 'sports.titanbet.co.uk',
     }
     for link in league_links:
         link = 'http://sports.titanbet.com' + link
         yield Request(link, headers=headers, callback=self.pre_parse_Data)

Example #10

0

Show file

    def parse(self, response):
        league_links = response.xpath('//ul[@class="matrixB"]/li/ul/li/a')
        league_pairs = [
            (take_first(l.xpath('@href').extract()),
             take_first(l.xpath('text()').extract())) for l in league_links if
            not linkFilter(self.name, take_first(l.xpath('text()').extract()))
        ]

        headers = {
            'Host':
            'sports.williamhill.com',
            'Referer':
            'http://sports.williamhill.com/bet/en-gb/betting/y/5/et/Football.html',
        }
        for pair in league_pairs:
            yield Request(url=pair[0],
                          headers=headers,
                          callback=self.parse_match)

Example #11

0

Show file

File: Apostasonline.py Project: lee-hodg/oddsbot

    def parseLeague(self, response):

        lpairs = []
        league_lis = response.xpath('//li[@class="sport_240"]/ul/li/ul/li')
        for li in league_lis:
            leagueName = take_first(li.xpath('a/text()').extract())
            leagueId = take_first(li.xpath('a//@data-id').extract())
            lpairs.append((leagueName, leagueId))

        leagueIds = [lId for (lName, lId) in lpairs
                     if not linkFilter(self.name, lName)]
        # Build req
        base_url = 'https://www.apostasonline.com/pt-PT/sportsbook/eventpaths/multi/'
        headers = {'Referer': 'https://www.apostasonline.com/',
                   'X-Requested-With': 'XMLHttpRequest',
                   'Host': 'www.apostasonline.com'}
        for lid in leagueIds:
            GETstr = '[%s]?ajax=true&timezone=undefined' % lid
            yield Request(url=base_url+GETstr, headers=headers,
                          callback=self.pre_parseData, dont_filter=True)