Esempio n. 1
0
 def err_callback(self, failure):
     if 'multiOddsData' in failure.request.url:
         yield failure.request
     else:
         l = ItemLoader(item=data_url())
         l.add_value('url', failure.request.url)
         yield l.load_item()
Esempio n. 2
0
 def parse_handicap(self, response):
     start_time = time.time()
     l = ItemLoader(item=data_handicap(), response=response)
     gameid = response.meta['gameid']
     print('请求剩余数目:' + str(len(self.crawler.engine.slot.inprogress)))
     try:
         table_odds = response.xpath('//table[@id = "oddsDetail"]')[0]
         company = table_odds.xpath('.//tr')[0].xpath(
             './/td//text()').extract()[:-3]
     except:
         print('让球请求失败,重新请求!\n ')
         print('响应内容:' + response.url)
         print('状态:' + str(response.status))
         print('响应内容:' + response.text)
         l = ItemLoader(item=data_url(), response=response)
         l.add_value('game_id', gameid)
         l.add_value('url', response.url)
         return l.load_item()
         # yield scrapy.Request(url=response.url,callback = self.parse_handicap, dont_filter=True,meta = response.meta['gameid'])
     for tr in table_odds.xpath('.//tr')[1:]:
         tds = tr.xpath('.//td')
         tds[2].extract()
         for index, td in enumerate(tds[:-2]):
             text = td.xpath('.//text()').extract()
             if text != []:
                 try:
                     l.add_value('game_id', gameid)
                     l.add_value('company', company[index])
                     l.add_value('line', text[0])
                     l.add_value('odds_home', text[1])
                     l.add_value('odds_away', text[-1])
                     dt = tds[-1].xpath('.//text()').extract()
                     l.add_value('change_time', dt[0] + ' ' + dt[1])
                 except:
                     print('让球解析错误!!!!!!!!!!!!!!!!!')
                 try:
                     l.add_value('score',
                                 tds[-2].xpath('.//text()').extract()[0])
                 except:
                     l.add_value('score', 'pregame')
     print(time.time() - start_time)
     return l.load_item()
Esempio n. 3
0
    def parse_euro(self, response):
        if response.url == 'http://1x2.nowscore.com/1575294.js':
            p = 0
        start_time = time.time()
        gameid = response.meta['gameid']
        flag = True
        l = ItemLoader(item=data_euro(),
                       response=response,
                       meta=response.meta['gameid'])
        print('请求剩余数目:' + str(len(self.crawler.engine.slot.inprogress)))
        re_game = re.compile(r'(?<=game\=Array\().*?(?=\);)')
        re_id = re.compile(r'(?<=\")\d*(?=\^)')
        re_gameDetail = re.compile(r'(?<=gameDetail\=Array\().*?(?=\);)')
        re_game = re.compile(r'(?<=game\=Array\().*?(?=\);)')
        try:
            cells_g = re_game.findall(response.text)[0].split('",')
            cells_d = re_gameDetail.findall(response.text)[0].split('",')
        except:
            try:
                cells_g = re_game.findall(response.text)[0].split('",')
                cells_d = re_game.findall(response.text)[0].split('",')
            except:
                print('欧赔请求失败,重新请求!\n ')
                print('响应内容:' + response.url)
                print('状态:' + str(response.status))
                print('响应内容:' + response.text)
                flag = False
            #yield scrapy.Request(url=response.url,callback = self.parse_euro, dont_filter=True)

        if flag == True:

            try:
                companys = {}
                for cell in cells_g:
                    cells = cell.split('|')
                    if cells[-1] == '1' or cells[
                            -2] == '1':  # cells[-1]==1 : 交易所,cells[-2] = 1: 主流公司
                        companys[cells[1]] = cells[2]
                companys_id = [key for key in companys]
                print('euro', time.time() - start_time)
                start_time = time.time()
                for cell in cells_d:
                    if re_id.findall(cell)[0] in companys_id:
                        company = companys[re_id.findall(cell)[0]]
                        for c in cell.split('^')[-1].split(';')[:-1]:
                            cells = c.split('|')
                            l.add_value('odds_home', cells[0])
                            l.add_value('odds_away', cells[2])
                            l.add_value('odds_tie', cells[1])
                            l.add_value('change_time', cells[3])
                            l.add_value('game_id', gameid)
                            l.add_value('company', company)
                            try:
                                l.add_value('kelly_home', cells[4])
                                l.add_value('kelly_away', cells[6])
                                l.add_value('kelly_tie', cells[5])
                            except:
                                continue
            except:
                if cell == '':
                    print('没有欧赔数据')
                else:
                    print('欧赔解析错误!\n ')
                    l = ItemLoader(item=data_url(), response=response)
                    l.add_value('game_id', gameid)
                    l.add_value('url', response.url)
                    return l.load_item()
        print('euro', time.time() - start_time)
        return l.load_item()