Ejemplo n.º 1
0
 def __init__(self):
     # 联赛积分
     self.mongo = MongoDB('shujufenxi')
     self.redis = Redis_Pool()
     # 创建队列和协程池
     self.queue = Queue()
     self.coroutine_pool = Pool()
Ejemplo n.º 2
0
 def __init__(self):
     self.redis = Redis_Pool()
     self.mongo = {
         'home_infos': MongoDB('home_infos'),
         'news_text_broadcas': MongoDB('news_text_broadcas'),
         'new_players_info': MongoDB('new_players_info')
     }
     # 创建队列和协程池
     self.queue = Queue()
     self.coroutine_pool = Pool()
Ejemplo n.º 3
0
 def __init__(self):
     # 联赛积分
     self.mongo = {
         'league-points': MongoDB('league-points'),
         'technical-statistics': MongoDB('technical-statistics'),
         'historical': MongoDB('historical'),
         'recent-record': MongoDB('recent-record'),
         'fixture': MongoDB('fixture')
         }
     self.redis = Redis_Pool()
     # 创建队列和协程池
     self.queue = Queue()
     self.coroutine_pool = Pool()
Ejemplo n.º 4
0
 def __init__(self):
     self.redis = Redis_Pool()
     self.mongo = MongoDB('detail')
     # 创建队列和协程池
     self.queue = Queue()
     self.coroutine_pool = Pool()
Ejemplo n.º 5
0
class NewSpider(object):
    def __init__(self):
        self.redis = Redis_Pool()
        self.mongo = MongoDB('detail')
        # 创建队列和协程池
        self.queue = Queue()
        self.coroutine_pool = Pool()

    def __chech_callbake(self, temp):
        '''异步回调函数'''
        if not self.queue.empty():
            self.coroutine_pool.apply_async(self.get_response, callback=self.__chech_callbake)

    def get_ID(self, name):
        datas = self.redis.find(name)
        for k, v in datas.items():
            try:
                ID = eval(k)
                self.queue.put(ID)
            except:
                pass

        for i in range(NEW_THREADING):
            if not self.queue.empty():
                self.coroutine_pool.apply_async(self.get_response, callback=self.__chech_callbake)
                # time.sleep(2)
            # 守护线程
        self.coroutine_pool.join()

    def get_response(self):
        if not self.queue.empty():
            ID = self.queue.get()
            proxy = get_ip()
            if proxy:
                response = requests.get('https://api.namitiyu.com/v1/basketball/match/detail?sid={}&lang=zh'.format(ID), proxies={'https': 'https://'+proxy, 'http': 'http://'+proxy} , headers=HEADERS).json()
            else:
                response = requests.get('https://api.namitiyu.com/v1/basketball/match/detail?sid={}&lang=zh'.format(ID), headers=HEADERS).json()
            msg = {}
            msg['赛事ID'] = ID
            # 文字直播
            dic_text = self.get_text_broadcas(response)
            msg['文字直播'] = dic_text
            # 球员信息
            dic_players = self.get_player(response)
            msg['球员信息'] = dic_players
            # 插入数据库
            self.mongo.insert_one(msg, '赛事ID')
            # 调度队列的tesk_done方法
            self.queue.task_done()

    def get_text_broadcas(self, response):
        '''文字直播'''
        msg = {'msg': {}}
        text_ls_data = response['data']['tlive']
        if not text_ls_data:
            msg['msg'] = '暂无信息'
        else:
            for i in range(len(text_ls_data)):
                msg['msg']['第{}节'.format(i + 1)] = text_ls_data[i]
        return msg

    def get_player(self, response):
        '''获取球员信息'''
        msg = {'msg': {}}
        text_ls_data = response['data']['players']
        if not text_ls_data:
            msg['msg'] = '暂无信息'
        else:
            for text_ls in text_ls_data[0:2]:
                for text in text_ls:
                    text[4] = 'https://cdn.leisu.com/basketball/player/' + text[4]
                    data = text[6].split('^')
                    if data[-1] == 0:
                        text[6] = '是^' + text[6][0:-4]
            msg['msg'] = text_ls_data
        return msg

    def run_today(self):
        threads = []
        threads.append(threading.Thread(target=self.get_ID, args=('basketball_live',)))
        threads.append(threading.Thread(target=self.get_ID, args=('basketball_notStart',)))
        threads.append(threading.Thread(target=self.get_ID, args=('basketball_finished',)))
        # 开启线程
        for thread in threads:
            thread.start()
        # 守护线程
        for thread in threads:
            thread.join()

    @classmethod
    def start(cls):
        st = cls()
        while True:
            st.run_today()
Ejemplo n.º 6
0
 def __init__(self):
     # 实例化
     self.mongo = MongoDB('home_page')
     self.redis = Redis_Pool()
Ejemplo n.º 7
0
class TimeDataSpider(object):
    def __init__(self):
        # 实例化
        self.mongo = MongoDB('home_page')
        self.redis = Redis_Pool()

    def get_data(self, day):
        if day < 0:
            self._history_one_data(day)
        else:
            self._future_one_data(day)

    def _future_one_data(self, day):
        date = time.strftime('%Y%m%d',
                             time.localtime(time.time() + day * 24 * 3600))
        proxy = get_ip()
        if proxy:
            response = requests.get(
                'https://live.leisu.com/lanqiu/saicheng?date={}'.format(date),
                headers=HEADERS,
                proxies={
                    'http': 'https://' + proxy
                }).text
        else:
            response = requests.get(
                'https://live.leisu.com/lanqiu/saicheng?date={}'.format(date),
                headers=HEADERS).text
        html = etree.HTML(response)
        datas = html.xpath('//ul[@class="layout-grid-list"]/li')
        for data in datas:
            eventID = data.xpath('./@data-id')[0]  # 赛事ID
            time_ = data.xpath('.//span[@class="time"]/text()')  # 时间
            if time_:
                times = date + time_[0]
            else:
                times = ''
            type_ = data.xpath('.//span[@class="no-state"]/span/text()')  # 状态
            if type_:
                types = type_[0]
            else:
                types = ''
            event = data.xpath(
                './/div[@class="list-right"]/div[1]/div[1]/span/span/text()')[
                    0]  # 赛事
            # 主场信息
            home_team_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[1]//span[@class="lang"]/text()'
            )  # 队名
            if home_team_:
                home_team = home_team_[0]
            else:
                home_team = ''
            home_team_logo__ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[1]//i[@class="ico"]/@style'
            )
            if home_team_logo__:
                home_team_logo_ = home_team_logo__[0]
            else:
                home_team_logo_ = ''
            if home_team_logo_:
                home_team_logo = 'https:' + re.findall(
                    'url\((.*?)\?', home_team_logo_)[0]  # 队logo
            else:
                home_team_logo = ''
            home_info_1234 = data.xpath(
                './/div[@class="r-left"]/div[1]/div[2]/div/text()')  # 1234
            home_shangxia_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[3]/text()')  # 上下
            if home_shangxia_:
                home_shangxia = home_shangxia_[0]
            else:
                home_shangxia = ''
            home_quanchang_ = data.xpath(
                './/div[@class="r-left"]/div[1]/b/text()')  # 全场
            if home_quanchang_:
                home_quanchang = home_quanchang_[0]
            else:
                home_quanchang = ''
            home_fencha_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[4]/text()')  # 分差
            if home_fencha_:
                home_fencha = home_fencha_[0]
            else:
                home_fencha = ''
            home_zongfen_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[5]/text()')  # 总分
            if home_zongfen_:
                home_zongfen = home_zongfen_[0]
            else:
                home_zongfen = ''
            home_ouzhi_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[6]//span[@class="exponent"]/span[@class="text"]/text()'
            )  # 欧指
            if home_ouzhi_:
                home_ouzhi = home_ouzhi_[0]
            else:
                home_ouzhi = ''
            home_rangfen1_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[7]/div[1]/text()')  # 让分
            if home_rangfen1_:
                home_rangfen1 = home_rangfen1_[0]
            else:
                home_rangfen1 = ''
            home_rangfen2_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[7]//span[@class="exponent"]/span/text()'
            )  # 让分
            if home_rangfen2_:
                home_rangfen2 = home_rangfen2_[0]
            else:
                home_rangfen2 = ''
            home_rangfen = home_rangfen1 + ' ' + home_rangfen2  # 让分
            home_hefen1_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[8]/div[1]/text()')  # 总分
            if home_hefen1_:
                home_hefen1 = home_hefen1_[0]
            else:
                home_hefen1 = ''
            home_hefen2_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[8]//span[@class="exponent"]/span/text()'
            )  # 总分
            if home_hefen2_:
                home_hefen2 = home_hefen2_[0]
            else:
                home_hefen2 = ''
            home_hefen = home_hefen1 + ' ' + home_hefen2
            # 客场信息
            away_team_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[1]//span[@class="lang"]/text()'
            )  # 队名
            if away_team_:
                away_team = away_team_[0]
            else:
                away_team = ''
            away_team_logo__ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[1]//i[@class="ico"]/@style'
            )
            if away_team_logo__:
                away_team_logo_ = away_team_logo__[0]
            else:
                away_team_logo_ = ''
            if away_team_logo_:
                away_team_logo = 'https:' + re.findall(
                    'url\((.*?)\?', away_team_logo_)[0]  # 队logo
            else:
                away_team_logo = ''
            away_info_1234 = data.xpath(
                './/div[@class="r-left"]/div[2]/div[2]/div/text()')  # 1234
            away_shangxia_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[3]/text()')  # 上下
            if away_shangxia_:
                away_shangxia = away_shangxia_[0]
            else:
                away_shangxia = ''
            away_quanchang_ = data.xpath(
                './/div[@class="r-left"]/div[2]/b/text()')  # 全场
            if away_quanchang_:
                away_quanchang = away_quanchang_[0]
            else:
                away_quanchang = ''
            away_fencha_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[4]/text()')  # 分差
            if away_fencha_:
                away_fencha = away_fencha_[0]
            else:
                away_fencha = ''
            away_zongfen_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[5]/text()')  # 总分
            if away_zongfen_:
                away_zongfen = away_zongfen_[0]
            else:
                away_zongfen = ''
            away_ouzhi_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[6]//span[@class="exponent"]/span[@class="text"]/text()'
            )  # 欧指
            if away_ouzhi_:
                away_ouzhi = away_ouzhi_[0]
            else:
                away_ouzhi = ''
            away_rangfen1_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[7]/div[1]/text()')  # 让分
            if away_rangfen1_:
                away_rangfen1 = away_rangfen1_[0]
            else:
                away_rangfen1 = ''
            away_rangfen2_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[7]//span[@class="exponent"]/span/text()'
            )  # 让分
            if away_rangfen2_:
                away_rangfen2 = away_rangfen2_[0]
            else:
                away_rangfen2 = ''
            away_rangfen = away_rangfen1 + ' ' + away_rangfen2
            away_hefen1_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[8]/div[1]/text()')  # 总分
            if away_hefen1_:
                away_hefen1 = away_hefen1_[0]
            else:
                away_hefen1 = ''
            away_hefen2_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[8]//span[@class="exponent"]/span/text()'
            )
            if away_hefen2_:
                away_hefen2 = away_hefen2_[0]
            else:
                away_hefen2 = ''
            away_hefen = away_hefen1 + ' ' + away_hefen2
            dic = {
                '赛事ID': eventID,
                '赛事': event,
                '时间': times,
                '状态': types,
                '主队': {
                    '队名': home_team,
                    '队logo': home_team_logo,
                    '1234': home_info_1234,
                    '上下': home_shangxia,
                    '全场': home_quanchang,
                    '分差': home_fencha,
                    '总分': home_zongfen,
                    '欧指': home_ouzhi,
                    '让分': home_rangfen,
                    '合分': home_hefen
                },
                '客队': {
                    '队名': away_team,
                    '队logo': away_team_logo,
                    '1234': away_info_1234,
                    '上下': away_shangxia,
                    '全场': away_quanchang,
                    '分差': away_fencha,
                    '总分': away_zongfen,
                    '欧指': away_ouzhi,
                    '让分': away_rangfen,
                    '合分': away_hefen
                }
            }
            # 保存数据库
            # if dic['赛事'] != 0:
            self.mongo.insert_one(dic, '赛事ID')
            self.redis.insert_one('basketball_history_events', eventID,
                                  eventID)

    def _history_one_data(self, day):
        date = time.strftime('%Y%m%d',
                             time.localtime(time.time() + day * 24 * 3600))
        proxy = get_ip()
        if proxy:
            response = requests.get(
                'https://live.leisu.com/lanqiu/wanchang?date={}'.format(date),
                headers=HEADERS,
                proxies={
                    'http': 'https://' + proxy
                }).text
        else:
            response = requests.get(
                'https://live.leisu.com/lanqiu/wanchang?date={}'.format(date),
                headers=HEADERS).text
        html = etree.HTML(response)
        datas = html.xpath('//ul[@class="layout-grid-list"]/li')
        for data in datas:
            eventID = data.xpath('./@data-id')[0]  # 赛事ID
            time_ = data.xpath('.//span[@class="time"]/text()')  # 时间
            if time_:
                times = date + time_[0]
            else:
                times = ''
            type_ = data.xpath('.//span[@class="no-state"]/span/text()')  # 状态
            if type_:
                types = type_[0]
            else:
                types = ''
            event_ = data.xpath(
                './/div[@class="list-right"]/div[1]/div[1]/span/span/text()'
            )  # 赛事
            if event_:
                event = event_[0]
            else:
                event = 0000
            # 主场信息
            home_team_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[1]//span[@class="lang"]/text()'
            )  # 队名
            if home_team_:
                home_team = home_team_[0]
            else:
                home_team = ''
            home_team_logo__ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[1]//i[@class="ico"]/@style'
            )
            if home_team_logo__:
                home_team_logo_ = home_team_logo__[0]
            else:
                home_team_logo_ = ''
            if home_team_logo_:
                home_team_logo = 'https:' + re.findall(
                    'url\((.*?)\?', home_team_logo_)[0]  # 队logo
            else:
                home_team_logo = ''
            home_info_1234 = data.xpath(
                './/div[@class="r-left"]/div[1]/div[2]/div/text()')  # 1234
            home_shangxia_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[3]/text()')  # 上下
            if home_shangxia_:
                home_shangxia = home_shangxia_[0]
            else:
                home_shangxia = ''
            home_quanchang_ = data.xpath(
                './/div[@class="r-left"]/div[1]/b/text()')  # 全场
            if home_quanchang_:
                home_quanchang = home_quanchang_[0]
            else:
                home_quanchang = ''
            home_fencha_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[4]/text()')  # 分差
            if home_fencha_:
                home_fencha = home_fencha_[0]
            else:
                home_fencha = ''
            home_zongfen_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[5]/text()')  # 总分
            if home_zongfen_:
                home_zongfen = home_zongfen_[0]
            else:
                home_zongfen = ''
            home_ouzhi_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[6]//span[@class="exponent"]/span[@class="text"]/text()'
            )  # 欧指
            if home_ouzhi_:
                home_ouzhi = home_ouzhi_[0]
            else:
                home_ouzhi = ''
            home_rangfen1_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[7]/div[1]/text()')  # 让分
            if home_rangfen1_:
                home_rangfen1 = home_rangfen1_[0]
            else:
                home_rangfen1 = ''
            home_rangfen2_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[7]//span[@class="exponent"]/span/text()'
            )  # 让分
            if home_rangfen2_:
                home_rangfen2 = home_rangfen2_[0]
            else:
                home_rangfen2 = ''
            home_rangfen = home_rangfen1 + ' ' + home_rangfen2  # 让分
            home_hefen1_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[8]/div[1]/text()')  # 总分
            if home_hefen1_:
                home_hefen1 = home_hefen1_[0]
            else:
                home_hefen1 = ''
            home_hefen2_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[8]//span[@class="exponent"]/span/text()'
            )  # 总分
            if home_hefen2_:
                home_hefen2 = home_hefen2_[0]
            else:
                home_hefen2 = ''
            home_hefen = home_hefen1 + ' ' + home_hefen2
            # 客场信息
            away_team_ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[1]//span[@class="lang"]/text()'
            )  # 队名
            if away_team_:
                away_team = away_team_[0]
            else:
                away_team = ''
            away_team_logo__ = data.xpath(
                './/div[@class="r-left"]/div[1]/div[1]//i[@class="ico"]/@style'
            )
            if away_team_logo__:
                away_team_logo_ = away_team_logo__[0]
            else:
                away_team_logo_ = ''
            if away_team_logo_:
                away_team_logo = 'https:' + re.findall(
                    'url\((.*?)\?', away_team_logo_)[0]  # 队logo
            else:
                away_team_logo = ''
            away_info_1234 = data.xpath(
                './/div[@class="r-left"]/div[2]/div[2]/div/text()')  # 1234
            away_shangxia_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[3]/text()')  # 上下
            if away_shangxia_:
                away_shangxia = away_shangxia_[0]
            else:
                away_shangxia = ''
            away_quanchang_ = data.xpath(
                './/div[@class="r-left"]/div[2]/b/text()')  # 全场
            if away_quanchang_:
                away_quanchang = away_quanchang_[0]
            else:
                away_quanchang = ''
            away_fencha_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[4]/text()')  # 分差
            if away_fencha_:
                away_fencha = away_fencha_[0]
            else:
                away_fencha = ''
            away_zongfen_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[5]/text()')  # 总分
            if away_zongfen_:
                away_zongfen = away_zongfen_[0]
            else:
                away_zongfen = ''
            away_ouzhi_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[6]//span[@class="exponent"]/span[@class="text"]/text()'
            )  # 欧指
            if away_ouzhi_:
                away_ouzhi = away_ouzhi_[0]
            else:
                away_ouzhi = ''
            away_rangfen1_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[7]/div[1]/text()')  # 让分
            if away_rangfen1_:
                away_rangfen1 = away_rangfen1_[0]
            else:
                away_rangfen1 = ''
            away_rangfen2_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[7]//span[@class="exponent"]/span/text()'
            )  # 让分
            if away_rangfen2_:
                away_rangfen2 = away_rangfen2_[0]
            else:
                away_rangfen2 = ''
            away_rangfen = away_rangfen1 + ' ' + away_rangfen2
            away_hefen1_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[8]/div[1]/text()')  # 总分
            if away_hefen1_:
                away_hefen1 = away_hefen1_[0]
            else:
                away_hefen1 = ''
            away_hefen2_ = data.xpath(
                './/div[@class="r-left"]/div[2]/div[8]//span[@class="exponent"]/span/text()'
            )
            if away_hefen2_:
                away_hefen2 = away_hefen2_[0]
            else:
                away_hefen2 = ''
            away_hefen = away_hefen1 + ' ' + away_hefen2
            dic = {
                '赛事ID': eventID,
                '赛事': event,
                '时间': times,
                '状态': types,
                '主队': {
                    '队名': home_team,
                    '队logo': home_team_logo,
                    '1234': home_info_1234,
                    '上下': home_shangxia,
                    '全场': home_quanchang,
                    '分差': home_fencha,
                    '总分': home_zongfen,
                    '欧指': home_ouzhi,
                    '让分': home_rangfen,
                    '合分': home_hefen
                },
                '客队': {
                    '队名': away_team,
                    '队logo': away_team_logo,
                    '1234': away_info_1234,
                    '上下': away_shangxia,
                    '全场': away_quanchang,
                    '分差': away_fencha,
                    '总分': away_zongfen,
                    '欧指': away_ouzhi,
                    '让分': away_rangfen,
                    '合分': away_hefen
                }
            }
            # 保存数据库
            # if dic['赛事'] != 0:
            self.mongo.insert_one(dic, '赛事ID')
            self.redis.insert_one('basketball_future_events', eventID, eventID)

    def run(self):
        threads = []
        for i in range(-3, 6):
            if i != 0:
                thread = threading.Thread(target=self.get_data, args=(i, ))
                threads.append(thread)
                # 启动线程
                thread.start()
            else:
                continue
            # 守护线程
        for thread in threads:
            thread.join()
        # self.get_data(-2)

    @classmethod
    def start(cls):
        st = cls()

        def ss():
            if time.strftime('%H', time.localtime(time.time())) == '00':
                st.run()

        st.run()
        # 每隔一段时间执行一次run方法
        schedule.every(600).seconds.do(ss)
        while True:
            schedule.run_pending()
            time.sleep(1)
Ejemplo n.º 8
0
class DataSpider(object):
    def __init__(self):
        # 联赛积分
        self.mongo = MongoDB('shujufenxi')
        self.redis = Redis_Pool()
        # 创建队列和协程池
        self.queue = Queue()
        self.coroutine_pool = Pool()

    def __chech_callbake(self, temp):
        '''异步回调函数'''
        if not self.queue.empty():
            self.coroutine_pool.apply_async(self.get_response, callback=self.__chech_callbake)

    def get_ID(self, name):
        datas = self.redis.find(name)
        for k, v in datas.items():
            ID = eval(k)
            self.queue.put(ID)

        for i in range(DATA_THREADING):
            if not self.queue.empty():
                self.coroutine_pool.apply_async(self.get_response, callback=self.__chech_callbake)
            # 守护线程
        self.coroutine_pool.join()

    def get_response(self):
        if not self.queue.empty():
            ID = self.queue.get()
            proxy = get_ip()
            try:
                if proxy:
                    response = requests.get('https://live.leisu.com/lanqiu/shujufenxi-{}'.format(ID), proxies={'https': 'https://'+proxy, 'http': 'http://'+proxy} , headers=HEADERS, timeout=3)
                else:
                    response = requests.get('https://live.leisu.com/lanqiu/shujufenxi-{}'.format(ID), headers=HEADERS, timeout=3)
                html = etree.HTML(response.text)
                msg = {'赛事ID': ID}
                # 联赛积分
                league_points = self._league_points(html)
                msg['联赛积分'] = league_points
                # 技术统计
                technical_statistics = self._technical_statistics(html)
                msg['技术统计'] = technical_statistics
                # 近期战绩
                historical = self._historical(html)
                msg['近期战绩'] = historical
                # 近期战绩
                recent_record = self._recent_record(html)
                msg['近期战绩'] = recent_record
                # 未来赛程
                fixture = self._fixture(html)
                msg['未来赛程'] = fixture
                print(msg)
                # 插入数据库
                self.mongo.insert_one(msg, '赛事ID')

                # 调度队列的tesk_done方法
                self.queue.task_done()

            except requests.exceptions.ProxyError:
                self.queue.get(ID)

    def _league_points(self, html):
        '''联赛积分'''
        msg = {'联赛积分': []}
        datas = html.xpath('//div[@id="league-points"]/div[2]/div')
        if len(datas) < 2:
            msg['联赛积分'] = '暂无数据'
        else:
            for data in datas:
                dic = {'msg': []}
                team = data.xpath('.//span[@class="name"]/text()')[0]
                dic['队名'] = team
                rank = data.xpath('.//div[@class="float-left f-s-12 color-999 line-h-25"]/text()')[0]
                dic['排名'] = rank
                das = data.xpath('.//tr')[1:]
                for da in das:
                    type_ls = da.xpath('./td[1]/text()')  # 类型
                    if type_ls:
                        type = type_ls[0].strip()
                    else:
                        type = ''
                    sai_ls = da.xpath('./td[2]/text()')  # 赛
                    if sai_ls:
                        sai = sai_ls[0].strip()
                    else:
                        sai = ''
                    sheng_ls = da.xpath('./td[3]/text()')  # 胜
                    if sheng_ls:
                        sheng = sheng_ls[0].strip()
                    else:
                        sheng = ''
                    fu_ls = da.xpath('./td[4]/text()')  # 负
                    if fu_ls:
                        fu = fu_ls[0].strip()
                    else:
                        fu = ''
                    defen_ls = da.xpath('./td[5]/text()')  # 得分
                    if defen_ls:
                        defen = defen_ls[0].strip()
                    else:
                        defen = ''
                    shifen_ls = da.xpath('./td[6]/text()')  # 失分
                    if shifen_ls:
                        shifen = shifen_ls[0].strip()
                    else:
                        shifen = ''
                    jingshengfen_ls = da.xpath('./td[7]/text()')  # 净胜分
                    if jingshengfen_ls:
                        jingshengfen = jingshengfen_ls[0].strip()
                    else:
                        jingshengfen = ''
                    paiming_ls = da.xpath('./td[8]/text()')  # 排名
                    if paiming_ls:
                        paiming = paiming_ls[0].strip()
                    else:
                        paiming = ''
                    shenglv_ls = da.xpath('./td[9]/text()')  # 胜率
                    if shenglv_ls:
                        shenglv = shenglv_ls[0].strip()
                    else:
                        shenglv = ''
                    dic['msg'].append({'类型': type, '赛': sai, '胜': sheng, '负': fu, '得分': defen, '失分': shifen, '净胜分': jingshengfen, '排名': paiming, '胜率': shenglv})
                msg['联赛积分'].append(dic)
        return msg

    def _technical_statistics(self, html):
        '''技术统计'''
        msg = {'技术统计': []}
        datas = html.xpath('//div[@id="technical-statistics"]/div[2]/div')
        if not datas:
            msg['技术统计'] = '暂无数据'
        else:
            for data in datas[1:]:
                dic = {'msg': []}
                team = data.xpath('.//span[@class="name"]/text()')[0]
                dic['队名'] = team
                das = data.xpath('.//tr')[1:]
                for da in das:
                    type_ls = da.xpath('./td[1]/text()')  # 类型
                    if type_ls:
                        type = type_ls[0].strip()
                    else:
                        type = ''
                    sai_ls = da.xpath('./td[2]/text()')  # 投篮命中率
                    if sai_ls:
                        sai = sai_ls[0].strip()
                    else:
                        sai = ''
                    sheng_ls = da.xpath('./td[3]/text()')  # 三分命中率
                    if sheng_ls:
                        sheng = sheng_ls[0].strip()
                    else:
                        sheng = ''
                    fu_ls = da.xpath('./td[4]/text()')  # 平均篮板
                    if fu_ls:
                        fu = fu_ls[0].strip()
                    else:
                        fu = ''
                    defen_ls = da.xpath('./td[5]/text()')  # 平均助攻
                    if defen_ls:
                        defen = defen_ls[0].strip()
                    else:
                        defen = ''
                    shifen_ls = da.xpath('./td[6]/text()')  # 平均抢断
                    if shifen_ls:
                        shifen = shifen_ls[0].strip()
                    else:
                        shifen = ''
                    jingshengfen_ls = da.xpath('./td[7]/text()')  # 平均失误
                    if jingshengfen_ls:
                        jingshengfen = jingshengfen_ls[0].strip()
                    else:
                        jingshengfen = ''

                    dic['msg'].append(
                        {'类型': type, '投篮命中率': sai, '三分命中率': sheng, '平均篮板': fu, '平均助攻': defen, '平均抢断': shifen, '平均失误': jingshengfen})
                msg['技术统计'].append(dic)
        return msg

    def _historical(self, html):
        '''历史交锋'''
        msg = {'历史交锋': []}
        datas = html.xpath('//div[@id="historical"]/div[2]//tr')
        if not datas:
            msg['历史交锋'] = '暂无数据'
        else:
            for data in datas[1:]:
                event = data.xpath('./td[1]/a/text()')[0]  # 赛事
                time = data.xpath('./td[2]/text()')[0].strip()  # 比赛时间
                away_team = data.xpath('./td[3]/a/span/text()')[0]  # 客队
                score = str(data.xpath('./td[4]/a/span/text()')).replace(',', ':')[1:-1]  # 比分
                home_team = data.xpath('./td[5]/a/span/text()')[0]  # 主队
                shengfu = data.xpath('./td[6]/span/text()')[0].strip()  # 胜负
                fencha = data.xpath('./td[7]/text()')[0].strip()  # 分差
                rangfen = data.xpath('./td[8]/text()')[0].strip()  # 让分
                panlu_ls = data.xpath('./td[9]/span/text()')  # 盘路
                if panlu_ls:
                    panlu = panlu_ls[0].strip()
                else:
                    panlu = ''
                zongfen = data.xpath('./td[10]/text()')[0].strip()  # 总分
                zongfenpan = data.xpath('./td[11]/text()')[0].strip()  # 总分盘
                jinqiushu_ls = data.xpath('./td[12]/span/text()')  # 进球数
                if jinqiushu_ls:
                    jinqiushu = jinqiushu_ls[0].strip()
                else:
                    jinqiushu = ''

                msg['历史交锋'].append({'赛事': event, '比赛时间': time, '客队': away_team, '比分': score, '主队': home_team, '胜负': shengfu, '分差': fencha, '让分': rangfen, '盘路': panlu, '总分': zongfen, '总分盘': zongfenpan, '进球数': jinqiushu})
        return msg

    def _recent_record(self, html):
        '''近期战绩'''
        msg = {'近期战绩': []}
        datas = html.xpath('//div[@id="recent-record"]/div[2]/div')
        # if not datas:
        #     msg['近期战绩'] = '暂无数据'
        # else:
        for data in datas:
            team_ls = data.xpath('.//span[@class="name"]/text()')
            if team_ls:
                team = team_ls[0]
            else:
                team = ''
            dic = {'战队': team, 'msg': []}
            dats = data.xpath('.//tr')[1:]
            for dat in dats:
                event = dat.xpath('./td[1]/a/text()')[0]  # 赛事
                time = dat.xpath('./td[2]/text()')[0].strip()  # 比赛时间
                away_team = dat.xpath('./td[3]/a/span/text()')[0]  # 客队
                score = str(dat.xpath('./td[4]/a/span/text()')).replace(',', ':')[1:-1]  # 比分
                home_team = dat.xpath('./td[5]/a/span/text()')[0]  # 主队
                shengfu = dat.xpath('./td[6]/span/text()')[0].strip()  # 胜负
                fencha = dat.xpath('./td[7]/text()')[0].strip()  # 分差
                rangfen = dat.xpath('./td[8]/text()')[0].strip()  # 让分
                panlu_ls = dat.xpath('./td[9]/span/text()')  # 盘路
                if panlu_ls:
                    panlu = panlu_ls[0].strip()
                else:
                    panlu = ''
                zongfen = dat.xpath('./td[10]/text()')[0].strip()  # 总分
                zongfenpan = dat.xpath('./td[11]/text()')[0].strip()  # 总分盘
                jinqiushu_ls = dat.xpath('./td[12]/span/text()')  # 进球数
                if jinqiushu_ls:
                    jinqiushu = jinqiushu_ls[0].strip()
                else:
                    jinqiushu = ''

                dic['msg'].append(
                    {'赛事': event, '比赛时间': time, '客队': away_team, '比分': score, '主队': home_team, '胜负': shengfu, '分差': fencha,
                     '让分': rangfen, '盘路': panlu, '总分': zongfen, '总分盘': zongfenpan, '进球数': jinqiushu})
            msg['近期战绩'].append(dic)
        return msg

    def _fixture(self, html):
        '''未来赛程'''
        msg = {'未来赛程': []}
        datas = html.xpath('//div[@id="fixture"]/div[2]/div')
        for data in datas:
            team_ls = data.xpath('.//span[@class="name"]/text()')
            if team_ls:
                team = team_ls[0]
            else:
                team = ''
            dic = {'战队': team, 'msg': []}
            dats = data.xpath('.//table/tr')
            if len(dats) > 1:
                for dat in dats[1:]:
                    event = dat.xpath('./td[1]/span/text()')[0]  # 赛事
                    time = dat.xpath('./td[2]/text()')[0].strip()  # 比赛时间
                    away_team = dat.xpath('./td[3]/text()')[0]  # 客队
                    home_team = dat.xpath('./td[4]/text()')[0]  # 主队
                    time_speed = dat.xpath('./td[5]/text()')[0]  # 与本场相隔

                    dic['msg'].append(
                        {'赛事': event, '比赛时间': time, '客队': away_team, '主队': home_team, '与本场相隔': time_speed})
                msg['未来赛程'].append(dic)
        return msg

    def run(self):
        threads = []
        threads.append(threading.Thread(target=self.get_ID, args=('basketball_notStart',)))
        threads.append(threading.Thread(target=self.get_ID('basketball_finished',)))
        threads.append(threading.Thread(target=self.get_ID('basketball_live',)))
        threads.append(threading.Thread(target=self.get_ID('basketball_history_events',)))
        threads.append(threading.Thread(target=self.get_ID('basketball_future_events',)))
        # 开启线程
        for thread in threads:
            thread.start()
        # 守护线程
        for thread in threads:
            thread.join()

    @classmethod
    def start(cls):
        '''未开始或已经结束'''
        st = cls()
        st.run()
        # 每隔一段时间执行一次run方法
        schedule.every(DATA_LIVE_TIME).seconds.do(st.run)
        while True:
            schedule.run_pending()
            time.sleep(1)
Ejemplo n.º 9
0
class NewSpider(object):
    def __init__(self):
        self.redis = Redis_Pool()
        self.mongo = {
            'home_infos': MongoDB('home_infos'),
            'news_text_broadcas': MongoDB('news_text_broadcas'),
            'new_players_info': MongoDB('new_players_info')
        }
        # 创建队列和协程池
        self.queue = Queue()
        self.coroutine_pool = Pool()

    def __chech_callbake(self, temp):
        '''异步回调函数'''
        if not self.queue.empty():
            self.coroutine_pool.apply_async(self.get_response,
                                            callback=self.__chech_callbake)

    def get_ID(self):
        datas = self.redis.find('basketball_live')
        print(datas)
        for k, v in datas.items():
            ID = eval(k)
            self.queue.put(ID)

        for i in range(NEW_THREADING):
            if not self.queue.empty():
                self.coroutine_pool.apply_async(self.get_response,
                                                callback=self.__chech_callbake)
                # time.sleep(2)
            # 守护线程
        self.coroutine_pool.join()

    def get_response(self):
        if not self.queue.empty():
            ID = self.queue.get()
            proxy = get_ip()
            if proxy:
                response = requests.get(
                    'https://live.leisu.com/lanqiu/detail-{}'.format(ID),
                    proxies={
                        'https': 'https://' + proxy,
                        'http': 'http://' + proxy
                    },
                    headers=HEADERS,
                    allow_redirects=False).text
            else:
                response = requests.get(
                    'https://live.leisu.com/lanqiu/detail-{}'.format(ID),
                    headers=HEADERS,
                    allow_redirects=False).text
            html = etree.HTML(response)
            # 文字直播
            self.get_text_broadcas(html, ID)
            # 球员信息
            self.get_player(html, ID)
            # 调度队列的tesk_done方法
            self.queue.task_done()

    def get_text_broadcas(self, html, ID):
        '''文字直播'''
        msg = {'文字直播': []}
        msg['赛事ID'] = ID
        datas = html.xpath(
            '//div[@class="nano-content"]/ul[@class="list-content"]/li')
        if not datas:
            msg['文字直播'] = '暂无信息'
        else:
            for data in datas:
                time = data.xpath('./div[@class="code"]/text()')[0]
                score = str(
                    data.xpath('./div[@class="score"]/span/text()')).replace(
                        ',', '-')[1:-1]
                tip = data.xpath('./div[@class="tip"]/text()')[0]
                msg['文字直播'].append({'时间': time, '比分': score, '文字描述': tip})
        # 保存
        self.mongo['news_text_broadcas'].insert_one(msg, '赛事ID')

    def get_player(self, html, ID):
        '''获取球员信息'''
        msg = {'人员信息': []}
        msg['赛事ID'] = ID
        datas = html.xpath('//div[@class="content clearfix"]/div')
        for data in datas[2:]:
            ms = {}
            team = data.xpath(
                './div[@class="logo-name"]/div[@class="name"]/text()')
            if team:
                ms['队名'] = team[0]
            else:
                ms['队名'] = ''
            # 获取球队得失信息
            team_infos = self._get_info(data)
            ms['球队总体信息'] = team_infos
            # 获取球员信息
            players_infos = [player for player in self._get_player_info(data)]
            ms['球员信息'] = players_infos
            # 添加列表
            msg['人员信息'].append(ms)
        print(msg)
        # 保存
        self.mongo['new_players_info'].insert_one(msg, '赛事ID')

    def _get_player_info(self, data):
        players = []
        infos = data.xpath('./div[@class="sp-tb"]/div[@class="list"]/div')
        for info in infos[1:]:
            dic = {}
            beihao = info.xpath('./div[1]/span/text()')
            if beihao:
                dic['背号'] = beihao[0]
            else:
                dic['背号'] = ''
            name = info.xpath('./div[2]//span[@class="o-hidden name"]/text()')
            if name:
                dic['姓名'] = name[0]
            else:
                dic['姓名'] = ''
            shoufa = info.xpath('./div[3]/span/text()')
            if shoufa:
                dic['首发'] = shoufa[0]
            else:
                dic['首发'] = ''
            chuchangshijian = info.xpath('./div[4]/span/text()')
            if chuchangshijian:
                dic['出场时间'] = chuchangshijian[0]
            else:
                dic['出场时间'] = ''
            toulan = info.xpath('./div[5]/span/text()')
            if toulan:
                dic['投篮'] = toulan[0]
            else:
                dic['投篮'] = ''
            sanfen = info.xpath('./div[6]/span/text()')
            if sanfen:
                dic['三分'] = sanfen[0]
            else:
                dic['三分'] = ''
            faqiu = info.xpath('./div[7]/span/text()')
            if faqiu:
                dic['罚球'] = faqiu[0]
            else:
                dic['罚球'] = ''
            qianlanban = info.xpath('./div[8]/span/text()')
            if qianlanban:
                dic['前篮板'] = qianlanban[0]
            else:
                dic['前篮板'] = ''
            houlanban = info.xpath('./div[9]/span/text()')
            if houlanban:
                dic['后篮板'] = houlanban[0]
            else:
                dic['后篮板'] = ''
            zonglanban = info.xpath('./div[10]/span/text()')
            if zonglanban:
                dic['总篮板'] = zonglanban[0]
            else:
                dic['总篮板'] = ''
            zhugong = info.xpath('./div[11]/span/text()')
            if zhugong:
                dic['助攻'] = zhugong[0]
            else:
                dic['助攻'] = ''
            qiangduan = info.xpath('./div[12]/span/text()')
            if qiangduan:
                dic['抢断'] = qiangduan[0]
            else:
                dic['抢断'] = ''
            gaimao = info.xpath('./div[13]/span/text()')
            if gaimao:
                dic['盖帽'] = gaimao[0]
            else:
                dic['盖帽'] = ''
            shiwu = info.xpath('./div[14]/span/text()')
            if shiwu:
                dic['失误'] = shiwu[0]
            else:
                dic['失误'] = ''
            fangui = info.xpath('./div[15]/span/text()')
            if fangui:
                dic['犯规'] = fangui[0]
            else:
                dic['犯规'] = ''
            defen = info.xpath('./div[16]/span/text()')
            if defen:
                dic['得分'] = defen[0]
            else:
                dic['得分'] = ''
            yield dic

            players.append(dic)
        return players

    def _get_info(self, data):
        # 获取总体情况
        dic = {}
        messages = data.xpath(
            './div[@class="sp-tb"]/div[@class="summary"]/div[@class="row totals "]'
        )
        if not messages:
            pass
        else:
            defen = messages[0].xpath('./div[1]/span/text()')
            if defen:
                dic['得分'] = defen[0]
            else:
                dic['得分'] = ''
            zhugong = messages[0].xpath('./div[2]/span/text()')
            if zhugong:
                dic['助攻'] = zhugong[0]
            else:
                dic['助攻'] = ''
            lanban = messages[0].xpath('./div[3]/span/text()')
            if lanban:
                dic['篮板'] = lanban[0]
            else:
                dic['篮板'] = ''
            qianhoulanban = messages[0].xpath('./div[4]/span/text()')
            if qianhoulanban:
                dic['前-后篮板'] = qianhoulanban[0]
            else:
                dic['前-后篮板'] = ''
            duanqiang = messages[0].xpath('./div[5]/span/text()')
            if duanqiang:
                dic['抢断'] = duanqiang[0]
            else:
                dic['抢断'] = ''
            gaimao = messages[0].xpath('./div[6]/span/text()')
            if gaimao:
                dic['盖帽'] = gaimao[0]
            else:
                dic['盖帽'] = ''
            toulan = messages[0].xpath('./div[7]/span/text()')
            if toulan:
                dic['投篮( 中 - 投 )'] = toulan[0]
            else:
                dic['投篮( 中 - 投 )'] = ''
            sanfen = messages[0].xpath('./div[8]/span/text()')
            if sanfen:
                dic['三分'] = sanfen[0]
            else:
                dic['三分'] = ''
            faqiu = messages[0].xpath('./div[9]/span/text()')
            if faqiu:
                dic['罚球( 中 - 投 )'] = faqiu[0]
            else:
                dic['罚球( 中 - 投 )'] = ''
            shiwu = messages[0].xpath('./div[10]/span/text()')
            if shiwu:
                dic['失误'] = shiwu[0]
            else:
                dic['失误'] = ''
            fangui = messages[0].xpath('./div[11]/span/text()')
            if fangui:
                dic['犯规'] = fangui[0]
            else:
                dic['犯规'] = ''
        return dic

    @classmethod
    def start(cls):
        run = cls()
        while True:
            # try:
            run.get_ID()
Ejemplo n.º 10
0
 def __init__(self):
     self.redis = Redis_Pool()
     self.mongo = MongoDB('home_info')
Ejemplo n.º 11
0
class HomeSpider(object):
    def __init__(self):
        self.redis = Redis_Pool()
        self.mongo = MongoDB('home_info')

    def get_home_data(self):
        try:
            proxy = get_ip()
            if proxy:
                response = requests.get('https://live.leisu.com/lanqiu', proxies={'https': 'https://'+proxy, 'http': 'http://'+proxy} , headers=HEADERS, allow_redirects=False, timeout=3).text
            else:
                response = requests.get('https://live.leisu.com/lanqiu', headers=HEADERS, allow_redirects=False, timeout=3).text
            html = etree.HTML(response)
            # 正在比赛的数据
            lives = html.xpath('//div[@id="live"]/ul/li')
            self._model(lives, 'basketball_live')

            # 未开始比赛的数据
            notStart = html.xpath('//div[@id="notStart"]/ul/li')
            self._model(notStart, 'basketball_notStart')

            # 已经完成的比赛数据
            finished = html.xpath('//div[@id="finished"]/ul/li')
            self._model(finished, 'basketball_finished')
        except Exception as e:
            print(e)

    def _model(self, lives, name):
        data = self._get_dispose_datas(lives)
        # 写入之前清理数据
        self.redis.delete(name)
        for dic in data:
            # 写入数据库
            if dic['mgs']:
                self.redis.insert_one(name, dic['赛事ID'], str(dic))
                self.mongo.insert_one(dic, '赛事ID')

    def _get_dispose_datas(self, lives):
        '''正在比赛的信息'''
        for live in lives:
            dic = {'mgs': []}
            events = live.xpath('.//div[@class="thead row"]/div[1]/span[1]/span/text()')
            if events:
                event = events[0]
            else:
                event = ''
            dic['赛事'] = event
            zhuangtai_1 = live.xpath('.//div[@class="thead row"]/div[1]/span[2]/text()')
            if not zhuangtai_1:
                zhuangtai_1 = ''
            else:
                zhuangtai_1 = zhuangtai_1[0]
            zhuangtai_2 = live.xpath('.//div[@class="thead row"]/div[1]/span[3]/text()')
            if not zhuangtai_2:
                zhuangtai_2 = ''
            else:
                zhuangtai_2 = zhuangtai_2[0]
            zhuangtai = zhuangtai_1 + ' ' + zhuangtai_2
            dic['状态'] = zhuangtai
            times = live.xpath('.//span[@class="time"]/text()')
            if times:
                time = times[0]
            else:
                time = ''
            dic['时间'] = time
            try:
                eventID = re.findall('\d+', live.xpath('.//div[@class="d-row"]/div/div[@class="row"]/a/@href')[0])[0]
            except:
                eventID = ''
            dic['赛事ID'] = eventID
            datas = live.xpath('.//div[@class="d-row"]/div[@class="r-left"]/div')
            for data in datas:
                dat = data.xpath('./div[1]/i[@class="ico"]/@style')
                if dat:
                    logo = 'https:' + re.findall('url\((.*?)\?', dat[0])[0]
                else:
                    logo = ''
                home_team = data.xpath('./div[1]/span[1]/span/text()')
                if home_team:
                    team = home_team[0]
                else:
                    team = ''
                home_1234 = data.xpath('./div[2]/div/text()')
                home_shangxia = data.xpath('./div[3]/text()')
                if home_shangxia:
                    shangxia = home_shangxia[0]
                else:
                    shangxia = ''
                home_quanchang = data.xpath('./b/text()')
                if home_quanchang:
                    quanchang = home_quanchang[0]
                else:
                    quanchang = ''
                home_fencha = data.xpath('./div[4]/text()')
                if home_fencha:
                    fencha = home_fencha[0]
                else:
                    fencha = ''
                team_zongfen = data.xpath('./div[5]/text()')
                if team_zongfen:
                    zongfen_ = team_zongfen[0]
                else:
                    zongfen_ = ''
                home_ouzhi = data.xpath('./div[6]/span/span/text()')
                if home_ouzhi:
                    ouzhi = home_ouzhi[0]
                else:
                    ouzhi = ''
                home_rangfen = data.xpath('./div[7]/div[2]/span/span/text()')
                if home_rangfen:
                    rangfen = home_rangfen[0]
                else:
                    rangfen = ''
                home_zongfen = data.xpath('./div[8]/div[2]/span/span/text()')
                if home_zongfen:
                    zongfen = home_zongfen[0]
                else:
                    zongfen = ''
                dic['mgs'].append({'队名': team, 'LOGO': logo, '1234': home_1234, '上下': shangxia, '全场': quanchang, '分差': fencha, '队总分': zongfen_, '欧指':ouzhi, '让分': rangfen, '总分': zongfen})
            yield dic

    @classmethod
    def start(cls):
        run = cls()
        while True:
            try:
                run.get_home_data()
            except requests.exceptions.ProxyError:
                pass