def __init__(self): # 建立数据库连接 self.mongo = MongoDB('football_info') self.redis = Redis_Pool() # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self, name): self.name = name self.redis = Redis_Pool() self.mongo_index = MongoDB('football_index') # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self, name): self.name = name # 建立数据库连接 self.mongo = MongoDB('football_home_page') self.redis = Redis_Pool() # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self, name): self.name = name self.redis = Redis_Pool() self.mongo_home_page = MongoDB('home_page') self.mongo = { 'historical': MongoDB('football_historical'), 'recent-record': MongoDB('football_recent'), 'since-trend': MongoDB('football_trend'), 'injury-situation': MongoDB('football_situation'), 'league-points': MongoDB('football_league_points') } # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
class IndexSpider(object): def __init__(self, name): self.name = name self.redis = Redis_Pool() self.mongo_index = MongoDB('football_index') # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool() def __chech_callbake(self, temp): '''异步回调函数''' if not self.queue.empty(): self.coroutine_pool.apply_async(self.__get_data, callback=self.__chech_callbake) def get_eventID(self): datas = self.redis.find(self.name) for k, v in datas.items(): msg = eval(v.decode("utf-8")) eventID = msg['赛事ID'] self.queue.put(eventID) # self.run() # 异步调用 for i in range(100): if not self.queue.empty(): self.coroutine_pool.apply_async(self.__get_data, callback=self.__chech_callbake) # 守护线程 self.coroutine_pool.join() def __get_data(self): proxy = get_ip() if not self.queue.empty(): eventID = self.queue.get() msg = {'msg': []} msg['赛事ID'] = eventID if proxy: response = requests.get( f'https://live.leisu.com/3in1-{str(eventID)}', proxies={ 'http': 'https://' + proxy }, headers=HEADERS, allow_redirects=False).text else: response = requests.get( f'https://live.leisu.com/3in1-{str(eventID)}', headers=HEADERS, allow_redirects=False).text html = etree.HTML(response) datas = html.xpath('/html/body/div[1]/div[3]//tr')[1:] for data in datas: name = data.xpath('./td[2]/span[2]/text()')[0].strip() if not name: name = 'Bet365' # name = 'https:' + data.xpath('./td[2]/span[2]/img/@src')[0].strip() # 欧指 ouzhi_1 = data.xpath('./td[3]/div[1]/span/text()') # 主胜 if not ouzhi_1: ouzhi_1 = ['', '', ''] ouzhi_2 = data.xpath( './td[3]/div[2]/span/span/span/text()') # 主胜 if not ouzhi_2: ouzhi_2 = ['', '', ''] # 让球 rangqiu_1 = data.xpath('./td[4]/div[1]/span/text()') if not rangqiu_1: rangqiu_1 = ['', '', ''] rangqiu_2 = data.xpath('./td[4]/div[2]/span/span/span/text()') if not rangqiu_2: rangqiu_2 = ['', ''] rangqiu_3 = data.xpath('./td[4]/div[2]/span/span[2]/text()') if not rangqiu_3: rangqiu_3 = [''] # 进球数 jiqiushu_1 = data.xpath('./td[5]/div[1]/span/text()') if not jiqiushu_1: jiqiushu_1 = ['', '', ''] jiqiushu_2 = data.xpath('./td[5]/div[2]/span/span/span/text()') if not jiqiushu_2: jiqiushu_2 = ['', ''] jiqiushu_3 = data.xpath('./td[5]/div[2]/span/span[2]/text()') if not jiqiushu_3: jiqiushu_3 = [''] dic = {f'{name}': [{'欧指': {'主胜': [ouzhi_1[0].strip(), ouzhi_2[0].strip()], '和局': [ouzhi_1[1].strip(), ouzhi_2[1].strip()], '客胜': [ouzhi_1[2].strip(), ouzhi_2[2].strip()]}, \ '让球': {'主胜': [rangqiu_1[0].strip(), rangqiu_2[0].strip()], '盘口': [rangqiu_1[1].strip(), rangqiu_3[0].strip()], '客胜': [rangqiu_1[2].strip(), rangqiu_2[1].strip()]}, \ '进球数': {'大球': [jiqiushu_1[0].strip(), jiqiushu_2[0].strip()], '和局': [jiqiushu_1[1].strip(), jiqiushu_3[0].strip()], '小球': [jiqiushu_1[2].strip(), jiqiushu_2[1].strip()]}}]} msg['msg'].append(dic) # 保存数据 if msg['msg']: self.mongo_index.insert_one(msg, '赛事ID') @classmethod def start(cls): def run(): notStart = cls('football_notStart') finished = cls('football_finished') other = cls('football_other') live = cls('football_live') live.get_eventID() notStart.get_eventID() finished.get_eventID() other.get_eventID() # 每隔一段时间执行一次run方法 schedule.every(2).seconds.do(run) while True: schedule.run_pending() time.sleep(1)
class IndexSpider(object): def __init__(self): self.redis = Redis_Pool() self.mongo_index = MongoDB('football_index') # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool() def __chech_callbake(self, temp): '''异步回调函数''' if not self.queue.empty(): self.coroutine_pool.apply_async(self.__get_data, callback=self.__chech_callbake) def get_eventID(self, name): datas = self.redis.find(name) for k, v in datas.items(): eventID = eval(k) self.queue.put(eventID) # 异步调用 for i in range(TEST_INDEX_DATA): if not self.queue.empty(): self.coroutine_pool.apply_async(self.__get_data, callback=self.__chech_callbake) # 守护线程 self.coroutine_pool.join() def __get_data(self): proxy = get_ip() if not self.queue.empty(): eventID = self.queue.get() msg = {'msg': []} msg['赛事ID'] = eventID if proxy: response = requests.get( f'https://live.leisu.com/3in1-{str(eventID)}', proxies={ 'http': 'https://' + proxy }, headers=HEADERS).text else: response = requests.get( f'https://live.leisu.com/3in1-{str(eventID)}', headers=HEADERS).text html = etree.HTML(response) datas = html.xpath('/html/body/div[1]/div[3]//tr')[1:] for data in datas: name = data.xpath('./td[2]/span[2]/text()')[0].strip() if not name: name = 'Bet365' # name = 'https:' + data.xpath('./td[2]/span[2]/img/@src')[0].strip() # 欧指 ouzhi_1 = data.xpath('./td[3]/div[1]/span/text()') # 主胜 if not ouzhi_1: ouzhi_1 = ['', '', ''] ouzhi_2 = data.xpath( './td[3]/div[2]/span/span/span/text()') # 主胜 if not ouzhi_2: ouzhi_2 = ['', '', ''] # 让球 rangqiu_1 = data.xpath('./td[4]/div[1]/span/text()') if not rangqiu_1: rangqiu_1 = ['', '', ''] rangqiu_2 = data.xpath('./td[4]/div[2]/span/span/span/text()') if not rangqiu_2: rangqiu_2 = ['', ''] rangqiu_3 = data.xpath('./td[4]/div[2]/span/span[2]/text()') if not rangqiu_3: rangqiu_3 = [''] # 进球数 jiqiushu_1 = data.xpath('./td[5]/div[1]/span/text()') if not jiqiushu_1: jiqiushu_1 = ['', '', ''] jiqiushu_2 = data.xpath('./td[5]/div[2]/span/span/span/text()') if not jiqiushu_2: jiqiushu_2 = ['', ''] jiqiushu_3 = data.xpath('./td[5]/div[2]/span/span[2]/text()') if not jiqiushu_3: jiqiushu_3 = [''] dic = {f'{name}': [{'欧指': {'主胜': [ouzhi_1[0].strip(), ouzhi_2[0].strip()], '和局': [ouzhi_1[1].strip(), ouzhi_2[1].strip()], '客胜': [ouzhi_1[2].strip(), ouzhi_2[2].strip()]}, \ '让球': {'主胜': [rangqiu_1[0].strip(), rangqiu_2[0].strip()], '盘口': [rangqiu_1[1].strip(), rangqiu_3[0].strip()], '客胜': [rangqiu_1[2].strip(), rangqiu_2[1].strip()]}, \ '进球数': {'大球': [jiqiushu_1[0].strip(), jiqiushu_2[0].strip()], '和局': [jiqiushu_1[1].strip(), jiqiushu_3[0].strip()], '小球': [jiqiushu_1[2].strip(), jiqiushu_2[1].strip()]}}]} msg['msg'].append(dic) # 保存数据 if msg['msg']: self.mongo_index.insert_one(msg, '赛事ID') def today_index(self): threads = [] threads.append( threading.Thread(target=self.get_eventID, args=('football_live', ))) threads.append( threading.Thread(target=self.get_eventID, args=('football_notStart', ))) threads.append( threading.Thread(target=self.get_eventID, args=('football_finished', ))) threads.append( threading.Thread(target=self.get_eventID, args=('football_other', ))) # 开启线程 for thread in threads: thread.start() # 守护线程 for thread in threads: thread.join() def not_today_index(self): threads = [] threads.append( threading.Thread(target=self.get_eventID, args=('football_history_events', ))) threads.append( threading.Thread(target=self.get_eventID, args=('football_future_events', ))) # 开启线程 for thread in threads: thread.start() # 守护线程 for thread in threads: thread.join() def run(self): self.today_index() if time.strftime('%H', time.localtime(time.time())) == '00': self.not_today_index() else: pass @classmethod def start(cls): st = cls() st.run() st.not_today_index() # 每隔一段时间执行一次run方法 schedule.every(10).seconds.do(st.run) while True: schedule.run_pending() time.sleep(1)
def __init__(self): self.redis = Redis_Pool() self.mongo = MongoDB('home_page')
class TimeDataSpider(object): def __init__(self): self.redis = Redis_Pool() self.mongo = MongoDB('home_page') def get_response(self, day): if day < 0: self._history_one_data(day) else: self._future_one_data(day) def _history_one_data(self, day): date = time.strftime('%Y%m%d', time.localtime(time.time() + day * 24 * 3600)) proxy = get_ip() if proxy: response = requests.get( 'https://live.leisu.com/wanchang?date={}'.format(date), headers=HEADERS, proxies={ 'http': 'https://' + proxy }).text else: response = requests.get( 'https://live.leisu.com/wanchang?date={}'.format(date), headers=HEADERS).text html = etree.HTML(response) datas = html.xpath('//*[@id="finished"]/ul/li') for data in datas: event_ID = data.xpath('./@data-id')[0] # 赛事ID event_LOGO_ = data.xpath( './/span[@class="lab-events"]/span/@style') # 赛事LOGO if event_LOGO_: event_LOGO = 'https:' + re.findall('url\((.*?)\?', event_LOGO_[0])[0] else: event_LOGO = '' event_ = data.xpath('.//a[@class="event-name"]/span/text()') # 赛事 if event_: event = event_[0] else: event = '' count_ = data.xpath('.//span[@class="lab-round"]/text()') # 轮次 if count_: count = count_[0] else: count = '' event_time_ = data.xpath( './/span[@class="lab-time"]/text()') # 比赛时间 if event_time_: event_time = date + event_time_[0] else: event_time = '' team_home_ = data.xpath( './/span[@class="lab-team-home"]/span/a/text()') # 主场球队 if team_home_: team_home = team_home_[0] else: team_home = '' team_home_ID_ = data.xpath( './/span[@class="lab-team-home"]/span/a/@href') # 主场球队ID if team_home_ID_: team_home_ID = team_home_ID_[0].split('-')[-1] else: team_home_ID = '' score_ = data.xpath('.//span[@class="score"]/b/text()') # 比分 if score_: score = score_[0] else: score = '' team_away_ = data.xpath( './/span[@class="lab-team-away"]/span/a/text()') # 客场球队 if team_away_: team_away = team_away_[0] else: team_away = '' team_away_ID_ = data.xpath( './/span[@class="lab-team-away"]/span/a/@href') # 客场球队ID if team_away_ID_: team_away_ID = team_away_ID_[0].split('-')[-1] else: team_away_ID = '' lab_half_ = data.xpath('.//span[@class="lab-half"]/text()') # 半场 if lab_half_: lab_half = lab_half_[0] else: lab_half = '' lab_corner_ = data.xpath( './/span[@class="lab-corner"]/span/text()') # 角球 if lab_corner_: lab_corner = lab_corner_[0] else: lab_corner = '' lab_bet_dds_ = data.xpath( './/span[@class="lab-bet-odds"]/span/text()') # 胜负 if lab_bet_dds_: lab_bet_dds = lab_bet_dds_[0] else: lab_bet_dds = '' lab_ratel_ = data.xpath('.//span[@class="lab-ratel"]/text()') # 让球 if lab_ratel_: lab_ratel = lab_ratel_[0] else: lab_ratel = '' lab_size_ = data.xpath( './/span[@class="lab-size"]/span/text()') # 进球数 if lab_size_: lab_size = lab_size_[0] else: lab_size = '' dic = { '赛事ID': event_ID, '赛事LOGO': event_LOGO, '赛事': event, '轮次': count, '比赛时间': event_time, '主场球队': team_home, '主场球队ID': team_home_ID, '比分': score, '客场球队': team_away, '客场球队ID': team_away_ID, '半场': lab_half, '角球': lab_corner, '胜负': lab_bet_dds, '让球': lab_ratel, '进球数': lab_size } # 保存数据库 self.mongo.insert_one(dic, '赛事ID') self.redis.insert_one('football_history_events', event_ID, event_ID) def _future_one_data(self, day): date = time.strftime('%Y%m%d', time.localtime(time.time() + day * 24 * 3600)) proxy = get_ip() if proxy: response = requests.get( 'https://live.leisu.com/saicheng?date={}'.format(date), headers=HEADERS, proxies={ 'http': 'https://' + proxy }).text else: response = requests.get( 'https://live.leisu.com/saicheng?date={}'.format(date), headers=HEADERS).text html = etree.HTML(response) datas = html.xpath('//*[@id="notStart"]/ul/li') for data in datas: event_ID = data.xpath('./@data-id')[0] # 赛事ID event_LOGO_ = data.xpath( './/span[@class="lab-events"]/span/@style') # 赛事LOGO if event_LOGO_: event_LOGO = 'https:' + re.findall('url\((.*?)\?', event_LOGO_[0])[0] else: event_LOGO = '' event_ = data.xpath('.//a[@class="event-name"]/span/text()') # 赛事 if event_: event = event_[0] else: event = '' count_ = data.xpath('.//span[@class="lab-round"]/text()') # 轮次 if count_: count = count_[0] else: count = '' event_time_ = data.xpath( './/span[@class="lab-time"]/text()') # 比赛时间 if event_time_: event_time = date + event_time_[0] else: event_time = '' team_home_ = data.xpath( './/span[@class="lab-team-home"]/span/a/text()') # 主场球队 if team_home_: team_home = team_home_[0] else: team_home = '' team_home_ID_ = data.xpath( './/span[@class="lab-team-home"]/span/a/@href') # 主场球队ID if team_home_ID_: team_home_ID = team_home_ID_[0].split('-')[-1] else: team_home_ID = '' score_ = data.xpath('.//span[@class="score"]/span/text()') # 比分 if score_: score = score_[0] else: score = '' team_away_ = data.xpath( './/span[@class="lab-team-away"]/span/a/text()') # 客场球队 if team_away_: team_away = team_away_[0] else: team_away = '' team_away_ID_ = data.xpath( './/span[@class="lab-team-away"]/span/a/@href') # 客场球队ID if team_away_ID_: team_away_ID = team_away_ID_[0].split('-')[-1] else: team_away_ID = '' lab_ratel_ = data.xpath('.//span[@class="lab-ratel"]/text()') # 让球 if lab_ratel_: lab_ratel = lab_ratel_[0] else: lab_ratel = '' dic = { '赛事ID': event_ID, '赛事LOGO': event_LOGO, '赛事': event, '轮次': count, '比赛时间': event_time, '主场球队': team_home, '主场球队ID': team_home_ID, '比分': score, '客场球队': team_away, '客场球队ID': team_away_ID, '让球': lab_ratel } # 保存数据库 self.mongo.insert_one(dic, '赛事ID') self.redis.insert_one('football_future_events', event_ID, event_ID) def run(self): # 储存线程的列表 threads = [] for i in range(-3, 6): if i != 0: thread = threading.Thread(target=self.get_response, args=(i, )) threads.append(thread) # 启动线程 thread.start() else: continue # 守护线程 for thread in threads: thread.join() # self.get_response(6) @classmethod def start(cls): st = cls() def ss(): if time.strftime('%H', time.localtime(time.time())) == '00': st.run() st.run() # 每隔一段时间执行一次run方法 schedule.every(600).seconds.do(ss) while True: schedule.run_pending() time.sleep(1)
class HomePage(object): def __init__(self): # 建立数据库连接 self.mongo = MongoDB('football_info') self.redis = Redis_Pool() # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool() def __chech_callbake(self, temp): '''异步回调函数''' if not self.queue.empty(): self.coroutine_pool.apply_async(self.__dispose_one_data, callback=self.__chech_callbake) def get_data(self, name): live = self.redis.find(name) for k, v in live.items(): ID = eval(k) self.queue.put(ID) for i in range(TEST_HOME_ASYNC_COUNT): if not self.queue.empty(): self.coroutine_pool.apply_async(self.__dispose_one_data, callback=self.__chech_callbake) # time.sleep(2) # 守护线程 self.coroutine_pool.join() def __dispose_one_data(self): proxy = get_ip() if not self.queue.empty(): ID = self.queue.get() # 用于储存数据的 # 文字直播及图片内信息 info = self._text_broadcast(ID) # 球队信息 info['球队信息'] = self._teams_info(ID) # 插入赛事ID info['赛事ID'] = ID self.mongo.insert_one(info, '赛事ID') def _text_broadcast(self, ID): res = requests.get( 'https://api.namitiyu.com/v1/football/match/detail?sid={}'.format( ID)).json() msg = {} if res: # 文字解说 datas = res['event'] for data in datas: data['data'] = data['data'].replace('雷速体育', '我们') # 插入文字解说 msg['文字解说'] = datas if res['stats']: # 赛场内容 corner = res['stats'][0] # 角球 yellow_card = res['stats'][1] # 黄牌 red_card = res['stats'][2] # 红牌 dianqiu = res['stats'][3] # 点球 shezheng = res['stats'][4] # 射正 shemen = res['stats'][5] # 射门 for k, v in shezheng.items(): shemen[k] += shezheng[k] jingong = res['stats'][6] # 进攻 weixianjingong = res['stats'][7] # 危险进攻 kongqiulv = res['stats'][8] # 控球率 msg['赛场内容'] = { '角球': corner, '黄牌': yellow_card, '红牌': red_card, '点球': dianqiu, '射正': shezheng, '射门': shemen, '进攻': jingong, '危险进攻': weixianjingong, '控球率': kongqiulv } else: msg['赛场内容'] = '暂无信息' # 返回数据 return msg def _teams_info(self, ID): res = requests.get( 'https://api.namitiyu.com/v1/football/match/lineup?tid=1&sid={}'. format(ID)).json() msg = {'home': {}, 'away': {}} # 左队信息 if res: for datas in res['lineup']['home'][1:]: for data in datas: if data[2]: data[2] = 'https:////cdn.leisu.com/avatar/' + data[2] msg['home']['msg'] = res['lineup']['home'] # 右队信息 for datas in res['lineup']['away'][1:]: for data in datas: if data[2]: data[2] = 'https:////cdn.leisu.com/avatar/' + data[2] msg['away']['msg'] = res['lineup']['away'] else: msg = '暂无信息' return msg def today_index(self): threads = [] threads.append( threading.Thread(target=self.get_data, args=('football_live', ))) threads.append( threading.Thread(target=self.get_data, args=('football_notStart', ))) threads.append( threading.Thread(target=self.get_data, args=('football_finished', ))) threads.append( threading.Thread(target=self.get_data, args=('football_other', ))) # 开启线程 for thread in threads: thread.start() # 守护线程 for thread in threads: thread.join() def not_today_index(self): threads = [] threads.append( threading.Thread(target=self.get_data, args=('football_history_events', ))) threads.append( threading.Thread(target=self.get_data, args=('football_future_events', ))) # 开启线程 for thread in threads: thread.start() # 守护线程 for thread in threads: thread.join() def run(self): self.today_index() if time.strftime('%H', time.localtime(time.time())) == '00': self.not_today_index() else: pass @classmethod def start(cls): st = cls() st.not_today_index() while True: st.today_index()
class DataSpider(object): def __init__(self, name): self.name = name self.redis = Redis_Pool() self.mongo_home_page = MongoDB('home_page') self.mongo = { 'historical': MongoDB('football_historical'), 'recent-record': MongoDB('football_recent'), 'since-trend': MongoDB('football_trend'), 'injury-situation': MongoDB('football_situation'), 'league-points': MongoDB('football_league_points') } # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool() def __chech_callbake(self, temp): '''异步回调函数''' if not self.queue.empty(): self.coroutine_pool.apply_async(self.__get_data, callback=self.__chech_callbake) def get_events_ID(self): datas = self.redis.find(self.name) for k, v in datas.items(): msg = eval(v.decode("utf-8")) self.queue.put((msg['赛事ID'], 'historical')) self.queue.put((msg['赛事ID'], 'recent-record')) self.queue.put((msg['赛事ID'], 'since-trend')) self.queue.put((msg['赛事ID'], 'injury-situation')) self.queue.put((msg['赛事ID'], 'league-points')) for i in range(TEST_PROXIES_ASYNC_COUNT): if not self.queue.empty(): self.coroutine_pool.apply_async(self.__get_data, callback=self.__chech_callbake) # time.sleep(2) # 守护线程 self.coroutine_pool.join() def __get_data(self): proxy = get_ip() if not self.queue.empty(): events_ID, ball = self.queue.get() # 分析页面 try: if proxy: resp = requests.get('https://live.leisu.com/shujufenxi-' + str(events_ID), proxies={ 'https': 'https://' + proxy, 'http': 'http://' + proxy }, headers=HEADERS, allow_redirects=False).text else: resp = requests.get('https://live.leisu.com/shujufenxi-' + str(events_ID), headers=HEADERS, allow_redirects=False).text if ball == 'since-trend': self.__get_trend(resp, events_ID, ball) elif ball == 'injury-situation': self.__get_situation(resp, events_ID, ball) elif ball == 'league-points': self.__get_league_points(resp, events_ID, ball) else: self.__get_historical(ball, events_ID, resp) # 调度队列的tesk_done方法 self.queue.task_done() except requests.exceptions.ProxyError: pass def __get_historical(self, ball, events_ID, resp): try: if ball == 'historical': result = re.findall( '<div id="historical"[\s\S]*?(<tr[\s\S]*?</td>)</tr></table>', resp)[0] else: result = re.findall( '<div id="recent-record"[\s\S]*?(<tr[\s\S]*?</td>)</tr></table>', resp)[0] html = etree.HTML(result) datas = html.xpath('//tr')[1:] dict_data = {'data': []} # 赛事ID dict_data['赛事ID'] = events_ID for data in datas: dic = {} # 赛事 event_1 = data.xpath('./td[1]/a/text()') if event_1: dic['赛事'] = event_1[0].strip() else: dic['赛事'] = '' # 时间 time_1 = data.xpath('./td[2]/text()') if time_1: dic['时间'] = time_1[0].strip() else: dic['时间'] = '' # 主场球队 home_team_1 = data.xpath('./td[3]/a/span/text()') if home_team_1: dic['主场球队'] = home_team_1[0].strip() else: dic['主场球队'] = '' # 比分 score = data.xpath('./td[4]/a/span/text()') if score: dic['比分'] = score[0] + ':' + score[1] else: dic['比分'] = '' # 客场球队 away_team_1 = data.xpath('./td[5]/a/span/text()') if away_team_1: dic['客场球队'] = away_team_1[0].strip() else: dic['客场球队'] = '' # 半场 half_score = data.xpath('./td[6]/text()') if half_score: dic['半场'] = half_score[0].strip() else: dic['半场'] = '' # 半角 lab_corner = data.xpath('./td[7]/text()') if lab_corner: dic['半角'] = lab_corner[0].strip() else: dic['半角'] = '' # 胜负 win = data.xpath('./td[8]/span/text()') if win: dic['胜负'] = win[0].strip() else: dic['胜负'] = '' # 欧指 europe = data.xpath('./td[9]/div/div/text()') if europe: dic['欧指'] = europe[0].strip() else: dic['欧指'] = '' # 让球 rangqiu = data.xpath('./td[10]/div/div/text()') if rangqiu: dic['让球'] = rangqiu[0].strip() else: dic['让球'] = '' # 盘数 panlu = data.xpath('./td[11]/span/text()') if panlu: dic['盘数'] = panlu[0].strip() else: dic['盘数'] = '' # 进球 jinqiu = data.xpath('./td[12]/span/text()') if jinqiu: dic['进球'] = jinqiu[0].strip() else: dic['进球'] = '' dict_data['data'].append(dic) # 保存数据 if dict_data['data']: self.mongo[ball].insert_one(dict_data, '赛事ID') except: pass def __get_trend(self, resp, events_ID, ball): html = etree.HTML(resp) datas = html.xpath('//div[@id="since-trend"]/div[2]/div') dic = {'mgs': []} dic['赛事ID'] = events_ID for data in datas: team_name = data.xpath('.//span[@class="name"]/text()')[0] messages = data.xpath('.//tr')[2:] mgs_dic = {'mgs': []} mgs_dic['队名'] = team_name for mes in messages: mgs = {} # 类型 stye = mes.xpath('./td[1]/text()') if stye: mgs['类型'] = stye[0] else: mgs['类型'] = '' # 比赛 game = mes.xpath('./td[2]/text()') if game: mgs['比赛'] = game[0] else: mgs['比赛'] = '' # 赢盘 winpan = mes.xpath('./td[3]/text()') if winpan: mgs['赢盘'] = winpan[0] else: mgs['赢盘'] = '' # 走盘 zoupan = mes.xpath('./td[4]/text()') if zoupan: mgs['走盘'] = zoupan[0] else: mgs['走盘'] = '' # 输盘 shupan = mes.xpath('./td[5]/text()') if shupan: mgs['输盘'] = shupan[0] else: mgs['输盘'] = '' # 赢盘率 yingpanlv = mes.xpath('./td[6]/text()') if yingpanlv: mgs['赢盘率'] = yingpanlv[0] else: mgs['赢盘率'] = '' # 大球 bigball = mes.xpath('./td[7]/text()') if bigball: mgs['大球'] = bigball[0] else: mgs['大球'] = '' # 大球率 bigballpor = mes.xpath('./td[8]/text()') if bigballpor: mgs['大球率'] = bigballpor[0] else: mgs['大球率'] = '' # 小球 litterball = mes.xpath('./td[9]/text()') if litterball: mgs['小球'] = litterball[0] else: mgs['小球'] = '' # 小球率 litterballpor = mes.xpath('./td[10]/text()') if litterballpor: mgs['小球率'] = litterballpor[0] else: mgs['小球率'] = '' mgs_dic['mgs'].append(mgs) dic['mgs'].append(mgs_dic) # 保存数据 if dic['mgs']: self.mongo[ball].insert_one(dic, '赛事ID') def __get_situation(self, resp, events_ID, ball): html = etree.HTML(resp) # 伤停情况 datas = html.xpath('//div[@id="injury-situation"]/div[2]/div') msg_dic = {'msg': []} msg_dic['赛事ID'] = events_ID def __message(): global dic, name, location, case, start_time, back_time, field_count dic = {} name = info.xpath('./td[1]/a/span/text()') if name: dic['球员'] = name[0] else: dic['球员'] = '' location = info.xpath('./td[2]/text()') if location: dic['位置'] = location[0] else: dic['位置'] = '' case = info.xpath('./td[3]/text()') if case: dic['原因'] = case[0] else: dic['原因'] = '' start_time = info.xpath('./td[4]/text()') if start_time: dic['开始时间'] = start_time[0] else: dic['开始时间'] = '' back_time = info.xpath('./td[5]/text()') if back_time: dic['归队时间'] = back_time[0] else: dic['归队时间'] = '' field_count = info.xpath('./td[6]/text()') if field_count: dic['影响场数'] = field_count[0] else: dic['影响场数'] = '' return dic for data in datas: team_name = data.xpath('.//span[@class="name"]/text()') infos = data.xpath('.//tr')[1:] msg = {'msg': {'伤病': [], '停赛': []}} if team_name: msg['队名'] = team_name[0] else: msg['队名'] = '' for info in infos: if not info.xpath('./td[@colspan="6"]'): if 'td-pd' in info.xpath('./@class')[0]: dic = __message() msg['msg']['伤病'].append(dic) else: dic = __message() msg['msg']['停赛'].append(dic) msg_dic['msg'].append(msg) # 保存数据 if msg_dic['msg']: self.mongo[ball].insert_one(msg_dic, '赛事ID') def __get_league_points(self, resp, events_ID, ball): html = etree.HTML(resp) # 联赛积分 datas = html.xpath('//div[@id="league-points"]/div[2]/div') msg = {'msg': []} msg['赛事ID'] = events_ID for data in datas: dic_msg = {'msg': []} team_name = data.xpath('./div[1]/div/a/span/text()') event = data.xpath('./div[1]/div/div/text()') if team_name and event: name = team_name[0] + event[0] else: name = '' dic_msg['队名'] = name infos = data.xpath('.//tr')[1:] for info in infos: dic = {} type = info.xpath('./td[1]/text()') if type: dic['类型'] = type[0] else: dic['类型'] = '' changshu = info.xpath('./td[2]/text()') if changshu: dic['比赛场数'] = changshu[0] else: dic['比赛场数'] = '' win_count = info.xpath('./td[3]/text()') if win_count: dic['胜场数'] = win_count[0] else: dic['胜场数'] = '' fail_count = info.xpath('./td[4]/text()') if fail_count: dic['负场数'] = fail_count[0] else: dic['负场数'] = '' ping_count = info.xpath('./td[5]/text()') if ping_count: dic['平数'] = ping_count[0] else: dic['平数'] = '' jinqiu = info.xpath('./td[6]/text()') if jinqiu: dic['进球'] = jinqiu[0] else: dic['进球'] = '' shiqiu = info.xpath('./td[7]/text()') if shiqiu: dic['失球'] = shiqiu[0] else: dic['失球'] = '' jingqiushu = info.xpath('./td[8]/text()') if jingqiushu: dic['进球率'] = jingqiushu[0] else: dic['进球率'] = '' jifen = info.xpath('./td[9]/text()') if jifen: dic['积分'] = jifen[0] else: dic['积分'] = '' paiming = info.xpath('./td[10]/text()') if paiming: dic['排名'] = paiming[0] else: dic['排名'] = '' shenglv = info.xpath('./td[11]/text()') if shenglv: dic['胜率'] = shenglv[0] else: dic['胜率'] = '' dic_msg['msg'].append(dic) msg['msg'].append(dic_msg) # 保存数据 if msg['msg']: self.mongo[ball].insert_one(msg, '赛事ID') @classmethod def start(cls): def run(): live = cls('football_live') live.get_events_ID() notStart = cls('football_notStart') notStart.get_events_ID() finished = cls('football_finished') finished.get_events_ID() other = cls('football_other') other.get_events_ID() run() # 每隔一段时间执行一次run方法 schedule.every(TEST_EVENTS_INTERVAL).seconds.do(run) while True: schedule.run_pending() time.sleep(1)
class HomePage(object): def __init__(self, name): self.name = name # 建立数据库连接 self.mongo = MongoDB('football_home_page') self.redis = Redis_Pool() # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool() def __chech_callbake(self, temp): '''异步回调函数''' if not self.queue.empty(): self.coroutine_pool.apply_async(self.__dispose_one_data, callback=self.__chech_callbake) def get_data(self): live = self.redis.find(self.name) threads = [] for k, v in live.items(): # self.__dispose_one_data(events, li, teams) msg = eval(v.decode("utf-8")) self.queue.put(msg) for i in range(TEST_HOME_ASYNC_COUNT): if not self.queue.empty(): self.coroutine_pool.apply_async(self.__dispose_one_data, callback=self.__chech_callbake) # time.sleep(2) # 守护线程 self.coroutine_pool.join() def __dispose_one_data(self): proxy = get_ip() if not self.queue.empty(): msg = self.queue.get() try: if proxy: response = requests.get('https://live.leisu.com/detail-' + str(msg['赛事ID']), proxies={ 'https': 'https://' + proxy }, headers=HEADERS, allow_redirects=False).text else: response = requests.get('https://live.leisu.com/detail-' + str(msg['赛事ID']), headers=HEADERS, allow_redirects=False).text # print(response) # 文字解说 narrate_ = re.findall('EVENT=(.*?])', response) if narrate_: narrate = narrate_[0].replace('雷速体育', '我们') else: narrate = '' html = etree.HTML(response) # 比分 try: score = f'''{html.xpath('//div[@class="score home"]/text()')[0]}-{html.xpath('//div[@class="score away"]/text()')[0]}''' except: score = '' # 半场 half_score_ = html.xpath('//span[@class ="half-score"]/text()') if half_score_: half_score = half_score_[0] else: half_score = '' # 角球 lab_data = html.xpath( '//span[@class="lab corner"]/span[@class="text"]/text()') if lab_data: lab_corner = lab_data[0] + '-' + lab_data[1] else: lab_corner = '' msg['半场'] = half_score msg['角球'] = lab_corner msg['解说'] = narrate self.mongo.insert_one(msg, '赛事ID') except requests.exceptions.ProxyError: pass @classmethod def start(cls): def run(): notStart = cls('football_notStart') finished = cls('football_finished') other = cls('football_other') live = cls('football_live') live.get_data() notStart.get_data() finished.get_data() other.get_data() run() # 每隔一段时间执行一次run方法 schedule.every(2).seconds.do(run) while True: schedule.run_pending() time.sleep(1)
def __init__(self): # 建立数据库连接 self.mongo = MongoDB('home_page') self.redis = Redis_Pool() # 主页 URL self.url = 'https://live.leisu.com/'
class RedisID(object): def __init__(self): # 建立数据库连接 self.mongo = MongoDB('home_page') self.redis = Redis_Pool() # 主页 URL self.url = 'https://live.leisu.com/' def get_data(self): # 发起请求 获取网页源代码 try: proxy = get_ip() if proxy: response = requests.get(self.url, headers=HEADERS, proxies={ 'https': 'https://' + proxy }, allow_redirects=False, timeout=3).text else: response = requests.get(self.url, headers=HEADERS, allow_redirects=False).text # 正则获取 js 数据 result = re.findall('THATDATA=(.*})', response) # 转 JSON 格式 data = json.loads(result[1]) # 球队信息 teams = data['teams'] # 赛事 events = data['events'] # 正在比赛的信息 live = data['matchesTrans']['live'] self._dispose_live('football_live', events, live, teams) # 未开始的比赛 notStart = data['matchesTrans']['notStart'] self._dispose_live('football_notStart', events, notStart, teams) # 已完成的比赛 finished = data['matchesTrans']['finished'] self._dispose_live('football_finished', events, finished, teams) # 其他 other = data['matchesTrans']['other'] self._dispose_live('football_other', events, other, teams) except: pass def _dispose_live(self, name, events, type, teams): '''处理正在比赛的球队''' self.redis.delete(name) for li in type: d = {} events_ID = li[0] # 赛事ID event = events[str(li[1])][0].split(',')[0] # 赛事 event_LOGO = 'https://cdn.leisu.com/eventlogo/' + events[str( li[1])][-2] # 赛事LOGO start_time = time.strftime('%H:%M', time.localtime(li[3])) # 时间 zhuangtai = (time.time() - li[3]) / 60 - 20 # 状态 home_team = teams[str(li[5][0])][0].split(',')[0] # 主场球队 home_team_ID = li[5][0] # 主场球队ID away_team = teams[str(li[6][0])][0].split(',')[0] # 客场球队 away_team_ID = li[6][0] # 客场球队ID d['赛事ID'] = events_ID d['赛事'] = event d['赛事LOGO'] = event_LOGO d['时间'] = start_time d['状态'] = int(zhuangtai) d['主场球队'] = home_team d['主场球队ID'] = home_team_ID d['客场球队'] = away_team d['客场球队ID'] = away_team_ID self.redis.insert_one(name, events_ID, d.__str__()) @classmethod def start(cls): run = cls() while True: try: run.get_data() except requests.exceptions.ProxyError: pass