def __init__(self, name): self.name = name self.redis = Redis_Pool() self.mongo_index = MongoDB('football_index') # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self): # 建立数据库连接 self.mongo = MongoDB('football_info') self.redis = Redis_Pool() # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self, name): self.name = name # 建立数据库连接 self.mongo = MongoDB('football_home_page') self.redis = Redis_Pool() # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
class Delete_home_page(object): def __init__(self): self.mongo_home_page = MongoDB('home_page') self.mongo_home_info = MongoDB('football_info') self.mongo_detail = MongoDB('football_index') self.mongo_shujufenxi = MongoDB('football_shujufenxi') def get_data(self): # 过期数据时间 date = time.strftime('%Y%m%d', time.localtime(time.time() - 4 * 24 * 3600)) datas = self.mongo_home_page.find() for data in datas: if date in data['比赛时间']: ID = data['赛事ID'] # 删除过期信息 self.mongo_home_page.delete_one(ID) self.mongo_home_info.delete_one(ID) self.mongo_detail.delete_one(ID) self.mongo_shujufenxi.delete_one(ID) def run(self): if time.strftime('%H', time.localtime(time.time())) == '00': self.get_data() @classmethod def start(cls): st = cls() # 每隔一段时间执行一次run方法 schedule.every(1).hours.do(st.run) while True: schedule.run_pending() time.sleep(60)
def __init__(self, name): self.name = name self.redis = Redis_Pool() self.mongo_home_page = MongoDB('home_page') self.mongo = { 'historical': MongoDB('football_historical'), 'recent-record': MongoDB('football_recent'), 'since-trend': MongoDB('football_trend'), 'injury-situation': MongoDB('football_situation'), 'league-points': MongoDB('football_league_points') } # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self): self.mongo_home_page = MongoDB('home_page') self.mongo_home_info = MongoDB('football_info') self.mongo_detail = MongoDB('football_index') self.mongo_shujufenxi = MongoDB('football_shujufenxi')
class IndexSpider(object): def __init__(self, name): self.name = name self.redis = Redis_Pool() self.mongo_index = MongoDB('football_index') # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool() def __chech_callbake(self, temp): '''异步回调函数''' if not self.queue.empty(): self.coroutine_pool.apply_async(self.__get_data, callback=self.__chech_callbake) def get_eventID(self): datas = self.redis.find(self.name) for k, v in datas.items(): msg = eval(v.decode("utf-8")) eventID = msg['赛事ID'] self.queue.put(eventID) # self.run() # 异步调用 for i in range(100): if not self.queue.empty(): self.coroutine_pool.apply_async(self.__get_data, callback=self.__chech_callbake) # 守护线程 self.coroutine_pool.join() def __get_data(self): proxy = get_ip() if not self.queue.empty(): eventID = self.queue.get() msg = {'msg': []} msg['赛事ID'] = eventID if proxy: response = requests.get( f'https://live.leisu.com/3in1-{str(eventID)}', proxies={ 'http': 'https://' + proxy }, headers=HEADERS, allow_redirects=False).text else: response = requests.get( f'https://live.leisu.com/3in1-{str(eventID)}', headers=HEADERS, allow_redirects=False).text html = etree.HTML(response) datas = html.xpath('/html/body/div[1]/div[3]//tr')[1:] for data in datas: name = data.xpath('./td[2]/span[2]/text()')[0].strip() if not name: name = 'Bet365' # name = 'https:' + data.xpath('./td[2]/span[2]/img/@src')[0].strip() # 欧指 ouzhi_1 = data.xpath('./td[3]/div[1]/span/text()') # 主胜 if not ouzhi_1: ouzhi_1 = ['', '', ''] ouzhi_2 = data.xpath( './td[3]/div[2]/span/span/span/text()') # 主胜 if not ouzhi_2: ouzhi_2 = ['', '', ''] # 让球 rangqiu_1 = data.xpath('./td[4]/div[1]/span/text()') if not rangqiu_1: rangqiu_1 = ['', '', ''] rangqiu_2 = data.xpath('./td[4]/div[2]/span/span/span/text()') if not rangqiu_2: rangqiu_2 = ['', ''] rangqiu_3 = data.xpath('./td[4]/div[2]/span/span[2]/text()') if not rangqiu_3: rangqiu_3 = [''] # 进球数 jiqiushu_1 = data.xpath('./td[5]/div[1]/span/text()') if not jiqiushu_1: jiqiushu_1 = ['', '', ''] jiqiushu_2 = data.xpath('./td[5]/div[2]/span/span/span/text()') if not jiqiushu_2: jiqiushu_2 = ['', ''] jiqiushu_3 = data.xpath('./td[5]/div[2]/span/span[2]/text()') if not jiqiushu_3: jiqiushu_3 = [''] dic = {f'{name}': [{'欧指': {'主胜': [ouzhi_1[0].strip(), ouzhi_2[0].strip()], '和局': [ouzhi_1[1].strip(), ouzhi_2[1].strip()], '客胜': [ouzhi_1[2].strip(), ouzhi_2[2].strip()]}, \ '让球': {'主胜': [rangqiu_1[0].strip(), rangqiu_2[0].strip()], '盘口': [rangqiu_1[1].strip(), rangqiu_3[0].strip()], '客胜': [rangqiu_1[2].strip(), rangqiu_2[1].strip()]}, \ '进球数': {'大球': [jiqiushu_1[0].strip(), jiqiushu_2[0].strip()], '和局': [jiqiushu_1[1].strip(), jiqiushu_3[0].strip()], '小球': [jiqiushu_1[2].strip(), jiqiushu_2[1].strip()]}}]} msg['msg'].append(dic) # 保存数据 if msg['msg']: self.mongo_index.insert_one(msg, '赛事ID') @classmethod def start(cls): def run(): notStart = cls('football_notStart') finished = cls('football_finished') other = cls('football_other') live = cls('football_live') live.get_eventID() notStart.get_eventID() finished.get_eventID() other.get_eventID() # 每隔一段时间执行一次run方法 schedule.every(2).seconds.do(run) while True: schedule.run_pending() time.sleep(1)
class IndexSpider(object): def __init__(self): self.redis = Redis_Pool() self.mongo_index = MongoDB('football_index') # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool() def __chech_callbake(self, temp): '''异步回调函数''' if not self.queue.empty(): self.coroutine_pool.apply_async(self.__get_data, callback=self.__chech_callbake) def get_eventID(self, name): datas = self.redis.find(name) for k, v in datas.items(): eventID = eval(k) self.queue.put(eventID) # 异步调用 for i in range(TEST_INDEX_DATA): if not self.queue.empty(): self.coroutine_pool.apply_async(self.__get_data, callback=self.__chech_callbake) # 守护线程 self.coroutine_pool.join() def __get_data(self): proxy = get_ip() if not self.queue.empty(): eventID = self.queue.get() msg = {'msg': []} msg['赛事ID'] = eventID if proxy: response = requests.get( f'https://live.leisu.com/3in1-{str(eventID)}', proxies={ 'http': 'https://' + proxy }, headers=HEADERS).text else: response = requests.get( f'https://live.leisu.com/3in1-{str(eventID)}', headers=HEADERS).text html = etree.HTML(response) datas = html.xpath('/html/body/div[1]/div[3]//tr')[1:] for data in datas: name = data.xpath('./td[2]/span[2]/text()')[0].strip() if not name: name = 'Bet365' # name = 'https:' + data.xpath('./td[2]/span[2]/img/@src')[0].strip() # 欧指 ouzhi_1 = data.xpath('./td[3]/div[1]/span/text()') # 主胜 if not ouzhi_1: ouzhi_1 = ['', '', ''] ouzhi_2 = data.xpath( './td[3]/div[2]/span/span/span/text()') # 主胜 if not ouzhi_2: ouzhi_2 = ['', '', ''] # 让球 rangqiu_1 = data.xpath('./td[4]/div[1]/span/text()') if not rangqiu_1: rangqiu_1 = ['', '', ''] rangqiu_2 = data.xpath('./td[4]/div[2]/span/span/span/text()') if not rangqiu_2: rangqiu_2 = ['', ''] rangqiu_3 = data.xpath('./td[4]/div[2]/span/span[2]/text()') if not rangqiu_3: rangqiu_3 = [''] # 进球数 jiqiushu_1 = data.xpath('./td[5]/div[1]/span/text()') if not jiqiushu_1: jiqiushu_1 = ['', '', ''] jiqiushu_2 = data.xpath('./td[5]/div[2]/span/span/span/text()') if not jiqiushu_2: jiqiushu_2 = ['', ''] jiqiushu_3 = data.xpath('./td[5]/div[2]/span/span[2]/text()') if not jiqiushu_3: jiqiushu_3 = [''] dic = {f'{name}': [{'欧指': {'主胜': [ouzhi_1[0].strip(), ouzhi_2[0].strip()], '和局': [ouzhi_1[1].strip(), ouzhi_2[1].strip()], '客胜': [ouzhi_1[2].strip(), ouzhi_2[2].strip()]}, \ '让球': {'主胜': [rangqiu_1[0].strip(), rangqiu_2[0].strip()], '盘口': [rangqiu_1[1].strip(), rangqiu_3[0].strip()], '客胜': [rangqiu_1[2].strip(), rangqiu_2[1].strip()]}, \ '进球数': {'大球': [jiqiushu_1[0].strip(), jiqiushu_2[0].strip()], '和局': [jiqiushu_1[1].strip(), jiqiushu_3[0].strip()], '小球': [jiqiushu_1[2].strip(), jiqiushu_2[1].strip()]}}]} msg['msg'].append(dic) # 保存数据 if msg['msg']: self.mongo_index.insert_one(msg, '赛事ID') def today_index(self): threads = [] threads.append( threading.Thread(target=self.get_eventID, args=('football_live', ))) threads.append( threading.Thread(target=self.get_eventID, args=('football_notStart', ))) threads.append( threading.Thread(target=self.get_eventID, args=('football_finished', ))) threads.append( threading.Thread(target=self.get_eventID, args=('football_other', ))) # 开启线程 for thread in threads: thread.start() # 守护线程 for thread in threads: thread.join() def not_today_index(self): threads = [] threads.append( threading.Thread(target=self.get_eventID, args=('football_history_events', ))) threads.append( threading.Thread(target=self.get_eventID, args=('football_future_events', ))) # 开启线程 for thread in threads: thread.start() # 守护线程 for thread in threads: thread.join() def run(self): self.today_index() if time.strftime('%H', time.localtime(time.time())) == '00': self.not_today_index() else: pass @classmethod def start(cls): st = cls() st.run() st.not_today_index() # 每隔一段时间执行一次run方法 schedule.every(10).seconds.do(st.run) while True: schedule.run_pending() time.sleep(1)
def __init__(self): self.redis = Redis_Pool() self.mongo = MongoDB('home_page')
class TimeDataSpider(object): def __init__(self): self.redis = Redis_Pool() self.mongo = MongoDB('home_page') def get_response(self, day): if day < 0: self._history_one_data(day) else: self._future_one_data(day) def _history_one_data(self, day): date = time.strftime('%Y%m%d', time.localtime(time.time() + day * 24 * 3600)) proxy = get_ip() if proxy: response = requests.get( 'https://live.leisu.com/wanchang?date={}'.format(date), headers=HEADERS, proxies={ 'http': 'https://' + proxy }).text else: response = requests.get( 'https://live.leisu.com/wanchang?date={}'.format(date), headers=HEADERS).text html = etree.HTML(response) datas = html.xpath('//*[@id="finished"]/ul/li') for data in datas: event_ID = data.xpath('./@data-id')[0] # 赛事ID event_LOGO_ = data.xpath( './/span[@class="lab-events"]/span/@style') # 赛事LOGO if event_LOGO_: event_LOGO = 'https:' + re.findall('url\((.*?)\?', event_LOGO_[0])[0] else: event_LOGO = '' event_ = data.xpath('.//a[@class="event-name"]/span/text()') # 赛事 if event_: event = event_[0] else: event = '' count_ = data.xpath('.//span[@class="lab-round"]/text()') # 轮次 if count_: count = count_[0] else: count = '' event_time_ = data.xpath( './/span[@class="lab-time"]/text()') # 比赛时间 if event_time_: event_time = date + event_time_[0] else: event_time = '' team_home_ = data.xpath( './/span[@class="lab-team-home"]/span/a/text()') # 主场球队 if team_home_: team_home = team_home_[0] else: team_home = '' team_home_ID_ = data.xpath( './/span[@class="lab-team-home"]/span/a/@href') # 主场球队ID if team_home_ID_: team_home_ID = team_home_ID_[0].split('-')[-1] else: team_home_ID = '' score_ = data.xpath('.//span[@class="score"]/b/text()') # 比分 if score_: score = score_[0] else: score = '' team_away_ = data.xpath( './/span[@class="lab-team-away"]/span/a/text()') # 客场球队 if team_away_: team_away = team_away_[0] else: team_away = '' team_away_ID_ = data.xpath( './/span[@class="lab-team-away"]/span/a/@href') # 客场球队ID if team_away_ID_: team_away_ID = team_away_ID_[0].split('-')[-1] else: team_away_ID = '' lab_half_ = data.xpath('.//span[@class="lab-half"]/text()') # 半场 if lab_half_: lab_half = lab_half_[0] else: lab_half = '' lab_corner_ = data.xpath( './/span[@class="lab-corner"]/span/text()') # 角球 if lab_corner_: lab_corner = lab_corner_[0] else: lab_corner = '' lab_bet_dds_ = data.xpath( './/span[@class="lab-bet-odds"]/span/text()') # 胜负 if lab_bet_dds_: lab_bet_dds = lab_bet_dds_[0] else: lab_bet_dds = '' lab_ratel_ = data.xpath('.//span[@class="lab-ratel"]/text()') # 让球 if lab_ratel_: lab_ratel = lab_ratel_[0] else: lab_ratel = '' lab_size_ = data.xpath( './/span[@class="lab-size"]/span/text()') # 进球数 if lab_size_: lab_size = lab_size_[0] else: lab_size = '' dic = { '赛事ID': event_ID, '赛事LOGO': event_LOGO, '赛事': event, '轮次': count, '比赛时间': event_time, '主场球队': team_home, '主场球队ID': team_home_ID, '比分': score, '客场球队': team_away, '客场球队ID': team_away_ID, '半场': lab_half, '角球': lab_corner, '胜负': lab_bet_dds, '让球': lab_ratel, '进球数': lab_size } # 保存数据库 self.mongo.insert_one(dic, '赛事ID') self.redis.insert_one('football_history_events', event_ID, event_ID) def _future_one_data(self, day): date = time.strftime('%Y%m%d', time.localtime(time.time() + day * 24 * 3600)) proxy = get_ip() if proxy: response = requests.get( 'https://live.leisu.com/saicheng?date={}'.format(date), headers=HEADERS, proxies={ 'http': 'https://' + proxy }).text else: response = requests.get( 'https://live.leisu.com/saicheng?date={}'.format(date), headers=HEADERS).text html = etree.HTML(response) datas = html.xpath('//*[@id="notStart"]/ul/li') for data in datas: event_ID = data.xpath('./@data-id')[0] # 赛事ID event_LOGO_ = data.xpath( './/span[@class="lab-events"]/span/@style') # 赛事LOGO if event_LOGO_: event_LOGO = 'https:' + re.findall('url\((.*?)\?', event_LOGO_[0])[0] else: event_LOGO = '' event_ = data.xpath('.//a[@class="event-name"]/span/text()') # 赛事 if event_: event = event_[0] else: event = '' count_ = data.xpath('.//span[@class="lab-round"]/text()') # 轮次 if count_: count = count_[0] else: count = '' event_time_ = data.xpath( './/span[@class="lab-time"]/text()') # 比赛时间 if event_time_: event_time = date + event_time_[0] else: event_time = '' team_home_ = data.xpath( './/span[@class="lab-team-home"]/span/a/text()') # 主场球队 if team_home_: team_home = team_home_[0] else: team_home = '' team_home_ID_ = data.xpath( './/span[@class="lab-team-home"]/span/a/@href') # 主场球队ID if team_home_ID_: team_home_ID = team_home_ID_[0].split('-')[-1] else: team_home_ID = '' score_ = data.xpath('.//span[@class="score"]/span/text()') # 比分 if score_: score = score_[0] else: score = '' team_away_ = data.xpath( './/span[@class="lab-team-away"]/span/a/text()') # 客场球队 if team_away_: team_away = team_away_[0] else: team_away = '' team_away_ID_ = data.xpath( './/span[@class="lab-team-away"]/span/a/@href') # 客场球队ID if team_away_ID_: team_away_ID = team_away_ID_[0].split('-')[-1] else: team_away_ID = '' lab_ratel_ = data.xpath('.//span[@class="lab-ratel"]/text()') # 让球 if lab_ratel_: lab_ratel = lab_ratel_[0] else: lab_ratel = '' dic = { '赛事ID': event_ID, '赛事LOGO': event_LOGO, '赛事': event, '轮次': count, '比赛时间': event_time, '主场球队': team_home, '主场球队ID': team_home_ID, '比分': score, '客场球队': team_away, '客场球队ID': team_away_ID, '让球': lab_ratel } # 保存数据库 self.mongo.insert_one(dic, '赛事ID') self.redis.insert_one('football_future_events', event_ID, event_ID) def run(self): # 储存线程的列表 threads = [] for i in range(-3, 6): if i != 0: thread = threading.Thread(target=self.get_response, args=(i, )) threads.append(thread) # 启动线程 thread.start() else: continue # 守护线程 for thread in threads: thread.join() # self.get_response(6) @classmethod def start(cls): st = cls() def ss(): if time.strftime('%H', time.localtime(time.time())) == '00': st.run() st.run() # 每隔一段时间执行一次run方法 schedule.every(600).seconds.do(ss) while True: schedule.run_pending() time.sleep(1)
class HomePage(object): def __init__(self): # 建立数据库连接 self.mongo = MongoDB('football_info') self.redis = Redis_Pool() # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool() def __chech_callbake(self, temp): '''异步回调函数''' if not self.queue.empty(): self.coroutine_pool.apply_async(self.__dispose_one_data, callback=self.__chech_callbake) def get_data(self, name): live = self.redis.find(name) for k, v in live.items(): ID = eval(k) self.queue.put(ID) for i in range(TEST_HOME_ASYNC_COUNT): if not self.queue.empty(): self.coroutine_pool.apply_async(self.__dispose_one_data, callback=self.__chech_callbake) # time.sleep(2) # 守护线程 self.coroutine_pool.join() def __dispose_one_data(self): proxy = get_ip() if not self.queue.empty(): ID = self.queue.get() # 用于储存数据的 # 文字直播及图片内信息 info = self._text_broadcast(ID) # 球队信息 info['球队信息'] = self._teams_info(ID) # 插入赛事ID info['赛事ID'] = ID self.mongo.insert_one(info, '赛事ID') def _text_broadcast(self, ID): res = requests.get( 'https://api.namitiyu.com/v1/football/match/detail?sid={}'.format( ID)).json() msg = {} if res: # 文字解说 datas = res['event'] for data in datas: data['data'] = data['data'].replace('雷速体育', '我们') # 插入文字解说 msg['文字解说'] = datas if res['stats']: # 赛场内容 corner = res['stats'][0] # 角球 yellow_card = res['stats'][1] # 黄牌 red_card = res['stats'][2] # 红牌 dianqiu = res['stats'][3] # 点球 shezheng = res['stats'][4] # 射正 shemen = res['stats'][5] # 射门 for k, v in shezheng.items(): shemen[k] += shezheng[k] jingong = res['stats'][6] # 进攻 weixianjingong = res['stats'][7] # 危险进攻 kongqiulv = res['stats'][8] # 控球率 msg['赛场内容'] = { '角球': corner, '黄牌': yellow_card, '红牌': red_card, '点球': dianqiu, '射正': shezheng, '射门': shemen, '进攻': jingong, '危险进攻': weixianjingong, '控球率': kongqiulv } else: msg['赛场内容'] = '暂无信息' # 返回数据 return msg def _teams_info(self, ID): res = requests.get( 'https://api.namitiyu.com/v1/football/match/lineup?tid=1&sid={}'. format(ID)).json() msg = {'home': {}, 'away': {}} # 左队信息 if res: for datas in res['lineup']['home'][1:]: for data in datas: if data[2]: data[2] = 'https:////cdn.leisu.com/avatar/' + data[2] msg['home']['msg'] = res['lineup']['home'] # 右队信息 for datas in res['lineup']['away'][1:]: for data in datas: if data[2]: data[2] = 'https:////cdn.leisu.com/avatar/' + data[2] msg['away']['msg'] = res['lineup']['away'] else: msg = '暂无信息' return msg def today_index(self): threads = [] threads.append( threading.Thread(target=self.get_data, args=('football_live', ))) threads.append( threading.Thread(target=self.get_data, args=('football_notStart', ))) threads.append( threading.Thread(target=self.get_data, args=('football_finished', ))) threads.append( threading.Thread(target=self.get_data, args=('football_other', ))) # 开启线程 for thread in threads: thread.start() # 守护线程 for thread in threads: thread.join() def not_today_index(self): threads = [] threads.append( threading.Thread(target=self.get_data, args=('football_history_events', ))) threads.append( threading.Thread(target=self.get_data, args=('football_future_events', ))) # 开启线程 for thread in threads: thread.start() # 守护线程 for thread in threads: thread.join() def run(self): self.today_index() if time.strftime('%H', time.localtime(time.time())) == '00': self.not_today_index() else: pass @classmethod def start(cls): st = cls() st.not_today_index() while True: st.today_index()
class HomePage(object): def __init__(self, name): self.name = name # 建立数据库连接 self.mongo = MongoDB('football_home_page') self.redis = Redis_Pool() # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool() def __chech_callbake(self, temp): '''异步回调函数''' if not self.queue.empty(): self.coroutine_pool.apply_async(self.__dispose_one_data, callback=self.__chech_callbake) def get_data(self): live = self.redis.find(self.name) threads = [] for k, v in live.items(): # self.__dispose_one_data(events, li, teams) msg = eval(v.decode("utf-8")) self.queue.put(msg) for i in range(TEST_HOME_ASYNC_COUNT): if not self.queue.empty(): self.coroutine_pool.apply_async(self.__dispose_one_data, callback=self.__chech_callbake) # time.sleep(2) # 守护线程 self.coroutine_pool.join() def __dispose_one_data(self): proxy = get_ip() if not self.queue.empty(): msg = self.queue.get() try: if proxy: response = requests.get('https://live.leisu.com/detail-' + str(msg['赛事ID']), proxies={ 'https': 'https://' + proxy }, headers=HEADERS, allow_redirects=False).text else: response = requests.get('https://live.leisu.com/detail-' + str(msg['赛事ID']), headers=HEADERS, allow_redirects=False).text # print(response) # 文字解说 narrate_ = re.findall('EVENT=(.*?])', response) if narrate_: narrate = narrate_[0].replace('雷速体育', '我们') else: narrate = '' html = etree.HTML(response) # 比分 try: score = f'''{html.xpath('//div[@class="score home"]/text()')[0]}-{html.xpath('//div[@class="score away"]/text()')[0]}''' except: score = '' # 半场 half_score_ = html.xpath('//span[@class ="half-score"]/text()') if half_score_: half_score = half_score_[0] else: half_score = '' # 角球 lab_data = html.xpath( '//span[@class="lab corner"]/span[@class="text"]/text()') if lab_data: lab_corner = lab_data[0] + '-' + lab_data[1] else: lab_corner = '' msg['半场'] = half_score msg['角球'] = lab_corner msg['解说'] = narrate self.mongo.insert_one(msg, '赛事ID') except requests.exceptions.ProxyError: pass @classmethod def start(cls): def run(): notStart = cls('football_notStart') finished = cls('football_finished') other = cls('football_other') live = cls('football_live') live.get_data() notStart.get_data() finished.get_data() other.get_data() run() # 每隔一段时间执行一次run方法 schedule.every(2).seconds.do(run) while True: schedule.run_pending() time.sleep(1)
def __init__(self): # 建立数据库连接 self.mongo = MongoDB('home_page') self.redis = Redis_Pool() # 主页 URL self.url = 'https://live.leisu.com/'
class RedisID(object): def __init__(self): # 建立数据库连接 self.mongo = MongoDB('home_page') self.redis = Redis_Pool() # 主页 URL self.url = 'https://live.leisu.com/' def get_data(self): # 发起请求 获取网页源代码 try: proxy = get_ip() if proxy: response = requests.get(self.url, headers=HEADERS, proxies={ 'https': 'https://' + proxy }, timeout=3).text else: response = requests.get(self.url, headers=HEADERS).text # 正则获取 js 数据 result = re.findall('THATDATA=(.*})', response) # 转 JSON 格式 data = json.loads(result[1]) # 球队信息 teams = data['teams'] # 赛事 events = data['events'] # 正在比赛的信息 live = data['matchesTrans']['live'] self._dispose_live('football_live', events, live, teams) # 未开始的比赛 notStart = data['matchesTrans']['notStart'] self._dispose_live('football_notStart', events, notStart, teams) # 已完成的比赛 finished = data['matchesTrans']['finished'] self._dispose_live('football_finished', events, finished, teams) # 其他 other = data['matchesTrans']['other'] self._dispose_live('football_other', events, other, teams) except Exception as e: print(e) def _dispose_live(self, name, events, type, teams): '''处理正在比赛的球队''' self.redis.delete(name) for li in type: d = {} events_ID = li[0] # 赛事ID event = events[str(li[1])][0].split(',')[0] # 赛事 event_LOGO = 'https://cdn.leisu.com/eventlogo/' + events[str( li[1])][-2] # 赛事LOGO start_time = time.strftime('%Y%m%d %H:%M', time.localtime(li[3])) # 时间 zhuangtai = (time.time() - li[3]) / 60 - 20 # 状态 home_team = teams[str(li[5][0])][0].split(',')[0] # 主场球队 home_team_ID = li[5][0] # 主场球队ID score = str(li[5][2]) + '-' + str(li[6][2]) # 比分 away_team = teams[str(li[6][0])][0].split(',')[0] # 客场球队 away_team_ID = li[6][0] # 客场球队ID half_score = str(li[5][3]) + '-' + str(li[6][3]) # 半场 corner = str(li[5][6]) + '-' + str(li[6][6]) # 角球 data = json.loads(li[-1]) jingcai = data[3][0] # 竞彩编号 beidan = data[3][1] # 北单编号 zucai = data[3][2] # 足彩编号 d['赛事ID'] = events_ID d['赛事'] = event d['赛事LOGO'] = event_LOGO d['比赛时间'] = start_time d['状态'] = int(zhuangtai) d['竞彩编号'] = jingcai d['北单编号'] = beidan d['足彩编号'] = zucai d['主场球队'] = home_team d['主场球队ID'] = home_team_ID d['比分'] = score d['客场球队'] = away_team d['客场球队ID'] = away_team_ID d['半场'] = half_score d['角球'] = corner # 保存数据库 self.mongo.insert_one(d, '赛事ID') self.redis.insert_one(name, events_ID, events_ID) @classmethod def start(cls): run = cls() while True: try: run.get_data() except requests.exceptions.ProxyError: pass