def douyu_parse(self, response): if deal_status(response): return info_dict = json.loads(response.body.decode()) code_url = urllib.parse.unquote(response.url) page = re.findall(r'/0_0/(\d+)$', code_url)[0] if info_dict["code"] == 0: data_list = info_dict["data"]["rl"] if len(data_list) == 0: return else: for data in data_list: roomid = data["rid"] gift_api = "http://open.douyucdn.cn/api/RoomApi/room/{}" yield scrapy.Request( gift_api.format(roomid), callback=self.douyu_detail, # 蚂蚁请求头 # headers=generate_sign(), # errback=self.errback_handle, ) # 下一页 next_api = "https://www.douyu.com/gapi/rkc/directory/0_0/{}".format(int(page) + 1) yield scrapy.Request( next_api, callback=self.douyu_parse, # 蚂蚁请求头 # headers=generate_sign(), # errback=self.errback_handle, )
def bilibili_parse(self, response): if deal_status(response): return info_dict = json.loads(response.body.decode()) code_url = urllib.parse.unquote(response.url) page = re.findall(r'&page=(\d+)&page_size=30$', code_url)[0] data_list = info_dict["data"] if len(data_list) == 0: return else: for data in data_list: roomid = data["roomid"] area_v2_id = data["area_v2_id"] gift_api = "https://api.live.bilibili.com/gift/v2/live/room_gift_list?roomid={}&area_v2_id={}" yield scrapy.Request( gift_api.format(roomid, area_v2_id), callback=self.bilibili_detail, headers=self.bili_headers, ) # 下一页 next_live_api = "http://api.vc.bilibili.com/room/v1/area/getRoomList?parent_area_id=2&cate_id=0&area_id=0&sort_type=online&page={}&page_size=30" yield scrapy.Request( next_live_api.format(int(page)+1), callback=self.bilibili_parse, )
def egame_detail(self, response): logging.error("企鹅电竞的礼物详情方法..........") if deal_status(response): return info_dict = json.loads(response.body.decode()) logging.error("企鹅电竞的礼物内容...............", info_dict) gift_list_a = info_dict["data"]["key"]["retBody"]["data"]["fans_guardian"]["list"] gift_list_b = info_dict["data"]["key"]["retBody"]["data"]["list"] logging.error("企鹅电竞的礼物集合长度:{} b:{}".format(len(gift_list_a), len(gift_list_b))) if len(gift_list_a) > 0: for gift_a in gift_list_a: item = Xj_gift_value() item["name"] = gift_a["name"] item["gift_id"] = gift_a["id"] item["platform_id"] = self.egame_id gift_cost = gift_a["price"] item["price"] = round(int(gift_cost) / self.egame_conver, 2) yield item if len(gift_list_b) > 0: for gift_b in gift_list_b: item = Xj_gift_value() item["name"] = gift_b["name"] item["gift_id"] = gift_b["id"] item["platform_id"] = self.egame_id gift_cost = gift_b["price"] item["price"] = round(int(gift_cost) / self.egame_conver, 2) yield item
def egame_parse(self, response): if deal_status(response): return roomid = response.meta["roomid"] url = response.meta["url"] info_dict = json.loads(response.body.decode()) if info_dict["ecode"] == 0: try: live_status = info_dict["data"]["key"]["retBody"]["data"][ "profile_info"]["is_live"] except KeyError as err: logging.error( "spider:{} 从该直播间抓取直播状态时取值失败 url:{} error:{}".format( self.name, url, err)) return item = Xj_view_liveItem() if live_status == 1: # 处于开播状态 item["start_time"] = info_dict["data"]["key"]["retBody"][ "data"]["video_info"]["start_tm"] pid = info_dict["data"]["key"]["retBody"]["data"][ "video_info"]["pid"] egame_api = 'http://wdanmaku.egame.qq.com/cgi-bin/pgg_barrage_async_fcgi?param={"key":{"module":"pgg_live_barrage_svr","method":"get_barrage","param":{"anchor_id":%d,"vid":%s,"scenes":4096,"last_tm":%d}}}&app_info={"platform":4,"terminal_type":2,"egame_id":"egame_official"}&g_tk=&p_tk=&tt=1' yield scrapy.Request(egame_api % (int(roomid), pid, int(time.time())), callback=self.egame_detail, meta={ "url": deepcopy(url), "item": deepcopy(item) }) if live_status == 0: # 处于关播状态 item["end_time"] = info_dict["data"]["key"]["retBody"]["data"][ "video_info"]["end_tm"] item["view_num"] = 0 item["anchor_id"] = self.relation[url] yield item
def huomao_parse(self, response): if deal_status(response): return info_dict = json.loads(response.body.decode()) code_url = urllib.parse.unquote(response.url) page = re.findall(r'\?page=(\d+)&', code_url)[0] if info_dict["code"] == 100: data_list = info_dict["data"]["channelList"] if len(data_list) == 0: return else: for data in data_list: cid = data["id"] live_url = "https://www.huomao.com/{}".format(data["room_number"]) self.huomao_headers["Referer"] = live_url gift_api = "http://www.huomao.com/ajax/getNewGift?cid={}&cache_time={}&face_label=0" yield scrapy.Request( gift_api.format(cid, int(time.time())), callback=self.huomao_detail, ) # 下一页 next_api = "https://www.huomao.com/channels/channel.json?page={}&game_url_rule=all" yield scrapy.Request( next_api.format(int(page) + 1), callback=self.huomao_parse, )
def bilibili_parse(self, response): if deal_status(response): return url = response.meta["url"] info_dict = json.loads(response.body.decode()) if info_dict["code"] == 0: room_id = info_dict["data"]["room_id"] bili_api = "https://api.live.bilibili.com/room/v1/Room/get_info?room_id={}&from=room" yield scrapy.Request(bili_api.format(room_id), callback=self.bilibili_detail, meta={"url": deepcopy(url)})
def egame_detail(self, response): if deal_status(response): return item = response.meta["item"] url = response.meta["url"] info_dict = json.loads(response.body.decode()) if info_dict["ecode"] == 0: item["view_num"] = info_dict["data"]["key"]["retBody"]["data"][ "online_count"] item["anchor_id"] = self.relation[url] yield item
def longzhu_parse(self, response): if deal_status(response): return url = response.meta["url"] longzhu_roomid = re.findall(r',"RoomId":(.*?),"Domain"', response.body.decode()) if len(longzhu_roomid) > 0: longzhu_api = "http://roomapicdn.longzhu.com/room/roomstatus?roomid={}&lzv=1".format( longzhu_roomid[0]) yield scrapy.Request(longzhu_api, callback=self.longzhu_detail, headers=longzhu_header(), meta={"url": deepcopy(url)})
def quanmin_parse(self, response): if deal_status(response): return info_dict = json.loads(response.body.decode()) if info_dict["code"] == 0: data_lists = info_dict["data"]["lists"] for data in data_lists: item = Xj_gift_value() item["gift_id"] = data["id"] item["name"] = data["name"] item["platform_id"] = self.quanmin_id gift_cost = data["diamond"] item["price"] = round(gift_cost / self.quanmin_conver, 2) yield item
def longzhu_detail(self, response): if deal_status(response): return url = response.meta["url"] item = Xj_view_liveItem() info_dict = json.loads(response.body.decode()) if "Broadcast" in info_dict.keys(): # 处于开播状态 item["start_time"] = int(time.time()) item["view_num"] = info_dict["OnlineCount"] item["anchor_id"] = self.relation[url] yield item else: # 处于关播状态 item["end_time"] = int(time.time()) item["view_num"] = 0 item["anchor_id"] = self.relation[url] yield item
def zhanqi_detail(self, response): logging.error("战旗的礼物详情方法..........") if deal_status(response): return info_json = re.findall(r'oPageConfig\.aRoomGiftList = (.+);', response.body.decode())[0] info_list = json.loads(info_json) logging.error("战旗的礼物集合长度:{}".format(len(info_list))) for info in info_list: item = Xj_gift_value() item["name"] = info["name"] item["platform_id"] = self.zhanqi_id gift_cost = info["price"] item["price"] = round(int(gift_cost) / self.zhanqi_conver, 2) item["gift_id"] = info["id"] yield item
def longzhu_parse(self, response): logging.error("龙珠的礼物详情方法..........") if deal_status(response): return gift_list = json.loads(response.body.decode()) logging.error("龙珠的礼物集合长度:{}".format(len(gift_list))) for gift in gift_list: item = Xj_gift_value() item["name"] = gift["title"] item["gift_id"] = gift["id"] item["platform_id"] = self.longzhu_id item["price"] = gift["costValue"] yield item
def quanmin_parse(self, response): if deal_status(response): return url = response.meta["url"] item = Xj_view_liveItem() live_status = re.findall(r'"play_status":(.*?),"forbid_status"', response.body.decode()) if len(live_status) > 0 and live_status[0] == "true": # 处于开播状态 item["start_time"] = int(time.time()) item["view_num"] = re.findall(r'"view":(.*?),"weight"', response.body.decode())[0] item["anchor_id"] = self.relation[url] yield item else: # 处于关播状态 item["end_time"] = int(time.time()) item["view_num"] = 0 item["anchor_id"] = self.relation[url] yield item
def bilibili_detail(self, response): logging.error("B站的礼物详情方法..........") if deal_status(response): return info_dict = json.loads(response.body.decode()) logging.error("B站的礼物集合长度:{}".format(len(info_dict))) if info_dict["code"] == 0: data_list = info_dict["data"] if len(data_list) > 0: for data in data_list: item = Xj_gift_value() item["name"] = data["name"] item["gift_id"] = data["id"] item["platform_id"] = self.bilibili_id gift_cost = data["price"] item["price"] = round(int(gift_cost) / self.bilibili_conver, 3) yield item
def huomao_detail(self, response): logging.error("火猫的礼物详情方法..........") if deal_status(response): return info_dict = json.loads(response.body.decode()) logging.error("火猫的礼物集合长度:{}, 内容:{}".format(len(info_dict),info_dict)) if info_dict["code"] == 200: gift_list = info_dict["data"]["giftInfo"] for gift in gift_list: item = Xj_gift_value() item["name"] = gift["name"] item["gift_id"] = gift["id"] item["platform_id"] = self.huomao_id gift_cost = gift["price"] item["price"] = round(int(gift_cost) / self.huomao_conver, 2) yield item
def bilibili_detail(self, response): if deal_status(response): return url = response.meta["url"] info_dict = json.loads(response.body.decode()) item = Xj_view_liveItem() live_status = info_dict["data"]["live_status"] if live_status == 1: # 处于开播状态 item["start_time"] = int( time.mktime( time.strptime(info_dict["data"]["live_time"], "%Y-%m-%d %H:%M:%S"))) item["view_num"] = info_dict["data"]["online"] item["anchor_id"] = self.relation[url] yield item if live_status == 0: # 处于关播状态 item["end_time"] = int(time.time()) item["view_num"] = 0 item["anchor_id"] = self.relation[url] yield item
def douyu_parse(self, response): if deal_status(response): return url = response.meta["url"] item = Xj_view_liveItem() info_dict = json.loads(response.body.decode()) if info_dict["error"] == 0: if info_dict["data"]["room_status"] == "1": # 处于开播状态 item["start_time"] = int( time.mktime( time.strptime(info_dict["data"]["start_time"], "%Y-%m-%d %H:%M:%S"))) item["view_num"] = info_dict["data"]["online"] item["anchor_id"] = self.relation[url] yield item if info_dict["data"]["room_status"] == "2": # 处于关播状态 item["end_time"] = int(time.time()) item["view_num"] = 0 item["anchor_id"] = self.relation[url] yield item
def douyu_detail(self, response): logging.error("斗鱼的礼物详情方法..........") if deal_status(response): return info_dict = json.loads(response.body.decode()) logging.error("斗鱼的礼物集合长度:{},内容:{}".format(len(info_dict),info_dict)) if info_dict["error"] == 0: gift_list = info_dict["data"]["gift"] for gift in gift_list: item = Xj_gift_value() if gift["type"] == "2": # 为鱼翅购买礼物 item["name"] = gift["name"] item["gift_id"] = gift["id"] item["platform_id"] = self.douyu_id gift_cost = gift["pc"] item["price"] = round(int(gift_cost) / self.douyu_conver, 2) yield item
def panda_parse(self, response): if deal_status(response): return url = response.meta["url"] item = Xj_view_liveItem() live_status = re.findall(r'\'videoinfo\'.+?"status":"(.*?)"', response.body.decode()) if len(live_status) > 0 and live_status[0] == "2": # 处于开播状态 item["start_time"] = int( re.findall(r'"start_time":"(.*?)","end_time":"(.*?)"', response.body.decode())[0][0]) # item["end_time"] = int(re.findall(r'"start_time":"(.*?)","end_time":"(.*?)"', response.body.decode())[0][1]) item["view_num"] = int( re.findall(r'"person_num":"(.*?)"', response.body.decode())[0]) item["anchor_id"] = self.relation[url] yield item else: item["end_time"] = int(time.time()) item["view_num"] = 0 item["anchor_id"] = self.relation[url] yield item
def huomao_parse(self, response): if deal_status(response): return url = response.meta["url"] item = Xj_view_liveItem() live_status = re.findall(r'"is_live":(.*?),"', response.body.decode()) if len(live_status) > 0 and live_status[0] == "1": # 处于开播状态 item["start_time"] = int(time.time()) try: item["view_num"] = int( re.findall(r'"views":(.*?),', response.body.decode())[0]) except Exception as error: data = "spider:{} 该直播间地址有误,提取的view_num为非数字格式. url:{} time:{} error:{}".format( self.name, url, time_str(), error) logging.error(data) item["view_num"] = 0 item["anchor_id"] = self.relation[url] yield item else: # 处于关播状态 item["end_time"] = int(time.time()) item["view_num"] = 0 item["anchor_id"] = self.relation[url] yield item
def zhanqi_parse(self, response): if deal_status(response): return code_url = urllib.parse.unquote(response.url) page = re.findall(r'/(\d+)\.json$', code_url)[0] info_dict = json.loads(response.body.decode()) if info_dict["code"] == 0: rooms_list = info_dict["data"]["rooms"] if len(rooms_list) == 0: return else: for room in rooms_list: live_url = "https://www.zhanqi.tv" + room["url"] yield scrapy.Request( live_url, callback=self.zhanqi_detail ) # 下一页 next_api = 'https://www.zhanqi.tv/api/static/v2.1/live/list/20/{}.json'.format(int(page) + 1) yield scrapy.Request( next_api, callback=self.zhanqi_parse )
def egame_parse(self, response): if deal_status(response): return info_dict = json.loads(response.body.decode()) code_url = urllib.parse.unquote(response.url) page = re.findall(r'"page_num":(\d+),"', code_url)[0] live_list = info_dict["data"]["key"]["retBody"]["data"]["live_data"]["live_list"] if len(live_list) == 0: logging.error("企鹅电竞的礼物内容集合为空...............") return else: for live in live_list: anchor_id = live["anchor_id"] gift_api = 'http://share.egame.qq.com/cgi-bin/pgg_kit_async_fcgi?param={"key":{"module":"pgg_gift_svr","method":"get_gift_list","param":{"tt":0,"version":"","anchor_id":%d}}}&app_info={"platform":4,"terminal_type":2,"egame_id":"egame_official"}&g_tk=&p_tk=&tt=1' yield scrapy.Request( gift_api % int(anchor_id), callback=self.egame_detail ) # 下一页 next_api = 'http://share.egame.qq.com/cgi-bin/pgg_live_async_fcgi?param={"key":{"module":"pgg_live_read_ifc_mt_svr","method":"get_new_live_list","param":{"appid":"hot","page_num":%d,"page_size":40,"tag_id":0,"tag_id_str":""}}}&app_info={"platform":4,"terminal_type":2,"egame_id":"egame_official","version_code":"9.9.9","version_name":"9.9.9"}&g_tk=&p_tk=&tt=1&_t=1526540987169' yield scrapy.Request( next_api % (int(page) + 1), callback=self.egame_parse, )
def huya_parse(self, response): if deal_status(response): return url = response.meta["url"] item = Xj_view_liveItem() live_status = re.findall(r'"state":"(.*?)"', response.body.decode()) if len(live_status) > 0 and live_status[0] == "ON": # 处于开播状态 item["start_time"] = int(time.time()) view_num = response.xpath( "//em[@id='live-count']/text()").extract_first() if view_num is None: data = "spider:{} 该直播间地址可能已无效. url:{} time:{}".format( self.name, url, time_str()) logging.error(data) item["view_num"] = 0 else: item["view_num"] = view_num.replace(',', '') item["anchor_id"] = self.relation[url] yield item else: # 处于关播状态 item["end_time"] = int(time.time()) item["view_num"] = 0 item["anchor_id"] = self.relation[url] yield item