コード例 #1
0
ファイル: xj_gift_value.py プロジェクト: inxes/zhibo_spirder
 def douyu_parse(self, response):
     if deal_status(response):
         return
     info_dict = json.loads(response.body.decode())
     code_url = urllib.parse.unquote(response.url)
     page = re.findall(r'/0_0/(\d+)$', code_url)[0]
     if info_dict["code"] == 0:
         data_list = info_dict["data"]["rl"]
         if len(data_list) == 0:
             return
         else:
             for data in data_list:
                 roomid = data["rid"]
                 gift_api = "http://open.douyucdn.cn/api/RoomApi/room/{}"
                 yield scrapy.Request(
                     gift_api.format(roomid),
                     callback=self.douyu_detail,
                     # 蚂蚁请求头
                     # headers=generate_sign(),
                     # errback=self.errback_handle,
                 )
     # 下一页
     next_api = "https://www.douyu.com/gapi/rkc/directory/0_0/{}".format(int(page) + 1)
     yield scrapy.Request(
         next_api,
         callback=self.douyu_parse,
         # 蚂蚁请求头
         # headers=generate_sign(),
         # errback=self.errback_handle,
     )
コード例 #2
0
ファイル: xj_gift_value.py プロジェクト: inxes/zhibo_spirder
 def bilibili_parse(self, response):
     if deal_status(response):
         return
     info_dict = json.loads(response.body.decode())
     code_url = urllib.parse.unquote(response.url)
     page = re.findall(r'&page=(\d+)&page_size=30$', code_url)[0]
     data_list = info_dict["data"]
     if len(data_list) == 0:
         return
     else:
         for data in data_list:
             roomid = data["roomid"]
             area_v2_id = data["area_v2_id"]
             gift_api = "https://api.live.bilibili.com/gift/v2/live/room_gift_list?roomid={}&area_v2_id={}"
             yield scrapy.Request(
                 gift_api.format(roomid, area_v2_id),
                 callback=self.bilibili_detail,
                 headers=self.bili_headers,
             )
     # 下一页
     next_live_api = "http://api.vc.bilibili.com/room/v1/area/getRoomList?parent_area_id=2&cate_id=0&area_id=0&sort_type=online&page={}&page_size=30"
     yield scrapy.Request(
         next_live_api.format(int(page)+1),
         callback=self.bilibili_parse,
     )
コード例 #3
0
ファイル: xj_gift_value.py プロジェクト: inxes/zhibo_spirder
    def egame_detail(self, response):
        logging.error("企鹅电竞的礼物详情方法..........")
        if deal_status(response):
            return
        info_dict = json.loads(response.body.decode())

        logging.error("企鹅电竞的礼物内容...............", info_dict)
        gift_list_a = info_dict["data"]["key"]["retBody"]["data"]["fans_guardian"]["list"]
        gift_list_b = info_dict["data"]["key"]["retBody"]["data"]["list"]

        logging.error("企鹅电竞的礼物集合长度:{}  b:{}".format(len(gift_list_a), len(gift_list_b)))

        if len(gift_list_a) > 0:
            for gift_a in gift_list_a:
                item = Xj_gift_value()
                item["name"] = gift_a["name"]
                item["gift_id"] = gift_a["id"]
                item["platform_id"] = self.egame_id
                gift_cost = gift_a["price"]
                item["price"] = round(int(gift_cost) / self.egame_conver, 2)
                yield item
        if len(gift_list_b) > 0:
            for gift_b in gift_list_b:
                item = Xj_gift_value()
                item["name"] = gift_b["name"]
                item["gift_id"] = gift_b["id"]
                item["platform_id"] = self.egame_id
                gift_cost = gift_b["price"]
                item["price"] = round(int(gift_cost) / self.egame_conver, 2)
                yield item
コード例 #4
0
 def egame_parse(self, response):
     if deal_status(response):
         return
     roomid = response.meta["roomid"]
     url = response.meta["url"]
     info_dict = json.loads(response.body.decode())
     if info_dict["ecode"] == 0:
         try:
             live_status = info_dict["data"]["key"]["retBody"]["data"][
                 "profile_info"]["is_live"]
         except KeyError as err:
             logging.error(
                 "spider:{} 从该直播间抓取直播状态时取值失败 url:{} error:{}".format(
                     self.name, url, err))
             return
         item = Xj_view_liveItem()
         if live_status == 1:  # 处于开播状态
             item["start_time"] = info_dict["data"]["key"]["retBody"][
                 "data"]["video_info"]["start_tm"]
             pid = info_dict["data"]["key"]["retBody"]["data"][
                 "video_info"]["pid"]
             egame_api = 'http://wdanmaku.egame.qq.com/cgi-bin/pgg_barrage_async_fcgi?param={"key":{"module":"pgg_live_barrage_svr","method":"get_barrage","param":{"anchor_id":%d,"vid":%s,"scenes":4096,"last_tm":%d}}}&app_info={"platform":4,"terminal_type":2,"egame_id":"egame_official"}&g_tk=&p_tk=&tt=1'
             yield scrapy.Request(egame_api %
                                  (int(roomid), pid, int(time.time())),
                                  callback=self.egame_detail,
                                  meta={
                                      "url": deepcopy(url),
                                      "item": deepcopy(item)
                                  })
         if live_status == 0:  # 处于关播状态
             item["end_time"] = info_dict["data"]["key"]["retBody"]["data"][
                 "video_info"]["end_tm"]
             item["view_num"] = 0
             item["anchor_id"] = self.relation[url]
             yield item
コード例 #5
0
ファイル: xj_gift_value.py プロジェクト: inxes/zhibo_spirder
 def huomao_parse(self, response):
     if deal_status(response):
         return
     info_dict = json.loads(response.body.decode())
     code_url = urllib.parse.unquote(response.url)
     page = re.findall(r'\?page=(\d+)&', code_url)[0]
     if info_dict["code"] == 100:
         data_list = info_dict["data"]["channelList"]
         if len(data_list) == 0:
             return
         else:
             for data in data_list:
                 cid = data["id"]
                 live_url = "https://www.huomao.com/{}".format(data["room_number"])
                 self.huomao_headers["Referer"] = live_url
                 gift_api = "http://www.huomao.com/ajax/getNewGift?cid={}&cache_time={}&face_label=0"
                 yield scrapy.Request(
                     gift_api.format(cid, int(time.time())),
                     callback=self.huomao_detail,
                 )
     # 下一页
     next_api = "https://www.huomao.com/channels/channel.json?page={}&game_url_rule=all"
     yield scrapy.Request(
         next_api.format(int(page) + 1),
         callback=self.huomao_parse,
     )
コード例 #6
0
 def bilibili_parse(self, response):
     if deal_status(response):
         return
     url = response.meta["url"]
     info_dict = json.loads(response.body.decode())
     if info_dict["code"] == 0:
         room_id = info_dict["data"]["room_id"]
         bili_api = "https://api.live.bilibili.com/room/v1/Room/get_info?room_id={}&from=room"
         yield scrapy.Request(bili_api.format(room_id),
                              callback=self.bilibili_detail,
                              meta={"url": deepcopy(url)})
コード例 #7
0
 def egame_detail(self, response):
     if deal_status(response):
         return
     item = response.meta["item"]
     url = response.meta["url"]
     info_dict = json.loads(response.body.decode())
     if info_dict["ecode"] == 0:
         item["view_num"] = info_dict["data"]["key"]["retBody"]["data"][
             "online_count"]
         item["anchor_id"] = self.relation[url]
         yield item
コード例 #8
0
 def longzhu_parse(self, response):
     if deal_status(response):
         return
     url = response.meta["url"]
     longzhu_roomid = re.findall(r',"RoomId":(.*?),"Domain"',
                                 response.body.decode())
     if len(longzhu_roomid) > 0:
         longzhu_api = "http://roomapicdn.longzhu.com/room/roomstatus?roomid={}&lzv=1".format(
             longzhu_roomid[0])
         yield scrapy.Request(longzhu_api,
                              callback=self.longzhu_detail,
                              headers=longzhu_header(),
                              meta={"url": deepcopy(url)})
コード例 #9
0
ファイル: xj_gift_value.py プロジェクト: inxes/zhibo_spirder
 def quanmin_parse(self, response):
     if deal_status(response):
         return
     info_dict = json.loads(response.body.decode())
     if info_dict["code"] == 0:
         data_lists = info_dict["data"]["lists"]
         for data in data_lists:
             item = Xj_gift_value()
             item["gift_id"] = data["id"]
             item["name"] = data["name"]
             item["platform_id"] = self.quanmin_id
             gift_cost = data["diamond"]
             item["price"] = round(gift_cost / self.quanmin_conver, 2)
             yield item
コード例 #10
0
 def longzhu_detail(self, response):
     if deal_status(response):
         return
     url = response.meta["url"]
     item = Xj_view_liveItem()
     info_dict = json.loads(response.body.decode())
     if "Broadcast" in info_dict.keys():  # 处于开播状态
         item["start_time"] = int(time.time())
         item["view_num"] = info_dict["OnlineCount"]
         item["anchor_id"] = self.relation[url]
         yield item
     else:  # 处于关播状态
         item["end_time"] = int(time.time())
         item["view_num"] = 0
         item["anchor_id"] = self.relation[url]
         yield item
コード例 #11
0
ファイル: xj_gift_value.py プロジェクト: inxes/zhibo_spirder
    def zhanqi_detail(self, response):
        logging.error("战旗的礼物详情方法..........")
        if deal_status(response):
            return
        info_json = re.findall(r'oPageConfig\.aRoomGiftList = (.+);', response.body.decode())[0]
        info_list = json.loads(info_json)
        logging.error("战旗的礼物集合长度:{}".format(len(info_list)))

        for info in info_list:
            item = Xj_gift_value()
            item["name"] = info["name"]
            item["platform_id"] = self.zhanqi_id
            gift_cost = info["price"]
            item["price"] = round(int(gift_cost) / self.zhanqi_conver, 2)
            item["gift_id"] = info["id"]
            yield item
コード例 #12
0
ファイル: xj_gift_value.py プロジェクト: inxes/zhibo_spirder
    def longzhu_parse(self, response):

        logging.error("龙珠的礼物详情方法..........")

        if deal_status(response):
            return
        gift_list = json.loads(response.body.decode())

        logging.error("龙珠的礼物集合长度:{}".format(len(gift_list)))

        for gift in gift_list:
            item = Xj_gift_value()
            item["name"] = gift["title"]
            item["gift_id"] = gift["id"]
            item["platform_id"] = self.longzhu_id
            item["price"] = gift["costValue"]
            yield item
コード例 #13
0
 def quanmin_parse(self, response):
     if deal_status(response):
         return
     url = response.meta["url"]
     item = Xj_view_liveItem()
     live_status = re.findall(r'"play_status":(.*?),"forbid_status"',
                              response.body.decode())
     if len(live_status) > 0 and live_status[0] == "true":  # 处于开播状态
         item["start_time"] = int(time.time())
         item["view_num"] = re.findall(r'"view":(.*?),"weight"',
                                       response.body.decode())[0]
         item["anchor_id"] = self.relation[url]
         yield item
     else:  # 处于关播状态
         item["end_time"] = int(time.time())
         item["view_num"] = 0
         item["anchor_id"] = self.relation[url]
         yield item
コード例 #14
0
ファイル: xj_gift_value.py プロジェクト: inxes/zhibo_spirder
    def bilibili_detail(self, response):
        logging.error("B站的礼物详情方法..........")
        if deal_status(response):
            return
        info_dict = json.loads(response.body.decode())

        logging.error("B站的礼物集合长度:{}".format(len(info_dict)))
        if info_dict["code"] == 0:
            data_list = info_dict["data"]
            if len(data_list) > 0:
                for data in data_list:
                    item = Xj_gift_value()
                    item["name"] = data["name"]
                    item["gift_id"] = data["id"]
                    item["platform_id"] = self.bilibili_id
                    gift_cost = data["price"]
                    item["price"] = round(int(gift_cost) / self.bilibili_conver, 3)
                    yield item
コード例 #15
0
ファイル: xj_gift_value.py プロジェクト: inxes/zhibo_spirder
    def huomao_detail(self, response):

        logging.error("火猫的礼物详情方法..........")
        if deal_status(response):
            return
        info_dict = json.loads(response.body.decode())

        logging.error("火猫的礼物集合长度:{}, 内容:{}".format(len(info_dict),info_dict))

        if info_dict["code"] == 200:
            gift_list = info_dict["data"]["giftInfo"]
            for gift in gift_list:
                item = Xj_gift_value()
                item["name"] = gift["name"]
                item["gift_id"] = gift["id"]
                item["platform_id"] = self.huomao_id
                gift_cost = gift["price"]
                item["price"] = round(int(gift_cost) / self.huomao_conver, 2)
                yield item
コード例 #16
0
 def bilibili_detail(self, response):
     if deal_status(response):
         return
     url = response.meta["url"]
     info_dict = json.loads(response.body.decode())
     item = Xj_view_liveItem()
     live_status = info_dict["data"]["live_status"]
     if live_status == 1:  # 处于开播状态
         item["start_time"] = int(
             time.mktime(
                 time.strptime(info_dict["data"]["live_time"],
                               "%Y-%m-%d %H:%M:%S")))
         item["view_num"] = info_dict["data"]["online"]
         item["anchor_id"] = self.relation[url]
         yield item
     if live_status == 0:  # 处于关播状态
         item["end_time"] = int(time.time())
         item["view_num"] = 0
         item["anchor_id"] = self.relation[url]
         yield item
コード例 #17
0
 def douyu_parse(self, response):
     if deal_status(response):
         return
     url = response.meta["url"]
     item = Xj_view_liveItem()
     info_dict = json.loads(response.body.decode())
     if info_dict["error"] == 0:
         if info_dict["data"]["room_status"] == "1":  # 处于开播状态
             item["start_time"] = int(
                 time.mktime(
                     time.strptime(info_dict["data"]["start_time"],
                                   "%Y-%m-%d %H:%M:%S")))
             item["view_num"] = info_dict["data"]["online"]
             item["anchor_id"] = self.relation[url]
             yield item
         if info_dict["data"]["room_status"] == "2":  # 处于关播状态
             item["end_time"] = int(time.time())
             item["view_num"] = 0
             item["anchor_id"] = self.relation[url]
             yield item
コード例 #18
0
ファイル: xj_gift_value.py プロジェクト: inxes/zhibo_spirder
    def douyu_detail(self, response):

        logging.error("斗鱼的礼物详情方法..........")

        if deal_status(response):
            return
        info_dict = json.loads(response.body.decode())

        logging.error("斗鱼的礼物集合长度:{},内容:{}".format(len(info_dict),info_dict))

        if info_dict["error"] == 0:
            gift_list = info_dict["data"]["gift"]
            for gift in gift_list:
                item = Xj_gift_value()
                if gift["type"] == "2":     # 为鱼翅购买礼物
                    item["name"] = gift["name"]
                    item["gift_id"] = gift["id"]
                    item["platform_id"] = self.douyu_id
                    gift_cost = gift["pc"]
                    item["price"] = round(int(gift_cost) / self.douyu_conver, 2)
                    yield item
コード例 #19
0
 def panda_parse(self, response):
     if deal_status(response):
         return
     url = response.meta["url"]
     item = Xj_view_liveItem()
     live_status = re.findall(r'\'videoinfo\'.+?"status":"(.*?)"',
                              response.body.decode())
     if len(live_status) > 0 and live_status[0] == "2":  # 处于开播状态
         item["start_time"] = int(
             re.findall(r'"start_time":"(.*?)","end_time":"(.*?)"',
                        response.body.decode())[0][0])
         # item["end_time"] = int(re.findall(r'"start_time":"(.*?)","end_time":"(.*?)"', response.body.decode())[0][1])
         item["view_num"] = int(
             re.findall(r'"person_num":"(.*?)"', response.body.decode())[0])
         item["anchor_id"] = self.relation[url]
         yield item
     else:
         item["end_time"] = int(time.time())
         item["view_num"] = 0
         item["anchor_id"] = self.relation[url]
         yield item
コード例 #20
0
 def huomao_parse(self, response):
     if deal_status(response):
         return
     url = response.meta["url"]
     item = Xj_view_liveItem()
     live_status = re.findall(r'"is_live":(.*?),"', response.body.decode())
     if len(live_status) > 0 and live_status[0] == "1":  # 处于开播状态
         item["start_time"] = int(time.time())
         try:
             item["view_num"] = int(
                 re.findall(r'"views":(.*?),', response.body.decode())[0])
         except Exception as error:
             data = "spider:{} 该直播间地址有误,提取的view_num为非数字格式. url:{} time:{} error:{}".format(
                 self.name, url, time_str(), error)
             logging.error(data)
             item["view_num"] = 0
         item["anchor_id"] = self.relation[url]
         yield item
     else:  # 处于关播状态
         item["end_time"] = int(time.time())
         item["view_num"] = 0
         item["anchor_id"] = self.relation[url]
         yield item
コード例 #21
0
ファイル: xj_gift_value.py プロジェクト: inxes/zhibo_spirder
 def zhanqi_parse(self, response):
     if deal_status(response):
         return
     code_url = urllib.parse.unquote(response.url)
     page = re.findall(r'/(\d+)\.json$', code_url)[0]
     info_dict = json.loads(response.body.decode())
     if info_dict["code"] == 0:
         rooms_list = info_dict["data"]["rooms"]
         if len(rooms_list) == 0:
             return
         else:
             for room in rooms_list:
                 live_url = "https://www.zhanqi.tv" + room["url"]
                 yield scrapy.Request(
                     live_url,
                     callback=self.zhanqi_detail
                 )
     # 下一页
     next_api = 'https://www.zhanqi.tv/api/static/v2.1/live/list/20/{}.json'.format(int(page) + 1)
     yield scrapy.Request(
         next_api,
         callback=self.zhanqi_parse
     )
コード例 #22
0
ファイル: xj_gift_value.py プロジェクト: inxes/zhibo_spirder
 def egame_parse(self, response):
     if deal_status(response):
         return
     info_dict = json.loads(response.body.decode())
     code_url = urllib.parse.unquote(response.url)
     page = re.findall(r'"page_num":(\d+),"', code_url)[0]
     live_list = info_dict["data"]["key"]["retBody"]["data"]["live_data"]["live_list"]
     if len(live_list) == 0:
         logging.error("企鹅电竞的礼物内容集合为空...............")
         return
     else:
         for live in live_list:
             anchor_id = live["anchor_id"]
             gift_api = 'http://share.egame.qq.com/cgi-bin/pgg_kit_async_fcgi?param={"key":{"module":"pgg_gift_svr","method":"get_gift_list","param":{"tt":0,"version":"","anchor_id":%d}}}&app_info={"platform":4,"terminal_type":2,"egame_id":"egame_official"}&g_tk=&p_tk=&tt=1'
             yield scrapy.Request(
                 gift_api % int(anchor_id),
                 callback=self.egame_detail
             )
     # 下一页
     next_api = 'http://share.egame.qq.com/cgi-bin/pgg_live_async_fcgi?param={"key":{"module":"pgg_live_read_ifc_mt_svr","method":"get_new_live_list","param":{"appid":"hot","page_num":%d,"page_size":40,"tag_id":0,"tag_id_str":""}}}&app_info={"platform":4,"terminal_type":2,"egame_id":"egame_official","version_code":"9.9.9","version_name":"9.9.9"}&g_tk=&p_tk=&tt=1&_t=1526540987169'
     yield scrapy.Request(
         next_api % (int(page) + 1),
         callback=self.egame_parse,
     )
コード例 #23
0
 def huya_parse(self, response):
     if deal_status(response):
         return
     url = response.meta["url"]
     item = Xj_view_liveItem()
     live_status = re.findall(r'"state":"(.*?)"', response.body.decode())
     if len(live_status) > 0 and live_status[0] == "ON":  # 处于开播状态
         item["start_time"] = int(time.time())
         view_num = response.xpath(
             "//em[@id='live-count']/text()").extract_first()
         if view_num is None:
             data = "spider:{} 该直播间地址可能已无效. url:{} time:{}".format(
                 self.name, url, time_str())
             logging.error(data)
             item["view_num"] = 0
         else:
             item["view_num"] = view_num.replace(',', '')
         item["anchor_id"] = self.relation[url]
         yield item
     else:  # 处于关播状态
         item["end_time"] = int(time.time())
         item["view_num"] = 0
         item["anchor_id"] = self.relation[url]
         yield item