コード例 #1
0
    def parse(self, response):
        """
        解析关注、粉丝
        :param response:
        :return:
        """
        try:
            if response.status != 200:
                req_url = response.request.url
                err_urls.append(req_url)
                print "请求错误 : ", req_url, " 错误码 : ", response.status
                return

            data_item = response.meta[FLAG_DATA_ITEM]
            cookies = response.meta[FLAG_COOKIE]

            # 拿到JSON进行解析
            s_json = response.text
            json_data = json.loads(s_json)
            data_item['fans'] = json_data['data']['total_subscribe_count']
            data_item['clicks'] = json_data['data']['play_effective_count']
            data_item['reads'] = json_data['data']['go_detail_count']
            data_item.save()
            # url=get_video_count()
            # yield scrapy.Request(url=url, headers=BASE_HEAD, dont_filter=True,
            #                      cookies=BASE_COOKIES,
            #                      meta={FLAG_DATA_ITEM: data_item, FLAG_COOKIE: cookies}), callback=self.parse_play_article)
        except Exception, e:
            traceback.print_exc()
コード例 #2
0
ファイル: bili_video.py プロジェクト: panda7802/scrapys
    def parse(self, response):
        print "+++++++++++++++ start parse " + str(
            self.page_index) + "+++++++++++++++\n"

        if response.status != 200:
            req_url = response.request.url
            err_urls.append(req_url)
            print "请求错误 : ", req_url, " 错误码 : ", response.status
            return

        s_json = response.text  # .decode('unicode_escape')
        # print "response : ", s_json
        status = json.loads(s_json)['status']
        if not status:
            logging.error("==========状态异常 status is err ==========")
            return

        # 视频列表
        vlist = json.loads(s_json)['data']['vlist']
        # 处理
        print "vlist len : " + str(len(vlist))
        for item in vlist:
            bili_item = LxdzxBiliItem()
            for show_item in bili_show_list:
                try:
                    key = show_item[0]
                    bili_item[key] = str(item[key])
                except Exception, e:
                    traceback.print_exc()
            yield bili_item
コード例 #3
0
ファイル: bili_video.py プロジェクト: panda7802/scrapys
    def get_detail(self, response):  # 关联
        if response.status != 200:
            req_url = response.request.url
            err_urls.append(req_url)
            print "get_detail请求错误 : ", req_url, " 错误码 : ", response.status
            return

        parent_item = response.meta['item']
        s_json = response.text.decode('unicode_escape')
        if len(s_json) <= 10:
            yield parent_item
            return

        gl_list = json.loads(s_json)  # 关联list
        if len(gl_list):
            for item in gl_list:
                try:
                    s_items = str(item).split(",")
                    parent_item["gl_title"] = eval(str(s_items[2]))  # 标题
                    parent_item[
                        "gl_url"] = "https://www.bilibili.com/video/av" + (
                            s_items[1]).strip()
                    yield parent_item
                except Exception, e:
                    traceback.print_exc()
コード例 #4
0
    def parse(self, response):
        """
        解析关注、粉丝
        :param response:
        :return:
        """
        try:
            if response.status != 200:
                req_url = response.request.url
                err_urls.append(req_url)
                print "请求错误 : ", req_url, " 错误码 : ", response.status
                return

            data_item = response.meta[FLAG_DATA_ITEM]
            mid = response.meta[FLAG_KEY_MID]
            # 拿到JSON进行解析
            s_json = response.text
            json_data = json.loads(s_json)
            data_item['follows'] = json_data['data']['following']  # 关注
            data_item['fans'] = json_data['data']['follower']  # 粉丝
            url = get_play_article(mid)
            yield scrapy.Request(url=url,
                                 headers=BASE_HEAD,
                                 dont_filter=True,
                                 cookies=BASE_COOKIES,
                                 meta={
                                     FLAG_DATA_ITEM: data_item,
                                     FLAG_KEY_MID: mid
                                 },
                                 callback=self.parse_play_article)
        except Exception, e:
            traceback.print_exc()
コード例 #5
0
    def parse_play_article(self, response):
        """
        解析阅读、播放
        :param response:
        :return:
        """
        try:
            if response.status != 200:
                req_url = response.request.url
                err_urls.append(req_url)
                print "请求错误 : ", req_url, " 错误码 : ", response.status
                return

            data_item = response.meta[FLAG_DATA_ITEM]
            mid = response.meta[FLAG_KEY_MID]
            # 拿到JSON进行解析
            s_json = response.text
            json_data = json.loads(s_json)
            data_item['clicks'] = json_data['data']['archive']['view']  # 点击量
            data_item['reads'] = json_data['data']['article']['view']  # 阅读数

            # 保存
            # print data_item
            data_item.save()
        except Exception, e:
            traceback.print_exc()
コード例 #6
0
ファイル: bili_video_list.py プロジェクト: panda7802/scrapys
def is_err_url(response):  # 是否为错误URL
    if response.status != 200:
        req_url = response.request.url
        err_urls.append(req_url)
        print "请求错误 : ", req_url, " 错误码 : ", response.status
        return True
    else:
        return False