コード例 #1
0
ファイル: main.py プロジェクト: ahxxm/zhihu-spider
    async def insert_user_all_answers(self, user_id: str):
        change_user_status(db=self.db, user_id=user_id, status=FLAG.IN_USE)

        # answer page count,
        # base url of this user's answer
        # first page's soup
        page_count, base_url, soup = await self.first_answer_page(user_id=user_id)
        self.insert_answer_list_page(soup, user_id)

        # insert all others, if any
        answers = 0
        if page_count > 1:
            answers += 20
            page_range = range(2, page_count + 1)
            for page_num in page_range:
                current_page_link = base_url + "?page=" + str(page_num)
                content = await get_page_body(current_page_link)
                soup = BeautifulSoup(content, BS_PARSER)
                answers += self.insert_answer_list_page(soup, user_id)

        change_user_status(db=self.db, user_id=user_id, status=FLAG.FINISHED)

        if answers != 0:
            log.debug("Inserted {} answers for user {}.".format(answers, user_id))
        return True
コード例 #2
0
ファイル: main.py プロジェクト: cznyx/zhihu-spider
    def insert_user_all_answers(self, user_id: str):
        change_user_status(db=self.db, user_id=user_id, status=FLAG.IN_USE)

        answer_page_count, answer_page_base, answer_list_soup = self.first_answer_page(user_id=user_id)
        self.insert_answer_list_page(answer_list_soup, user_id)

        # insert all others, if any
        answers = 20
        if answer_page_count > 1:
            page_range = range(2, answer_page_count + 1)
            for page_num in page_range:
                current_page_link = answer_page_base + "?page=" + str(page_num)
                r = self.session.get(current_page_link)
                soup = BeautifulSoup(r.content.decode('utf-8'), BS_PARSER)
                answers += self.insert_answer_list_page(soup, user_id)

        change_user_status(db=self.db, user_id=user_id, status=FLAG.FINISHED)
        log.info("Inserted {} answers for user {}.".format(answers, user_id))