Python parseNum Examples

Programming Language: Python

Namespace/Package Name: common

Method/Function: parseNum

Examples at hotexamples.com: 2

Python parseNum - 2 examples found. These are the top rated real world Python examples of common.parseNum extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: zhihu_question.py Project: brettKK/zhihu_crawler

    def _extractAnswer(self, block):
        # aid
        aid = block['data-aid']
        # 回答人
        responderBlock = block.find('a', class_='zm-item-link-avatar')
        if responderBlock is None:
            responder = -1  # 匿名用户
        else:
            responder = responderBlock['href'][8:]  # /people/<responder>
        # 日期
        date = block['data-created']
        # 内容
        content = block.find('div', class_='zm-editable-content').text.strip()
        # 赞同数
        upvote = parseNum(block.find('span', class_='count').text)
        # 评论数目
        comments = block.find('a', class_='toggle-comment').text.strip()
        p = comments.find('条评论')
        if p > 0:
            commentsCount = int(comments[:p])
        else:
            commentsCount = 0

        answer = dict()
        answer['aid'] = aid
        answer['responder'] = responder
        answer['date'] = date
        answer['content'] = content
        answer['upvote'] = upvote
        answer['commentsCount'] = commentsCount
        return answer

Example #2

Show file

File: zhihu_question.py Project: brettKK/zhihu_crawler

    def update(self):
        '''
        更新Question，并获取Answers
        '''
        self.lastModified = str(datetime.datetime.now())

        qurl = 'http://www.zhihu.com/question/%d' % (self.qid)
        r = get(qurl)
        if r.status_code != 200:
            return False

        soup = BeautifulSoup(r.text)
        # 标题
        self.title = soup.find('h2', class_='zm-item-title').text.strip()
        # 内容
        self.detail = soup.find('div', id='zh-question-detail').div.text.strip()
        # 所属的话题标签
        self.tags = [a.string.strip() for a in soup.find_all("a", class_='zm-item-tag')]
        # 关注人数
        followersCountBlock = soup.find('div', class_='zg-gray-normal')
        if followersCountBlock is None or followersCountBlock.strong is None:
            # 当”还没有人关注该问题” followersCountBlock.strong is None
            self.followersCount = 0
        else:
            self.followersCount = parseNum(followersCountBlock.strong.text)

        self.answers = []
        # 回答数目
        answersCountBlock = soup.find('h3', id='zh-question-answer-num')
        if answersCountBlock is None:
            if soup.find('span', class_='count') is not None:
                answersCount = 1
            else:
                answersCount = 0
        else:
            answersCount = int(answersCountBlock['data-num'])

        # 答案部分 每次50个
        for block in soup.find_all('div', class_='zm-item-answer'):
            if block.find('div', class_='answer-status') is not None:
                continue  # 忽略建议修改的答案
            self.answers.append(self._extractAnswer(block))
        if answersCount > 50:
            _xsrf = soup.find('input', attrs={'name': '_xsrf'})['value']
            otherHeaders = {'Referer': qurl}
            for i in range(1, math.ceil(answersCount/50)):  # more answers
                data = {"_xsrf": _xsrf, "method": 'next', 'params': '{"url_token": %d, "pagesize": 50, "offset": %d}' % (self.qid, i*50)}
                r = post('http://www.zhihu.com/node/QuestionAnswerListV2', otherHeaders, data)
                for block in r.json()['msg']:
                    div = BeautifulSoup(block).div
                    if div.find('div', class_='answer-status') is not None:
                        continue  # 忽略建议修改的答案
                    self.answers.append(self._extractAnswer(div))

        return True