Example #1
0
    def run(self):
        with self.threadingSum:
            logging.debug("%s start" % self.url)
            dbHandler = DbHandler()
            if not dbHandler.hasQuestion(self.url):
                # 插入新的问题
                question = Question(self.url)
                title = question.get_title()
                detail = question.get_detail()
                answerNum = question.get_answer_num()
                followersNum = question.get_followers_num()
                tags = ""
                for tag in question.get_tags():
                    tags += tag + ";"
                tags = tags[0: len(tags) - 1]
                questionDict = {"url": self.url, "title": title, 
                                "detail": detail, "followers": followersNum, 
                                "answerNum": answerNum, "tags": tags}
                dbHandler.insertNewQuestion(questionDict)

                zh_qid = dbHandler.getQueIdByUrl(self.url)
                # 插入新的答案
                for answer_link in question.get_all_answer_link():

                    answer = Answer(answer_link)
                    author = answer.get_author()
                    votes = answer.get_votes()
                    answerDict = {"url": answer_link, "author": author, "zh_qid": zh_qid, 
                              "votes": votes}
                    dbHandler.insertNewAnswer(answerDict)

                    # 插入图片地址
                    zh_aid = dbHandler.getAnsIdByUrl(answer_link)

                    for imgUrl in answer.get_all_pics():
                        dbHandler.insertNewImgUrl(zh_aid, imgUrl)

                    contents = answer.get_answer_content()
                    self.storeTheAnswer(zh_aid, contents)

                dbHandler.close()

            logging.debug("%s done" % self.url)
def question_test():
    start = time.time()
    question = Question("http://www.zhihu.com/question/33488763")
    question.parser()
    print "Title of the question is ", question.get_title()
    print "Tags of the question is ", question.get_tags()
    print "Details of the question is ", question.get_details()
    print "Number of answer of the question is ", question.get_answer_num()
    print "Number of collapsed answer of this question is ", question.get_collapsed_answer_num()
    print "Number of follower of this question is ", question.get_follower_num()
    print "Number of view of this question is ", question.get_view_num()
    print "Number of comment of this question is ", question.get_comment_num()
    print "Last activity time of this question is", question.get_last_activity_time()
    print "Number of follower of related tags is", question.get_related_tags_follower_num()
    print "First known follower of this question is ", question.get_first_known_follower()
    print "Related questions are ", question.get_related_questions()
    question.save_top_answers(3)
    question.save_all_answers()  # might be time-consuming
    question.save_all_followers_profile()  # might be time-consuming
    end = time.time()
    print "Time used is", end - start
Example #3
0
def question_test(url):
    question = Question(url)

    # 获取该问题的标题
    title = question.get_title()
    # 获取该问题的详细描述
    detail = question.get_detail()
    # 获取回答个数
    answers_num = question.get_answers_num()
    # 获取关注该问题的人数
    followers_num = question.get_followers_num()
    # 获取该问题所属话题
    topics = question.get_topics()
    # 获取该问题被浏览次数
    visit_times = question.get_visit_times()
    # 获取排名第一的回答
    top_answer = question.get_top_answer()
    # 获取排名前十的十个回答
    top_answers = question.get_top_i_answers(10)
    # 获取所有回答
    answers = question.get_all_answers()

    print title  # 输出:现实可以有多美好?
    print detail
    # 输出:
    # 本问题相对于“现实可以多残酷?传送门:现实可以有多残酷?
    # 题主:       昨天看了“现实可以有多残酷“。感觉不太好,所以我
    # 开了这个问题以相对应,希望能够“中和一下“。和那个问题题主不想
    # 把它变成“比惨大会“一样,我也不想把这个变成“鸡汤故事会“,或者
    # 是“晒幸福“比赛。所以大家从“现实,实际”的角度出发,讲述自己的
    # 美好故事,让大家看看社会的冷和暖,能更加辨证地看待世界,是此
    # 题和彼题共同的“心愿“吧。
    print answers_num  # 输出:2441
    print followers_num  # 输出:26910
    for topic in topics:
        print topic,  # 输出:情感克制 现实 社会 个人经历
    print visit_times  # 输出: 该问题当前被浏览的次数
    print top_answer  # 输出:<zhihu.Answer instance at 0x7f8b6582d0e0>(Answer类对象)
    print top_answers  # 输出:<generator object get_top_i_answers at 0x7fed676eb320>(代表前十的Answer的生成器)
    print answers  # 输出:<generator object get_all_answer at 0x7f8b66ba30a0>(代表所有Answer的生成器)
Example #4
0
# -*- coding: utf-8 -*-
from question import Question
from answer import Answer
from voters import Voters
from user import User

question_url = "http://www.zhihu.com/question/24269892"
answer_url = ""
voters_url = ""

questio_test = Question(question_url)
print "get_title:\t", questio_test.get_title()
print "get_detail:\t", questio_test.get_detail()
print "get_answers_num:\t", questio_test.get_answers_num()
print "get_followers_num:\t", questio_test.get_followers_num()
print "get_topics:\t", questio_test.get_topics()
print "get_all_answers:\t", questio_test.get_all_answers().next()
print "get_visit_times:\t", questio_test.get_visit_times()


print "\n" * 20

answer_test = Answer(questio_test.get_all_answers().next())

print "get_author\t", answer_test.get_author()
print "get_upvote\t", answer_test.get_upvote()
print "get_content\t", answer_test.get_content()
print "get_answerid\t", answer_test.get_answerid()

print "\n" * 20
voters_test = Voters(answer_test.get_answerid())
Example #5
0
from question import Question
from toHtml import ToHtml

question = Question("http://www.zhihu.com/question/27848661")
title = question.get_title()
print title
detail = question.get_detail()
print detail
answer_num = question.get_answer_num()
print answer_num
tags = question.get_tags()
for tag in tags:
    print tag
# authors = question.get_all_authors()
# for author in authors:
#     print author
# votes = question.get_all_votes()
# for vote in votes:
#     print vote
# answers = question.get_all_answers()
# for answer in answers:
#     print answer
# toHtml = ToHtml("http://www.zhihu.com/question/27848661")
# toHtml.answerToHtml()

# question = Question("http://www.zhihu.com/question/25029518")
# urls = question.get_all_pics()
# question.download_all_pics(urls)