Example #1
0
    def run(self):
        with self.threadingSum:
            logging.debug("%s start" % self.url)
            dbHandler = DbHandler()
            if not dbHandler.hasQuestion(self.url):
                # 插入新的问题
                question = Question(self.url)
                title = question.get_title()
                detail = question.get_detail()
                answerNum = question.get_answer_num()
                followersNum = question.get_followers_num()
                tags = ""
                for tag in question.get_tags():
                    tags += tag + ";"
                tags = tags[0: len(tags) - 1]
                questionDict = {"url": self.url, "title": title, 
                                "detail": detail, "followers": followersNum, 
                                "answerNum": answerNum, "tags": tags}
                dbHandler.insertNewQuestion(questionDict)

                zh_qid = dbHandler.getQueIdByUrl(self.url)
                # 插入新的答案
                for answer_link in question.get_all_answer_link():

                    answer = Answer(answer_link)
                    author = answer.get_author()
                    votes = answer.get_votes()
                    answerDict = {"url": answer_link, "author": author, "zh_qid": zh_qid, 
                              "votes": votes}
                    dbHandler.insertNewAnswer(answerDict)

                    # 插入图片地址
                    zh_aid = dbHandler.getAnsIdByUrl(answer_link)

                    for imgUrl in answer.get_all_pics():
                        dbHandler.insertNewImgUrl(zh_aid, imgUrl)

                    contents = answer.get_answer_content()
                    self.storeTheAnswer(zh_aid, contents)

                dbHandler.close()

            logging.debug("%s done" % self.url)
def question_test():
    start = time.time()
    question = Question("http://www.zhihu.com/question/33488763")
    question.parser()
    print "Title of the question is ", question.get_title()
    print "Tags of the question is ", question.get_tags()
    print "Details of the question is ", question.get_details()
    print "Number of answer of the question is ", question.get_answer_num()
    print "Number of collapsed answer of this question is ", question.get_collapsed_answer_num()
    print "Number of follower of this question is ", question.get_follower_num()
    print "Number of view of this question is ", question.get_view_num()
    print "Number of comment of this question is ", question.get_comment_num()
    print "Last activity time of this question is", question.get_last_activity_time()
    print "Number of follower of related tags is", question.get_related_tags_follower_num()
    print "First known follower of this question is ", question.get_first_known_follower()
    print "Related questions are ", question.get_related_questions()
    question.save_top_answers(3)
    question.save_all_answers()  # might be time-consuming
    question.save_all_followers_profile()  # might be time-consuming
    end = time.time()
    print "Time used is", end - start
Example #3
0
from question import Question
from toHtml import ToHtml

question = Question("http://www.zhihu.com/question/27848661")
title = question.get_title()
print title
detail = question.get_detail()
print detail
answer_num = question.get_answer_num()
print answer_num
tags = question.get_tags()
for tag in tags:
    print tag
# authors = question.get_all_authors()
# for author in authors:
#     print author
# votes = question.get_all_votes()
# for vote in votes:
#     print vote
# answers = question.get_all_answers()
# for answer in answers:
#     print answer
# toHtml = ToHtml("http://www.zhihu.com/question/27848661")
# toHtml.answerToHtml()

# question = Question("http://www.zhihu.com/question/25029518")
# urls = question.get_all_pics()
# question.download_all_pics(urls)