def run(self): with self.threadingSum: logging.debug("%s start" % self.url) dbHandler = DbHandler() if not dbHandler.hasQuestion(self.url): # 插入新的问题 question = Question(self.url) title = question.get_title() detail = question.get_detail() answerNum = question.get_answer_num() followersNum = question.get_followers_num() tags = "" for tag in question.get_tags(): tags += tag + ";" tags = tags[0: len(tags) - 1] questionDict = {"url": self.url, "title": title, "detail": detail, "followers": followersNum, "answerNum": answerNum, "tags": tags} dbHandler.insertNewQuestion(questionDict) zh_qid = dbHandler.getQueIdByUrl(self.url) # 插入新的答案 for answer_link in question.get_all_answer_link(): answer = Answer(answer_link) author = answer.get_author() votes = answer.get_votes() answerDict = {"url": answer_link, "author": author, "zh_qid": zh_qid, "votes": votes} dbHandler.insertNewAnswer(answerDict) # 插入图片地址 zh_aid = dbHandler.getAnsIdByUrl(answer_link) for imgUrl in answer.get_all_pics(): dbHandler.insertNewImgUrl(zh_aid, imgUrl) contents = answer.get_answer_content() self.storeTheAnswer(zh_aid, contents) dbHandler.close() logging.debug("%s done" % self.url)
def question_test(): start = time.time() question = Question("http://www.zhihu.com/question/33488763") question.parser() print "Title of the question is ", question.get_title() print "Tags of the question is ", question.get_tags() print "Details of the question is ", question.get_details() print "Number of answer of the question is ", question.get_answer_num() print "Number of collapsed answer of this question is ", question.get_collapsed_answer_num() print "Number of follower of this question is ", question.get_follower_num() print "Number of view of this question is ", question.get_view_num() print "Number of comment of this question is ", question.get_comment_num() print "Last activity time of this question is", question.get_last_activity_time() print "Number of follower of related tags is", question.get_related_tags_follower_num() print "First known follower of this question is ", question.get_first_known_follower() print "Related questions are ", question.get_related_questions() question.save_top_answers(3) question.save_all_answers() # might be time-consuming question.save_all_followers_profile() # might be time-consuming end = time.time() print "Time used is", end - start
def question_test(url): question = Question(url) # 获取该问题的标题 title = question.get_title() # 获取该问题的详细描述 detail = question.get_detail() # 获取回答个数 answers_num = question.get_answers_num() # 获取关注该问题的人数 followers_num = question.get_followers_num() # 获取该问题所属话题 topics = question.get_topics() # 获取该问题被浏览次数 visit_times = question.get_visit_times() # 获取排名第一的回答 top_answer = question.get_top_answer() # 获取排名前十的十个回答 top_answers = question.get_top_i_answers(10) # 获取所有回答 answers = question.get_all_answers() print title # 输出:现实可以有多美好? print detail # 输出: # 本问题相对于“现实可以多残酷?传送门:现实可以有多残酷? # 题主: 昨天看了“现实可以有多残酷“。感觉不太好,所以我 # 开了这个问题以相对应,希望能够“中和一下“。和那个问题题主不想 # 把它变成“比惨大会“一样,我也不想把这个变成“鸡汤故事会“,或者 # 是“晒幸福“比赛。所以大家从“现实,实际”的角度出发,讲述自己的 # 美好故事,让大家看看社会的冷和暖,能更加辨证地看待世界,是此 # 题和彼题共同的“心愿“吧。 print answers_num # 输出:2441 print followers_num # 输出:26910 for topic in topics: print topic, # 输出:情感克制 现实 社会 个人经历 print visit_times # 输出: 该问题当前被浏览的次数 print top_answer # 输出:<zhihu.Answer instance at 0x7f8b6582d0e0>(Answer类对象) print top_answers # 输出:<generator object get_top_i_answers at 0x7fed676eb320>(代表前十的Answer的生成器) print answers # 输出:<generator object get_all_answer at 0x7f8b66ba30a0>(代表所有Answer的生成器)
# -*- coding: utf-8 -*- from question import Question from answer import Answer from voters import Voters from user import User question_url = "http://www.zhihu.com/question/24269892" answer_url = "" voters_url = "" questio_test = Question(question_url) print "get_title:\t", questio_test.get_title() print "get_detail:\t", questio_test.get_detail() print "get_answers_num:\t", questio_test.get_answers_num() print "get_followers_num:\t", questio_test.get_followers_num() print "get_topics:\t", questio_test.get_topics() print "get_all_answers:\t", questio_test.get_all_answers().next() print "get_visit_times:\t", questio_test.get_visit_times() print "\n" * 20 answer_test = Answer(questio_test.get_all_answers().next()) print "get_author\t", answer_test.get_author() print "get_upvote\t", answer_test.get_upvote() print "get_content\t", answer_test.get_content() print "get_answerid\t", answer_test.get_answerid() print "\n" * 20 voters_test = Voters(answer_test.get_answerid())
from question import Question from toHtml import ToHtml question = Question("http://www.zhihu.com/question/27848661") title = question.get_title() print title detail = question.get_detail() print detail answer_num = question.get_answer_num() print answer_num tags = question.get_tags() for tag in tags: print tag # authors = question.get_all_authors() # for author in authors: # print author # votes = question.get_all_votes() # for vote in votes: # print vote # answers = question.get_all_answers() # for answer in answers: # print answer # toHtml = ToHtml("http://www.zhihu.com/question/27848661") # toHtml.answerToHtml() # question = Question("http://www.zhihu.com/question/25029518") # urls = question.get_all_pics() # question.download_all_pics(urls)