from lxml import html
import requests, time, zhihu_oauth

start_time = time.time()  # 初始时间戳

# ========================登录========================
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
client.load_token('/Users/alicewish/我的坚果云/token.pkl')

# ========================查询答案========================
aid = 34404209
answer = client.answer(aid)
print('作者', answer.author)
print('能否评论', answer.can_comment)
print('收藏夹', answer.collections)
print('评论数', answer.comment_count)
print('评论权限', answer.comment_permission)
print('评论', answer.comments)
print('内容', answer.content)
print('创建时间', answer.created_time)
print('摘录', answer.excerpt)
print('答案ID', answer.id)
print('能否复制', answer.is_copyable)
print('是我回答的吗', answer.is_mine)
print('从属问题', answer.question)
print('建议修改', answer.suggest_edit)
print('感谢数', answer.thanks_count)
print('更新时间',
      time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(answer.updated_time)))
Beispiel #2
0

#df = pd.DataFrame(topic_questions_detail,columns=['tid','qid','aids'])

mogo_client = MongoClient('mongodb://localhost:27017/')
db = mogo_client['test']
col = db['questions_detail']

print(col.count())
#target = col.find_one({'tid' : 19575211 })
target = pd.DataFrame(list(col.find()))
#print(target.describe())
print(target)
for answers in target['aids']:
    for ans_id in answers:
        ans = client.answer(ans_id)
        print(Cleaner.filter_tags(ans.content))
        time.sleep(3)




# for q in topic.unanswered_questions:
#     if(q.follower_count > 1000):
#         #print("question {}, created at {}, has {} followers, {} answers\n".format(q.title, datetime.utcfromtimestamp(q.created_time).strftime('%Y-%m-%d %H:%M:%S'), q.follower_count, q.answer_count))
#         for ans in q.answers:
#             for com in ans.comments:
#                 print("question {} - answer {} {}- comments {}\n".format(q.id, ans.id, ans.content, com.content))


Beispiel #3
0
class Crawler:
    # Initialize the crawler with the name of database
    def __init__(self, dbname,email,key):
        self.con = sqlite3.connect(dbname)
        self.cursor = self.con.cursor()
        TOKEN_FILE = 'token.pkl'
        self.zhclient = ZhihuClient()
        try:
            # self.zhclient.login_in_terminal(email, key)
            self.zhclient.login(email, key)
        except NeedCaptchaException:
            print("需要输入验证码,账号 %s 可能已失效" %(email))
        # if os.path.isfile(TOKEN_FILE):
        #     self.zhclient.load_token(TOKEN_FILE)
        # else:
        #     self.zhclient.login_in_terminal(email, key)
        #     self.zhclient.save_token(TOKEN_FILE)

    def __del__(self):
        self.con.close()

    def dbcommit(self):
        self.con.commit()

    #建立数据表
    def createindextables(self):
        self.cursor.execute('create table userinfo(id primary key NOT NULL ,name text,headline text,gender int,address text,business text,school_name text,job text,company text,answer_count int ,question_count int ,voteup_count int ,thanked_count int ,following_count int ,follower_count int ,following_question_count int ,following_topic_count,collected_count int,identity text,best_topics text,is_organization int,org_name text,org_home_page text,org_industry text,record_time text)')
        self.cursor.execute('create table answerinfo(id primary key NOT NULL,content text,author_id int ,voteup_count int,thanks_count int, created_time text,comment_count int,updated_time text,record_time text)')
        self.cursor.execute('create table questioninfo(id primary key NOT NULL,title text,follower_count int ,answer_count int,created_time text,updated_time text,record_time text)')
        self.cursor.execute('create table topicinfo(id primary key NOT NULL,title text,best_answer_count int ,follower_count int ,question_count int,record_time text)')

        self.cursor.execute('create table topic_questions(topic_id ,topic_name text,question_id ,question_title text,record_time text)')
        self.cursor.execute('create table topic_users(topic_id,topic_name text,user_id,user_name text,record_time text)')
        self.cursor.execute('create table question_users(question_id,question_title text,user_id,user_name text,record_time text)')
        self.cursor.execute('create table question_answers(question_id,question_title text,answer_id,author_id,record_time text)')
        self.cursor.execute('create table user_users(user_id,user_follower_id)')
        self.cursor.execute('create table question_topics(question_id,topic_id,topic_name text,record_time text)')
        self.cursor.execute('create table user_topics(user_id,user_name text,topic_id,topic_name text,record_time text)')

        self.cursor.execute('create index userinfoidx on userinfo(id)')
        self.cursor.execute('create index answerinfoidx on answerinfo(id)')
        self.cursor.execute('create index questioninfoidx on questioninfo(id)')
        self.cursor.execute('create index topicinfoidx on topicinfo(id)')

        self.cursor.execute('create index topic_questionsidx on topic_questions(topic_id,question_id)')
        self.cursor.execute('create index topic_usersidx on topic_users(topic_id,user_id)')
        self.cursor.execute('create index question_usersidx on question_users(question_id,user_id)')
        self.cursor.execute('create index question_answersidx on question_answers(question_id,answer_id)')
        self.cursor.execute('create index user_usersidx on user_users(user_id,user_follower_id)')
        self.cursor.execute('create index question_topicsidx on question_topics(question_id,topic_id)')
        self.cursor.execute('create index user_topicsidx on user_topics(user_id,topic_id)')

        self.dbcommit()

    # #多线程尝试
    # def crawl_data(self,work_set,table1,field1,table2,field2):
    #     if table2 == "userinfo":
    #         for subid in work_set:
    #             subid = subid[0]
    #             self.userinfo(subid)
    #     elif table2 == "answerinfo":
    #         for subid in work_set:
    #             subid = subid[0]
    #             self.answerinfo(subid)
    #             # time.sleep(0.8)
    #             # time.sleep(0.5)
    #     elif table2 == "questioninfo":
    #         for subid in work_set:
    #             subid = subid[0]
    #             self.questioninfo(subid)
    #     elif table2 == "topicinfo":
    #         for subid in work_set:
    #             subid = subid[0]
    #             self.topicinfo(subid)
    #     elif table2 == "question_answers":
    #         for subid in work_set:
    #             subid = subid[0]
    #             self.question_answers(subid)
    #     elif table2 == "question_topics":
    #         for subid in work_set:
    #             subid = subid[0]
    #             self.question_topics(subid)
    #     elif table2 == "question_users":
    #         for subid in work_set:
    #             subid = subid[0]
    #             self.question_users(subid)
    #     elif table2 == "topic_questions":
    #         for subid in work_set:
    #             subid = subid[0]
    #             self.topic_questions(subid)
    #     elif table2 == "topic_users":
    #         for subid in work_set:
    #             subid = subid[0]
    #             self.topic_users(subid)
    #     elif table2 == "user_users":
    #         for subid in work_set:
    #             subid = subid[0]
    #             self.user_users(subid)
    #     elif table2 == "user_topics":
    #         for subid in work_set:
    #             subid = subid[0]
    #             self.user_topics(subid)
    #     return None


    def justdoit(self,table1,field1,table2,field2):
        set2 =set(self.cursor.execute("select DISTINCT  {} from {}".format(field2,table2)).fetchall())
        set1 = set(self.cursor.execute("select DISTINCT {} from {}".format(field1,table1)).fetchall())
        work_set = set1-set2
        # work_set = list(set1 - set2)
        # splitlen = int(len(work_set) / 2)
        # subwork_set = [work_set[i:i + splitlen] for i in range(0, len(work_set), splitlen)]
        # threads = []
        # for i in range(0,len(subwork_set)):
        #     t = multiprocessing.Process(target=self.crawl_data,args=(subwork_set[i],table1,field1,table2,field2))
        #     threads.append(t)
        # for t in threads:
        #     t.start()
        #     t.join()
        if table2 == "userinfo":
            for subid in work_set:
                subid = subid[0]
                self.userinfo(subid)
        elif table2 == "answerinfo":
            for subid in work_set:
                subid = subid[0]
                self.answerinfo(subid)
                # time.sleep(1.0)
                time.sleep(0.1)
        elif table2 == "questioninfo":
            for subid in work_set:
                subid = subid[0]
                self.questioninfo(subid)
        elif table2 == "topicinfo":
            for subid in work_set:
                subid = subid[0]
                self.topicinfo(subid)
        elif table2 == "question_answers":
            for subid in work_set:
                subid = subid[0]
                self.question_answers(subid)
        elif table2 == "question_topics":
            for subid in work_set:
                subid = subid[0]
                self.question_topics(subid)
        elif table2 == "question_users":
            for subid in work_set:
                subid = subid[0]
                self.question_users(subid)
        elif table2 == "topic_questions":
            for subid in work_set:
                subid = subid[0]
                self.topic_questions(subid)
        elif table2 == "topic_users":
            for subid in work_set:
                subid = subid[0]
                self.topic_users(subid)
        elif table2 == "user_users":
            for subid in work_set:
                subid = subid[0]
                self.user_users(subid)
        elif table2 == "user_topics":
            for subid in work_set:
                subid = subid[0]
                self.user_topics(subid)
        return None

    #话题-(精华)问题关系
    def topic_questions(self,topic_id):
        try:
            topic = self.zhclient.topic(topic_id)
            record_time = self.logtime()
            ques_set = set()
            for hot_ques in shield(topic.best_answers,action=SHIELD_ACTION.PASS):
                status = self.isdupicaterel("topic_questions", "topic_id", "question_id", topic.id, hot_ques.question.id)
                if status == None:
                    if hot_ques.question.id not in ques_set:
                        ques_set.add(hot_ques.question.id)
                        values = (topic.id,topic.name,hot_ques.question.id,hot_ques.question.title,record_time)
                        self.cursor.execute("insert into topic_questions(topic_id,topic_name,question_id,question_title,record_time) VALUES (?,?,?,?,?)" ,values)
                        self.dbcommit()
                        print("正在处理", hot_ques.question.id)
                else:
                    print("已存在,正在跳过")
                    pass
        except GetDataErrorException:
            print("Pass the GetDataErrorException")
            raise
        except UnexpectedResponseException:
            print("Pass the UnexpectedResponseException")
            pass
    #话题-关注者关系
    def topic_users(self,topic_id,start_at = 0):
        try:
            topic = self.zhclient.topic(topic_id)
            record_time = self.logtime()
            user_set = set()
            for follower in shield(topic.followers,start_at=start_at,action=SHIELD_ACTION.PASS):
                status = self.isdupicaterel("topic_users", "topic_id", "user_id", topic.id, follower.id)
                if status == None:
                    if follower.id not in user_set:
                        user_set.add(follower.id)
                        values = (topic.id,topic.name,follower.id,follower.name,record_time)
                        self.cursor.execute("insert into topic_users(topic_id,topic_name,user_id,user_name,record_time) VALUES (?,?,?,?,?)" ,values)
                        self.dbcommit()
                        print("正在处理",topic.name,follower.name)
                        # time.sleep(0.3)
                else:
                    print("已存在,正在跳过")
                    pass
        except GetDataErrorException:
            print("Pass the GetDataErrorException")
            pass
        except UnexpectedResponseException:
            print("Pass the UnexpectedResponseException")
            pass
    # 问题-关注者关系
    def question_users(self, question_id):
        try:
            question = self.zhclient.question(question_id)
            record_time = self.logtime()
            user_set = set()
            for follower in shield(question.followers,action=SHIELD_ACTION.PASS):
                status = self.isdupicaterel("question_users", "question_id", "user_id", question.id, follower.id)
                if status == None:
                    if follower.id not in user_set:
                        user_set.add(follower.id)
                        values = (question.id, question.title, follower.id, follower.name,record_time)
                        self.cursor.execute(
                            "insert into question_users(question_id,question_title,user_id,user_name,record_time) VALUES (?,?,?,?,?)", values)
                        self.dbcommit()
                        print("正在处理",follower.name,question.title)
                else:
                    print("已存在,正在跳过")
                    pass
        except GetDataErrorException:
            print("Pass the GetDataErrorException")
            pass
        except UnexpectedResponseException:
            print("Pass the UnexpectedResponseException")
            pass
    # 问题-回答关系
    def question_answers(self, question_id):
        try:
            question = self.zhclient.question(question_id)
            record_time = self.logtime()
            answer_set = set()
            for answer in shield(question.answers):
                status = self.isdupicaterel("question_answers", "question_id", "answer_id", question.id, answer.id)
                if status == None:
                    if answer.id not in answer_set:
                        answer_set.add(answer.id)
                        values = (question.id, question.title, answer.id, answer.author.id,record_time)
                        self.cursor.execute("insert into question_answers(question_id,question_title,answer_id,author_id,record_time) VALUES (?,?,?,?,?)", values)
                        self.dbcommit()
                        print("正在处理", question.id, question.title, answer.id, answer.author.id)
                else:
                    print("已存在,正在跳过")
                    pass
        except GetDataErrorException:
            print("Pass the GetDataErrorException")
            pass
        except UnexpectedResponseException:
            print("Pass the UnexpectedResponseException")
            pass
        except ZhihuWarning:
            print("Pass the UnexpectedResponseException")
            pass

    #获取用户-用户关注关系,知乎有5020限制,api限制最多获取一个用户5020粉丝
    def user_users(self,user_id):
        try:
            people = self.zhclient.people(user_id)
            record_time = self.logtime()
            user_set = set()
            for follower in shield(people.followers,action=SHIELD_ACTION.PASS):
                status = self.isdupicaterel("user_users", "user_id", "user_follower_id", people.id, follower.id)
                if status == None:
                    if follower.id not in user_set:
                        user_set.add(follower.id)
                        valus = (people.id,follower.id,record_time)
                        self.cursor.execute("insert into user_users(user_id,user_follower_id,record_time) VALUES (?,?,?)",valus)
                        self.dbcommit()
                        print("正在处理",follower.name)
                else:
                    print("已存在,正在跳过")
                    pass
        except GetDataErrorException:
            print("Pass the GetDataErrorException")
            pass
        except UnexpectedResponseException:
            print("Pass the UnexpectedResponseException")
            pass
    #获取问题-话题关系
    def question_topics(self,question_id):
        try:
            question = self.zhclient.question(question_id)
            record_time = self.logtime()
            topic_set = set()
            for topic in shield(question.topics):
                status = self.isdupicaterel("question_topics", "question_id", "topic_id", question.id, topic.id)
                if status == None:
                    if topic.id not in topic_set:
                        topic_set.add(topic.id)
                        values = (question.id,topic.id,topic.name,record_time)
                        self.cursor.execute("insert into question_topics(question_id,topic_id,topic_name,record_time) VALUES (?,?,?,?)",values)
                        self.dbcommit()
                        print("正在处理", topic.name,question.title)
                else:
                    print("已存在,正在跳过")
                    pass
        except UnexpectedResponseException:
            print("Pass the UnexpectedResponseException")
            pass
        except GetDataErrorException:
            print("Pass the GetDataErrorException")
            pass

    # 获取用户-话题关系
    def user_topics(self, user_id):
        try:
            people = self.zhclient.people(user_id)
            record_time = self.logtime()
            topic_set = set()
            for topic in shield(people.following_topics):
                status = self.isdupicaterel("user_topics", "user_id", "topic_id", people.id, topic.id)
                if status == None:
                    if topic.id not in topic_set:
                        topic_set.add(topic.id)
                        values = (people.id, people.name, topic.id,topic.name, record_time)
                        self.cursor.execute(
                            "insert into user_topics(user_id,user_name,topic_id,topic_name,record_time) VALUES (?,?,?,?,?)",
                            values)
                        self.dbcommit()
                        print("正在处理", people.name ,topic.name)
                else:
                    print("已存在,正在跳过")
                    pass
        except UnexpectedResponseException:
            print("Pass the UnexpectedResponseException")
            pass
        except GetDataErrorException:
            print("Pass the GetDataErrorException")
            pass

    # 判断数据重复
    def isdupicateid(self, table, id):
        cur = self.cursor.execute(
            "select rowid from {} where id = ?".format(table), (id,))
        self.dbcommit()
        res = cur.fetchone()
        res = None if res == None else res[0]
        return res

    def isdupicaterel(self,table,field1,field2, id1,id2):
        cur = self.cursor.execute(
            "select rowid from {} where {}= ? And {} = ?".format(table,field1,field2), (id1,id2))
        res = cur.fetchone()
        self.dbcommit()
        res = None if res == None else res[0]
        return res

    #个人信息
    def userinfo(self,user_id):
        try:
            status = self.isdupicateid("userinfo",user_id)
            if status==None:
                people = self.zhclient.people(user_id)
                record_time = self.logtime()
                address = "|".join([location.name for location in people.locations])
                school_name = "|".join([education.school.name for education in people.educations if "school" in education])
                job = "|".join([employment.job.name for employment in people.employments if "job" in employment])
                company = "|".join([employment.company.name for employment in people.employments if "company" in employment])
                business = people.business.name if people.business else None
                #勋章判断
                if people.badge.has_identity:
                    identity = people.badge.identity
                else:
                    identity = None
                if people.badge.is_best_answerer:
                    best_topics = "".join([topic.name for topic in people.badge.topics])
                else:
                    best_topics = None
                if people.badge.is_organization:
                    is_organization = 1
                    org_name = people.badge.org_name
                    org_home_page = people.badge.org_home_page
                    org_industry = people.badge.org_industry
                else:
                    is_organization = 0
                    org_name = None
                    org_home_page = None
                    org_industry = None
                values = (
                people.id, people.name, people.headline, people.gender, address, business, school_name, job,company,
                people.answer_count, people.question_count, people.voteup_count, people.thanked_count,
                people.following_count, people.follower_count, people.following_question_count,
                people.following_topic_count, people.collected_count, identity,best_topics,is_organization,org_name,org_home_page,org_industry,record_time)
                self.cursor.execute(
                    "insert into userinfo(id,name,headline,gender,address,business,school_name,job,company,answer_count,question_count,voteup_count,thanked_count,following_count,follower_count,following_question_count,following_topic_count,collected_count,identity,best_topics,is_organization,org_name,org_home_page,org_industry,record_time) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
                    values)
                self.dbcommit()
                print("正在处理", people.name)
            else:
                print("重复,rowid",status)
        except GetDataErrorException:
            print("Pass the GetDataErrorException")
            pass
        except UnexpectedResponseException:
            print("Pass the UnexpectedResponseException")
            pass

    def answerinfo(self,answer_id):
        try:
            status = self.isdupicateid("answerinfo", answer_id)
            if status == None:
                answer = self.zhclient.answer(answer_id)
                record_time = self.logtime()
                values = (answer.id,answer.content,answer.author.id,answer.voteup_count,answer.thanks_count,answer.comment_count,answer.created_time,answer.updated_time,record_time)
                self.cursor.execute("insert into answerinfo(id,content,author_id,voteup_count,thanks_count,comment_count,created_time,updated_time,record_time) VALUES (?,?,?,?,?,?,?,?,?)",values)
                self.dbcommit()
                print("正在处理",answer.id)
            else:
                return ("重复,rowid",status)
        except GetDataErrorException:
            print("Pass the GetDataErrorException")
            self.cursor.execute("delete from question_answers where answer_id = ?",(answer_id,))##在从question_answer表中获取及时删除无效问题,方式切换帐号后反复爬去无效问题。
            pass
        except UnexpectedResponseException:
            print("Pass the UnexpectedResponseException")
            pass
    #问题信息
    def questioninfo(self,question_id):
        try:
            status = self.isdupicateid("questioninfo", question_id)
            if status == None:
                question = self.zhclient.question(question_id)
                record_time = self.logtime()
                values = (question.id,question.title,question.follower_count,question.answer_count,question.created_time,question.updated_time,record_time)
                self.cursor.execute("insert into questioninfo(id,title,follower_count,answer_count,created_time,updated_time,record_time) VALUES (?,?,?,?,?,?,?)",values)
                self.dbcommit()
                print("正在处理" ,question.title)
            else:
                return ("重复,rowid",status)
        except GetDataErrorException:
            print("Pass the GetDataErrorException")
            pass
        except UnexpectedResponseException:
            print("Pass the UnexpectedResponseException")
            pass
    #话题信息
    def topicinfo(self,topic_id):
        try:
            status = self.isdupicateid("topicinfo", topic_id)
            if status == None:
                topic = self.zhclient.topic(topic_id)
                record_time = self.logtime()
                values=(topic.id,topic.name,topic.best_answer_count,topic.follower_count,topic.question_count,record_time)
                self.cursor.execute("insert into topicinfo(id,title,best_answer_count,follower_count,question_count,record_time) VALUES (?,?,?,?,?,?)",values)
                self.dbcommit()
                print("正在处理", topic.name)
            else:
                return ("重复,rowid",status)
        except GetDataErrorException:
            print("Pass the GetDataErrorException")
            pass
        except UnexpectedResponseException:
            print("Pass the UnexpectedResponseException")
            pass
    #时间戳
    def logtime(self):
        fmt = '%Y-%m-%d'  # 定义时间显示格式
        Date = time.strftime(fmt, time.localtime(time.time()))
        return Date


    def add_counts(self,filepath = "logincounts.txt"):
        counts = []
        for line in open(filepath):
            count = {}
            count["count"], count["key"] = line.split("----")
            count["key"] = count["key"].strip("\n")
            counts.append(count)
        return counts

    def get_proxy(self):
        try:
            PROXY_POOL_URL = 'http://localhost:5000/get'
            response = requests.get(PROXY_POOL_URL)
            if response.status_code == 200:
                return response.text
        except ConnectionError:
            return None
Beispiel #4
0
fail_aid = []
rows = []
counter = 4000
write_counter = 0

output_file = "./content_20_50.csv"
#headers = ["aid", "created_time", "length", "img_count", "link_count", "follower_count", "answer_count", "content"]
#with open(output_file,'a') as f:
#	f_csv = csv.writer(f)
#	f_csv.writerow(headers)


for aid in x100:

	try:
		a1 = client.answer(int(aid))
		(content, length, img_count, link_count) = content_analyze(a1.content)
		created_time = a1.created_time
		#follower_count = 38
		#answer_count = 20
		follower_count = a1.author.follower_count
		answer_count = a1.author.answer_count
		rows.append((aid, created_time, length, img_count, link_count, follower_count, answer_count, content))
		print ("success {0}".format(counter))
		counter += 1
		write_counter += 1
	except:
		fail_aid.append(aid)
		print ("fail {0}".format(counter))
		counter += 1
		print (fail_aid)
# -*- coding: utf-8 -*-
"""
Created on Sat Oct  7 18:02:01 2017

@author: roger
"""
import csv
from zhihu_oauth import ZhihuClient

client = ZhihuClient()

client.load_token('token.pkl')

a1 = client.answer(126638629)
print (a1.content)
<p>10/13日的演讲,下面这段感动了很多人。大致翻译一下:我加入竞选前,有人警告我,说参加总统竞选会把我打入地狱。我商业经营很成功,旗下多家公司,家庭也很幸福。我本来可以不来趟这场浑水,每天坐在家里舒舒服服享受生活。但这个国家现在千疮百孔,我个人的成功既然得益于国家强盛,现在就必须回馈她。我本来也是掌控这个国家的少数上层人士之一,但我现在跟他们决裂了。我从那个小团体出来,深知这个国家肮脏到了什么程度。也正因为我从那里出来,我也是治理这个国家的最佳人选。</p><p>This is our moment of reckoning as a society and as a civilization 
itself.   I didn't need to do this, folks, believe me — believe me.   I 
built a great company and I had a wonderful life.   I could have enjoyed 
the fruits and benefits of years of successful business deals and 
businesses for myself and my family.   Instead of going through this 
absolute horror show of lies, deceptions, malicious attacks — who would 
have thought?   I'm doing it because this country has given me so much, 
and I feel so strongly that it's my turn to give back to the country 
that I love.</p><p>Many of my friends and many political experts warned me that this 
campaign would be a journey to hell – said that.   But they're wrong.   It 
will be a journey to heaven, because we will help so many people that 
are so desperately in need of help. </p><p>In my former life, I was an insider as much as anybody else.   And I knew
 what it's like, and I still know what it's like to be an insider.   It's 
not bad.   It's not bad.   Now I'm being punished for leaving the special 
club and revealing to you the terrible things that are going on having 
to do with our country.   Because I used to be part of the club, I'm the 
Beispiel #6
0
# -*- coding: utf-8 -*-
import os
from zhihu_oauth import ZhihuClient

TOKEN_FILE = 'token.pkl'

# login
client = ZhihuClient()

if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal()
    client.save_token(TOKEN_FILE)

answer = client.answer(44818033)
print(answer.comment_count)

# for follower in people.followers:
#     print(follower.name)

# 获取点赞量最高的 5 个回答
# for _, answer in zip(range(5), people.answers.order_by('votenum')):
#     print(answer.question.title, answer.voteup_count)
# print('----------')
Beispiel #7
0
    print("文字通过:%d") % len(res["pass"])
    print("文字待审核:%d") % len(res["review"])
    print("文字违规:%d") % len(res["reject"])


def picture_review(content):
    pic = re.findall(r"<img src=(.*?)data", content)
    h = 0
    l = len(pic)
    client = AipImageCensor(APP_ID, API_KEY, SECRET_KEY)
    if l > 0:
        for i in range(l):
            url = pic[0][1:-2]
            result = client.imageCensorUserDefined(url)
            print(result)
            if len(result['conclusion']) == 2:
                h += 1
        print("图片通过:%d") % h
        print("图片违规:%d") % (l - h)
    else:
        print("没有图片")


if __name__ == '__main__':
    client = ZhihuClient()
    client.load_token('token.pkl')
    id = "376064029"
    content = client.answer(id).content
    answer_review(content)
    picture_review(content)
from lxml import html
import requests, time, zhihu_oauth

start_time = time.time()  # 初始时间戳

# ========================登录========================
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
client.load_token('/Users/alicewish/我的坚果云/token.pkl')

# ========================查询答案========================
aid = 34404209
answer = client.answer(aid)
print('作者', answer.author)
print('能否评论', answer.can_comment)
print('收藏夹', answer.collections)
print('评论数', answer.comment_count)
print('评论权限', answer.comment_permission)
print('评论', answer.comments)
print('内容', answer.content)
print('创建时间', answer.created_time)
print('摘录', answer.excerpt)
print('答案ID', answer.id)
print('能否复制', answer.is_copyable)
print('是我回答的吗', answer.is_mine)
print('从属问题', answer.question)
print('建议修改', answer.suggest_edit)
print('感谢数', answer.thanks_count)
print('更新时间', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(answer.updated_time)))
print('投票者', answer.voters)