# -*- coding: utf-8 -*- import os from zhihu_oauth import ZhihuClient TOKEN_FILE = 'token.pkl' #login client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) collection = client.collection(19825336) print(collection.answer_count) # 报错: # Traceback (most recent call last): # File "C:/learnpython/zhihu_sp/collection.py", line 18, in <module> # print(collection.answer_count) # File "C:\Python27\lib\site-packages\zhihu_oauth\zhcls\other.py", line 57, in wrapper # return cls(cache['id'], cache, self._session) # TypeError: 'int' object has no attribute '__getitem__'
# answer.save(answer.question.title) if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: try: client.login('email_or_phone', 'password') except NeedCaptchaException: with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login('email_or_phone', 'password', captcha) client.save_token(TOKEN_FILE) collection = client.collection(int(collection_id)) print(collection.title) #日志设置 logging.basicConfig(level=logging.ERROR, format='%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S', filename='zhi.log', filemode='w') if os.path.exists('知乎-{}.xlsx'.format(file_name)): queue = pickle.load(open("queue-collec.pkl", "rb")) wb = load_workbook('知乎-{}.xlsx'.format(file_name)) sheet = wb.active data_rows = sheet.max_row - 1 print("上次进度已加载!") else:
parser = argparse.ArgumentParser() parser.add_argument("--collection", "-c", help="Set collection ID, e.g. 99549491") parser.add_argument("--thread", "-t", help="(Optional) Set the threads, 3 to 7 is recommended, default value is 5.") parser.add_argument("--upload", "-u", action='store_true', help="Upload the answers to Zhihu.") parser.add_argument("--download", "-d", action='store_true', help="Download the answers from Zhihu.") global client client = ZhihuClient() client.login_in_terminal() me = client.me() args = parser.parse_args() if args.collection: print("Selected collection: ", str(args.collection)) collection = client.collection(int(args.collection)) global collection_id collection_id = str(collection.id) else: print("YOU MUST TELL ME WHICH COLLECTION YOU WANT TO ADD IN!!") exit() if args.thread: print("Thread amount: ", str(args.thread)) pool = ThreadPool(int(args.thread)) else: print("Thread amount: 5 (DEFAULT)") pool = ThreadPool(5) if args.upload: pool.map(do_upload, get_answers)
from zhihu_oauth import ZhihuClient import json import random client = ZhihuClient() client.load_token('tokem.pkl') convCont = 0 colle = client.collection(19928423) for answer in colle.answers: random_num = random.randint(0, 50) if not (random_num == 17): continue res = [] comments = answer.comments if convCont > 200000: break for comm in comments: try: conver = comm.replies a = comm.content i = 0 for item in conver: if i == 0: j = 0 else: comItem = {} comItem["id"] = convCont comItem["post"] = a.replace("<p>", "").replace("</p>", "") comItem["res"] = item.content.replace("<p>", "").replace( "</p>", "")
from lxml import html import requests, time, zhihu_oauth start_time = time.time() # 初始时间戳 # ========================登录======================== from zhihu_oauth import ZhihuClient client = ZhihuClient() client.load_token('/Users/alicewish/我的坚果云/token.pkl') # ============收藏夹模块============ cid = 10000 collection = client.collection(cid) print('答案数', collection.answer_count) print('答案', collection.answers) print('评论数', collection.comment_count) print('评论', collection.comments) print('创建时间', collection.created_time) print('创建者', collection.creator) print('描述', collection.description) print('关注人数', collection.follower_count) print('关注人', collection.followers) print('收藏夹ID', collection.id) print('是否公开', collection.is_public) print('标题', collection.title) print('更新时间', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(collection.updated_time))) # ================运行时间计时================ run_time = time.time() - start_time if run_time < 60: # 两位小数的秒