Beispiel #1
0
# -*- coding: utf-8 -*-
import os
from zhihu_oauth import ZhihuClient

TOKEN_FILE = 'token.pkl'

#login
client = ZhihuClient()

if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal()
    client.save_token(TOKEN_FILE)

collection = client.collection(19825336)
print(collection.answer_count)

# 报错:
# Traceback (most recent call last):
#   File "C:/learnpython/zhihu_sp/collection.py", line 18, in <module>
#     print(collection.answer_count)
#   File "C:\Python27\lib\site-packages\zhihu_oauth\zhcls\other.py", line 57, in wrapper
#     return cls(cache['id'], cache, self._session)
# TypeError: 'int' object has no attribute '__getitem__'
        # answer.save(answer.question.title)
        


if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    try:
        client.login('email_or_phone', 'password')
    except NeedCaptchaException:
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = input('please input captcha:')
        client.login('email_or_phone', 'password', captcha)
    client.save_token(TOKEN_FILE)
collection = client.collection(int(collection_id))
print(collection.title)
#日志设置
logging.basicConfig(level=logging.ERROR,  
                format='%(asctime)s %(levelname)s %(message)s',  
                datefmt='%Y-%m-%d %H:%M:%S',
                filename='zhi.log',
                filemode='w')

if os.path.exists('知乎-{}.xlsx'.format(file_name)):
    queue = pickle.load(open("queue-collec.pkl", "rb"))
    wb = load_workbook('知乎-{}.xlsx'.format(file_name))
    sheet = wb.active
    data_rows = sheet.max_row - 1
    print("上次进度已加载!")
else:
    parser = argparse.ArgumentParser()
    parser.add_argument("--collection", "-c", help="Set collection ID, e.g. 99549491")
    parser.add_argument("--thread", "-t", help="(Optional) Set the threads, 3 to 7 is recommended, default value is 5.")
    parser.add_argument("--upload", "-u", action='store_true', help="Upload the answers to Zhihu.")
    parser.add_argument("--download", "-d", action='store_true', help="Download the answers from Zhihu.")

    global client
    client = ZhihuClient()
    client.login_in_terminal()
    me = client.me()

    args = parser.parse_args()

    if args.collection:
        print("Selected collection: ", str(args.collection))
        collection = client.collection(int(args.collection))
        global collection_id
        collection_id = str(collection.id)
    else:
        print("YOU MUST TELL ME WHICH COLLECTION YOU WANT TO ADD IN!!")
        exit()

    if args.thread:
        print("Thread amount: ", str(args.thread))
        pool = ThreadPool(int(args.thread))
    else:
        print("Thread amount: 5 (DEFAULT)")
        pool = ThreadPool(5)

    if args.upload:
        pool.map(do_upload, get_answers)
Beispiel #4
0
from zhihu_oauth import ZhihuClient
import json
import random

client = ZhihuClient()
client.load_token('tokem.pkl')
convCont = 0
colle = client.collection(19928423)

for answer in colle.answers:
    random_num = random.randint(0, 50)
    if not (random_num == 17):
        continue
    res = []
    comments = answer.comments
    if convCont > 200000:
        break
    for comm in comments:
        try:
            conver = comm.replies
            a = comm.content
            i = 0
            for item in conver:
                if i == 0:
                    j = 0
                else:
                    comItem = {}
                    comItem["id"] = convCont
                    comItem["post"] = a.replace("<p>", "").replace("</p>", "")
                    comItem["res"] = item.content.replace("<p>", "").replace(
                        "</p>", "")
from lxml import html
import requests, time, zhihu_oauth

start_time = time.time()  # 初始时间戳

# ========================登录========================
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
client.load_token('/Users/alicewish/我的坚果云/token.pkl')

# ============收藏夹模块============
cid = 10000
collection = client.collection(cid)
print('答案数', collection.answer_count)
print('答案', collection.answers)
print('评论数', collection.comment_count)
print('评论', collection.comments)
print('创建时间', collection.created_time)
print('创建者', collection.creator)
print('描述', collection.description)
print('关注人数', collection.follower_count)
print('关注人', collection.followers)
print('收藏夹ID', collection.id)
print('是否公开', collection.is_public)
print('标题', collection.title)
print('更新时间', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(collection.updated_time)))

# ================运行时间计时================
run_time = time.time() - start_time
if run_time < 60:  # 两位小数的秒