def getLatestBestAnserwerAndSave(): # phoneNum = '+8613096348217' # pw = '2015141463222' ans_num = 20 i=0 TOKEN_FILE = 'token.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) # try: # client.login(phoneNum, pw) # except NeedCaptchaException: # # 保存验证码并提示输入,重新登录 # with open('a.gif', 'wb') as f: # f.write(client.get_captcha()) # captcha = input('please input captcha:') # client.login(phoneNum, pw, captcha) java = client.topic(19550867) BA = java.best_answers for answ in BA: ansItem2artical(ansItem(answ)).save() i = i+1 if i==ans_num: break
def start(self): try: client = ZhihuClient() client.login_in_terminal() client.save_token(Path.ZHIHUTOKEN) except NeedLoginException: print u"Oops, please try again." sys.exit() return
def LoginZhihuClient(token_name): TOKEN_FILE = 'liuximing.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) me = client.me() return me
def zhihu_login(): client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login('*****@*****.**', 'a4906639') client.save_token(TOKEN_FILE) me = client.me() print(me.name) return client
def login(self): TOKEN_FILE = 'token.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) return client
def get_client(self, reset_=0): client = ZhihuClient() if reset_ != 0: client.login_in_terminal() client.save_token(TOKEN_FILE) if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) return client
class Login(): def __init__(self): self.TOKEN_FILE = 'token.pkl.' + str(sys.version_info[0]) self.client = ZhihuClient() def client_login(self): if not os.path.isfile(self.TOKEN_FILE): self.client.login_in_terminal() self.client.save_token(self.TOKEN_FILE) else: self.client.load_token(self.TOKEN_FILE) return self.client
def zhihu_login(): r""" 知乎登陆 :return: 登陆之后的客户端client """ client = ZhihuClient() # 登录 if os.path.isfile(TOKEN_FILE_NAME): client.load_token(TOKEN_FILE_NAME) else: client.login_in_terminal() client.save_token(TOKEN_FILE_NAME) return client
def login(username, password): from zhihu_oauth import ZhihuClient from zhihu_oauth.exception import NeedCaptchaException client = ZhihuClient() try: client.login(username, password) print(u"登陆成功!") except NeedCaptchaException: # 处理要验证码的情况 # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login(username, password, captcha) client.save_token('token.pkl') # 保存token
def login(account, password): client = ZhihuClient() try: client.load_token(TOKEN_FILE) except FileNotFoundError: try: client.login(account, password) except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('./captcha/a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login(account, password, captcha) client.save_token('./token/token.pkl') finally: return client
def main(): client = ZhihuClient() try: # client.login(email_or_phone, password) client.login_in_terminal(username=email_or_phone, password=password) client.save_token(TOKEN_FILE) # 保存登录会话,留着以后登录用 # raise NeedCaptchaException except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('请输入验证码: ') client.login(email_or_phone, password, captcha) data_out_list_a = [] line_saved = 0 max_lines = 1 with open(USER_CSV_PATH) as file: for line in file.readlines(): crawl_id = line.strip('\n') my_crawl = MyCrawler(crawl_id, client) print('------>>>| 待爬取的用户的知乎id为: ', crawl_id) data_a = my_crawl.crawling_answer(crawl_id) print('该用户爬取完毕'.center(60, '*')) if len(data_a) % 60 == 0: tmp_time = int(len(data_a) / 60) for i in range(tmp_time): data_out_list_a.append(data_a[60*i:60*(i+1)]) else: print('无用的输出!') # sleep(randint(1, 3)) line_saved += 1 if line_saved == max_lines: save_to_csv_a(data_out_list_a, client) data_out_list_a = [] line_saved = 0 print('全部用户采集完毕'.center(40, '*'))
from zhihu_oauth import ZhihuClient from zhihu_oauth.exception import NeedCaptchaException #custom your email/phone number matched with password email = "" password = "" client = ZhihuClient() try: client.login(email, password) except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login(email, password, captcha) client.save_token('token.pkl')
if os.path.lexists(token_file): client.load_token(token_file) print 'load token success' else: try: login_result = client.login(test_email, test_password) except NeedCaptchaException: # 保存验证码并提示输入,重新登录 print u'登录失败,需要输入验证码' with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = raw_input(u'please input captcha:') login_result = client.login(test_email, test_password, captcha) print 'login result => ' print login_result client.save_token(token_file) print 'save token success' # question response_file_uri = './question_response.html' # 将json输出到网页中,chrome下按F12选preview能看见浏览器渲染出的json数据结构 question_id = 35005800 question = client.question(question_id) data = question.pure_data response_json = json.dumps(data) response_file = open(response_file_uri, 'w+') response_file.write(response_json) print u"数据保存完成" response_file_uri = './people_response.html' # 将json输出到网页中,chrome下按F12选preview能看见浏览器渲染出的json数据结构 people_id = '404-Page-Not-found' people = client.people(people_id)
class Crawl: def __init__(self): self.client = ZhihuClient() def login(self, username, password): if os.path.isfile('app/Resource/' + username + '.token'): self.client.load_token('app/Resource/' + username + '.token') else: try: self.client.login(username, password) except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(self.client.get_captcha()) captcha = input('please input captcha:') self.client.login(username, password, captcha) self.client.save_token('app/Resource/' + username + '.token') def get_live_list(self): lives = self.client.me().lives return lives @staticmethod def save_live_list(livedata): new_live = MyLive(live_id=livedata.id, title=livedata.title, speaker=livedata.speaker.name, speaker_description=livedata.speaker.description, live_description=livedata.description, seats_count=livedata.seat_taken, price=livedata.fee) new_live.save() def live_list_work(self): for live in self.get_live_list(): exist = MyLive.objects(live_id=live.id) if not exist: self.save_live_list(live) def get_live_content(self, live_id, before_id=''): res = self.client._session.get( LIVECONTENT_URL.format(live_id, before_id)) data = json.loads(res.content) return data def save_live_content_image(self, id, url): content = self.client._session.get(url).content file = 'app/Resource/' + str(id) + '.png' with open(file, 'wb') as f: f.write(content) @staticmethod def save_live_content(live_id, livedata): for r in livedata['data']: exist = LiveContent.objects(message_id=r['id']) if exist: continue if r['type'] == 'audio': url = r['audio']['url'] elif r['type'] == 'image': url = r['image']['full']['url'] else: url = '' content = r['text'] if 'text' in r else '' reply = ','.join(r['replies']) if 'replies' in r else '' new_live_content = LiveContent( message_id=int(r['id']), sender=r['sender']['member']['name'], type=r['type'], content=content, url=url, reply=reply, likes=r['likes']['count'], created_at=datetime.fromtimestamp((r['created_at'])), live_title=live_id) new_live_content.save() def live_content_work(self, id): live = MyLive.objects(id=id).first() # 使用知乎的live的ID值传入获取详情 data = self.get_live_content(live.live_id) while data['unload_count'] > 0: # 存储时使用mongo的ID值传入 self.save_live_content(live.id, data) data = self.get_live_content(live.live_id, data['data'][0]['id']) else: print('success') image_contents = LiveContent.objects(live_title=live.id, type='image') for item in image_contents: self.save_live_content_image(item.id, item.url)
from zhihu_oauth import ZhihuClient from zhihu_oauth.exception import NeedCaptchaException client = ZhihuClient() user = '******' pwd = '6666666' try: client.login_in_terminal(user, pwd) print(u"登陆成功!") except NeedCaptchaException: # 处理要验证码的情况 # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login_in_terminal(user, pwd, captcha) print(u"登陆成功!") client.save_token('token.pkl') # 保存token # 必须在 client 已经处于登录状态时才能使用 #有了token之后,下次登录就可以直接加载token文件了 # client.load_token('filename') # client.login_in_terminal() # or ('*****@*****.**', 'password')
# In[2]: # login ZhihuClient client = ZhihuClient() user = '******' pwd = '961204yy' try: client.login(user, pwd) except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login(user, pwd, captcha) client.save_token('token.kpl') # TOKEN_FILE = 'token.pkl' # # if os.path.isfile(TOKEN_FILE): # client.load_token(TOKEN_FILE) # else: # client.login_in_terminal() # client.save_token(TOKEN_FILE) def save_answer(topic, answer_numbers=0, save_path='zhihu'): # if not os.path.exists(save_path): # os.mkdir(save_path) # topic_path = save_path + '/' + topic.name # if not os.path.exists(topic_path): # os.mkdir(topic_path)
token = './XXX.pk1' client = ZhihuClient() try: if os.path.exists(token): client.load_token(token) else: client.login('username', 'passwd') except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = raw_input('please input captcha:') client.login('username', 'passwd', captcha) client.save_token(token) def dump_activities(pid): person = client.people(pid) filter_types = { ActType.COLLECT_ANSWER, ActType.COLLECT_ANSWER, ActType.COLLECT_ARTICLE, ActType.CREATE_ANSWER, ActType.CREATE_ARTICLE, ActType.CREATE_PIN, ActType.CREATE_QUESTION, ActType.FOLLOW_COLLECTION, ActType.FOLLOW_COLUMN, ActType.FOLLOW_QUESTION,
def login(): TOKEN_FILE = 'token.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) """ me = client.me() print('name', me.name) print('headline', me.headline) print('description', me.description) print('following topic count', me.following_topic_count) print('following people count', me.following_topic_count) print('followers count', me.follower_count) print('voteup count', me.voteup_count) print('get thanks count', me.thanked_count) print('answered question', me.answer_count) print('question asked', me.question_count) print('collection count', me.collection_count) print('article count', me.articles_count) print('following column count', me.following_column_count) # 获取最近 5 个回答 for _, answer in zip(range(5), me.answers): print(answer.question.title, answer.voteup_count) print('----------') # 获取点赞量最高的 5 个回答 for _, answer in zip(range(5), me.answers.order_by('votenum')): print(answer.question.title, answer.voteup_count) print('----------') # 获取最近提的 5 个问题 for _, question in zip(range(5), me.questions): print(question.title, question.answer_count) print('----------') # 获取最近发表的 5 个文章 for _, article in zip(range(5), me.articles): print(article.title, article.voteup_count) """ topic = client.topic(19560072) # 转基因 # topic = client.topic(19578906) # 气候变化 # topic = client.topic(19551296) # 网络游戏 answers_count = 0 for question in topic.unanswered_questions: print(question.id) print(question.title) print(question.answer_count) answers_count += question.answer_count for answer in question.answers: print(answer.author.id,answer.author.name) answer.save('Data\\Gene\\'+str(question.id)+'#'+question.title, str(answer.author.id)+'#'+answer.author.name) print("总共有{0}个回答".format(answers_count))
print(text_readline) for i in range(len(text_readline)): print(text_readline[i]) # ================读取账号和密码================ account = text_readline[0] passward = text_readline[1] client = ZhihuClient() try: client.login(account, passward) except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login(account, passward, captcha) # 必须在 client 已经处于登录状态时才能使用 client.save_token('/Users/alicewish/我的坚果云/token.pkl') # ================运行时间计时================ run_time = time.time() - start_time if run_time < 60: # 两位小数的秒 print("耗时:{:.2f}秒".format(run_time)) elif run_time < 3600: # 分秒取整 print("耗时:{:.0f}分{:.0f}秒".format(run_time // 60, run_time % 60)) else: # 时分秒取整 print("耗时:{:.0f}时{:.0f}分{:.0f}秒".format(run_time // 3600, run_time % 3600 // 60, run_time % 60))
for i in range(len(text_readline)): print(text_readline[i]) # ================读取账号和密码================ account = text_readline[0] passward = text_readline[1] client = ZhihuClient() try: client.login(account, passward) except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login(account, passward, captcha) # 必须在 client 已经处于登录状态时才能使用 client.save_token('/Users/alicewish/我的坚果云/token.pkl') # ================运行时间计时================ run_time = time.time() - start_time if run_time < 60: # 两位小数的秒 print("耗时:{:.2f}秒".format(run_time)) elif run_time < 3600: # 分秒取整 print("耗时:{:.0f}分{:.0f}秒".format(run_time // 60, run_time % 60)) else: # 时分秒取整 print("耗时:{:.0f}时{:.0f}分{:.0f}秒".format(run_time // 3600, run_time % 3600 // 60, run_time % 60))
import pandas as pd from zhihu_oauth import ZhihuClient client = ZhihuClient() from zhihu_oauth.exception import NeedCaptchaException try: client.login('*****@*****.**', 'justbemyself1998') except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login('*****@*****.**', 'justbemyself1998', captcha) client.save_token("token.pkl")
class ZhiHu(object): TOKEN_FILE = 'token.pkl' def __init__(self): """ 初始化 """ self.login_zhihu() self.db = EasySqlite('zhihu.db') def login_zhihu(self): """ 登录知乎 :return: """ self.client = ZhihuClient() if os.path.isfile(self.TOKEN_FILE): self.client.load_token(self.TOKEN_FILE) else: self.client.login_in_terminal() self.client.save_token(self.TOKEN_FILE) def save_quesions(self, topic_id): """ 保存话题下的问题 :param topic_id: :return: """ topic = self.client.topic(topic_id) print(topic) questions = topic.unanswered_questions sql_tmp = 'replace into questions values(?,?,?,?,?,?)' for question in questions: if question.answer_count < 10: continue row = [question.id, question.title, question.follower_count, question.answer_count, question.comment_count, topic_id] print(row) ret = self.db.update(sql_tmp, args=row) if not ret: print('insert error!') else: print('insert success!') def save_answer_info(self, question_id): """ 保存指定问题的答案概况 :param question_id: :return: """ question = self.client.question(question_id) print(question.title) answers = question.answers for answer in answers: print(answer.comment_count, answer.excerpt, answer.question, answer.thanks_count, answer.voteup_count) answer.save() break # sql_tmp = 'replace into questions values(?,?,?,?,?,?)' # for question in questions: # if question.answer_count < 10: # continue # row = [question.id, question.title, question.follower_count, question.answer_count, question.comment_count, # topic_id] # print(row) # ret = self.db.update(sql_tmp, args=row) # if not ret: # print('insert error!') # else: # print('insert success!') def to_md(self, topic, file_name): sql = "select * from questions where topic_id = '%s' order by follower_count desc limit 1000" % topic ret = self.db.query(sql) line_tmp = "%s. [%s](https://www.zhihu.com/question/%s) 关注数:%s 回答数:%s 评论数:%s<br>\n" i = 1 with open(file_name, 'w', encoding='utf8') as f: for item in ret: line = line_tmp % (i, item['title'], item['id'], item['follower_count'], item['answer_count'], item['comment_count']) f.write(line) i += 1
from timeout import timeout import os from utils import print_err from pymongo import MongoClient MAX_SLEEP_TIME = 15 Cookies_File = './cookies/cookies%s.json' % sys.argv[1] global client client = ZhihuClient() if os.path.isfile(Cookies_File): client.load_token(Cookies_File) else: client_info = open('./cookies/client_info_list.data').readlines() client_info = client_info[int(sys.argv[1])].strip().split('\t') client.login_in_terminal(client_info[0], client_info[1]) client.save_token(Cookies_File) def get_user_questions(uname): global client if uname == '': return print(uname) user_questions = dict() try: people = client.people(uname) user_questions['_id'] = uname user_questions['owner'] = uname user_questions['questions'] = [] for q in people.following_questions:
# coding=utf-8 from __future__ import unicode_literals, print_function import os from zhihu_oauth import ZhihuClient TOKEN_FILE = 'ZHIHUTOKEN.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE)
class zhihuspider(basespider): def __init__(self): super().loadConfig() super().prepare() self.loadConfig() self.prepare() self.login() def loadConfig(self): self.config = self.allConfig['zhihu'] self.data_path = self.socialRoot + self.config['data_path'] self.TOKEN_FILE = self.data_path + self.config['TOKEN_FILE'] self.friends_file = self.data_path + self.config['friends_file'] self.url_template_question = "https://www.zhihu.com/question/%s" self.url_template_answer = "https://www.zhihu.com/question/%s/answer/%s" self.url_template_article = "https://zhuanlan.zhihu.com/p/%s" def prepare(self): if not os.path.isdir(self.data_path): os.makedirs(self.data_path) if os.path.isfile(self.friends_file): with open(self.friends_file, "rb") as f: self.name_map = pickle.load(f) else: self.name_map = dict() self.client = ZhihuClient() def login(self): if os.path.isfile(self.TOKEN_FILE): self.client.load_token(self.TOKEN_FILE) else: self.client.login_in_terminal() self.client.save_token(self.TOKEN_FILE) self.me = self.client.me() if self.me.over: logging.error("login failed! Reason is " + self.me.over_reason) self.client.login_in_terminal() self.client.save_token(self.TOKEN_FILE) def followings2name_map(self, me): for peo in me.followings: self.name_map[peo.name] = peo.id with open(self.friends_file, "wb") as f: pickle.dump(self.name_map, f) def getActivities(self, userid, count=10, timeOldest=None, timeLatest=None): """ 关于actionType CREATE_ANSWER CREATE_ARTICLE CREATE_QUESTION FOLLOW_QUESTION VOTEUP_ANSWER """ def getTargetText_Topic(target, actType): if isinstance(target, zhihu_oauth.Answer): return (target.content, target.question.topics, self.url_template_answer % (target.question.id, target.id)) elif isinstance(target, zhihu_oauth.Question): return (target.detail, target.topics, self.url_template_question % (target.id)) elif isinstance(target, zhihu_oauth.Article): return (target.content, [], self.url_template_article % (target.id)) else: return ("", [], "") if isinstance(userid, int): userid = str(userid) backuserid = userid dtLatest = datetime.datetime(*timeLatest[0:6]) if timeLatest else None dtOldest = datetime.datetime(*timeOldest[0:6]) if timeOldest else None pp = self.client.people(userid) if pp.over: if userid not in self.name_map: try: self.followings2name_map(self.me) except Exception as e: logging.error(str(e)) if userid in self.name_map: userid = self.name_map[userid] pp = self.client.people(userid) if pp.over: return [] activityList = [] cnt = 0 for act in pp.activities: try: targetInfo = getTargetText_Topic(act.target, act.type) entry = { 'username': pp.name, 'avatar_url': pp.avatar_url, 'headline': pp.headline, 'time': time.localtime(act.created_time), 'actionType': act.type, 'summary': act2str(act), 'targetText': targetInfo[0], 'topics': list(map(lambda topic: topic.name, targetInfo[1])), 'source_url': targetInfo[2] } imglist = re.findall(r'(?<=<img src=")(.*?)(?=")', entry['targetText']) if isinstance(act.target, zhihu_oauth.Article) and act.target.image_url: imglist[0:0] = [act.target.image_url] if imglist: entry['imgs'] = imglist dt = datetime.datetime(*entry['time'][0:6]) if dtLatest and dtLatest < dt: continue if dtOldest and dtOldest > dt: break activityList.append(entry) cnt += 1 if cnt >= count: break except Exception as e: logging.error("getActivities of " + backuserid + " failed") traceback.print_exc() return activityList
from __future__ import unicode_literals, print_function import os from zhihu_oauth import ZhihuClient from zhihu_oauth import SearchType TOKEN_FILE = 'token.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) me = client.me() print('name', me.name) print('headline', me.headline) print('description', me.description) print('following topic count', me.following_topic_count) print('following people count', me.following_topic_count) print('followers count', me.follower_count) print('voteup count', me.voteup_count) print('get thanks count', me.thanked_count) print('answered question', me.answer_count)
import pandas as pd import os import csv from datetime import datetime import time from pymongo import MongoClient import json from utils import Cleaner TOKEN_FILE="token.pkl" client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) client.save_token('token.pkl') question_id = 294220610 topic_id = 19575211 topic = client.topic(topic_id) #print("topic {} has {} questions\n".format(topic.name, topic.questions_count)) #print("topic {} has {} followers\n".format(topic.name, topic.followers_count)) # for act in topic.activities: # if(isinstance(act, Answer)): # print("this answer content is {}\n".format(act.content)) # # else: