def LoginZhihuClient(token_name): TOKEN_FILE = 'liuximing.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) me = client.me() return me
def zhihu_login(): client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login('*****@*****.**', 'a4906639') client.save_token(TOKEN_FILE) me = client.me() print(me.name) return client
import os from zhihu_oauth import ZhihuClient from zhihu_oauth import SearchType TOKEN_FILE = 'token.pkl' client = ZhihuClient() if os.path.isfile(TOKEN_FILE): client.load_token(TOKEN_FILE) else: client.login_in_terminal() client.save_token(TOKEN_FILE) me = client.me() print('name', me.name) print('headline', me.headline) print('description', me.description) print('following topic count', me.following_topic_count) print('following people count', me.following_topic_count) print('followers count', me.follower_count) print('voteup count', me.voteup_count) print('get thanks count', me.thanked_count) print('answered question', me.answer_count) print('question asked', me.question_count) print('collection count', me.collection_count)
from lxml import html import requests, time, zhihu_oauth start_time = time.time() # 初始时间戳 # ========================登录======================== from zhihu_oauth import ZhihuClient client = ZhihuClient() client.load_token('/Users/alicewish/我的坚果云/token.pkl') # ========================我======================== me = client.me() # print('活动', me.activities) # print('答案数', me.answer_count) # print('答案', me.answers) # print('文章', me.articles) # print('文章数', me.articles_count) # print('头像地址', me.avatar_url) # print('用户所在行业', me.business) # print('收藏数', me.collected_count) # print('收藏夹数', me.collection_count) # print('收藏夹', me.collections) # print('专栏数', me.column_count) # print('专栏', me.columns) # print('专栏数', me.columns_count) # created_at = time.localtime(me.created_at) # print('创建时间', time.strftime("%Y-%m-%d %H:%M:%S", created_at)) # print('个人描述', me.description) # print('草稿数', me.draft_count) # print('教育信息', me.educations)
class Crawl: def __init__(self): self.client = ZhihuClient() def login(self, username, password): if os.path.isfile('app/Resource/' + username + '.token'): self.client.load_token('app/Resource/' + username + '.token') else: try: self.client.login(username, password) except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(self.client.get_captcha()) captcha = input('please input captcha:') self.client.login(username, password, captcha) self.client.save_token('app/Resource/' + username + '.token') def get_live_list(self): lives = self.client.me().lives return lives @staticmethod def save_live_list(livedata): new_live = MyLive(live_id=livedata.id, title=livedata.title, speaker=livedata.speaker.name, speaker_description=livedata.speaker.description, live_description=livedata.description, seats_count=livedata.seat_taken, price=livedata.fee) new_live.save() def live_list_work(self): for live in self.get_live_list(): exist = MyLive.objects(live_id=live.id) if not exist: self.save_live_list(live) def get_live_content(self, live_id, before_id=''): res = self.client._session.get( LIVECONTENT_URL.format(live_id, before_id)) data = json.loads(res.content) return data def save_live_content_image(self, id, url): content = self.client._session.get(url).content file = 'app/Resource/' + str(id) + '.png' with open(file, 'wb') as f: f.write(content) @staticmethod def save_live_content(live_id, livedata): for r in livedata['data']: exist = LiveContent.objects(message_id=r['id']) if exist: continue if r['type'] == 'audio': url = r['audio']['url'] elif r['type'] == 'image': url = r['image']['full']['url'] else: url = '' content = r['text'] if 'text' in r else '' reply = ','.join(r['replies']) if 'replies' in r else '' new_live_content = LiveContent( message_id=int(r['id']), sender=r['sender']['member']['name'], type=r['type'], content=content, url=url, reply=reply, likes=r['likes']['count'], created_at=datetime.fromtimestamp((r['created_at'])), live_title=live_id) new_live_content.save() def live_content_work(self, id): live = MyLive.objects(id=id).first() # 使用知乎的live的ID值传入获取详情 data = self.get_live_content(live.live_id) while data['unload_count'] > 0: # 存储时使用mongo的ID值传入 self.save_live_content(live.id, data) data = self.get_live_content(live.live_id, data['data'][0]['id']) else: print('success') image_contents = LiveContent.objects(live_title=live.id, type='image') for item in image_contents: self.save_live_content_image(item.id, item.url)
class zhihuspider(basespider): def __init__(self): super().loadConfig() super().prepare() self.loadConfig() self.prepare() self.login() def loadConfig(self): self.config = self.allConfig['zhihu'] self.data_path = self.socialRoot + self.config['data_path'] self.TOKEN_FILE = self.data_path + self.config['TOKEN_FILE'] self.friends_file = self.data_path + self.config['friends_file'] self.url_template_question = "https://www.zhihu.com/question/%s" self.url_template_answer = "https://www.zhihu.com/question/%s/answer/%s" self.url_template_article = "https://zhuanlan.zhihu.com/p/%s" def prepare(self): if not os.path.isdir(self.data_path): os.makedirs(self.data_path) if os.path.isfile(self.friends_file): with open(self.friends_file, "rb") as f: self.name_map = pickle.load(f) else: self.name_map = dict() self.client = ZhihuClient() def login(self): if os.path.isfile(self.TOKEN_FILE): self.client.load_token(self.TOKEN_FILE) else: self.client.login_in_terminal() self.client.save_token(self.TOKEN_FILE) self.me = self.client.me() if self.me.over: logging.error("login failed! Reason is " + self.me.over_reason) self.client.login_in_terminal() self.client.save_token(self.TOKEN_FILE) def followings2name_map(self, me): for peo in me.followings: self.name_map[peo.name] = peo.id with open(self.friends_file, "wb") as f: pickle.dump(self.name_map, f) def getActivities(self, userid, count=10, timeOldest=None, timeLatest=None): """ 关于actionType CREATE_ANSWER CREATE_ARTICLE CREATE_QUESTION FOLLOW_QUESTION VOTEUP_ANSWER """ def getTargetText_Topic(target, actType): if isinstance(target, zhihu_oauth.Answer): return (target.content, target.question.topics, self.url_template_answer % (target.question.id, target.id)) elif isinstance(target, zhihu_oauth.Question): return (target.detail, target.topics, self.url_template_question % (target.id)) elif isinstance(target, zhihu_oauth.Article): return (target.content, [], self.url_template_article % (target.id)) else: return ("", [], "") if isinstance(userid, int): userid = str(userid) backuserid = userid dtLatest = datetime.datetime(*timeLatest[0:6]) if timeLatest else None dtOldest = datetime.datetime(*timeOldest[0:6]) if timeOldest else None pp = self.client.people(userid) if pp.over: if userid not in self.name_map: try: self.followings2name_map(self.me) except Exception as e: logging.error(str(e)) if userid in self.name_map: userid = self.name_map[userid] pp = self.client.people(userid) if pp.over: return [] activityList = [] cnt = 0 for act in pp.activities: try: targetInfo = getTargetText_Topic(act.target, act.type) entry = { 'username': pp.name, 'avatar_url': pp.avatar_url, 'headline': pp.headline, 'time': time.localtime(act.created_time), 'actionType': act.type, 'summary': act2str(act), 'targetText': targetInfo[0], 'topics': list(map(lambda topic: topic.name, targetInfo[1])), 'source_url': targetInfo[2] } imglist = re.findall(r'(?<=<img src=")(.*?)(?=")', entry['targetText']) if isinstance(act.target, zhihu_oauth.Article) and act.target.image_url: imglist[0:0] = [act.target.image_url] if imglist: entry['imgs'] = imglist dt = datetime.datetime(*entry['time'][0:6]) if dtLatest and dtLatest < dt: continue if dtOldest and dtOldest > dt: break activityList.append(entry) cnt += 1 if cnt >= count: break except Exception as e: logging.error("getActivities of " + backuserid + " failed") traceback.print_exc() return activityList