def login(username, password): from zhihu_oauth import ZhihuClient from zhihu_oauth.exception import NeedCaptchaException client = ZhihuClient() try: client.login(username, password) print(u"登陆成功!") except NeedCaptchaException: # 处理要验证码的情况 # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login(username, password, captcha) client.save_token('token.pkl') # 保存token
def login(account, password): client = ZhihuClient() try: client.load_token(TOKEN_FILE) except FileNotFoundError: try: client.login(account, password) except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('./captcha/a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login(account, password, captcha) client.save_token('./token/token.pkl') finally: return client
def main(): client = ZhihuClient() try: # client.login(email_or_phone, password) client.login_in_terminal(username=email_or_phone, password=password) client.save_token(TOKEN_FILE) # 保存登录会话,留着以后登录用 # raise NeedCaptchaException except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('请输入验证码: ') client.login(email_or_phone, password, captcha) data_out_list_a = [] line_saved = 0 max_lines = 1 with open(USER_CSV_PATH) as file: for line in file.readlines(): crawl_id = line.strip('\n') my_crawl = MyCrawler(crawl_id, client) print('------>>>| 待爬取的用户的知乎id为: ', crawl_id) data_a = my_crawl.crawling_answer(crawl_id) print('该用户爬取完毕'.center(60, '*')) if len(data_a) % 60 == 0: tmp_time = int(len(data_a) / 60) for i in range(tmp_time): data_out_list_a.append(data_a[60*i:60*(i+1)]) else: print('无用的输出!') # sleep(randint(1, 3)) line_saved += 1 if line_saved == max_lines: save_to_csv_a(data_out_list_a, client) data_out_list_a = [] line_saved = 0 print('全部用户采集完毕'.center(40, '*'))
def main(): client = ZhihuClient() try: client.login('*****@*****.**', 'durant') except NeedCaptchaException: print("Login Error") with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login('*****@*****.**', 'durant', captcha) max_lines = 1 line_saved = 0 data_out_list_a = [] with open(USER_TRY_CSV_PATH) as f: for line in f.readlines(): craw_id = line.strip("\n") craw = MyCrawler(craw_id, client) print(craw_id) data_a = craw.crawling_answer(craw_id) if len(data_a) % 60 == 0: times = int(len(data_a) / 60) for i in range(times): data_out_list_a.append(data_a[60 * i:60 * (i + 1)]) else: print("Invalid Output") a = random.randint(1, 3) time.sleep(a) line_saved += 1 if line_saved == max_lines: save_to_csv_a(data_out_list_a) data_out_list_a = [] line_saved = 0
from zhihu_oauth import ZhihuClient from zhihu_oauth.exception import NeedCaptchaException #custom your email/phone number matched with password email = "" password = "" client = ZhihuClient() try: client.login(email, password) except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = input('please input captcha:') client.login(email, password, captcha) client.save_token('token.pkl')
class Crawl: def __init__(self): self.client = ZhihuClient() def login(self, username, password): if os.path.isfile('app/Resource/' + username + '.token'): self.client.load_token('app/Resource/' + username + '.token') else: try: self.client.login(username, password) except NeedCaptchaException: # 保存验证码并提示输入,重新登录 with open('a.gif', 'wb') as f: f.write(self.client.get_captcha()) captcha = input('please input captcha:') self.client.login(username, password, captcha) self.client.save_token('app/Resource/' + username + '.token') def get_live_list(self): lives = self.client.me().lives return lives @staticmethod def save_live_list(livedata): new_live = MyLive(live_id=livedata.id, title=livedata.title, speaker=livedata.speaker.name, speaker_description=livedata.speaker.description, live_description=livedata.description, seats_count=livedata.seat_taken, price=livedata.fee) new_live.save() def live_list_work(self): for live in self.get_live_list(): exist = MyLive.objects(live_id=live.id) if not exist: self.save_live_list(live) def get_live_content(self, live_id, before_id=''): res = self.client._session.get( LIVECONTENT_URL.format(live_id, before_id)) data = json.loads(res.content) return data def save_live_content_image(self, id, url): content = self.client._session.get(url).content file = 'app/Resource/' + str(id) + '.png' with open(file, 'wb') as f: f.write(content) @staticmethod def save_live_content(live_id, livedata): for r in livedata['data']: exist = LiveContent.objects(message_id=r['id']) if exist: continue if r['type'] == 'audio': url = r['audio']['url'] elif r['type'] == 'image': url = r['image']['full']['url'] else: url = '' content = r['text'] if 'text' in r else '' reply = ','.join(r['replies']) if 'replies' in r else '' new_live_content = LiveContent( message_id=int(r['id']), sender=r['sender']['member']['name'], type=r['type'], content=content, url=url, reply=reply, likes=r['likes']['count'], created_at=datetime.fromtimestamp((r['created_at'])), live_title=live_id) new_live_content.save() def live_content_work(self, id): live = MyLive.objects(id=id).first() # 使用知乎的live的ID值传入获取详情 data = self.get_live_content(live.live_id) while data['unload_count'] > 0: # 存储时使用mongo的ID值传入 self.save_live_content(live.id, data) data = self.get_live_content(live.live_id, data['data'][0]['id']) else: print('success') image_contents = LiveContent.objects(live_title=live.id, type='image') for item in image_contents: self.save_live_content_image(item.id, item.url)
test_email = '*****@*****.**' test_password = '******' token_file = './token.pkl' if os.path.lexists(token_file): client.load_token(token_file) print 'load token success' else: try: login_result = client.login(test_email, test_password) except NeedCaptchaException: # 保存验证码并提示输入,重新登录 print u'登录失败,需要输入验证码' with open('a.gif', 'wb') as f: f.write(client.get_captcha()) captcha = raw_input(u'please input captcha:') login_result = client.login(test_email, test_password, captcha) print 'login result => ' print login_result client.save_token(token_file) print 'save token success' # question response_file_uri = './question_response.html' # 将json输出到网页中,chrome下按F12选preview能看见浏览器渲染出的json数据结构 question_id = 35005800 question = client.question(question_id) data = question.pure_data response_json = json.dumps(data) response_file = open(response_file_uri, 'w+') response_file.write(response_json)