Beispiel #1
0
def getLatestBestAnserwerAndSave():
    # phoneNum = '+8613096348217'
    # pw = '2015141463222'

    ans_num = 20
    i=0


    TOKEN_FILE = 'token.pkl'
    client = ZhihuClient()

    if os.path.isfile(TOKEN_FILE):
        client.load_token(TOKEN_FILE)
    else:
        client.login_in_terminal()
        client.save_token(TOKEN_FILE)

    # try:
    #     client.login(phoneNum, pw)
    # except NeedCaptchaException:
    #     # 保存验证码并提示输入,重新登录
    #     with open('a.gif', 'wb') as f:
    #         f.write(client.get_captcha())
    #     captcha = input('please input captcha:')
    #     client.login(phoneNum, pw, captcha)

    java = client.topic(19550867)
    BA = java.best_answers
    for answ in BA:
        ansItem2artical(ansItem(answ)).save()
        i = i+1

        if i==ans_num:
            break
Beispiel #2
0
 def start(self):
     try:
         client = ZhihuClient()
         client.login_in_terminal()
         client.save_token(Path.ZHIHUTOKEN)
     except NeedLoginException:
         print u"Oops, please try again."
         sys.exit()
     return
Beispiel #3
0
def LoginZhihuClient(token_name):
    TOKEN_FILE = 'liuximing.pkl'
    client = ZhihuClient()
    if os.path.isfile(TOKEN_FILE):
        client.load_token(TOKEN_FILE)
    else:
        client.login_in_terminal()
        client.save_token(TOKEN_FILE)
    me = client.me()
    return me
Beispiel #4
0
def zhihu_login():
    client = ZhihuClient()

    if os.path.isfile(TOKEN_FILE):
        client.load_token(TOKEN_FILE)
    else:
        client.login('*****@*****.**', 'a4906639')
        client.save_token(TOKEN_FILE)
    me = client.me()
    print(me.name)
    return client
Beispiel #5
0
def zhihu_login():
    client = ZhihuClient()

    if os.path.isfile(TOKEN_FILE):
        client.load_token(TOKEN_FILE)
    else:
        client.login('*****@*****.**', 'a4906639')
        client.save_token(TOKEN_FILE)
    me = client.me()
    print(me.name)
    return client
Beispiel #6
0
    def login(self):
        TOKEN_FILE = 'token.pkl'

        client = ZhihuClient()

        if os.path.isfile(TOKEN_FILE):
            client.load_token(TOKEN_FILE)
        else:
            client.login_in_terminal()
            client.save_token(TOKEN_FILE)
        return client
Beispiel #7
0
 def get_client(self, reset_=0):
     client = ZhihuClient()
     if reset_ != 0:
         client.login_in_terminal()
         client.save_token(TOKEN_FILE)
     if os.path.isfile(TOKEN_FILE):
         client.load_token(TOKEN_FILE)
     else:
         client.login_in_terminal()
         client.save_token(TOKEN_FILE)
     return client
Beispiel #8
0
class Login():
    def __init__(self):
        self.TOKEN_FILE = 'token.pkl.' + str(sys.version_info[0])
        self.client = ZhihuClient()

    def client_login(self):
        if not os.path.isfile(self.TOKEN_FILE):
            self.client.login_in_terminal()
            self.client.save_token(self.TOKEN_FILE)
        else:
            self.client.load_token(self.TOKEN_FILE)
        return self.client
Beispiel #9
0
def zhihu_login():
    r"""
    知乎登陆
    :return:        登陆之后的客户端client
    """
    client = ZhihuClient()
    # 登录
    if os.path.isfile(TOKEN_FILE_NAME):
        client.load_token(TOKEN_FILE_NAME)
    else:
        client.login_in_terminal()
        client.save_token(TOKEN_FILE_NAME)
    return client
Beispiel #10
0
def login(username, password):
    from zhihu_oauth import ZhihuClient
    from zhihu_oauth.exception import NeedCaptchaException
    client = ZhihuClient()
    try:
        client.login(username, password)
        print(u"登陆成功!")
    except NeedCaptchaException:  # 处理要验证码的情况
        # 保存验证码并提示输入,重新登录
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = input('please input captcha:')
        client.login(username, password, captcha)
    client.save_token('token.pkl')  # 保存token
Beispiel #11
0
def login(account, password):
    client = ZhihuClient()
    try:
        client.load_token(TOKEN_FILE)
    except FileNotFoundError:
        try:
            client.login(account, password)
        except NeedCaptchaException:
            # 保存验证码并提示输入,重新登录
            with open('./captcha/a.gif', 'wb') as f:
                f.write(client.get_captcha())
            captcha = input('please input captcha:')
            client.login(account, password, captcha)
            client.save_token('./token/token.pkl')
    finally:
        return client
def main():
    client = ZhihuClient()

    try:
        # client.login(email_or_phone, password)
        client.login_in_terminal(username=email_or_phone, password=password)
        client.save_token(TOKEN_FILE)          # 保存登录会话,留着以后登录用
        # raise NeedCaptchaException
    except NeedCaptchaException:
        # 保存验证码并提示输入,重新登录
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = input('请输入验证码: ')
        client.login(email_or_phone, password, captcha)

    data_out_list_a = []
    line_saved = 0
    max_lines = 1

    with open(USER_CSV_PATH) as file:
        for line in file.readlines():
            crawl_id = line.strip('\n')
            my_crawl = MyCrawler(crawl_id, client)
            print('------>>>| 待爬取的用户的知乎id为: ', crawl_id)

            data_a = my_crawl.crawling_answer(crawl_id)
            print('该用户爬取完毕'.center(60, '*'))
            if len(data_a) % 60 == 0:
                tmp_time = int(len(data_a) / 60)
                for i in range(tmp_time):
                    data_out_list_a.append(data_a[60*i:60*(i+1)])
            else:
                print('无用的输出!')

            # sleep(randint(1, 3))
            line_saved += 1

            if line_saved == max_lines:
                save_to_csv_a(data_out_list_a, client)
                data_out_list_a = []
                line_saved = 0

    print('全部用户采集完毕'.center(40, '*'))
Beispiel #13
0
from zhihu_oauth import ZhihuClient
from zhihu_oauth.exception import NeedCaptchaException

#custom your email/phone number matched with password
email = ""
password = ""

client = ZhihuClient()

try:
    client.login(email, password)
except NeedCaptchaException:
    # 保存验证码并提示输入,重新登录
    with open('a.gif', 'wb') as f:
        f.write(client.get_captcha())
    captcha = input('please input captcha:')
    client.login(email, password, captcha)

    client.save_token('token.pkl')
Beispiel #14
0
if os.path.lexists(token_file):
    client.load_token(token_file)
    print 'load token success'
else:
    try:
        login_result = client.login(test_email, test_password)
    except NeedCaptchaException:
        # 保存验证码并提示输入,重新登录
        print u'登录失败,需要输入验证码'
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = raw_input(u'please input captcha:')
        login_result = client.login(test_email, test_password, captcha)
    print 'login result => '
    print login_result
    client.save_token(token_file)
    print 'save token success'

# question
response_file_uri = './question_response.html'  # 将json输出到网页中,chrome下按F12选preview能看见浏览器渲染出的json数据结构
question_id = 35005800
question = client.question(question_id)
data = question.pure_data
response_json = json.dumps(data)
response_file = open(response_file_uri, 'w+')
response_file.write(response_json)
print u"数据保存完成"

response_file_uri = './people_response.html'  # 将json输出到网页中,chrome下按F12选preview能看见浏览器渲染出的json数据结构
people_id = '404-Page-Not-found'
people = client.people(people_id)
Beispiel #15
0
class Crawl:
    def __init__(self):
        self.client = ZhihuClient()

    def login(self, username, password):
        if os.path.isfile('app/Resource/' + username + '.token'):
            self.client.load_token('app/Resource/' + username + '.token')
        else:
            try:
                self.client.login(username, password)
            except NeedCaptchaException:
                # 保存验证码并提示输入,重新登录
                with open('a.gif', 'wb') as f:
                    f.write(self.client.get_captcha())
                captcha = input('please input captcha:')
                self.client.login(username, password, captcha)
            self.client.save_token('app/Resource/' + username + '.token')

    def get_live_list(self):
        lives = self.client.me().lives
        return lives

    @staticmethod
    def save_live_list(livedata):
        new_live = MyLive(live_id=livedata.id,
                          title=livedata.title,
                          speaker=livedata.speaker.name,
                          speaker_description=livedata.speaker.description,
                          live_description=livedata.description,
                          seats_count=livedata.seat_taken,
                          price=livedata.fee)
        new_live.save()

    def live_list_work(self):
        for live in self.get_live_list():
            exist = MyLive.objects(live_id=live.id)
            if not exist:
                self.save_live_list(live)

    def get_live_content(self, live_id, before_id=''):
        res = self.client._session.get(
            LIVECONTENT_URL.format(live_id, before_id))
        data = json.loads(res.content)
        return data

    def save_live_content_image(self, id, url):
        content = self.client._session.get(url).content
        file = 'app/Resource/' + str(id) + '.png'
        with open(file, 'wb') as f:
            f.write(content)

    @staticmethod
    def save_live_content(live_id, livedata):
        for r in livedata['data']:
            exist = LiveContent.objects(message_id=r['id'])
            if exist:
                continue

            if r['type'] == 'audio':
                url = r['audio']['url']
            elif r['type'] == 'image':
                url = r['image']['full']['url']

            else:
                url = ''
            content = r['text'] if 'text' in r else ''
            reply = ','.join(r['replies']) if 'replies' in r else ''

            new_live_content = LiveContent(
                message_id=int(r['id']),
                sender=r['sender']['member']['name'],
                type=r['type'],
                content=content,
                url=url,
                reply=reply,
                likes=r['likes']['count'],
                created_at=datetime.fromtimestamp((r['created_at'])),
                live_title=live_id)
            new_live_content.save()

    def live_content_work(self, id):
        live = MyLive.objects(id=id).first()
        # 使用知乎的live的ID值传入获取详情
        data = self.get_live_content(live.live_id)
        while data['unload_count'] > 0:
            # 存储时使用mongo的ID值传入
            self.save_live_content(live.id, data)
            data = self.get_live_content(live.live_id, data['data'][0]['id'])
        else:
            print('success')

        image_contents = LiveContent.objects(live_title=live.id, type='image')
        for item in image_contents:
            self.save_live_content_image(item.id, item.url)
Beispiel #16
0
from zhihu_oauth import ZhihuClient
from zhihu_oauth.exception import NeedCaptchaException
client = ZhihuClient()
user = '******'
pwd = '6666666'
try:
    client.login_in_terminal(user, pwd)
    print(u"登陆成功!")
except NeedCaptchaException:  # 处理要验证码的情况
    # 保存验证码并提示输入,重新登录
    with open('a.gif', 'wb') as f:
        f.write(client.get_captcha())
    captcha = input('please input captcha:')
    client.login_in_terminal(user, pwd, captcha)
    print(u"登陆成功!")

client.save_token('token.pkl')  # 保存token
# 必须在 client 已经处于登录状态时才能使用
#有了token之后,下次登录就可以直接加载token文件了
# client.load_token('filename')
# client.login_in_terminal() # or ('*****@*****.**', 'password')
Beispiel #17
0
# In[2]:

# login ZhihuClient
client = ZhihuClient()
user = '******'
pwd = '961204yy'
try:
    client.login(user, pwd)
except NeedCaptchaException:
    # 保存验证码并提示输入,重新登录
    with open('a.gif', 'wb') as f:
        f.write(client.get_captcha())
    captcha = input('please input captcha:')
    client.login(user, pwd, captcha)
client.save_token('token.kpl')
# TOKEN_FILE = 'token.pkl'
#
# if os.path.isfile(TOKEN_FILE):
#     client.load_token(TOKEN_FILE)
# else:
#     client.login_in_terminal()
#     client.save_token(TOKEN_FILE)


def save_answer(topic, answer_numbers=0, save_path='zhihu'):
    # if not os.path.exists(save_path):
    #     os.mkdir(save_path)
    # topic_path = save_path + '/' + topic.name
    # if not os.path.exists(topic_path):
    #     os.mkdir(topic_path)
Beispiel #18
0
token = './XXX.pk1'
client = ZhihuClient()

try:
    if os.path.exists(token):
        client.load_token(token)
    else:
        client.login('username', 'passwd')
except NeedCaptchaException:
    # 保存验证码并提示输入,重新登录
    with open('a.gif', 'wb') as f:
        f.write(client.get_captcha())
    captcha = raw_input('please input captcha:')
    client.login('username', 'passwd', captcha)
client.save_token(token)


def dump_activities(pid):
    person = client.people(pid)
    filter_types = {
        ActType.COLLECT_ANSWER,
        ActType.COLLECT_ANSWER,
        ActType.COLLECT_ARTICLE,
        ActType.CREATE_ANSWER,
        ActType.CREATE_ARTICLE,
        ActType.CREATE_PIN,
        ActType.CREATE_QUESTION,
        ActType.FOLLOW_COLLECTION,
        ActType.FOLLOW_COLUMN,
        ActType.FOLLOW_QUESTION,
Beispiel #19
0
def login():
    TOKEN_FILE = 'token.pkl'
    client = ZhihuClient()

    if os.path.isfile(TOKEN_FILE):
        client.load_token(TOKEN_FILE)
    else:
        client.login_in_terminal()
        client.save_token(TOKEN_FILE)

    """
    me = client.me()
    print('name', me.name)
    print('headline', me.headline)
    print('description', me.description)

    print('following topic count', me.following_topic_count)
    print('following people count', me.following_topic_count)
    print('followers count', me.follower_count)

    print('voteup count', me.voteup_count)
    print('get thanks count', me.thanked_count)

    print('answered question', me.answer_count)
    print('question asked', me.question_count)
    print('collection count', me.collection_count)
    print('article count', me.articles_count)
    print('following column count', me.following_column_count)

    # 获取最近 5 个回答
    for _, answer in zip(range(5), me.answers):
        print(answer.question.title, answer.voteup_count)

    print('----------')

    # 获取点赞量最高的 5 个回答
    for _, answer in zip(range(5), me.answers.order_by('votenum')):
        print(answer.question.title, answer.voteup_count)

    print('----------')

    # 获取最近提的 5 个问题
    for _, question in zip(range(5), me.questions):
        print(question.title, question.answer_count)

    print('----------')

    # 获取最近发表的 5 个文章
    for _, article in zip(range(5), me.articles):
        print(article.title, article.voteup_count)
    """
    topic = client.topic(19560072)  # 转基因
    # topic = client.topic(19578906)  # 气候变化
    # topic = client.topic(19551296)  # 网络游戏

    answers_count = 0
    for question in topic.unanswered_questions:
        print(question.id)
        print(question.title)
        print(question.answer_count)
        answers_count += question.answer_count
        for answer in question.answers:
            print(answer.author.id,answer.author.name)
            answer.save('Data\\Gene\\'+str(question.id)+'#'+question.title, str(answer.author.id)+'#'+answer.author.name)
    print("总共有{0}个回答".format(answers_count))
print(text_readline)

for i in range(len(text_readline)):
    print(text_readline[i])
# ================读取账号和密码================
account = text_readline[0]
passward = text_readline[1]

client = ZhihuClient()

try:
    client.login(account, passward)
except NeedCaptchaException:
    # 保存验证码并提示输入,重新登录
    with open('a.gif', 'wb') as f:
        f.write(client.get_captcha())
    captcha = input('please input captcha:')
    client.login(account, passward, captcha)

# 必须在 client 已经处于登录状态时才能使用
client.save_token('/Users/alicewish/我的坚果云/token.pkl')

# ================运行时间计时================
run_time = time.time() - start_time
if run_time < 60:  # 两位小数的秒
    print("耗时:{:.2f}秒".format(run_time))
elif run_time < 3600:  # 分秒取整
    print("耗时:{:.0f}分{:.0f}秒".format(run_time // 60, run_time % 60))
else:  # 时分秒取整
    print("耗时:{:.0f}时{:.0f}分{:.0f}秒".format(run_time // 3600, run_time % 3600 // 60, run_time % 60))
for i in range(len(text_readline)):
    print(text_readline[i])
# ================读取账号和密码================
account = text_readline[0]
passward = text_readline[1]

client = ZhihuClient()

try:
    client.login(account, passward)
except NeedCaptchaException:
    # 保存验证码并提示输入,重新登录
    with open('a.gif', 'wb') as f:
        f.write(client.get_captcha())
    captcha = input('please input captcha:')
    client.login(account, passward, captcha)

# 必须在 client 已经处于登录状态时才能使用
client.save_token('/Users/alicewish/我的坚果云/token.pkl')

# ================运行时间计时================
run_time = time.time() - start_time
if run_time < 60:  # 两位小数的秒
    print("耗时:{:.2f}秒".format(run_time))
elif run_time < 3600:  # 分秒取整
    print("耗时:{:.0f}分{:.0f}秒".format(run_time // 60, run_time % 60))
else:  # 时分秒取整
    print("耗时:{:.0f}时{:.0f}分{:.0f}秒".format(run_time // 3600,
                                            run_time % 3600 // 60,
                                            run_time % 60))
Beispiel #22
0
import pandas as pd
from zhihu_oauth import ZhihuClient

client = ZhihuClient()
from zhihu_oauth.exception import NeedCaptchaException

try:
    client.login('*****@*****.**', 'justbemyself1998')
except NeedCaptchaException:
    # 保存验证码并提示输入,重新登录
    with open('a.gif', 'wb') as f:
        f.write(client.get_captcha())
    captcha = input('please input captcha:')
    client.login('*****@*****.**', 'justbemyself1998', captcha)
    client.save_token("token.pkl")
Beispiel #23
0
class ZhiHu(object):
    TOKEN_FILE = 'token.pkl'

    def __init__(self):
        """
        初始化
        """
        self.login_zhihu()
        self.db = EasySqlite('zhihu.db')

    def login_zhihu(self):
        """
        登录知乎
        :return:
        """
        self.client = ZhihuClient()
        if os.path.isfile(self.TOKEN_FILE):
            self.client.load_token(self.TOKEN_FILE)
        else:
            self.client.login_in_terminal()
            self.client.save_token(self.TOKEN_FILE)

    def save_quesions(self, topic_id):
        """
        保存话题下的问题
        :param topic_id:
        :return:
        """
        topic = self.client.topic(topic_id)
        print(topic)
        questions = topic.unanswered_questions
        sql_tmp = 'replace into questions values(?,?,?,?,?,?)'
        for question in questions:
            if question.answer_count < 10:
                continue
            row = [question.id, question.title, question.follower_count, question.answer_count, question.comment_count,
                   topic_id]
            print(row)
            ret = self.db.update(sql_tmp, args=row)
            if not ret:
                print('insert error!')
            else:
                print('insert success!')

    def save_answer_info(self, question_id):
        """
        保存指定问题的答案概况
        :param question_id:
        :return:
        """
        question = self.client.question(question_id)
        print(question.title)
        answers = question.answers
        for answer in answers:
            print(answer.comment_count, answer.excerpt, answer.question, answer.thanks_count,
                  answer.voteup_count)
            answer.save()
            break
        # sql_tmp = 'replace into questions values(?,?,?,?,?,?)'
        # for question in questions:
        #     if question.answer_count < 10:
        #         continue
        #     row = [question.id, question.title, question.follower_count, question.answer_count, question.comment_count,
        #            topic_id]
        #     print(row)
        #     ret = self.db.update(sql_tmp, args=row)
        #     if not ret:
        #         print('insert error!')
        #     else:
        #         print('insert success!')

    def to_md(self, topic, file_name):
        sql = "select * from questions where topic_id = '%s' order by follower_count desc limit 1000" % topic
        ret = self.db.query(sql)
        line_tmp = "%s. [%s](https://www.zhihu.com/question/%s) 关注数:%s 回答数:%s 评论数:%s<br>\n"
        i = 1
        with open(file_name, 'w', encoding='utf8') as f:
            for item in ret:
                line = line_tmp % (i, item['title'], item['id'], item['follower_count'], item['answer_count'], item['comment_count'])
                f.write(line)
                i += 1
Beispiel #24
0
from timeout import timeout
import os
from utils import print_err
from pymongo import MongoClient

MAX_SLEEP_TIME = 15
Cookies_File = './cookies/cookies%s.json' % sys.argv[1]
global client
client = ZhihuClient()
if os.path.isfile(Cookies_File):
    client.load_token(Cookies_File)
else:
    client_info = open('./cookies/client_info_list.data').readlines()
    client_info = client_info[int(sys.argv[1])].strip().split('\t')
    client.login_in_terminal(client_info[0], client_info[1])
    client.save_token(Cookies_File)


def get_user_questions(uname):
    global client
    if uname == '':
        return
    print(uname)

    user_questions = dict()
    try:
        people = client.people(uname)
        user_questions['_id'] = uname
        user_questions['owner'] = uname
        user_questions['questions'] = []
        for q in people.following_questions:
Beispiel #25
0
if os.path.lexists(token_file):
    client.load_token(token_file)
    print 'load token success'
else:
    try:
        login_result = client.login(test_email, test_password)
    except NeedCaptchaException:
        # 保存验证码并提示输入,重新登录
        print u'登录失败,需要输入验证码'
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = raw_input(u'please input captcha:')
        login_result = client.login(test_email, test_password, captcha)
    print 'login result => '
    print login_result
    client.save_token(token_file)
    print 'save token success'

# question
response_file_uri = './question_response.html' # 将json输出到网页中,chrome下按F12选preview能看见浏览器渲染出的json数据结构
question_id = 35005800
question = client.question(question_id)
data = question.pure_data
response_json = json.dumps(data)
response_file = open(response_file_uri, 'w+')
response_file.write(response_json)
print u"数据保存完成"

response_file_uri = './people_response.html' # 将json输出到网页中,chrome下按F12选preview能看见浏览器渲染出的json数据结构
people_id = '404-Page-Not-found'
people = client.people(people_id)
Beispiel #26
0
# coding=utf-8

from __future__ import unicode_literals, print_function

import os

from zhihu_oauth import ZhihuClient


TOKEN_FILE = 'ZHIHUTOKEN.pkl'


client = ZhihuClient()

if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal()
    client.save_token(TOKEN_FILE)
class zhihuspider(basespider):
    def __init__(self):
        super().loadConfig()
        super().prepare()
        self.loadConfig()
        self.prepare()
        self.login()

    def loadConfig(self):
        self.config = self.allConfig['zhihu']

        self.data_path = self.socialRoot + self.config['data_path']
        self.TOKEN_FILE = self.data_path + self.config['TOKEN_FILE']
        self.friends_file = self.data_path + self.config['friends_file']

        self.url_template_question = "https://www.zhihu.com/question/%s"
        self.url_template_answer = "https://www.zhihu.com/question/%s/answer/%s"
        self.url_template_article = "https://zhuanlan.zhihu.com/p/%s"

    def prepare(self):
        if not os.path.isdir(self.data_path): os.makedirs(self.data_path)

        if os.path.isfile(self.friends_file):
            with open(self.friends_file, "rb") as f:
                self.name_map = pickle.load(f)
        else:
            self.name_map = dict()

        self.client = ZhihuClient()

    def login(self):
        if os.path.isfile(self.TOKEN_FILE):
            self.client.load_token(self.TOKEN_FILE)
        else:
            self.client.login_in_terminal()
            self.client.save_token(self.TOKEN_FILE)

        self.me = self.client.me()
        if self.me.over:
            logging.error("login failed! Reason is " + self.me.over_reason)
            self.client.login_in_terminal()
            self.client.save_token(self.TOKEN_FILE)

    def followings2name_map(self, me):
        for peo in me.followings:
            self.name_map[peo.name] = peo.id
        with open(self.friends_file, "wb") as f:
            pickle.dump(self.name_map, f)

    def getActivities(self,
                      userid,
                      count=10,
                      timeOldest=None,
                      timeLatest=None):
        """
		关于actionType
			CREATE_ANSWER
			CREATE_ARTICLE
			CREATE_QUESTION
			FOLLOW_QUESTION
			VOTEUP_ANSWER
		"""
        def getTargetText_Topic(target, actType):
            if isinstance(target, zhihu_oauth.Answer):
                return (target.content, target.question.topics,
                        self.url_template_answer %
                        (target.question.id, target.id))
            elif isinstance(target, zhihu_oauth.Question):
                return (target.detail, target.topics,
                        self.url_template_question % (target.id))
            elif isinstance(target, zhihu_oauth.Article):
                return (target.content, [],
                        self.url_template_article % (target.id))
            else:
                return ("", [], "")

        if isinstance(userid, int): userid = str(userid)
        backuserid = userid
        dtLatest = datetime.datetime(*timeLatest[0:6]) if timeLatest else None
        dtOldest = datetime.datetime(*timeOldest[0:6]) if timeOldest else None

        pp = self.client.people(userid)
        if pp.over:
            if userid not in self.name_map:
                try:
                    self.followings2name_map(self.me)
                except Exception as e:
                    logging.error(str(e))
            if userid in self.name_map:
                userid = self.name_map[userid]
                pp = self.client.people(userid)
            if pp.over: return []

        activityList = []

        cnt = 0
        for act in pp.activities:
            try:
                targetInfo = getTargetText_Topic(act.target, act.type)
                entry = {
                    'username': pp.name,
                    'avatar_url': pp.avatar_url,
                    'headline': pp.headline,
                    'time': time.localtime(act.created_time),
                    'actionType': act.type,
                    'summary': act2str(act),
                    'targetText': targetInfo[0],
                    'topics': list(map(lambda topic: topic.name,
                                       targetInfo[1])),
                    'source_url': targetInfo[2]
                }

                imglist = re.findall(r'(?<=<img src=")(.*?)(?=")',
                                     entry['targetText'])
                if isinstance(act.target,
                              zhihu_oauth.Article) and act.target.image_url:
                    imglist[0:0] = [act.target.image_url]
                if imglist: entry['imgs'] = imglist

                dt = datetime.datetime(*entry['time'][0:6])
                if dtLatest and dtLatest < dt: continue
                if dtOldest and dtOldest > dt: break
                activityList.append(entry)

                cnt += 1
                if cnt >= count: break
            except Exception as e:
                logging.error("getActivities of " + backuserid + " failed")
                traceback.print_exc()

        return activityList
Beispiel #28
0
from __future__ import unicode_literals, print_function

import os

from zhihu_oauth import ZhihuClient
from zhihu_oauth import SearchType

TOKEN_FILE = 'token.pkl'

client = ZhihuClient()

if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal()
    client.save_token(TOKEN_FILE)

me = client.me()

print('name', me.name)
print('headline', me.headline)
print('description', me.description)

print('following topic count', me.following_topic_count)
print('following people count', me.following_topic_count)
print('followers count', me.follower_count)

print('voteup count', me.voteup_count)
print('get thanks count', me.thanked_count)

print('answered question', me.answer_count)
Beispiel #29
0
import pandas as pd
import os
import csv
from datetime import datetime
import time
from pymongo import MongoClient
import json
from utils import Cleaner

TOKEN_FILE="token.pkl"
client = ZhihuClient()
if os.path.isfile(TOKEN_FILE):
    client.load_token(TOKEN_FILE)
else:
    client.login_in_terminal()
    client.save_token(TOKEN_FILE)
client.save_token('token.pkl')

question_id = 294220610
topic_id = 19575211

topic = client.topic(topic_id)
#print("topic {} has {} questions\n".format(topic.name, topic.questions_count))
#print("topic {} has {} followers\n".format(topic.name, topic.followers_count))


# for act in topic.activities:
#     if(isinstance(act, Answer)):
#         print("this answer content is {}\n".format(act.content))
#
#     else: