Ejemplo n.º 1
0
def login(username, password):
    from zhihu_oauth import ZhihuClient
    from zhihu_oauth.exception import NeedCaptchaException
    client = ZhihuClient()
    try:
        client.login(username, password)
        print(u"登陆成功!")
    except NeedCaptchaException:  # 处理要验证码的情况
        # 保存验证码并提示输入,重新登录
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = input('please input captcha:')
        client.login(username, password, captcha)
    client.save_token('token.pkl')  # 保存token
Ejemplo n.º 2
0
def login(account, password):
    client = ZhihuClient()
    try:
        client.load_token(TOKEN_FILE)
    except FileNotFoundError:
        try:
            client.login(account, password)
        except NeedCaptchaException:
            # 保存验证码并提示输入,重新登录
            with open('./captcha/a.gif', 'wb') as f:
                f.write(client.get_captcha())
            captcha = input('please input captcha:')
            client.login(account, password, captcha)
            client.save_token('./token/token.pkl')
    finally:
        return client
Ejemplo n.º 3
0
def main():
    client = ZhihuClient()

    try:
        # client.login(email_or_phone, password)
        client.login_in_terminal(username=email_or_phone, password=password)
        client.save_token(TOKEN_FILE)          # 保存登录会话,留着以后登录用
        # raise NeedCaptchaException
    except NeedCaptchaException:
        # 保存验证码并提示输入,重新登录
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = input('请输入验证码: ')
        client.login(email_or_phone, password, captcha)

    data_out_list_a = []
    line_saved = 0
    max_lines = 1

    with open(USER_CSV_PATH) as file:
        for line in file.readlines():
            crawl_id = line.strip('\n')
            my_crawl = MyCrawler(crawl_id, client)
            print('------>>>| 待爬取的用户的知乎id为: ', crawl_id)

            data_a = my_crawl.crawling_answer(crawl_id)
            print('该用户爬取完毕'.center(60, '*'))
            if len(data_a) % 60 == 0:
                tmp_time = int(len(data_a) / 60)
                for i in range(tmp_time):
                    data_out_list_a.append(data_a[60*i:60*(i+1)])
            else:
                print('无用的输出!')

            # sleep(randint(1, 3))
            line_saved += 1

            if line_saved == max_lines:
                save_to_csv_a(data_out_list_a, client)
                data_out_list_a = []
                line_saved = 0

    print('全部用户采集完毕'.center(40, '*'))
Ejemplo n.º 4
0
def main():
    client = ZhihuClient()

    try:
        client.login('*****@*****.**', 'durant')

    except NeedCaptchaException:
        print("Login Error")
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = input('please input captcha:')
        client.login('*****@*****.**', 'durant', captcha)

    max_lines = 1
    line_saved = 0
    data_out_list_a = []

    with open(USER_TRY_CSV_PATH) as f:
        for line in f.readlines():
            craw_id = line.strip("\n")
            craw = MyCrawler(craw_id, client)
            print(craw_id)

            data_a = craw.crawling_answer(craw_id)
            if len(data_a) % 60 == 0:
                times = int(len(data_a) / 60)
                for i in range(times):
                    data_out_list_a.append(data_a[60 * i:60 * (i + 1)])
            else:
                print("Invalid Output")

            a = random.randint(1, 3)
            time.sleep(a)

            line_saved += 1

            if line_saved == max_lines:
                save_to_csv_a(data_out_list_a)

                data_out_list_a = []

                line_saved = 0
Ejemplo n.º 5
0
from zhihu_oauth import ZhihuClient
from zhihu_oauth.exception import NeedCaptchaException

#custom your email/phone number matched with password
email = ""
password = ""

client = ZhihuClient()

try:
    client.login(email, password)
except NeedCaptchaException:
    # 保存验证码并提示输入,重新登录
    with open('a.gif', 'wb') as f:
        f.write(client.get_captcha())
    captcha = input('please input captcha:')
    client.login(email, password, captcha)

    client.save_token('token.pkl')
Ejemplo n.º 6
0
class Crawl:
    def __init__(self):
        self.client = ZhihuClient()

    def login(self, username, password):
        if os.path.isfile('app/Resource/' + username + '.token'):
            self.client.load_token('app/Resource/' + username + '.token')
        else:
            try:
                self.client.login(username, password)
            except NeedCaptchaException:
                # 保存验证码并提示输入,重新登录
                with open('a.gif', 'wb') as f:
                    f.write(self.client.get_captcha())
                captcha = input('please input captcha:')
                self.client.login(username, password, captcha)
            self.client.save_token('app/Resource/' + username + '.token')

    def get_live_list(self):
        lives = self.client.me().lives
        return lives

    @staticmethod
    def save_live_list(livedata):
        new_live = MyLive(live_id=livedata.id,
                          title=livedata.title,
                          speaker=livedata.speaker.name,
                          speaker_description=livedata.speaker.description,
                          live_description=livedata.description,
                          seats_count=livedata.seat_taken,
                          price=livedata.fee)
        new_live.save()

    def live_list_work(self):
        for live in self.get_live_list():
            exist = MyLive.objects(live_id=live.id)
            if not exist:
                self.save_live_list(live)

    def get_live_content(self, live_id, before_id=''):
        res = self.client._session.get(
            LIVECONTENT_URL.format(live_id, before_id))
        data = json.loads(res.content)
        return data

    def save_live_content_image(self, id, url):
        content = self.client._session.get(url).content
        file = 'app/Resource/' + str(id) + '.png'
        with open(file, 'wb') as f:
            f.write(content)

    @staticmethod
    def save_live_content(live_id, livedata):
        for r in livedata['data']:
            exist = LiveContent.objects(message_id=r['id'])
            if exist:
                continue

            if r['type'] == 'audio':
                url = r['audio']['url']
            elif r['type'] == 'image':
                url = r['image']['full']['url']

            else:
                url = ''
            content = r['text'] if 'text' in r else ''
            reply = ','.join(r['replies']) if 'replies' in r else ''

            new_live_content = LiveContent(
                message_id=int(r['id']),
                sender=r['sender']['member']['name'],
                type=r['type'],
                content=content,
                url=url,
                reply=reply,
                likes=r['likes']['count'],
                created_at=datetime.fromtimestamp((r['created_at'])),
                live_title=live_id)
            new_live_content.save()

    def live_content_work(self, id):
        live = MyLive.objects(id=id).first()
        # 使用知乎的live的ID值传入获取详情
        data = self.get_live_content(live.live_id)
        while data['unload_count'] > 0:
            # 存储时使用mongo的ID值传入
            self.save_live_content(live.id, data)
            data = self.get_live_content(live.live_id, data['data'][0]['id'])
        else:
            print('success')

        image_contents = LiveContent.objects(live_title=live.id, type='image')
        for item in image_contents:
            self.save_live_content_image(item.id, item.url)
Ejemplo n.º 7
0
test_email = '*****@*****.**'
test_password = '******'
token_file = './token.pkl'

if os.path.lexists(token_file):
    client.load_token(token_file)
    print 'load token success'
else:
    try:
        login_result = client.login(test_email, test_password)
    except NeedCaptchaException:
        # 保存验证码并提示输入,重新登录
        print u'登录失败,需要输入验证码'
        with open('a.gif', 'wb') as f:
            f.write(client.get_captcha())
        captcha = raw_input(u'please input captcha:')
        login_result = client.login(test_email, test_password, captcha)
    print 'login result => '
    print login_result
    client.save_token(token_file)
    print 'save token success'

# question
response_file_uri = './question_response.html' # 将json输出到网页中,chrome下按F12选preview能看见浏览器渲染出的json数据结构
question_id = 35005800
question = client.question(question_id)
data = question.pure_data
response_json = json.dumps(data)
response_file = open(response_file_uri, 'w+')
response_file.write(response_json)