Ejemplo n.º 1
0
def get():
    s = requests.session()
    s.headers.update({'User-Agent': UserAgent.get()})
    s.proxies = Proxy.get()
    account = Account.get()

    for i in range(6):
        try:
            page = s.get("http://www.zhihu.com").content
            captcha = decaptcha(s)

            _xsrf = PageHelper.get_xsrf(page)
            data = {
                '_xsrf': _xsrf,
                'password': account['password'],
                'remember_me': 'true'
            }
            if captcha:
                data.update({'captcha': captcha})

            if '@' in account['username']:
                data['email'] = account['username']
                login = '******'
            else:
                data['phone_num'] = account['username']
                login = '******'

            response = s.post(login, data)
            res = response.json()
            if res['r'] == 0:
                logger.warning("Login Success")
                break
            else:
                if i == 5:
                    raise AccountException()
                else:
                    logger.error('Login Failed %s, Change Account, Retry', res)
                    account = Account.get(account)
        except requests.RequestException:
            if i == 5:
                raise NetworkException()
            if i % 2 == 1:
                s.proxies = Proxy.get()
                logger.error("Request Failed, Change Proxy, Retry")
            else:
                logger.error("Request Failed, Retry in 10s")
                time.sleep(10)

    return s
Ejemplo n.º 2
0
def terminate(*msg):
    if msg:
        logger.error(msg[0])
        status.update({
            'status': 'error',
            'message': str(msg[0])
        })
    else:
        status.update({
            'status': 'exit',
            'message': 'Exit gracefully',
        })

    logger.error('Terminate gracefully...')

    acct = Account.get_using()
    if 'username' in acct:
        rh.push_acct(acct)
        logger.error('return account %s', acct['username'])

    if task:
        rh.lpush_task_user(task)
        logger.error('return task %s', task)

    status.update({
        'finished': finished,
        'task': task,
        'account': acct,
        'update_time': int(time.time())
    })
    rh.publish_status(status)

    logger.error('Exit')
    if msg:
        sys.exit(msg[0])
    else:
        sys.exit(0)
Ejemplo n.º 3
0
def start(instance_id):
    global task
    global status
    task = ''

    try:
        logger.warning('Instance id: %s', instance_id)

        hostname = socket.gethostname()
        ip = socket.gethostbyname(hostname)
        start_time = int(time.time())

        session = SessionHelper()

        status = {
            'id': hostname + '-' + str(instance_id),
            'hostname': hostname,
            'ip': ip,
            'finished': finished,
            'task': '',
            'status': 'init',
            'message': '',
            'account': Account.get_using(),
            'start_time': start_time,
            'update_time': int(time.time())
        }

        rh.publish_status(status)

        while True:
            task = ''
            task = rh.get_task_user()
            logger.warning('Get task: ' + task)

            if rh.is_user_crawled(task):
                logger.warning("User %s crawled, skip", task)
                continue

            status.update({
                'finished': finished,
                'task': task,
                'status': 'crawling',
                'account': Account.get_using(),
                'update_time': int(time.time())
            })
            rh.publish_status(status)

            try:
                fc = FollowingsCrawler(session, task)
                user = fc.get()
                logger.warning('Push result: ' + task)
                rh.push_result_user(user)

                finished['user'] += 1
                finished['followings'] += len(user['followings'])

                time.sleep(random.uniform(1, 5))
            except NotFoundException:
                message = "User %s not found, continue" % task
                logger.error(message)
                continue
    except ResponseException:
        message = 'Crawling response error, push back task, quit'
        terminate(message)
    except RedisException:
        logger.error('Redis connection error, quit')
        sys.exit('Redis Error!')
    except NetworkException:
        message = 'Network connection error, quit'
        terminate(message)
    except Exception as e:
        print(traceback.format_exc())
        terminate(e)