Example #1
0
def login():
    retry_times = 0

    while True:
        try:
            idx = random.randint(0, len(login_users) - 1)
            login_user = login_users[idx]
            logger.info(login_user)

            flag = -1
            while flag != 0:
                s = my_request.get_https_session(new=True, agent=True)
                (flag,
                 r) = my_request.get(logger,
                                     "https://www.itjuzi.com/user/login")
                logger.info(r.status_code)
                if flag == 0 and r.status_code != 200:
                    flag = -1
            logger.info(r.headers["Set-Cookie"])
            r = s.post("https://www.itjuzi.com/user/login",
                       data={
                           "identity": login_user["name"],
                           "password": login_user["pwd"]
                       },
                       timeout=10)
            logger.info(r.headers["Refresh"])
            if "0;url=https://www.itjuzi.com/" == r.headers["Refresh"]:
                return True
        except Exception, ex:
            logger.exception(ex)
            time.sleep(10)
        '''
Example #2
0
def login():
    while True:
        try:
            global s
            s = my_request.get_https_session(new=True, agent=True)
            login_url = 'https://passport.jd.com/uc/login'

            randomCode = random.uniform(0.1, 1.0)
            r = s.get(login_url)
            d = pq(r.text)

            uuid = d('input:eq(0)').attr('value')
            param = d('input:eq(4)').attr('name')
            param_value = d('input:eq(4)').attr('value')

            loginname = login_user['name']
            loginpwd = login_user['pwd']

            verify_url = 'https://passport.jd.com/uc/showAuthCode?r='+str(randomCode)+'&version=2015'
            r = s.post(verify_url, data={'loginName': loginname}, timeout=10)
            result = r.text.replace('(', '').replace(')', '')

            verify = json.loads(result)
            logger.info(verify)
            if not verify['verifycode']:
                data = {'uuid': uuid,
                        param: param_value,
                        'loginname': loginname,
                        'nloginpwd': loginpwd,
                        'loginpwd': loginpwd,
                        'chkRememberMe': 'on',
                        'authcode': None}

                url = 'https://passport.jd.com/uc/loginService?uuid='+uuid+'&version=2015'
                r = s.post(url, data, timeout=10)

                logger.info(r.text)
                if "success" in r.text:
                    return True

        except Exception,ex:
            logger.exception(ex)
            time.sleep(60)
Example #3
0
def fetch_company(url):
    (company_key, ) = util.re_get_result("http://www.itjuzi.com/company/(\d+)",
                                         url)
    logger.info("company_key=%s" % company_key)

    company_content = None
    member_contents = []
    news_contents = []
    investor_contents = []

    (flag, r) = my_request.get(logger, url)
    logger.info("flag=%d", flag)
    if flag == -1:
        return -1

    logger.info("status=%d", r.status_code)

    if r.status_code == 404:
        logger.info("Page Not Found!!!")
        return r.status_code

    if r.status_code != 200:
        #logger.info(r.status_code)
        return r.status_code

    #logger.info(r.text)
    d = pq(r.text)
    product_name = d('div.line-title> span> b').clone().children().remove(
    ).end().text().strip()
    if product_name == "":
        return 404

    company_content = {
        "date": datetime.datetime.now(),
        "source": source,
        "url": url,
        "company_key": company_key,
        "company_key_int": int(company_key),
        "content": r.text
    }

    #members
    lis = d('h4.person-name> a.title')
    for li in lis:
        try:
            l = pq(li)
            href = l.attr("href").strip()
            logger.info(href)
            member_name = l('b> span.c').text().strip()
            (member_key, ) = util.re_get_result(
                r'http://www.itjuzi.com/person/(\d*?)$', href)
            logger.info("member_key=%s, member_name=%s" %
                        (member_key, member_name))

            href = href.replace("http://", "https://")
            #if member_collection.find_one({"source":source, "member_key":member_key}) == None:
            if 1 == 1:
                flag = -1
                while flag != 0:
                    (flag, r) = my_request.get(logger, href)
                    if flag == 0 and r.status_code != 200:
                        flag = -1
                    if flag != 0:
                        my_request.get_https_session(new=True, agent=True)
                #logger.info(r.text)
                member_contents.append({
                    "date": datetime.datetime.now(),
                    "source": source,
                    "url": url,
                    "member_key": member_key,
                    "member_name": member_name,
                    "content": r.text
                })
        except Exception, ex:
            logger.exception(ex)
Example #4
0
                    "news_date":
                    "%s/%s/%s" % (news_year, news_month, news_day),
                    "news_url":
                    news_url,
                    "news_source_domain":
                    news_source_domain,
                    "content":
                    r.text
                })
        except Exception, ex:
            logger.exception(ex)

    #investors
    lis = d('table.list-round-v2 >tr > td> a')
    if len(lis) > 0:
        my_request.get_https_session(new=True, agent=True)
    for li in lis:
        try:
            l = pq(li)
            investor_url = l.attr('href').strip()
            investor_name = l.text().strip()
            (investor_key, ) = util.re_get_result(
                r"http://www.itjuzi.com/investfirm/(\d*)$", investor_url)
            logger.info(investor_url)
            logger.info(investor_name)
            logger.info(investor_key)
            #if investor_collection.find_one({"source":source, "investor_key":investor_key}) == None:
            investor_url = investor_url.replace("http://", "https://")
            if 1 == 1:
                flag = -1
                while flag != 0:
Example #5
0
        latest_album = collection.find({}).sort("key",
                                                pymongo.DESCENDING).limit(1)
    if latest_album.count() == 0:
        i = 0
    else:
        i = int(latest_album[0]["key"])

    latest = i
    logger.info("From: %d" % i)

    while True:
        i += 1
        url = "https://itjuzi.com/album/%d" % (i)

        if cnt <= 0:
            my_request.get_https_session()
            cnt = 100

        status = -1
        retry_times = 0
        while status != 200 and status != 404 and status != 302:
            try:
                status = fetch(url)
            except Exception, ex:
                logger.exception(ex)

            if status == -1:
                my_request.get_http_session(new=True, agent=False)
                cnt = 100

            retry_times += 1