Esempio n. 1
0
def getInfo(url, para):
    """
    获取信息
    """
    generalHttp = Http()
    htmlCode = generalHttp.post(url,
                                para=para,
                                headers=headers,
                                cookies=cookies)
    generalParse = Parse(htmlCode)
    pageCount = generalParse.parsePage()
    info = []
    for i in range(1, pageCount + 1):
        print('第%s页' % i)
        para['pn'] = str(i)
        htmlCode = generalHttp.post(url,
                                    para=para,
                                    headers=headers,
                                    cookies=cookies)
        generalParse = Parse(htmlCode)
        info = info + getInfoDetail(generalParse)
        time.sleep(5)
        if i == 10:
            return info
    return info
Esempio n. 2
0
def getInfo(url, para):
    """
    获取信息
    """
    generalHttps = Https()
    htmlCode = generalHttps.post(url, para=para, headers=hd)
    #print htmlCode
    generalParse = Parse(htmlCode)
    pageCount = generalParse.parsePage()

    info = []
    for i in range(1, pageCount + 1):
        try:
            print('第%s页开始爬取' % i)
            para['pn'] = str(i)
            htmlCode = generalHttps.post(url, para=para, headers=hd)
            generalParse = Parse(htmlCode)
            info = getInfoDetail(generalParse)
            if info:
                flag = processInfo(info, para)
                if flag is None:
                    print('存储异常')
                    #return None
                print('第%s页存储完成' % i)
            else:
                print('第%s页内容为空,不存储' % i)

        except Exception, e:
            logging.error('Process except')
            print 'str(Exception):\t', str(Exception)
            print 'str(e):\t\t', str(e)
            print 'repr(e):\t', repr(e)
            print 'e.message:\t', e.message

        time.sleep(2)
Esempio n. 3
0
def getInfo(url, data):
    """
    获取信息
    """
    htmlCode = requests.post(url, data=data, headers=setting.headers)
    generalParse = Parse(htmlCode)
    pageCount = generalParse.parsePage()
    info = []
    for i in range(1, pageCount + 1):
        print("第%s页" % i)
        data['pn'] = str(i)
        htmlCode = requests.post(url, data=data, headers=setting.headers)
        generalParse = Parse(htmlCode)
        info = info + getInfoDetail(generalParse)
        time.sleep(2)
    return info
Esempio n. 4
0
def getInfo(url, para):
    """
    获取信息
    """

    res = init()
    res = requests.post('https://www.lagou.com/jobs/positionAjax.json', headers=res['headers'], params=res['params'],
                             cookies=res['cookies'], data=res['data'])  # 请求接口
    htmlCode = res.text
    generalParse = Parse(htmlCode)
    pageCount = generalParse.parsePage()
    print('pageCount',pageCount)
    info = []
    for i in range(1, pageCount + 1):
    # for i in range(1,2):
        print('第%s页' % i)
        para['pn'] = str(i)
        res = init()
        res = requests.post('https://www.lagou.com/jobs/positionAjax.json', headers=res['headers'], params=res['params'],cookies=res['cookies'], data=res['data'])  # 请求接口
        htmlCode = res.text
        generalParse = Parse(htmlCode)
        info = info + getInfoDetail(generalParse)
        time.sleep(3)
    return info