Python fetchUrlProxy2 Examples

Programming Language: Python

Namespace/Package Name: jTool

Method/Function: fetchUrlProxy2

Examples at hotexamples.com: 2

Python fetchUrlProxy2 - 2 examples found. These are the top rated real world Python examples of jTool.fetchUrlProxy2 extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: getMore.py Project: jhfnetboy/proxyCrawler

def getDetailListPages(pageNo, conn):
    '''
    最终目标页面的列表项
    每个列表项打开后是若干url列表，每个url指向的页面是抓取目标
    '''    
#    requestUrl = 'http://218.108.28.28:8000/ListPrompts.aspx?sectionID=01&tableID=CourtNotCarryOut&associateID=00000000000000000&hasPromptHistroy=False'
    requestUrl = 'http://218.108.28.28:8000/ListPrompts.aspx'
    head = ['Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                    'Accept-Charset:GBK,utf-8;q=0.7,*;q=0.3',
                    'Accept-Encoding:gzip,deflate,sdch',
                    'Accept-Language:zh-CN,zh;q=0.8',
                    'Cache-Control:max-age=0',
                    'Connection:keep-alive',
                    'Cookie:ASP.NET_SessionId=t3isah45gu5kb4454qyxkhzy; lzstat_uv=6061202253430616218|2529639; lzstat_ss=953382219_1_1373621147_2529639; _gscu_374314293=7359234405sddy11; _gscs_374314293=73592344d74zfy11|pv:3; _gscbrs_374314293=1; ECStaticSession=ECS81',
                    'Host:www.zjcredit.gov.cn:8000',
                    'Pragma:no-cache',
                    'Origin:http://www.zjcredit.gov.cn:8000',
                    'Referer:http://218.108.28.28:8000/ListPrompts.aspx?sectionID=01&tableID=CourtNotCarryOut&associateID=00000000000000000&hasPromptHistroy=False',
                    'User-Agent:Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36'
                    ]
#    associateID = '00000000000000000'
#    field_CorporationName = ''
#    field_OrganizationCode = ''
#    isIntermediary = 'False'
#    pageLength = 20
#    recordTotal = 46358
#    sectionID = '01'
#    tableID = 'CourtNotCarryOut'
    post_data_dic = {'recordTotal': 46358, 'tableID': 'CourtNotCarryOut', 'associateID': '00000000000000000', 'field_CorporationName': '', 'sectionID': '01', 'field_OrganizationCode': '', 'isIntermediary': 'False', 'pageNo': pageNo, 'pageLength': 20}
    proxy = '218.108.170.173:82'
    proxy = '218.108.170.170:80'
    content = jTool.fetchUrlProxy2(proxy, requestUrl, post_data_dic, 'post', head)
#    jTool.logit(content, '22.html')
#    content = open('22.html').read()
    if content:
        exp = '//table[1]/tr[2]/td[1]/table[1]/tr/td/a'
        aList = jTool.getNodes(content, exp)
#        import lxml.etree as ETree
#        print aList[1].xpath('//@title')[0]
        titles = aList[1].xpath('//@title')
        corpRecords = {}
#        print aList[1].xpath('//@onclick')[20]
        aStr = aList[1].xpath('//@onclick')
        cursor = conn.cursor()
        from urllib import unquote
        for i in range(20):
#            corpRecords['`corpName`'] = (unquote(titles[i])).decode('utf-8')
            corpRecords['`corpName`'] = titles[i].decode('utf-8').encode('utf-8')
#            print corpRecords['`corpName`']
#            print unquote(corpRecords['`corpName`'])
#            print aStr[i+1]
            tmp = aStr[i+1].split(',')
#            print i+1
            corpRecords['`table`'] = tmp[0].split("'")[1]
            corpRecords['`rowID`'] = tmp[5].split("'")[1]
#            print corpRecords['rowID']
#            print corpRecords['corpName']
            corpRecords['`pageNo`'] = str(pageNo)
            result = jTool.insertData(cursor, 'base_page_list', corpRecords)
            conn.commit()
        cursor.close()
        conn.close()

Example #2

Show file

File: getMore.py Project: jhfnetboy/proxyCrawler

def getDetailPageContent(proxy, head, rowID, corpName):
    requestUrl = 'http://www.zjcredit.gov.cn:8000/BrowseDocumentPrompt.aspx'
    post_data_dic = {'sectionID': '01', 'associateID': '00000000000000000', 'tableID': 'CourtNotCarryOut', 'creditID': 0, 'rowID': rowID, 'timeSpan': '', 'seqNo': '', 'corpName': corpName, 'titleID': ''}
    content = jTool.fetchUrlProxy2(proxy, requestUrl, post_data_dic, 'post', head)
    return content