Exemple #1
0
def doTask(task, param):
    proxyList = jTool.getProxy("proxy.txt")
    pcount = len(proxyList) - 1
    param["id"] = str(task[0])
    #    param['url'] = param['preUrlip'] + task[1]
    param["url"] = task[1]
    param["ename"] = task[2]
    cursor = param["conn"].cursor()
    proxy = str(proxyList[random.randint(0, pcount)]).strip()
    #    try:
    result = rp.crawlGetUrl(param["conn"], param, proxy)
    count = 1
    while not result["logic"] and count <= 2:
        proxy = str(proxyList[random.randint(0, pcount)]).strip()
        result = rp.crawlGetUrl(param["conn"], param, proxy)
        count += 1
    if result["logic"]:
        completeTask(param, task[3])
    if not result["logic"]:
        print "error record:" + str(param["id"])
    #    except Exception, e:
    #        print 'doTask', __name__, e
    #        return True
    param["conn"].commit()
    cursor.close()
Exemple #2
0
def doTask(task, param):
    proxyList = jTool.getProxy('proxy.txt')
    pcount = len(proxyList)-1
    param['id'] = str(task[0])
    param['url'] = param['preUrlip'] + task[1]
    param['ename'] = task[2]
    cursor = param['conn'].cursor()
    proxy = str(proxyList[random.randint(0, pcount)]).strip()
#    try:
    result = rp.crawlGetUrl(param['conn'], param, proxy)
    count = 1
    while not result['logic'] and count<=2:
            proxy = str(proxyList[random.randint(0, pcount)]).strip()
            result = rp.crawlGetUrl(param['conn'], param, proxy)
#            result = rp.crawlPostUrl(param['conn'], param, proxy)
            count += 1
#        if result['logic']:#insertId
#            print 'id:'+str(result['rtData'])
#            param['rid'] = result['rtData']
#            result2 = rp.crawlPostUrl(param['conn'], param, proxy)
#            count2 = 1
#            while not result2['logic'] and count2<=2:
#                result = rp.crawlPostUrl(param['conn'], param, proxy)
#                count2 += 1
#            if result2['logic']:
#                return True                          
#            if not result['logic']:
#                result2['rtData']['eid'] = param['id']
#                result2['rtData']['url'] = param['url']
#                result2['rtData']['ename'] = param['ename']
#                result2['rtData']['proxy'] = proxy.strip()
#                jTool.insertDatai(cursor, 'error_log', result2['rtData'])
    if result['logic']:
        completeTask(param, param['iid'])
    if not result['logic']:
        result['rtData']['eid'] = param['id']
        result['rtData']['url'] = param['url']
        result['rtData']['ename'] = param['ename']
        result['rtData']['proxy'] = proxy.strip()
        jTool.insertDatai(cursor, 'error_log_1', result['rtData'])
        print 'error record:'+ str(param['id'])
#    except Exception, e:
#        print 'doTask', __name__, e
#        return True
    param['conn'].commit()
    cursor.close()