def tryToGetHotWithProxy(rec,proxy): hot=0 try: hot=getwordhot.getNumOfBaiduSearchResult(rec.name,5,proxy) if (hot>0) : hasWorkableProxy=True else: hasWorkableProxy=False except Exception, e: hasWorkableProxy=False print e print 'failed to get result with proxy:',proxy['http']
def tryToGetHotWithProxy(rec, proxy): hot = 0 try: hot = getwordhot.getNumOfBaiduSearchResult(rec, 5, proxy) if hot > 0: hasWorkableProxy = True else: hasWorkableProxy = False except Exception, e: hasWorkableProxy = False print e print "failed to get result with proxy:", proxy["http"]
def verifyProxy(inFile,proxyList): #,outFile): lock = threading.Lock() while True: lock.acquire() line = inFile.readline().strip() lock.release() if len(line) == 0: break protocol, proxy = line.split('=') try: ''' conn = httplib.HTTPConnection(proxy, timeout=3.0) conn.request(method='GET', url='http://www.baidu.com/s?wd=iphone') res = conn.getresponse() ret_headers = str( res.getheaders() ) ''' proxyDic={protocol.lower():proxy} print proxyDic #print 'trying with proxy=',proxy printMT('trying with proxy='+proxy,lock) num=getwordhot.getNumOfBaiduSearchResult('iphone',timeout=2,proxy=proxyDic) #print 'number is ',num #printMT('number is '+str(num),lock) if num>0: lock.acquire() print line #outFile = open('good_proxy.txt', 'w+') #outFile.write('%s\n',proxy) #outFile.close() #print 'finish write fie' proxyList.append(line) lock.release() #print html_doc.encode('gbk') except Exception, e: lock.acquire() print e print 'bad proxy:',proxy lock.release()
if proxy == None: print "Can't get a proxy, exit!!!!" hasMoreProxy=False break else: print 'change proxy as:',proxy['http'] hot,hasWorkableProxy=tryToGetHotWithProxy(rec,proxy) else: hot,hasWorkableProxy=tryToGetHotWithProxy(rec,proxy) if hasWorkableProxy==False and not hasMoreProxy: print 'no more proxy avaible, change as local mode' hot=getwordhot.getNumOfBaiduSearchResult(rec.name,2,None) if (hot<1): print 'all were banned, stop!!!!!' break i=i+1 print rec.name.encode('utf8') print hot updateHotByID(rec.id,hot) if (i%10==0): print 'i=',i,'make a DB commit' session.commit()