def main(): global FETCH_THREAD_MAX, FETCH_TIMEOUT, PROXY_THREAD_FETCH_MAX, PROXY_THREAD_TEST_PROXY_MAX, PROXY_TIMEOUT, hosts, conn, CURR_DATE create_db() proxy_urls() for i in range(PROXY_THREAD_FETCH_MAX): t = Thread(target=thread_fetch, args=('proxy', proxy_queue,)) t.setDaemon(True) t.start() # wait all threads done proxy_queue.join() for proxy in hosts: test_queue.put(proxy, block=False) for i in range(PROXY_THREAD_TEST_PROXY_MAX): t = Thread(target=thread_fetch, args=('test', test_queue,)) t.setDaemon(True) t.start() LWP.log('''test_queue.qsize(): %s''' % (test_queue.qsize(), )) test_queue.join() sys.exit(0)
def fetch_proxy(url, conn): global hosts, PROXY_TIMEOUT, HTTP_DEBUG urls = urlparse(url) httplib2.debuglevel = HTTP_DEBUG http = httplib2.Http(timeout=PROXY_TIMEOUT) LWP.log("Fetch URL: %s" % (url)) resp = None content = '' try: resp, content = http.request( uri=url, headers={ 'User-Agent': LWP.USER_AGENT, 'Accept-Encoding': 'gzip, deflate', 'Referer': 'http://%s' % (urls.hostname), } ) except Exception, e: time.sleep(random.randint(1, 5)) if resp != None: if resp.status == 304: pass else: LWP.log('Error: (%s) %s' % (e, url)) return fetch_proxy(url, conn)
def thread_fetch(ftype, queue): global PROXY_TIMEOUT global DB_HOST, DB_USER, DB_PASS, DB_NAME, DB_PORT conn = db.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME, port=DB_PORT) while True: # 队列为空,停止 if queue.empty(): LWP.log('''TaskEmpty:break''') break try: task = queue.get(block=False) #LWP.log('''TaskGet: %s''' % (task, )) except queue.Empty: LWP.log('''TaskGet.Empty:break''') break try: if task != None: if ftype == 'proxy': fetch_proxy(task, conn) elif ftype == 'test': proxy, speed = LWP.test_proxy(task, PROXY_TIMEOUT, 'http://esf.sh.fang.com/agenthome/', r"""\-搜房网</title>""", 'gbk') if proxy != None: c = conn.cursor() proxy_insert(proxy, speed, c) conn.commit() c.close() except Exception, e: LWP.log('''TaskError(%s)''' % (e,)) queue.task_done()
local_conn = db.connect(host=LOCAL_DB_HOST, user=LOCAL_DB_USER, passwd=LOCAL_DB_PASS, db=LOCAL_DB_NAME, port=LOCAL_DB_PORT) def get_anjuke(mobile): global ajk_conn ajk_c = ajk_conn.cursor() ajk_c.execute('''SELECT `BrokerLevel` FROM `ajk_brokerextend` where UserMobile=%s;''', (mobile,)) row = ajk_c.fetchone() ajk_c.close() if row != None: return row[0] else: return None local_c = local_conn.cursor() local_c.execute('''SELECT `id`,`realname`,`phonenum` FROM `broker` order by `id` asc;''') for broker in local_c.fetchall(): bid, realname, phonenum = broker phonenum = str(phonenum) broker_level = get_anjuke(phonenum) if broker_level != None: local_c.execute('''UPDATE broker SET `is_anjuke`=1,`broker_level`=%s where `id`=%s;''', (broker_level,bid,)) else: local_c.execute('''UPDATE broker SET `is_anjuke`=0,`broker_level`='' where `id`=%s;''', (bid,)) LWP.log("%s %s %s:%s" % (phonenum, broker_level, bid, realname, )) local_conn.commit() local_c.close()
# proxy if matches != None: length = 0 for match in matches: proxy = (match[0], match[1]) if (proxy in hosts) == False: hosts.append(proxy) c = conn.cursor() proxy_insert(proxy, 999999, c) conn.commit() c.close() length += 1 # LWP.log("Add Proxy Server: %s:%s" % proxy); LWP.log("Add Proxy: %d/%d %s" % (length, len(matches), url)) return True def thread_fetch(ftype, queue): global PROXY_TIMEOUT global DB_HOST, DB_USER, DB_PASS, DB_NAME, DB_PORT conn = db.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME, port=DB_PORT) while True: # 队列为空,停止 if queue.empty(): LWP.log('''TaskEmpty:break''') break try: