def main(): global FETCH_THREAD_MAX, FETCH_TIMEOUT, PROXY_THREAD_FETCH_MAX, PROXY_THREAD_TEST_PROXY_MAX, PROXY_TIMEOUT, hosts, conn, CURR_DATE create_db() proxy_urls() for i in range(PROXY_THREAD_FETCH_MAX): t = Thread(target=thread_fetch, args=('proxy', proxy_queue,)) t.setDaemon(True) t.start() # wait all threads done proxy_queue.join() for proxy in hosts: test_queue.put(proxy, block=False) for i in range(PROXY_THREAD_TEST_PROXY_MAX): t = Thread(target=thread_fetch, args=('test', test_queue,)) t.setDaemon(True) t.start() LWP.log('''test_queue.qsize(): %s''' % (test_queue.qsize(), )) test_queue.join() sys.exit(0)
def fetch_proxy(url, conn): global hosts, PROXY_TIMEOUT, HTTP_DEBUG urls = urlparse(url) httplib2.debuglevel = HTTP_DEBUG http = httplib2.Http(timeout=PROXY_TIMEOUT) LWP.log("Fetch URL: %s" % (url)) resp = None content = '' try: resp, content = http.request( uri=url, headers={ 'User-Agent': LWP.USER_AGENT, 'Accept-Encoding': 'gzip, deflate', 'Referer': 'http://%s' % (urls.hostname), } ) except Exception, e: time.sleep(random.randint(1, 5)) if resp != None: if resp.status == 304: pass else: LWP.log('Error: (%s) %s' % (e, url)) return fetch_proxy(url, conn)
def thread_fetch(ftype, queue): global PROXY_TIMEOUT global DB_HOST, DB_USER, DB_PASS, DB_NAME, DB_PORT conn = db.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME, port=DB_PORT) while True: # 队列为空,停止 if queue.empty(): LWP.log('''TaskEmpty:break''') break try: task = queue.get(block=False) #LWP.log('''TaskGet: %s''' % (task, )) except queue.Empty: LWP.log('''TaskGet.Empty:break''') break try: if task != None: if ftype == 'proxy': fetch_proxy(task, conn) elif ftype == 'test': proxy, speed = LWP.test_proxy(task, PROXY_TIMEOUT, 'http://esf.sh.fang.com/agenthome/', r"""\-搜房网</title>""", 'gbk') if proxy != None: c = conn.cursor() proxy_insert(proxy, speed, c) conn.commit() c.close() except Exception, e: LWP.log('''TaskError(%s)''' % (e,)) queue.task_done()
local_conn = db.connect(host=LOCAL_DB_HOST, user=LOCAL_DB_USER, passwd=LOCAL_DB_PASS, db=LOCAL_DB_NAME, port=LOCAL_DB_PORT) def get_anjuke(mobile): global ajk_conn ajk_c = ajk_conn.cursor() ajk_c.execute('''SELECT `BrokerLevel` FROM `ajk_brokerextend` where UserMobile=%s;''', (mobile,)) row = ajk_c.fetchone() ajk_c.close() if row != None: return row[0] else: return None local_c = local_conn.cursor() local_c.execute('''SELECT `id`,`realname`,`phonenum` FROM `broker` order by `id` asc;''') for broker in local_c.fetchall(): bid, realname, phonenum = broker phonenum = str(phonenum) broker_level = get_anjuke(phonenum) if broker_level != None: local_c.execute('''UPDATE broker SET `is_anjuke`=1,`broker_level`=%s where `id`=%s;''', (broker_level,bid,)) else: local_c.execute('''UPDATE broker SET `is_anjuke`=0,`broker_level`='' where `id`=%s;''', (bid,)) LWP.log("%s %s %s:%s" % (phonenum, broker_level, bid, realname, )) local_conn.commit() local_c.close()
import re import sys import time import random import LWP import httplib2 import MySQLdb as db from Queue import Queue from threading import Thread from urlparse import urlparse from threading import stack_size stack_size(32768 * 16) # 下载代理列表线程数 PROXY_THREAD_FETCH_MAX = int(LWP.config('Proxy', 'FETCH_THREAD_MAX')) # 代理测速线程数 PROXY_THREAD_TEST_PROXY_MAX = int(LWP.config('Proxy', 'TEST_THREAD_MAX')) # 代理超时 PROXY_TIMEOUT = int(LWP.config('Proxy', 'TIMEOUT')) # debug HTTP_DEBUG = 0 # 当前日期 CURR_DATE = LWP.t2date(time.time(), '%Y%m%d') # DB config DB_HOST = str(LWP.config('DB', 'HOST')) DB_USER = str(LWP.config('DB', 'USER')) DB_PASS = str(LWP.config('DB', 'PASS')) DB_NAME = str(LWP.config('DB', 'NAME')) DB_PORT = int(LWP.config('DB', 'PORT'))