Beispiel #1
0
def main():
    global FETCH_THREAD_MAX, FETCH_TIMEOUT, PROXY_THREAD_FETCH_MAX, PROXY_THREAD_TEST_PROXY_MAX, PROXY_TIMEOUT, hosts, conn, CURR_DATE

    create_db()

    proxy_urls()

    for i in range(PROXY_THREAD_FETCH_MAX):
        t = Thread(target=thread_fetch, args=('proxy', proxy_queue,))
        t.setDaemon(True)
        t.start()

    # wait all threads done
    proxy_queue.join()

    for proxy in hosts:
        test_queue.put(proxy, block=False)

    for i in range(PROXY_THREAD_TEST_PROXY_MAX):
        t = Thread(target=thread_fetch, args=('test', test_queue,))
        t.setDaemon(True)
        t.start()

    LWP.log('''test_queue.qsize(): %s''' % (test_queue.qsize(), ))

    test_queue.join()

    sys.exit(0)
Beispiel #2
0
def fetch_proxy(url, conn):
    global hosts, PROXY_TIMEOUT, HTTP_DEBUG

    urls = urlparse(url)
    httplib2.debuglevel = HTTP_DEBUG
    http = httplib2.Http(timeout=PROXY_TIMEOUT)

    LWP.log("Fetch URL: %s" % (url))

    resp = None
    content = ''

    try:
        resp, content = http.request(
            uri=url,
            headers={
                'User-Agent': LWP.USER_AGENT,
                'Accept-Encoding': 'gzip, deflate',
                'Referer': 'http://%s' % (urls.hostname),
            }
        )

    except Exception, e:
        time.sleep(random.randint(1, 5))
        if resp != None:
            if resp.status == 304:
                pass
            else:
                LWP.log('Error: (%s) %s' % (e, url))
                return fetch_proxy(url, conn)
Beispiel #3
0
def thread_fetch(ftype, queue):
    global PROXY_TIMEOUT
    global DB_HOST, DB_USER, DB_PASS, DB_NAME, DB_PORT
    conn = db.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME, port=DB_PORT)
    while True:
        # 队列为空,停止
        if queue.empty():
            LWP.log('''TaskEmpty:break''')
            break

        try:
            task = queue.get(block=False)
            #LWP.log('''TaskGet: %s''' % (task, ))
        except queue.Empty:
            LWP.log('''TaskGet.Empty:break''')
            break

        try:
            if task != None:
                if ftype == 'proxy':
                    fetch_proxy(task, conn)
                elif ftype == 'test':
                    proxy, speed = LWP.test_proxy(task, PROXY_TIMEOUT, 'http://esf.sh.fang.com/agenthome/', r"""\-搜房网</title>""",
                                                  'gbk')
                    if proxy != None:
                        c = conn.cursor()
                        proxy_insert(proxy, speed, c)
                        conn.commit()
                        c.close()

        except Exception, e:
            LWP.log('''TaskError(%s)''' % (e,))

        queue.task_done()
local_conn = db.connect(host=LOCAL_DB_HOST, user=LOCAL_DB_USER, passwd=LOCAL_DB_PASS, db=LOCAL_DB_NAME,
                        port=LOCAL_DB_PORT)


def get_anjuke(mobile):
    global ajk_conn
    ajk_c = ajk_conn.cursor()
    ajk_c.execute('''SELECT `BrokerLevel` FROM `ajk_brokerextend` where UserMobile=%s;''', (mobile,))
    row = ajk_c.fetchone()
    ajk_c.close()
    if row != None:
        return row[0]
    else:
        return None


local_c = local_conn.cursor()
local_c.execute('''SELECT `id`,`realname`,`phonenum` FROM `broker` order by `id` asc;''')
for broker in local_c.fetchall():
    bid, realname, phonenum = broker
    phonenum = str(phonenum)
    broker_level = get_anjuke(phonenum)
    if broker_level != None:
        local_c.execute('''UPDATE broker SET `is_anjuke`=1,`broker_level`=%s where `id`=%s;''', (broker_level,bid,))
    else:
        local_c.execute('''UPDATE broker SET `is_anjuke`=0,`broker_level`='' where `id`=%s;''', (bid,))

    LWP.log("%s %s %s:%s" % (phonenum, broker_level, bid, realname, ))

local_conn.commit()
local_c.close()
Beispiel #5
0
import re
import sys
import time
import random
import LWP
import httplib2
import MySQLdb as db
from Queue import Queue
from threading import Thread
from urlparse import urlparse
from threading import stack_size

stack_size(32768 * 16)

# 下载代理列表线程数
PROXY_THREAD_FETCH_MAX = int(LWP.config('Proxy', 'FETCH_THREAD_MAX'))
# 代理测速线程数
PROXY_THREAD_TEST_PROXY_MAX = int(LWP.config('Proxy', 'TEST_THREAD_MAX'))
# 代理超时
PROXY_TIMEOUT = int(LWP.config('Proxy', 'TIMEOUT'))
# debug
HTTP_DEBUG = 0
# 当前日期
CURR_DATE = LWP.t2date(time.time(), '%Y%m%d')
# DB config
DB_HOST = str(LWP.config('DB', 'HOST'))
DB_USER = str(LWP.config('DB', 'USER'))
DB_PASS = str(LWP.config('DB', 'PASS'))
DB_NAME = str(LWP.config('DB', 'NAME'))
DB_PORT = int(LWP.config('DB', 'PORT'))