Exemple #1
0
    def run(self):
        while True:
            self.proxies.clear()
            str = 'IPProxyPool----->>>>>>>>beginning'
            sys.stdout.write(str + "\r\n")
            sys.stdout.flush()
            proxylist = sqlhelper.select()
            myip = getMyIP()
            spawns = []
            for proxy in proxylist:
                spawns.append(gevent.spawn(detect_from_db, myip, proxy, self.proxies))
            gevent.joinall(spawns)
            self.db_proxy_num.value = len(self.proxies)
            str = 'IPProxyPool----->>>>>>>>db exists ip:%d' % len(self.proxies)

            if len(self.proxies) < MINNUM:
                str += '\r\nIPProxyPool----->>>>>>>>now ip num < MINNUM,start crawling...'
                sys.stdout.write(str + "\r\n")
                sys.stdout.flush()
                self.crawl_pool.map(self.crawl, parserList)
            else:
                str += '\r\nIPProxyPool----->>>>>>>>now ip num meet the requirement,wait UPDATE_TIME...'
                sys.stdout.write(str + "\r\n")
                sys.stdout.flush()

            time.sleep(UPDATE_TIME)
Exemple #2
0
    def run(self):
        while True:
            self.proxies.clear()
            str = 'IPProxyPool----->>>>>>>>beginning'
            sys.stdout.write(str + "\r\n")
            sys.stdout.flush()
            proxylist = sqlhelper.select()
            myip = getMyIP()
            spawns = []
            for proxy in proxylist:
                spawns.append(gevent.spawn(detect_from_db, myip, proxy, self.proxies))
            gevent.joinall(spawns)
            self.db_proxy_num.value = len(self.proxies)
            str = 'IPProxyPool----->>>>>>>>db exists ip:%d' % len(self.proxies)

            if len(self.proxies) < MINNUM:
                str += '\r\nIPProxyPool----->>>>>>>>now ip num < MINNUM,start crawling...'
                sys.stdout.write(str + "\r\n")
                sys.stdout.flush()
                self.crawl_pool.map(self.crawl, parserList)
            else:
                str += '\r\nIPProxyPool----->>>>>>>>now ip num meet the requirement,wait UPDATE_TIME...'
                sys.stdout.write(str + "\r\n")
                sys.stdout.flush()

            time.sleep(UPDATE_TIME)
Exemple #3
0
 def update(self, conditions=None, value=None):
     if conditions and value:
         from validator.Validator import getMyIP, checkSped
         selfip = getMyIP()
         speeds = checkSped(selfip, conditions)
         if speeds:
             if speeds[0] < 50 or speeds[1] < 50:
                 conditon_list = []
                 for key in list(conditions.keys()):
                     if self.params.get(key, None):
                         conditon_list.append(
                             self.params.get(key) == conditions.get(key))
                 conditions = conditon_list
                 query = self.session.query(Proxy)
                 for condition in conditions:
                     query = query.filter(condition)
                 updatevalue = {}
                 for key in list(value.keys()):
                     if self.params.get(key, None):
                         updatevalue[self.params.get(key,
                                                     None)] = value.get(key)
                 updateNum = query.update(updatevalue)
                 self.session.commit()
             else:
                 self.delete(conditions)
                 updateNum = None
         else:
             self.delete(conditions)
             updateNum = None
     else:
         updateNum = 0
     return {'updateNum': updateNum}
Exemple #4
0
def store_data(queue2, db_proxy_num):
    successNum = 0
    failNum = 0
    while True:
        try:
            proxy = queue2.get(timeout=300)
            if proxy:
                from validator.Validator import getMyIP, checkSped
                selfip = getMyIP()
                speeds = checkSped(selfip, proxy)
                value2 = [speeds, config.GOAL_HTTPS_LIST]
                print(value2)
                if speeds:
                    sqlhelper.insert(proxy, value2)
                    successNum += 1
            else:
                failNum += 1
            str = 'IPProxyPool----->>>>>>>>Success ip num :%d,Fail ip num:%d' % (
                successNum, failNum)
            sys.stdout.write(str + "\r")
            sys.stdout.flush()
        except BaseException as e:
            if db_proxy_num.value != 0:
                successNum += db_proxy_num.value
                db_proxy_num.value = 0
                str = 'IPProxyPool----->>>>>>>>Success ip num :%d,Fail ip num:%d' % (
                    successNum, failNum)
                sys.stdout.write(str + "\r")
                sys.stdout.flush()
                successNum = 0
                failNum = 0
Exemple #5
0
def main():
    myip = getMyIP()
    DB_PROXY_NUM = Value('i', 0)
    q1 = Queue(maxsize=TASK_QUEUE_SIZE)
    q2 = Queue()
    p1 = Process(target=startProxyCrawl, args=(q1, DB_PROXY_NUM, myip, 'FREE'))
    p2 = Process(target=validator, args=(q1, q2, myip, 'FREE'))
    p3 = Process(target=store_data, args=(q2, DB_PROXY_NUM, 'FREE'))
    p1.start()
    p2.start()
    p3.start()
    p1.join()
    p2.join()
    p3.join()
Exemple #6
0
# coding:utf-8

from multiprocessing import Value, Queue, Process
from api.apiServer import start_api_server
from db.DataStore import store_data

from validator.Validator import validator, getMyIP
from spider.ProxyCrawl import startProxyCrawl

from config import TASK_QUEUE_SIZE

if __name__ == "__main__":
    myip = getMyIP()
    DB_PROXY_NUM = Value('i', 0)
    q1 = Queue(maxsize=TASK_QUEUE_SIZE)
    q2 = Queue()
    #p0 = Process(target=start_api_server)
    p1 = Process(target=startProxyCrawl, args=(q1, DB_PROXY_NUM, myip))
    p2 = Process(target=validator, args=(q1, q2, myip))
    p3 = Process(target=store_data, args=(q2, DB_PROXY_NUM))
    #p0.start()
    p1.start()
    p2.start()
    p3.start()
    #p0.join()
    p1.join()
    p2.join()
    p3.join()
Exemple #7
0
from util.watcher import Watcher

monkey.patch_all()

from db.DataStore import store_data
from multiprocessing import Value, Queue, Process
from api.apiServer import start_api_server

from spider.ProxyCrawl import startProxyCrawl
from validator.Validator import validator, getMyIP
from loguru import logger

if __name__ == "__main__":
    #Watcher()

    my_current_ip = getMyIP()
    logger.info("My Current IP:{}", my_current_ip)

    DB_PROXY_NUM = Value('i', 0)
    q1 = Queue()
    q2 = Queue()
    # 内部处理ip/port验证的process 控制列表
    process_pool = {}

    p0 = Process(target=start_api_server)
    p1 = Process(target=startProxyCrawl, args=(q1, DB_PROXY_NUM))
    p2 = Process(target=validator, args=(q1, q2, my_current_ip, process_pool))
    p3 = Process(target=store_data, args=(q2, DB_PROXY_NUM))

    try:
        p0.start()
Exemple #8
0
# coding:utf-8

from multiprocessing import Value, Queue, Process
from api.apiServer import start_api_server
from db.DataStore import store_data

from validator.Validator import validator, getMyIP
from spider.ProxyCrawl import startProxyCrawl

from config import TASK_QUEUE_SIZE

if __name__ == "__main__":
    myip = getMyIP()
    DB_PROXY_NUM = Value('i', 0)
    q1 = Queue(maxsize=TASK_QUEUE_SIZE)
    q2 = Queue()
    p0 = Process(target=start_api_server)
    p1 = Process(target=startProxyCrawl, args=(q1, DB_PROXY_NUM,myip))
    p2 = Process(target=validator, args=(q1, q2, myip))
    p3 = Process(target=store_data, args=(q2, DB_PROXY_NUM))
    p0.start()
    p1.start()
    p2.start()
    p3.start()
    p0.join()
    p1.join()
    p2.join()
    p3.join()