예제 #1
0
파일: api.py 프로젝트: xiaoleo911/Spider
 def __init__(self, request, client_address, server):
     try:
         self.sqlite = DatabaseObject(DB_CONFIG['SQLITE'])
         self.table_name = 'proxy'
     except Exception, e:
         self.sqlite = ''
         logger.error('SQLite error: %s', e)
예제 #2
0
class ProxyPool:
    def __init__(self):
        self.sqlite = DatabaseObject(DB_CONFIG['SQLITE'])
        self.Validator = Validator()
        self.Crawler = Crawler()

    def _monitor(self):
        while True:
            self._update(PROXYPOOL_CONFIG['UPDATE_TIME'])
            self._delete(PROXYPOOL_CONFIG['DELETE_TIME'])
            self._crawl(PROXYPOOL_CONFIG['CRAWL_TIME'])
            time.sleep(1800)

    def _crawl(self, minutes):
        query = 'SELECT COUNT(*) FROM proxy WHERE updatetime>\'%s\'' % \
                ((datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'))
        count = self.sqlite.executesql(query)[0]
        if int(count[0]) < PROXYPOOL_CONFIG['MIN_IP_NUM']:
            logger.info('Crawl proxy begin')
            proxies = self.Crawler.run()
            logger.info('Crawl proxy end')
            logger.info('Validate proxy begin')
            avaliable_proxies = self.Validator.run(proxies)
            logger.info('Validate proxy end')
            if DB_CONFIG['SQLITE']:
                self.save2sqlite(avaliable_proxies)
            time.sleep(600)
            self._crawl(minutes)
        else:
            return

    def _delete(self, minutes):
        query = 'DELETE FROM proxy WHERE updatetime<\'%s\'' % (
        (datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'))
        self.sqlite.executesql(query)

    def _update(self, minutes):
        query = 'SELECT ip,port FROM proxy WHERE updatetime<\'%s\'' % (
        (datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'))
        proxies = ['%s:%s' % n for n in self.sqlite.executesql(query)]
        if proxies:
            avaliable_proxies = self.Validator.run(proxies)
            self.save2sqlite(avaliable_proxies)

    def save2sqlite(self, result):
        failed = self.sqlite.insert('proxy', result)
        if failed:
            failed = self.sqlite.update('proxy', failed)
        if failed:
            logger.info('Some ip failed to save: %s' % (str(failed)))

    def _api(self):
        ProxyServer(API_CONFIG['PORT'])

    def run(self):
        t1 = threading.Thread(target=self._api)
        t2 = threading.Thread(target=self._monitor)
        t1.start()
        t2.start()
예제 #3
0
파일: proxypool.py 프로젝트: 0ps/ProxyPool
class ProxyPool:
    def __init__(self):
        self.sqlite = DatabaseObject(DB_CONFIG['SQLITE'])
        self.Validator = Validator()
        self.Crawler = Crawler()

    def _monitor(self):
        while True:
            self._update(PROXYPOOL_CONFIG['UPDATE_TIME'])
            self._delete(PROXYPOOL_CONFIG['DELETE_TIME'])
            self._crawl(PROXYPOOL_CONFIG['CRAWL_TIME'])
            time.sleep(1800)

    def _crawl(self, minutes):
        query = 'SELECT COUNT(*) FROM proxy WHERE updatetime>\'%s\'' % (
        (datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'))
        count = self.sqlite.executesql(query)[0]
        if int(count[0]) < PROXYPOOL_CONFIG['MIN_IP_NUM']:
            logger.info('Crawl proxy begin')
            proxies = self.Crawler.run()
            logger.info('Crawl proxy end')
            logger.info('Validate proxy begin')
            avaliable_proxies = self.Validator.run(proxies)
            logger.info('Validate proxy end')
            if DB_CONFIG['SQLITE']:
                self.save2sqlite(avaliable_proxies)

    def _delete(self, minutes):
        query = 'DELETE FROM proxy WHERE updatetime<\'%s\'' % (
        (datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'))
        self.sqlite.executesql(query)

    def _update(self, minutes):
        query = 'SELECT ip,port FROM proxy WHERE updatetime<\'%s\'' % (
        (datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'))
        proxies = ['%s:%s' % n for n in self.sqlite.executesql(query)]
        if proxies:
            avaliable_proxies = self.Validator.run(proxies)
            self.save2sqlite(avaliable_proxies)

    def save2sqlite(self, result):
        failed = self.sqlite.insert('proxy', result)
        if failed:
            failed = self.sqlite.update('proxy', failed)
        if failed:
            logger.info('Some ip failed to save: %s' % (str(failed)))

    def _api(self):
        ProxyServer(API_CONFIG['PORT'])

    def run(self):
        t1 = threading.Thread(target=self._api)
        t2 = threading.Thread(target=self._monitor)
        t1.start()
        t2.start()
예제 #4
0
 def __init__(self):
     self.sqlite = DatabaseObject(DB_CONFIG['SQLITE'])
     self.Validator = Validator()
     self.Crawler = Crawler()
예제 #5
0
class ProxyPool:
    def __init__(self):
        self.sqlite = DatabaseObject(DB_CONFIG['SQLITE'])
        self.Validator = Validator()
        self.Crawler = Crawler()

    def _monitor(self):
        while True:
            self._update(PROXYPOOL_CONFIG['UPDATE_TIME'])
            self._delete(PROXYPOOL_CONFIG['DELETE_TIME'],
                         PROXYPOOL_CONFIG['DELETE_SCORE'])
            self._crawl(PROXYPOOL_CONFIG['CRAWL_TIME'])
            time.sleep(PROXYPOOL_CONFIG['SLEEP_TIME'])

    def _crawl(self, minutes):
        query = "SELECT COUNT(*) FROM proxy WHERE updatetime>'%s'" % \
                ((datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'))
        count = self.sqlite.executesql(query)[0]
        if int(count[0]) < PROXYPOOL_CONFIG['MIN_IP_NUM']:
            logger.info('Crawl proxy begin')
            proxies = self.Crawler.run()
            logger.info('Crawl proxy end')
            logger.info('Validate proxy begin')
            proxies = [(n, '') for n in proxies]
            avaliable_proxies = self.Validator.run(proxies)
            logger.info('Validate proxy end')
            if DB_CONFIG['SQLITE']:
                self.save2sqlite(avaliable_proxies)
            time.sleep(600)
            self._crawl(minutes)
        else:
            return

    def _delete(self, minutes, score):
        query = "DELETE FROM proxy WHERE updatetime<'%s' or score<%s" % (
            (datetime.datetime.now() - datetime.timedelta(minutes=minutes)
             ).strftime('%Y-%m-%d %H:%M:%S'), score)
        self.sqlite.executesql(query)

    def _update(self, minutes):
        query = "SELECT ip,port,protocol FROM proxy WHERE updatetime<'%s'" % (
            (datetime.datetime.now() - datetime.timedelta(minutes=minutes)
             ).strftime('%Y-%m-%d %H:%M:%S'))
        proxies = [('%s:%s' % n[:2], n[2])
                   for n in self.sqlite.executesql(query)]
        if proxies:
            avaliable_proxies = self.Validator.run(proxies)
            validated_fail_proxies = list(
                set([tuple(n[0].split(':')) for n in proxies]).difference(
                    set([(n['ip'], n['port']) for n in avaliable_proxies])))
            self.save2sqlite(avaliable_proxies)
            self._minus_score(validated_fail_proxies)

    def save2sqlite(self, result):
        failed = self.sqlite.insert('proxy', result)
        if failed:
            failed = self.sqlite.update('proxy', failed)
        if failed:
            logger.info('Some ip failed to save: %s' % (str(failed)))

    def _minus_score(self, result):
        query = "UPDATE proxy SET score=(score-1) WHERE ip=? AND port=?;"
        self.sqlite.cursor.executemany(query, result)
        self.sqlite.db.commit()

    def _api(self):
        ProxyServer(API_CONFIG['PORT'])

    def run(self):
        t1 = threading.Thread(target=self._api)
        t2 = threading.Thread(target=self._monitor)
        t1.start()
        t2.start()
예제 #6
0
 def __init__(self):
     self.sqlite = DatabaseObject(DB_CONFIG['SQLITE'])
     self.Validator = Validator()
     self.Crawler = Crawler()
예제 #7
0
class ProxyPool:
    def __init__(self):
        self.sqlite = DatabaseObject(DB_CONFIG['SQLITE'])
        self.Validator = Validator()
        self.Crawler = Crawler()

    def _monitor(self):
        while True:
            self._update(PROXYPOOL_CONFIG['UPDATE_TIME'])
            self._delete(PROXYPOOL_CONFIG['DELETE_TIME'], PROXYPOOL_CONFIG['DELETE_SCORE'])
            self._crawl(PROXYPOOL_CONFIG['CRAWL_TIME'])
            time.sleep(PROXYPOOL_CONFIG['SLEEP_TIME'])

    def _crawl(self, minutes):
        query = "SELECT COUNT(*) FROM proxy WHERE updatetime>'%s'" % \
                ((datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'))
        count = self.sqlite.executesql(query)[0]
        if int(count[0]) < PROXYPOOL_CONFIG['MIN_IP_NUM']:
            logger.info('Crawl proxy begin')
            proxies = self.Crawler.run()
            logger.info('Crawl proxy end')
            logger.info('Validate proxy begin')
            proxies = [(n, '') for n in proxies]
            avaliable_proxies = self.Validator.run(proxies)
            logger.info('Validate proxy end')
            if DB_CONFIG['SQLITE']:
                self.save2sqlite(avaliable_proxies)
            time.sleep(600)
            self._crawl(minutes)
        else:
            return

    def _delete(self, minutes, score):
        query = "DELETE FROM proxy WHERE updatetime<'%s' or score<%s" % (
        (datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'), score)
        self.sqlite.executesql(query)

    def _update(self, minutes):
        query = "SELECT ip,port,protocol FROM proxy WHERE updatetime<'%s'" % (
        (datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'))
        proxies = [('%s:%s' % n[:2], n[2]) for n in self.sqlite.executesql(query)]
        if proxies:
            avaliable_proxies = self.Validator.run(proxies)
            validated_fail_proxies = list(set([tuple(n[0].split(':')) for n in proxies]).difference(set([(n['ip'], n['port']) for n in avaliable_proxies])))
            self.save2sqlite(avaliable_proxies)
            self._minus_score(validated_fail_proxies)

    def save2sqlite(self, result):
        failed = self.sqlite.insert('proxy', result)
        if failed:
            failed = self.sqlite.update('proxy', failed)
        if failed:
            logger.info('Some ip failed to save: %s' % (str(failed)))

    def _minus_score(self, result):
        query = "UPDATE proxy SET score=(score-1) WHERE ip=? AND port=?;"
        self.sqlite.cursor.executemany(query, result)
        self.sqlite.db.commit()

    def _api(self):
        ProxyServer(API_CONFIG['PORT'])

    def run(self):
        t1 = threading.Thread(target=self._api)
        t2 = threading.Thread(target=self._monitor)
        t1.start()
        t2.start()