Esempio n. 1
0
class Getter():
    def __init__(self):
        self.Mysql = MysqlClient()
        self.crawler = Crawler()
    
    def is_over_threshold(self):
        """
        Determine whether the agent pool limit has been reached
        """
        if self.Mysql.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False
    
    def run(self):
        print('Get the execution')
        if not self.is_over_threshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[callback_label]
                # Get an agent
                proxies = self.crawler.get_proxies(callback)
                sys.stdout.flush()
                for proxy in proxies:
                    if (self.Mysql.exists(proxy)):
                        pass
                    else:
                        print(proxy)
                        self.Mysql.add(proxy)
Esempio n. 2
0
class Getter():
    def __init__(self):
        self.mysql = MysqlClient()
        self.spider = Spider()

    def is_over_max(self):
        if self.mysql.count() >= MAX_POOL_COUNT:
            return True
        else:
            return False

    def run(self):
        print('爬虫程序开始执行')
        if not self.is_over_max():
            for callback_lable in range(self.spider.__SpiderFuncCount__):
                callback = self.spider.__SpiderFunc__[callback_lable]
                proxies = self.spider.get_proxies(callback)
                for proxy in proxies:
                    self.mysql.add(proxy)
        self.mysql.close()
Esempio n. 3
0
 def save_to_mysql(self,query,title,url):
     m = MysqlClient()
     m.add(query,title,url)
Esempio n. 4
0
 def save_to_mysql(self, query, title, url):
     m = MysqlClient()
     m.add(query, title, url)