Exemple #1
0
class ProxyManager(object):
    """
    ProxyManager
    """

    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            try:
                proxy_set = set()
                # fetch raw proxy
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    if proxy:
                        self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy))
                        proxy_set.add(proxy.strip())

                # store raw proxy
                for proxy in proxy_set:
                    self.db.changeTable(self.useful_proxy_queue)
                    if self.db.exists(proxy):
                        continue
                    self.db.changeTable(self.raw_proxy_queue)
                    self.db.put(proxy)
            except Exception, e:
                print e
                continue
Exemple #2
0
class ProxyManager(object):

    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        抓取代理地址存入DB中
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()

            try:
                self.log.info("{func}:fetch proxy start".format(func=proxyGetter))
                proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()]
            except Exception as e:
                self.log.error("{func}:fetch proxy fail".format(func=proxyGetter))
                continue
            for proxy in proxy_iter:
                proxy = proxy.strip()
                if proxy and verifyProxyFormat(proxy):
                    self.log.info("{func}:fetch proxy {proxy}".format(func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy)
                else:
                    self.log.info("{func}:fetch proxy {proxy} error".format(func=proxyGetter, proxy=proxy))

            # 存储到DB
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        返回一个有用的代理
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None

    def delete(self, proxy):
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        self.db.changeTable(self.useful_proxy_queue)
        items = self.db.getAll()
        if EnvUtil.PY3:
            return list(items.keys()) if items else list()
        return items.key() if items else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_proxy = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_proxy
        }
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy.strip())

            # store raw proxy
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
class ProxyManager(object):
    """
    ProxyManager
    """

    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy.strip())

            # store raw proxy
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
Exemple #5
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            # fetch
            proxy_set = set()
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                proxy_iter = [
                    _ for _ in getattr(GetFreeProxy, proxyGetter.strip())()
                ]
            except Exception as e:
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue
            for proxy in proxy_iter:
                proxy = proxy.strip()
                if proxy and verifyProxyFormat(proxy):
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy)
                else:
                    self.log.error('{func}: fetch proxy {proxy} error'.format(
                        func=proxyGetter, proxy=proxy))

            # store
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def validateProxy(self, proxy):

        return validUsefulProxy(proxy)

    def referProxy(self, proxy, city):

        print(proxy, city)
        if proxy != None:
            if self.validateProxy(proxy):
                return proxy

            old_proxy_info = GetProxyLocInfo(proxy)
            old_proxy_loc = old_proxy_info.get_proxy_loc_info()
            city = old_proxy_loc[1]

        count = 1
        while count < 15:
            tmp_proxy = self.get()
            print(tmp_proxy)
            if self.validateProxy(tmp_proxy):
                try:
                    tmp_proxy_info = GetProxyLocInfo(tmp_proxy)
                    tmp_proxy_loc = tmp_proxy_info.get_proxy_loc_info()
                    if tmp_proxy_loc[1] == city:
                        return tmp_proxy
                except:
                    pass
            count += 1

        return None

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
Exemple #6
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxyGetter, proxy=proxy['ip']))
                    self.db.changeTable(self.useful_proxy_queue)
                    if self.db.exists(proxy['ip']):
                        continue
                    self.db.changeTable(self.raw_proxy_queue)
                    proxy['country'] = self.get_ip_country(proxy['ip'])
                    self.db.put(proxy)

    def get_ip_country(self, ip):
        match = geolite2.lookup(ip)
        return match.country if match else None

    def get(self, filters):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.random_one(filters)

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.getAll()

    def clean(self):
        self.db.clean()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }