예제 #1
0
class LoginCore:
    def __init__(self):
        self.db = DbClient()

    def cookie_login(self,driver,worker,account_dict):
        """
        网站登录并获取cookies
        """
        cookies = worker.login(driver,account_dict)
        return cookies


    def check_cookies(self,driver,worker,cookies):
        result = worker.check_cookies(driver,cookies)
        return result

    def save_cookies(self,cookies,account_dict,cookie_queue):
        """
        保存cookie到cookie池里面
        """
        try:
            self.db.put(account_dict['account'],data=cookies)
        except Exception as e:
            raise e

    def run(self,driver,worker,account_dict):
        cookies = self.cookie_login(driver,worker,account_dict)
        time.sleep(2)
        result = self.check_cookies(driver,worker,cookies)
        if result:
            self.save_cookies(cookies,account_dict,worker.cookie_queue)
        driver.quit()
예제 #2
0
 def __init__(self, host="localhost", client=None):
     self.config = GetConfig()
     self.client = MongoClient('mongodb://localhost:27017/') if client is None else client
     self.ssdb_client = DbClient()
     self.db = self.client.spider
     if self.db.lazada.count() is 0:
         self.db.lazada.create_index([("status", ASCENDING),
                                      ("pr", DESCENDING)])
예제 #3
0
class PayProxyTest(object):
    def __init__(self):
        self.db = DbClient()
        self.db.changeTable('useful_proxy')

    def GetProxy(self):
        try:
            time1 = time.time()
            url = 'https://api.2808proxy.com/proxy/unify/get?token=Y3AEO9WES4U3WKQAJXZO8DYM7LAZFOQN&amount=1&proxy_type=http&format=json&splitter=rn&expire=300'
            resp = requests.get(url)
            ip = resp.json().get('data')[0].get('ip')
            http_port = resp.json().get('data')[0].get('http_port')
            proxy = '%s:%s' % (ip, http_port)
            print(proxy)
            time2 = time.time()
            print(resp.json())
            print('总耗时:', time2 - time1)
        except Exception as e:
            print(e)

    def InsertProxy(self, proxy):
        self.db.put(proxy)
예제 #4
0
class ProxyManager(object):
    """
    ProxyManager
    """

    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            try:
                proxy_set = set()
                # fetch raw proxy
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    if proxy:
                        self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy))
                        proxy_set.add(proxy.strip())

                # store raw proxy
                for proxy in proxy_set:
                    self.db.changeTable(self.useful_proxy_queue)
                    if self.db.exists(proxy):
                        continue
                    self.db.changeTable(self.raw_proxy_queue)
                    self.db.put(proxy)
            except Exception, e:
                print e
                continue
예제 #5
0
 def __init__(self):
     self.asdl_proxy_queue = "asdl_proxy"
     self.db = DbClient()
예제 #6
0
 def __init__(self):
     self.db = DbClient()
예제 #7
0
 def __init__(self):
     self.db = DbClient()
     self.raw_proxy_queue = 'raw_proxy'
     self.log = LogHandler('proxy_manager')
     self.useful_proxy_queue = 'useful_proxy'
     self.adsl_queue = 'adsl'
예제 #8
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.useful_proxy_queue = 'useful_proxy_queue'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                proxy_set.add(proxy)

            # store raw proxy
            self.db.changeTable(self.raw_proxy_queue)
            for proxy in proxy_set:
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.getAll()
예제 #9
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        for proxyGetter in self.config.proxy_getter_functions:
            # fetch
            # proxy_set = set()
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                # proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()]
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    proxy = proxy.strip()
                    if proxy and verifyProxyFormat(proxy):
                        self.log.info('{func}: fetch proxy {proxy}'.format(
                            func=proxyGetter, proxy=proxy))
                        self.db.put(proxy)
                    else:
                        self.log.error(
                            '{func}: fetch proxy {proxy} error'.format(
                                func=proxyGetter, proxy=proxy))
            except Exception as e:
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
예제 #10
0
class ProxyManager(object):

    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        抓取代理地址存入DB中
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()

            try:
                self.log.info("{func}:fetch proxy start".format(func=proxyGetter))
                proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()]
            except Exception as e:
                self.log.error("{func}:fetch proxy fail".format(func=proxyGetter))
                continue
            for proxy in proxy_iter:
                proxy = proxy.strip()
                if proxy and verifyProxyFormat(proxy):
                    self.log.info("{func}:fetch proxy {proxy}".format(func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy)
                else:
                    self.log.info("{func}:fetch proxy {proxy} error".format(func=proxyGetter, proxy=proxy))

            # 存储到DB
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        返回一个有用的代理
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None

    def delete(self, proxy):
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        self.db.changeTable(self.useful_proxy_queue)
        items = self.db.getAll()
        if EnvUtil.PY3:
            return list(items.keys()) if items else list()
        return items.key() if items else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_proxy = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_proxy
        }
예제 #11
0
 def __init__(self):
     ProxyManager.__init__(self)
     self.db = DbClient()
     self.log = LogHandler('valid_schedule')
예제 #12
0
class ProxyManager(object):
    """
    ProxyManager
    """

    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy.strip())

            # store raw proxy
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
예제 #13
0
 def __init__(self):
     self.db = DbClient()
     self.raw_proxy_queue = 'raw_proxy'
     self.log = LogHandler('proxy_manager')
     # self.log.error("new a ProxyManager instance")
     self.useful_proxy_queue = 'useful_proxy'
예제 #14
0
class AsdlProxyManager():
    def __init__(self):
        self.asdl_proxy_queue = "asdl_proxy"
        self.db = DbClient()

    def add_asdl_proxy(self, proxy_str):
        proxy = Proxy(proxy_str,
                      last_time=datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        self.db.changeTable(self.asdl_proxy_queue)
        self.db.put(proxy)

    def delete_asdl_proxy(self, proxy_str):
        self.db.changeTable(self.asdl_proxy_queue)
        self.db.delete(proxy_str)

    def get_all_proxy(self):
        self.db.changeTable(self.asdl_proxy_queue)
        item_list = self.db.getAll()
        return [Proxy.newProxyFromJson(_) for _ in item_list]
예제 #15
0
 def __init__(self):
     self.db = DbClient()
     self.db.changeTable('useful_proxy')
예제 #16
0
 def __init__(self, mode):
     self.mode = mode
     self.db = DbClient(mode)
     self.raw_proxy_queue = 'raw_proxy'
     self.log = LogHandler('proxy_manager')
     self.useful_proxy_queue = 'useful_proxy'
예제 #17
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self, mode):
        self.mode = mode
        self.db = DbClient(mode)
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter/getFreeProxy.py
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        for proxyGetter in config.proxy_getter_functions:
            # fetch
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能
                    proxy = proxy.strip()
                    if proxy and verifyProxyFormat(proxy):
                        self.log.info(
                            'Mode:{mode} {func}: fetch proxy {proxy}'.format(
                                mode=self.mode, func=proxyGetter, proxy=proxy))
                        self.db.put(proxy)
                    else:
                        self.log.error(
                            'Mode:{mode} {func}: fetch proxy {proxy} error'.
                            format(mode=self.mode,
                                   func=proxyGetter,
                                   proxy=proxy))
            except Exception as e:
                self.log.error("Mode:{mode} {func}: fetch proxy fail".format(
                    mode=self.mode, func=proxyGetter))
                continue

    def get_http(self):
        """
        return a useful proxy (http)
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def get_https(self):
        """
        return a useful proxy (https)
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
예제 #18
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxyGetter, proxy=proxy['ip']))
                    self.db.changeTable(self.useful_proxy_queue)
                    if self.db.exists(proxy['ip']):
                        continue
                    self.db.changeTable(self.raw_proxy_queue)
                    proxy['country'] = self.get_ip_country(proxy['ip'])
                    self.db.put(proxy)

    def get_ip_country(self, ip):
        match = geolite2.lookup(ip)
        return match.country if match else None

    def get(self, filters):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.random_one(filters)

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.getAll()

    def clean(self):
        self.db.clean()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
예제 #19
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def fetch(self):
        """
        fetch proxy into db by ProxyGetter
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        proxy_set = set()
        self.log.info("ProxyFetch : start")
        for proxyGetter in config.proxy_getter_functions:
            self.log.info(
                "ProxyFetch - {func}: start".format(func=proxyGetter))
            try:
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    proxy = proxy.strip()

                    if not proxy or not verifyProxyFormat(proxy):
                        self.log.error('ProxyFetch - {func}: '
                                       '{proxy} illegal'.format(
                                           func=proxyGetter,
                                           proxy=proxy.ljust(20)))
                        continue
                    elif proxy in proxy_set:
                        self.log.info('ProxyFetch - {func}: '
                                      '{proxy} exist'.format(
                                          func=proxyGetter,
                                          proxy=proxy.ljust(20)))
                        continue
                    else:
                        self.log.info('ProxyFetch - {func}: '
                                      '{proxy} success'.format(
                                          func=proxyGetter,
                                          proxy=proxy.ljust(20)))
                        self.db.put(Proxy(proxy, source=proxyGetter))
                        proxy_set.add(proxy)
            except Exception as e:
                self.log.error(
                    "ProxyFetch - {func}: error".format(func=proxyGetter))
                self.log.error(str(e))

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()
        if item_list:
            random_choice = random.choice(item_list)
            return Proxy.newProxyFromJson(random_choice)
        return None

    def delete(self, proxy_str):
        """
        delete proxy from pool
        :param proxy_str:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy_str)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()
        return [Proxy.newProxyFromJson(_) for _ in item_list]

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }

    def getAllByName(self, name):
        all_proxies = self.getAll()

        self.db.changeTable(self.useful_proxy_queue + '_fail_' + name)
        fail_list = self.db.getAll()
        fail_proxies = [Proxy.newProxyFromJson(_) for _ in fail_list]

        # todo: 优化
        filter_proxies = []
        for proxy in all_proxies:
            isFailed = False
            for failed in fail_proxies:
                if failed.proxy == proxy.proxy:
                    failed_date = datetime.strptime(failed.last_time,
                                                    "%Y-%m-%d %H:%M:%S")
                    if failed_date + timedelta(hours=24) > datetime.now():
                        isFailed = True
                    break
            if not isFailed:
                filter_proxies.append(proxy)

        return filter_proxies

    def deleteByName(self, name, proxy):
        failed_proxy = Proxy(
            proxy=proxy,
            last_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        self.db.changeTable(self.useful_proxy_queue + '_fail_' + name)
        self.db.put(failed_proxy)

    def getByName(self, name):
        proxies = self.getAllByName(name)
        if proxies:
            return random.choice(proxies)
        return None
예제 #20
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy_test'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy_test'

    def refresh(self):
        """从已有站点上抓取proxy,并存放到redis raw_proxy
        fetch proxy into Db by ProxyGetter/getFreeProxy.py
        :return:
        """
        max_conn = 100
        meta: dict = {}
        self.db.changeTable(self.raw_proxy_queue)
        for proxyGetter in config.proxy_getter_functions:
            # fetch
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能
                    proxy = proxy.strip()
                    if proxy and verifyProxyFormat(proxy):
                        self.log.info('{func}: fetch proxy {proxy}'.format(
                            func=proxyGetter, proxy=proxy))
                        host, port = proxy.split(":")
                        meta["host"] = host
                        meta["port"] = port
                        meta["max_conn"] = max_conn
                        self.db.put(proxy, json.dumps(meta))
                    else:
                        self.log.error(
                            '{func}: fetch proxy {proxy} error'.format(
                                func=proxyGetter, proxy=proxy))
            except Exception as e:
                self.log.error(e)
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }

    def add_proxy(proxy, meta):
        # 向proxy-center中增加proxy节点,同时更新redis
        host, port = proxy.split(":")
        url = f'http://10.143.55.90:9381/api/proxies/{host}%3A{port}/'
        jmeta = json.dumps(meta)
        r = requests.post(url, data=jmeta)
        # print(r.status_code)
        print(r.text)

    def delete_proxy(proxy):
        # 从proxy-center中删除proxy节点,同时更新redis
        host, port = proxy.split(":")
        url = f'http://10.143.55.90:9381/api/proxies/{host}%3A{port}/'
        r = requests.delete(url)
예제 #21
0
class ProxyManager(object):
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = Loghandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        self.db.changeTable(self.raw_proxy_queue)
        for proxyGetter in config.proxy_getter_functions:
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    if proxy is not False:
                        if proxy and verifyProxyFormat(proxy):
                            self.log.info("{func}: fetch proxy {proxy}".format(
                                func=proxyGetter, proxy=proxy))
                            self.db.put(proxy)
                        else:
                            self.log.error(
                                "{func}: fetch proxy {proxy} error".format(
                                    func=proxyGetter, proxy=proxy))
            except Exception as s:
                self.log.error("refresh: {}".format(s))
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue

    def get(self):
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None

    def get_new(self):
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            return random.choice(item_dict)

    def delete(self, proxy):
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.getAll()
        # if EnvUtil.PY3:
        #     return list(item_dict.keys()) if item_dict else list()
        # return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_proxy = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_proxy
        }

    def getNumber_new(self):
        # self.db.changeTable(self.raw_proxy_queue)
        raw, useful = self.db.getNumber(self.raw_proxy_queue,
                                        self.useful_proxy_queue)
        # self.db.changeTable(self.useful_proxy_queue)
        # total_useful_proxy = self.db.getNumber(self.useful_proxy_queue)
        print('{}---,{}'.format(raw, useful))
        return raw, useful
예제 #22
0
class ProxyManager(object):
    """
    ProxyManager
    """

    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.useful_proxy_queue = 'useful_proxy_queue'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy.strip():
                    proxy_set.add(proxy.strip())

            # store raw proxy
            self.db.changeTable(self.raw_proxy_queue)
            for proxy in proxy_set:
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.get()
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.getAll()

    def get_status(self):
        self.db.changeTable(self.raw_proxy_queue)
        quan_raw_proxy = self.db.get_status()
        self.db.changeTable(self.useful_proxy_queue)
        quan_useful_queue = self.db.get_status()
        return {'raw_proxy': quan_raw_proxy, 'useful_proxy_queue': quan_useful_queue}
예제 #23
0
 def __init__(self):
     self.db = DbClient()
     self.config = GetConfig()
     self.raw_proxy_queue = 'raw_proxy'
     self.log = LogHandler('proxy_manager')
     self.useful_proxy_queue = 'useful_proxy'
예제 #24
0
 def __init__(self):
     self.db = DbClient()
     self.config = GetConfig()
     self.raw_proxy_queue = 'raw_proxy'
     self.useful_proxy_queue = 'useful_proxy_queue'
예제 #25
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy.strip())

            # store raw proxy
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
예제 #26
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            # fetch
            proxy_set = set()
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                proxy_iter = [
                    _ for _ in getattr(GetFreeProxy, proxyGetter.strip())()
                ]
            except Exception as e:
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue
            for proxy in proxy_iter:
                proxy = proxy.strip()
                if proxy and verifyProxyFormat(proxy):
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy)
                else:
                    self.log.error('{func}: fetch proxy {proxy} error'.format(
                        func=proxyGetter, proxy=proxy))

            # store
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def validateProxy(self, proxy):

        return validUsefulProxy(proxy)

    def referProxy(self, proxy, city):

        print(proxy, city)
        if proxy != None:
            if self.validateProxy(proxy):
                return proxy

            old_proxy_info = GetProxyLocInfo(proxy)
            old_proxy_loc = old_proxy_info.get_proxy_loc_info()
            city = old_proxy_loc[1]

        count = 1
        while count < 15:
            tmp_proxy = self.get()
            print(tmp_proxy)
            if self.validateProxy(tmp_proxy):
                try:
                    tmp_proxy_info = GetProxyLocInfo(tmp_proxy)
                    tmp_proxy_loc = tmp_proxy_info.get_proxy_loc_info()
                    if tmp_proxy_loc[1] == city:
                        return tmp_proxy
                except:
                    pass
            count += 1

        return None

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
예제 #27
0
class ProxyValidSchedule(ProxyManager):
    def __init__(self):
        ProxyManager.__init__(self)
        self.db = DbClient()
        self.log = LogHandler('valid_schedule')

    def __validProxy(self):
        """
        验证代理
        :return:
        """
        time.sleep(60 * 0 * random.random())
        while True:
            self.db.changeTable(self.useful_proxy_queue)
            for each_proxy in self.db.getAll():
                if isinstance(each_proxy, bytes):
                    each_proxy = each_proxy.decode('utf-8')

                if validUsefulProxy(each_proxy) == True:
                    # 成功计数器加1
                    self.db.inckey(each_proxy, 1)
                    self.log.debug('validProxy_b: {} validation pass'.format(each_proxy))
                else:
                    # 失败计数器减一
                    print "原有value  " + str(self.db.getvalue(each_proxy))
                    if self.db.getvalue(each_proxy) >= 0:
                        self.db.inckey(each_proxy, -1*int(self.db.getvalue(each_proxy)))
                    else:
                        self.db.inckey(each_proxy, -1)
                    # self.db.delete(each_proxy)
                    self.log.info('validProxy_b: {} validation fail'.format(each_proxy))
                value = self.db.getvalue(each_proxy)
                print  value
                if None != value and int(value) < 0:
                    # 计数器小于-5删除该代理
                    print "删除" + each_proxy
                    self.db.delete(each_proxy)
        self.log.info('validProxy_a running normal')

    def main(self):
        self.__validProxy()
예제 #28
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    @staticmethod
    def __dynamic_import__(name):
        components = name.split('.')
        mod = __import__(components[0])
        for comp in components[1:]:
            mod = getattr(mod, comp)
        return mod

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter user defined proxy getter class
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        try:
            proxy_getter_class = self.__dynamic_import__(
                config.proxy_getter_lib)
        except Exception as e:
            raise Exception('%s not found in ProxyGetter' %
                            config.proxy_getter_lib)
        for proxyGetter in config.proxy_getter_functions:
            # fetch
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                for proxy in getattr(proxy_getter_class,
                                     proxyGetter.strip())():
                    # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能
                    proxy = proxy.strip()
                    if proxy and verifyProxyFormat(proxy):
                        self.log.info('{func}: fetch proxy {proxy}'.format(
                            func=proxyGetter, proxy=proxy))
                        self.db.put(proxy)
                    else:
                        self.log.error(
                            '{func}: fetch proxy {proxy} error'.format(
                                func=proxyGetter, proxy=proxy))
            except Exception as e:
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
예제 #29
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def fetch(self):
        """
        fetch proxy into db by ProxyGetter
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        proxy_set = set()
        self.log.info("ProxyFetch : start")

        for proxyGetter in config.proxy_getter_functions:
            self.log.info(
                "ProxyFetch - {func}: start".format(func=proxyGetter))
            try:
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    proxy = proxy.strip()

                    if not proxy or not verifyProxyFormat(proxy):
                        self.log.error('ProxyFetch - {func}: '
                                       '{proxy} illegal'.format(
                                           func=proxyGetter,
                                           proxy=proxy.ljust(20)))

                        continue
                    elif proxy in proxy_set:
                        self.log.info('ProxyFetch - {func}: '
                                      '{proxy} exist'.format(
                                          func=proxyGetter,
                                          proxy=proxy.ljust(20)))

                        continue
                    else:
                        self.log.info('ProxyFetch - {func}: '
                                      '{proxy} success'.format(
                                          func=proxyGetter,
                                          proxy=proxy.ljust(20)))
                        self.db.put(Proxy(proxy, source=proxyGetter))
                        proxy_set.add(proxy)
            except Exception as e:
                self.log.error(
                    "ProxyFetch - {func}: error".format(func=proxyGetter))
                self.log.error(str(e))

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        if item_list:
            random_choice = random.choice(item_list)

            return Proxy.newProxyFromJson(random_choice)

        return None

    def get_http(self):
        """
        return a http proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        if item_list:
            for _ in item_list:
                random_choice = random.choice(item_list)
                proxy_type = json.loads(random_choice)['proxy'].split("://")[0]

                if proxy_type == 'http':
                    return Proxy.newProxyFromJson(random_choice)

        return None

    def get_socks(self):
        """
        return a useful socks proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        if item_list:
            for _ in item_list:
                random_choice = random.choice(item_list)
                proxy_type = json.loads(random_choice)['proxy'].split("://")[0]

                if proxy_type == 'socks4':
                    return Proxy.newProxyFromJson(random_choice)

        return None

    def delete(self, proxy_str):
        """
        delete proxy from pool
        :param proxy_str:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy_str)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        return [Proxy.newProxyFromJson(_) for _ in item_list]

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()

        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
예제 #30
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy.strip())

            # store raw proxy
            self.db.changeTable(self.raw_proxy_queue)
            for proxy in proxy_set:
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.get()
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.getAll()

    def get_status(self):
        # TODO rename get_count..
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.get_status()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.get_status()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
예제 #31
0
 def __init__(self):
     self.db = DbClient()
     self.config = GetConfig()
     self.raw_proxy_queue = 'raw_proxy'
     self.useful_proxy_queue = 'useful_proxy_queue'
예제 #32
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'
        self.adsl_queue = 'adsl'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter/getFreeProxy.py
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        for proxyGetter in config.proxy_getter_functions:
            # fetch
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能
                    proxy = proxy.strip()
                    if proxy and verifyProxyFormat(proxy):
                        self.log.info('{func}: fetch proxy {proxy}'.format(
                            func=proxyGetter, proxy=proxy))
                        self.db.put(proxy)
                    else:
                        self.log.error(
                            '{func}: fetch proxy {proxy} error'.format(
                                func=proxyGetter, proxy=proxy))
            except Exception as e:
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }

    def initProxyPool(self):
        """
        第一次启动时调用这个方法
        :return:
        """
        self.deleteAll()
        self.db.changeTable(self.adsl_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.values()) if item_dict else list()
        return item_dict.values() if item_dict else list()

    def deleteAll(self):
        """
        清空代理池
        :param proxy:
        :return:
        """
        # 删除所有
        proxies = self.getAll()
        for proxy in proxies:
            self.delete(proxy)

    def refreshADSL(self, proxy):
        """
        重新拨号
        :param proxy:
        :return:
        """
        if isinstance(proxy, bytes):
            proxy = proxy.decode('utf8')
        ip = proxy.split(':')[0]
        try:
            # 调用接口重新拨号
            refreshApi = "http://{ip}:8000/refresh".format(ip=ip)
            r = requests.get(refreshApi, timeout=5, verify=False)
            if r.status_code == 200:
                print('{proxy} refres done')
        except Exception as e:
            print(str(e))
예제 #33
0
 def __init__(self):
     self.db = DbClient()
     self.config = GetConfig()
     self.raw_proxy_queue = 'raw_proxy'
     self.log = LogHandler('proxy_manager')
     self.useful_proxy_queue = 'useful_proxy'
예제 #34
0
class MongoSsdbUrlManager(object):
    def __init__(self, host="localhost", client=None):
        self.config = GetConfig()
        self.client = MongoClient('mongodb://localhost:27017/') if client is None else client
        self.ssdb_client = DbClient()
        self.db = self.client.spider
        if self.db.lazada.count() is 0:
            self.db.lazada.create_index([("status", ASCENDING),
                                         ("pr", DESCENDING)])

    def enqueue_url(self, url, status, depth):
        md5 = hashlib.md5(url).hexdigest()
        i = 0
        while i < 2:
            try:
                num = self.ssdb_client.get(md5)
                if num is not None:
                    self.ssdb_client.update(key=md5, value=int(num) + 1)
                    return
                self.ssdb_client.put(md5)
                i = 2
            except Exception as error:
                print error
                i += 1
        self.db.lazada.save({
            '_id': md5,
            'url': url,
            'status': status,
            'queue_time': datetime.utcnow(),
            'depth': depth,
            'pr': 0
        })

    def dequeue_url(self, depth):
        # record = self.db.lazada.find_one_and_update(
        #     {'status': 'downloading', 'depth': depth},
        #     {'$set': {'status': 'downloading'}},
        #     upsert=False,
        #     sort=[('pr', DESCENDING)],  # sort by pr in descending
        #     returnNewDocument=False
        # )

        record = self.db.lazada.find_one(
            {'status': 'downloading', 'depth': depth}
        )

        if record:
            return record
        else:
            return None

    def finish_url(self, url):
        record = {'status': 'done', 'done_time': datetime.utcnow()}
        self.db.lazada.update({'_id': hashlib.md5(url).hexdigest()}, {'$set': record}, upsert=False)

    def clear(self):
        self.ssdb_client.clear()

    def save_error(self, url, status, depth):
        md5 = hashlib.md5(url).hexdigest()
        self.db.lazada_error.save({
            '_id': md5,
            'url': url,
            'status': status,
            'queue_time': datetime.utcnow(),
            'depth': depth,
            'pr': 0
        })

    def update(self):
        self.db.lazada.update({'status': 'downloading'}, {'$set': {'status': 'new'}}, multi=True)


    def save(self, record):
        self.db.record.save(record)