Esempio n. 1
0
def build_btree(speeches):
    """Build Binary Tree - Oranize Speeches by Date"""
    speech_tree = BinaryTree() #binary tree whose values are ID/Date tuples and is aranged by date
    # build a binary tree of file numbers arranged by (date,file_number) tuple
    for speech in speeches:
        date_id_key = speech.date, speech.speech_id #unique for each file, allows sort by date order
        speech_tree.insert(date_id_key, speech)  
    return speech_tree
    # find the earliest and latest date in the folder
    min_speech_date = speech_tree.min_item()
    max_speech_date = speech_tree.max_item()
Esempio n. 2
0
def build_btree(speeches):
    """Build Binary Tree - Oranize Speeches by Date"""
    speech_tree = BinaryTree(
    )  #binary tree whose values are ID/Date tuples and is aranged by date
    # build a binary tree of file numbers arranged by (date,file_number) tuple
    for speech in speeches:
        date_id_key = speech.date, speech.speech_id  #unique for each file, allows sort by date order
        speech_tree.insert(date_id_key, speech)
    return speech_tree
    # find the earliest and latest date in the folder
    min_speech_date = speech_tree.min_item()
    max_speech_date = speech_tree.max_item()
Esempio n. 3
0
class AppProxy(object):
    def __init__(self, queue_key=None):
        self.proxys = BinaryTree()
        self.proxy_queue_handle = None

        #  self.proxy_queue_key =  "scrapy:ip_proxy_queue"
        self.proxy_queue_key = queue_key
        # 代理IP 池的长度
        self.proxy_queue_min_lenght = cfg.PROXY_MIN_QUEUE_LENGHT
        # 代理最多每次增加数量
        self.proxy_increment_num = 20
        self.sleep_time = 0.2

    def _create_proxy(self, proxycls, ratio):
        if not isinstance(ratio, int):
            ratio = int(ratio)
        if isinstance(proxycls, Proxy):
            obj = proxycls.create(self.proxy_queue_handle, ratio,
                                  cfg.PROXY_SERVER_REQUEST_TIMEOUT)
        elif isinstance(proxycls, six.string_types):
            obj = AppProxy.load_object(proxycls).create(
                self.proxy_queue_handle, ratio,
                cfg.PROXY_SERVER_REQUEST_TIMEOUT)
        else:
            raise ValueError("Not a valid value(%s)" % str(proxycls))

        last_val = ratio

        if self.proxys.count > 0:
            last_val = self.proxys.max_item()[0] + last_val
        self.proxys.insert(last_val, obj)

    def _get_proxy(self, need_num):
        # 根据需求数,调用不同的类的get方法,往代理池中增加相应数量的ip数量

        max_weight = self.proxys.max_item()[0]
        random_weight = random.randint(1, max_weight)
        proxy = self.proxys.ceiling_item(random_weight)[1]
        num = int(min(proxy.request_max_num, need_num))
        result = 0
        if num > 0:
            result = proxy.get(num)
        log.debug("_on_proxy: (%s)ratio=%d|request_max_num=%d|result=%d" %
                  (proxy.name, proxy.ratio, proxy.request_max_num, result))
        return result

    def _on_procrssed(self):
        # 判断代理ip池中的长度
        llen = self.proxy_queue_handle.len()
        # 和最小ip池数量做对比,求出差值,
        need_num = cfg.PROXY_MIN_QUEUE_LENGHT - llen
        result = 0
        if need_num > 0:
            need_num = min(need_num, self.proxy_increment_num)
            result = self._get_proxy(need_num)
        log.debug("_on_procrssed -->: llen=%d|need_num=%d|result=%d" %
                  (llen, need_num, result))

    @staticmethod
    def load_object(path):
        """
        Load an object given its absolute object path, and return it.
        :param path: ie, 'proxy.proxy.Proxy'
        :return:
        """
        try:
            dot = path.rindex('.')
        except ValueError:
            raise ValueError("Error loading object '%s': not a full path" %
                             path)

        module, name = path[:dot], path[dot + 1:]
        mod = importlib.import_module(module)

        try:
            obj = getattr(mod, name)
        except AttributeError:
            raise NameError(
                "Module '%s' doesn't define any object named '%s'" %
                (module, name))

        return obj

    def start_proxys(self, redis_cfg, proxy_class):

        redis_pool.init_redis(redis_cfg)

        self.proxy_queue_handle = RedisProxyQueue(self.proxy_queue_key)
        self.proxy_queue_handle.clear()
        for proxycls in proxy_class:
            # 返回一个不同代理IP供应商类的实例
            self._create_proxy(proxycls, proxy_class[proxycls])
            log.info("start_proxy: %s --> %d" %
                     (str(proxycls), proxy_class[proxycls]))

    def run(self):
        log.info('###############################################')
        log.info('Now begin.......')
        while 1:
            try:
                self._on_procrssed()
                time.sleep(self.sleep_time)
            except KeyboardInterrupt:
                pass
            except Exception as e:
                log.error('run error:', str(e))
                log.error("run traceback:" + traceback.format_exc())
Esempio n. 4
0
from bintrees import BinaryTree

data = {3:'White', 2:'Red', 1:'Green', 5:'Orange', 4:'Yellow', 7:'Purple', 0:'Magenta'}

tree = BinaryTree(data)
tree.update({6: 'Teal'})

def displayKeyValue(key, value):
    print('Key: ', key, ' Value: ', value)

tree.foreach(displayKeyValue)
print('Item 3 contains: ', tree.get(3))
print('The maximum item is: ', tree.max_item())