def build_btree(speeches): """Build Binary Tree - Oranize Speeches by Date""" speech_tree = BinaryTree() #binary tree whose values are ID/Date tuples and is aranged by date # build a binary tree of file numbers arranged by (date,file_number) tuple for speech in speeches: date_id_key = speech.date, speech.speech_id #unique for each file, allows sort by date order speech_tree.insert(date_id_key, speech) return speech_tree # find the earliest and latest date in the folder min_speech_date = speech_tree.min_item() max_speech_date = speech_tree.max_item()
def build_btree(speeches): """Build Binary Tree - Oranize Speeches by Date""" speech_tree = BinaryTree( ) #binary tree whose values are ID/Date tuples and is aranged by date # build a binary tree of file numbers arranged by (date,file_number) tuple for speech in speeches: date_id_key = speech.date, speech.speech_id #unique for each file, allows sort by date order speech_tree.insert(date_id_key, speech) return speech_tree # find the earliest and latest date in the folder min_speech_date = speech_tree.min_item() max_speech_date = speech_tree.max_item()
class AppProxy(object): def __init__(self, queue_key=None): self.proxys = BinaryTree() self.proxy_queue_handle = None # self.proxy_queue_key = "scrapy:ip_proxy_queue" self.proxy_queue_key = queue_key # 代理IP 池的长度 self.proxy_queue_min_lenght = cfg.PROXY_MIN_QUEUE_LENGHT # 代理最多每次增加数量 self.proxy_increment_num = 20 self.sleep_time = 0.2 def _create_proxy(self, proxycls, ratio): if not isinstance(ratio, int): ratio = int(ratio) if isinstance(proxycls, Proxy): obj = proxycls.create(self.proxy_queue_handle, ratio, cfg.PROXY_SERVER_REQUEST_TIMEOUT) elif isinstance(proxycls, six.string_types): obj = AppProxy.load_object(proxycls).create( self.proxy_queue_handle, ratio, cfg.PROXY_SERVER_REQUEST_TIMEOUT) else: raise ValueError("Not a valid value(%s)" % str(proxycls)) last_val = ratio if self.proxys.count > 0: last_val = self.proxys.max_item()[0] + last_val self.proxys.insert(last_val, obj) def _get_proxy(self, need_num): # 根据需求数,调用不同的类的get方法,往代理池中增加相应数量的ip数量 max_weight = self.proxys.max_item()[0] random_weight = random.randint(1, max_weight) proxy = self.proxys.ceiling_item(random_weight)[1] num = int(min(proxy.request_max_num, need_num)) result = 0 if num > 0: result = proxy.get(num) log.debug("_on_proxy: (%s)ratio=%d|request_max_num=%d|result=%d" % (proxy.name, proxy.ratio, proxy.request_max_num, result)) return result def _on_procrssed(self): # 判断代理ip池中的长度 llen = self.proxy_queue_handle.len() # 和最小ip池数量做对比,求出差值, need_num = cfg.PROXY_MIN_QUEUE_LENGHT - llen result = 0 if need_num > 0: need_num = min(need_num, self.proxy_increment_num) result = self._get_proxy(need_num) log.debug("_on_procrssed -->: llen=%d|need_num=%d|result=%d" % (llen, need_num, result)) @staticmethod def load_object(path): """ Load an object given its absolute object path, and return it. :param path: ie, 'proxy.proxy.Proxy' :return: """ try: dot = path.rindex('.') except ValueError: raise ValueError("Error loading object '%s': not a full path" % path) module, name = path[:dot], path[dot + 1:] mod = importlib.import_module(module) try: obj = getattr(mod, name) except AttributeError: raise NameError( "Module '%s' doesn't define any object named '%s'" % (module, name)) return obj def start_proxys(self, redis_cfg, proxy_class): redis_pool.init_redis(redis_cfg) self.proxy_queue_handle = RedisProxyQueue(self.proxy_queue_key) self.proxy_queue_handle.clear() for proxycls in proxy_class: # 返回一个不同代理IP供应商类的实例 self._create_proxy(proxycls, proxy_class[proxycls]) log.info("start_proxy: %s --> %d" % (str(proxycls), proxy_class[proxycls])) def run(self): log.info('###############################################') log.info('Now begin.......') while 1: try: self._on_procrssed() time.sleep(self.sleep_time) except KeyboardInterrupt: pass except Exception as e: log.error('run error:', str(e)) log.error("run traceback:" + traceback.format_exc())
from bintrees import BinaryTree data = {3:'White', 2:'Red', 1:'Green', 5:'Orange', 4:'Yellow', 7:'Purple', 0:'Magenta'} tree = BinaryTree(data) tree.update({6: 'Teal'}) def displayKeyValue(key, value): print('Key: ', key, ' Value: ', value) tree.foreach(displayKeyValue) print('Item 3 contains: ', tree.get(3)) print('The maximum item is: ', tree.max_item())