def testGetScopeSize_badScopeName(self): pool = Pool(self.pool) try: scopeSize = pool.GetScopeSize('BadScope') self.fail("Expected exception was not thrown") except Pool.Exception, e: self.assertEqual("Scope 'BadScope' not found", e.what)
def testRemoveAttribute(self): pool1 = Pool(self.pool) pool1.RemoveAttribute('S1', 'Simple') result = pool1.SelectAttributes('S1', 'Simple') self.assertEqual(0, len(result))
def testDefaultInit(self): pool = Pool() file = cStringIO.StringIO() pool.Dump(file) self.assertEqual( "<?xml version='1.0' encoding='UTF-8'?>\n<DescriptorsPool/>\n", file.getvalue())
def __init__(self): # 创建初始化对象 # 接收实际项目spider, self.spiders = self._auto_import_module_cls(SPIDERS, True) self.scheduler = Scheduler() self.downloader = Downloader() # self.pipeline = Pipeline() # 完善框架处理多管道能力 self.pipelines = self._auto_import_module_cls(PIPELINES) # 初始化中间件文件 # self.spider_middlewares = SpiderMiddlewares() # self.downloader_middlewares = DownloaderMiddlewares() # 框架完善处理项目重写的爬虫 下载中间件 self.spider_mids = self._auto_import_module_cls(SPIDER_MIDDLEWARES) self.downloader_mids = self._auto_import_module_cls( DOWNLOADER_MIDDLEWARES) # 创建线程池对象 self.pool = Pool() # 请求在那里就在那里使用, 写两个方法 异步处理 # 添加计数器 self.total_response = 0 # 添加响应的计数器 # 设置主线程的运行状态, 主线程是否在执行? self.is_running = True
def testInserAttributeAsNodes(self) : pool1 = Pool(self.pool) pool2 = Pool(self.pool2) nodesToMove = pool2.SelectAttributes('S2','AdditionalAttribute') pool1._InsertNode(nodesToMove,'S1') nodes = pool1.SelectAttributes('S1', 'AdditionalAttribute') self.assertEquals(self.serializeXml(nodesToMove), self.serializeXml(nodes))
def real_getRW(key="default"): global g_dbh_pool if g_dbh_pool.get(key, None) is None: import MySQLdb g_dbh_pool[key] = Pool.Pool( Pool.Constructor(MySQLdb.connect, **getConnectInfo(key)), getConnectInfo(key).get("connections", 30)) return g_dbh_pool[key].get()
def testInserAttribute_withNonExistingAttribute(self): pool1 = Pool(self.pool) pool2 = Pool(self.pool2) try: pool1.InsertAttribute(pool2, 'S2', 'BadAttribute', 'S1') self.fail("Expected exception was not thrown") except Pool.Exception, e: self.assertEqual("Attribute 'S2::BadAttribute' not found", e.what)
def testAssureScopeWithPopulation_existingScopeDifferentSizes(self): pool = Pool(self.pool) try: pool.AssureScopeWithPopulation("S1", 3) self.fail("Expected exception was not thrown") except Pool.Exception, e: self.assertEqual( "Requested size for scope 'S1' was 3 but it is actually 1", e.what)
def testRemoveAttribute_scopeDoesNotExists(self): pool1 = Pool(self.pool) try: pool1.RemoveAttribute('BadScope', 'Simple') self.fail("Expected exception was not thrown") except Pool.Exception, e: self.assertEqual( "Scope 'BadScope' not found while removing 'BadScope::Simple'", e.what)
def __init__(self): conv1 = ConvLayer(28, 28, 1, 6, 5, 1, 2) sigmoid1 = Sigmoid() pool1 = Pool(2) conv2 = ConvLayer(14, 14, 6, 16, 5, 1, 0) sigmoid2 = Sigmoid() pool2 = Pool(2) fc = Perceptron([400, 600, 10]) self.layers = [conv1, sigmoid1, pool1, conv2, sigmoid2, pool2, fc]
def testRemoveAttribute_attributeDoesNotExists(self): pool1 = Pool(self.pool) try: pool1.RemoveAttribute('S1', 'BadAttribute') self.fail("Expected exception was not thrown") except Pool.Exception, e: self.assertEqual( "Attribute 'BadAttribute' not found while removing 'S1::BadAttribute'", e.what)
def _init(self, namespace=None, pool_size=10, decode_responses=False, **kwargs): self.protocol_factory = partial(RedisStoreConnection, Consumer) self._decode_responses = decode_responses if namespace: self._urlparams['namespace'] = namespace self._pool = Pool(self.connect, pool_size=pool_size, loop=self._loop) if self._database is None: self._database = 0 self._database = int(self._database) self.loaded_scripts = set()
def testInsertAttribute_differentScopeSizes(self): pool = Pool(self.pool) poolDifferentSize = Pool(self.poolDifferentSize) try: pool.InsertAttribute(poolDifferentSize, 'S2', 'AdditionalAttribute', 'S1') self.fail("Expected exception was not thrown") except Pool.Exception, e: self.assertEqual( "Requested size for scope 'S1' was 3 but it is actually 1", e.what)
def __init__(self, spiders, spider_mids=[], downloader_mids=[]): self.spiders = spiders # 爬虫 self.scheduler = Scheduler() # 调度器 self.downloader = Downloader() # 下载器 self.pipline = Pipeline() # 管道 self.spider_mids = spider_mids # 爬虫中间件 self.downloader_mids = downloader_mids # 下载中间件 self.pool = Pool() self.response_number = 0 # 响应数量 self.max_async = settings.MAX_ASYNC # 最大并发数 self.running = False
def pooling_test(): """定义输入样本""" # 输入数据大小为 1x5x8x8 x = torch.tensor(np.random.randn(1,5,8,8).astype(np.float32), requires_grad=True) x_numpy = x.detach().numpy() """定义误差""" dy = torch.tensor(np.random.randn(1,5,4,4).astype(np.float32), requires_grad=True) dy_numpy = dy.detach().numpy() # pytorch pool=(2,2) stride=2 pool_out = F.max_pool2d(x, kernel_size=2, stride=2) pool_out.backward(dy) print('pool_out: \n', pool_out) print('pool_out.shape: \n', pool_out.shape) # numpy pool1 = Pool.MaxPooling(pool_shape=(2,2), stride=(2,2)) pool_out_numpy = pool1.forward(x_numpy) pool_eta = pool1.gradient(dy_numpy) print('pool_out_numpy: \n', pool_out_numpy) print('pool_out_numpy.shape: \n', pool_out_numpy.shape) # 反向传播误差对比 print('pool_out_grad: \n', x.grad) print('pool_out_grad.shape: \n', x.grad.shape) print('pool_out_numpy_grad: \n', pool_eta) print('pool_out_numpy_grad.shape: \n', pool_eta.shape) print('pool_out_numpy_grad error: \n', pool_eta-x.grad.detach().numpy())
def __init__(self): self.spiders = self._auto_import_instances(path=SPIDERS, isspider=True) # 爬虫字典 self.scheduler = Scheduler() self.downloader = Downloader() self.pipelines = self._auto_import_instances(path=PIPELINES) self.spider_mids = self._auto_import_instances(path=SPIDER_MIDDLEWARES) self.downloader_mids = self._auto_import_instances( path=DOWNLOADER_MIDDLEWARES) self.total_response_nums = 0 self.total_request_nums = 0 self.pool = Pool(5) # os.cpu_count() or 1 self.is_running = True
def __init__(self): self.spiders = self._auto_import_instances(path=SPIDERS, isspider=True) # 爬虫字典 self.pipelines = self._auto_import_instances(path=PIPELINES) self.spider_mids = self._auto_import_instances(path=SPIDER_MIDDLEWARES) self.downloader_mids = self._auto_import_instances(path=DOWNLOADER_MIDDLEWARES) if SCHEDULER_PERSIST: self.collector = ReidsStatsCollector() else: self.collector = NormalStatsCollector() # self.total_response_nums = 0 # self.total_request_nums = 0 self.scheduler = Scheduler(self.collector) self.downloader = Downloader() self.pool = Pool(5) # os.cpu_count() or 1 self.is_running = True
def testInserAttributeAsNodes(self): pool1 = Pool(self.pool) pool2 = Pool(self.pool2) nodesToMove = pool2.SelectAttributes('S2', 'AdditionalAttribute') pool1._InsertNode(nodesToMove, 'S1') nodes = pool1.SelectAttributes('S1', 'AdditionalAttribute') self.assertEquals(self.serializeXml(nodesToMove), self.serializeXml(nodes))
def __init__(self): #self.spiders = spiders self.spiders = self._auto_import_module_cls(SPIDERS, True) self.scheduler = Scheduler() self.downloader = Downloader() #self.pipeline = Pipeline() #self.pipelines = pipelines self.pipelines = self._auto_import_module_cls(PIPELINES) #self.spider_middlewares = SpiderMiddlewares() #self.downloader_middlewares = DownloaderMiddlewares() #self.spider_mids = spider_mids self.spider_mids = self._auto_import_module_cls(SPIDER_MIDDLEWARES) self.downloader_mids = self._auto_import_module_cls(DOWNLOADER_MIDDLEWARES) #self.downloader_mids = downloader_mids # 创建线程池/协程池对象 self.pool = Pool() self.total_response = 0 self.is_running = True
def __init__(self): # 爬虫 self.spiders = self.auto_import_module(SPIDERS) # 调度 self.scheduler = Scheduler() # 响应 self.download = Download() # 保存 self.pipelines = self.auto_import_module(PIPELINES) # 爬虫中间件/下载中间件 self.spider_mids = self.auto_import_module(SPIDER_MIDDLEWARES) self.download_mids = self.auto_import_module(DOWNLOAD_MINDDLEWARES) # 创建对象 self.pool = Pool(ASYNC_COUNT) # 响应计数 self.response_count = 0 # 判断是否含有请求 self.has_request = True
def testInserAttribute_onANewScope(self): pool1 = Pool(self.pool) pool2 = Pool(self.pool2) pool1.InsertAttribute(pool2, 'S2', 'AdditionalAttribute', 'NewScope') result = pool1.SelectAttributes('NewScope', 'AdditionalAttribute') expect = pool2.SelectAttributes('S2', 'AdditionalAttribute') self.assertEquals(self.serializeXml(expect), self.serializeXml(result))
def QueryDescriptors(self, id, ignoreCache=False, computeIfNotCached=False, keepCache=True): print "Computing", self.extractor, "for", id if not ignoreCache and False: # TODO: Use properly the ignoreCache flag try: result = Pool(file(self._poolPath(id))) print "Using cached data" return result except IOError, e: pass # Not found
def UpdateDescriptors(self, id, pool, descriptors=None): if descriptors == None: descriptors = pool.PresentAttributes() scripts = self._DisgregatorScripts(descriptors) for source, script in scripts.items(): if script == "": continue disgregator = Aggregator(cStringIO.StringIO(script)) try: result = self.sources[source].QueryDescriptors(id) except: result = Pool() disgregator.run(result, [pool]) self.sources[source].UpdateDescriptors(id, result)
def QueryDescriptors(self, id, descriptors): if self.verbose: print "++ Building aggregation script..." (aggregatorScript, sourceIds) = self._AggregatorScriptFor(descriptors) aggregator = Aggregator(cStringIO.StringIO(aggregatorScript)) result = Pool() sourcesPools = [] for sourceId in sourceIds: if self.verbose: print "++ Querying descriptors from %s..." % sourceId sourcePool = self.sources[sourceId].QueryDescriptors(id) sourcesPools.append(sourcePool) if self.verbose: print "++ Aggregating..." aggregator.run(result, sourcesPools) return result
def TestMnistConv(): # Learn # Images, Labels = LoadMnistData('MNIST\\t10k-images-idx3-ubyte.gz', 'MNIST\\t10k-labels-idx1-ubyte.gz') Images = np.divide(Images, 255) W1 = 1e-2 * np.random.randn(9, 9, 20) W5 = np.random.uniform(-1, 1, (100, 2000)) * np.sqrt(6) / np.sqrt(360 + 2000) Wo = np.random.uniform(-1, 1, (10, 100)) * np.sqrt(6) / np.sqrt(10 + 100) X = Images[0:8000, :, :] D = Labels[0:8000] for _epoch in range(3): print(_epoch) W1, W5, Wo = MnistConv(W1, W5, Wo, X, D) # Test # X = Images[8000:10000, :, :] D = Labels[8000:10000] acc = 0 N = len(D) for k in range(N): x = X[k, :, :] y1 = Conv(x, W1) y2 = ReLU(y1) y3 = Pool(y2) y4 = np.reshape(y3, (-1, 1)) v5 = np.matmul(W5, y4) y5 = ReLU(v5) v = np.matmul(Wo, y5) y = Softmax(v) i = np.argmax(y) if i == D[k][0]: acc = acc + 1 acc = acc / N print("Accuracy is : ", acc)
def map(func, params, multiprocess=False, processes=psutil.cpu_count() - 1): """Distribute map/starmap on # of processes (default to cores - 1)""" if not multiprocess: processes = 1 print("cores: %d" % psutil.cpu_count()) print("processes: %d" % processes) starmap = isinstance(params[0], tuple) print("starmap: %s" % starmap) t = timer() if starmap: func(*params[0]) else: func(params[0]) print("calcs: %d (~%.2fs) .." % (len(params), (timer() - t) * len(params) / processes)) # Calculation t = timer() if processes > 1: with Pool(processes=processes) as pool: try: if starmap: results = pool.starmap(func, params) else: results = pool.map(func, params) except Exception as e: pool.close() pool.join() raise e else: if starmap: results = [func(*p) for p in params] else: results = [func(p) for p in params] print("done. %.2fs" % (timer() - t)) return results
def create_block_volume(volume_name, pool, size, description='', format=128, performance_priority=0, qos_enabled=False, max_total_iops=0, max_total_bw=0, burst_total_iops=0, burst_total_iobw=0, host=None): """ volume_name: volume name pool: pool id or name size: volume size, like 100M, 100G, or 100000. If the unit is ignored, the unit is byte. """ retval = 0 if not isinstance(pool, int): pool = Pool.get_pool_id(pool, host) if pool == -1: print "[Error] The pool id is invalid." return -1 cmd = utils.XMS_CLI_HEADER + "-f json block-volume create -p {poolid} -s {volsize} -f {fmt} --pp {pp} {volname}".format( poolid=pool, volsize=size, fmt=format, pp=performance_priority, volname=volume_name) print cmd ret = utils.execute_cmd_in_host(cmd, host) if ret[2] != 0: print "[Error] Failed to create block volume " + str( volume_name) + ". Error message: [{err}]".format(err=ret[1]) retval = -1 return retval
def main(): # 取要copy的文件夹名字 old_folder_name = input("请输入要copy的文件夹名:") # 创建一个文件夹 new_folder_name = old_folder_name+"-[复件]" os.mkdir(new_folder_name) # 获取老文件夹所有的文件名字 old_foldir_list = os.listdir(old_folder_name) # 使用多进程方式copy源文件所有文件到nw文件夹 pool = Pool(5) for file_name in old_foldir_list: pool.apply_async(copy_file_task, args=(file_name,old_folder_name,new_folder_name)) pool.join()
class Engine(object): """引擎, 中心调度""" def __init__(self): # 爬虫 self.spiders = self.auto_import_module(SPIDERS) # 调度 self.scheduler = Scheduler() # 响应 self.download = Download() # 保存 self.pipelines = self.auto_import_module(PIPELINES) # 爬虫中间件/下载中间件 self.spider_mids = self.auto_import_module(SPIDER_MIDDLEWARES) self.download_mids = self.auto_import_module(DOWNLOAD_MINDDLEWARES) # 创建对象 self.pool = Pool(ASYNC_COUNT) # 响应计数 self.response_count = 0 # 判断是否含有请求 self.has_request = True def main(self): if ROLE == "mater" or ROLE is None: self._execute_start_requests() # for _ in range(ASYNC_COUNT): # self.pool.apply_async(self._execute_request_response_item, callback=self._callback) while True: if self.scheduler.request_count == self.response_count and self.scheduler.request_count != 0: self.has_request = False break if ROLE == "slave" or ROLE is None: self.pool.apply_async(self._execute_request_response_item, callback=self._callback) time.sleep(TIME_SLEEP) self.pool.close() self.pool.join() def _execute_start_requests(self): # 遍历爬虫列表 for spider_name, spider in self.spiders.items(): # 获取获取请求 for start_request in spider.start_requests(): # url入队列之前预处理, spider中间件 start_request.name = spider_name for spider_mid in self.spider_mids: start_request = spider_mid.process_request(start_request) # 请求入列, 判重 self.scheduler.add_request(start_request) def _execute_request_response_item(self): # 取出队列中的url request = self.scheduler.get_request() if request is None: return True # 请求之前预处理, download中间件 for download in self.download_mids: request = download.process_request(request) # 发送请求获取响应 response = self.download.send_request(request) # 响应之后处理, download中间件 for download in self.download_mids: response = download.process_response(response) # 解析数据 spider = self.spiders[request.name] # 使用生成器获取多个数据解析结果 parse_func = getattr(spider, request.callback) for result in parse_func(response): # 判断解析后的数据是url, 还是data: if isinstance(result, Request): # url入队列之前预处理, spider中间件 result.spider_name = request.spider_name for spider_mid in self.spider_mids: result = spider_mid.process_request(result) # 继续解析url self.scheduler.add_request(result) elif isinstance(result, Item): # 保存之前预处理 for spider_mid in self.spider_mids: result = spider_mid.process_item(result) # 保存response for pipeline in self.pipelines: pipeline.process_item(result, spider) self.response_count += 1 def _callback(self, foo): if foo: return True elif self.has_request is True: self.pool.apply_async(self._execute_request_response_item, callback=self._callback) def start(self): start = datetime.now() logger.info("Start time is [ {} ]".format(start)) self.main() end = datetime.now() logger.info("End time is [ {} ]".format(end)) print("[INFO]: run time is {}s".format((end - start).total_seconds())) @staticmethod def auto_import_module(module_list): instance = {} instance1 = [] for module in module_list: index = module.rfind(".") # 遍历并拆分变量名, path_name: 路径, var_name: 变量名 path_name = module[:index] var_name = module[index + 1:] # 动态导入模块 import_module = __import__(path_name) # 动态导入变量 var = getattr(import_module, var_name) # 判断是否有tag属性, 并且属性为spider, 则为spider, 进行实例化 if hasattr(var, "tag") and var.tag == "spider": instance[var.name] = var() else: instance1.append(var()) return instance or instance1
def just_for_test_block(volume_num=1000, volume_name_prefix="volume-", volume_size_min=100, volume_size_max=500, snapshot_num=1000, snapshot_name_prefix="snapshot-", client_group_num=1000, client_group_name_prefix="client-group", access_path_num=2, access_path_name_prefix="access_path", host=None): if access_path_num < 2 or volume_num < 2 or snapshot_num < 2 or client_group_num < 2: print "[Error] The num of access path, volume, snapshot, client group should be larger than 2." return if volume_size_min < 0 or volume_size_max < 0 or volume_size_min > volume_size_max: print "[Error] Invalid parameters." return # create block volumes ret, pool_ids = Pool.get_pool_ids(host=host) if ret != 0 or len(pool_ids) < 1: print "[Error] Failed to get pool info or pool not exists." return for i in range(1, volume_num + 1): size = random.randint(volume_size_min, volume_size_max) pool_id = pool_ids[random.randint(0, len(pool_ids) - 1)] print BlockVolume.create_block_volume(volume_name_prefix + str(i), pool_id, str(size) + "G", host=host) # create block snapshots ret, volume_ids = BlockVolume.get_block_volume_ids(host=host) if ret != 0 or len(volume_ids) < 1: print "[Error] Failed to get volume info or volumes not exist." return for i in range(1, snapshot_num + 1): idx = random.randint(0, len(volume_ids) - 1) print Snapshot.create_block_snapshot(snapshot_name_prefix + str(uuid.uuid1()), volume_ids[idx], host=host) for i in range(1, client_group_num + 1): iqn = generate_iqn() print ClientGroup.create_client_group(client_group_name_prefix + str(i), "iSCSI", iqn, host=host) # create access paths for i in range(1, access_path_num + 1): print AccessPath.create_access_path(access_path_name_prefix + str(i), aptype="iSCSI", host=host) # create mapping groups ret, client_group_ids = ClientGroup.get_client_group_ids(host=host) if ret != 0: print "[Error] Failed to get client group info." return volume_ids.sort() client_group_ids.sort() cgid_len = len(client_group_ids) vid_len = len(volume_ids) for i in range(0, cgid_len / 2): cgid = client_group_ids[i] print MappingGroup.create_mapping_group(1, volume_ids[:vid_len / 2], cgid, host=host) for i in range(cgid_len / 2, cgid_len): cgid = client_group_ids[i] print MappingGroup.create_mapping_group(2, volume_ids[vid_len / 2:], cgid, host=host)
class Engine(object): def __init__(self): # 创建初始化对象 # 接收实际项目spider, self.spiders = self._auto_import_module_cls(SPIDERS, True) self.scheduler = Scheduler() self.downloader = Downloader() # self.pipeline = Pipeline() # 完善框架处理多管道能力 self.pipelines = self._auto_import_module_cls(PIPELINES) # 初始化中间件文件 # self.spider_middlewares = SpiderMiddlewares() # self.downloader_middlewares = DownloaderMiddlewares() # 框架完善处理项目重写的爬虫 下载中间件 self.spider_mids = self._auto_import_module_cls(SPIDER_MIDDLEWARES) self.downloader_mids = self._auto_import_module_cls( DOWNLOADER_MIDDLEWARES) # 创建线程池对象 self.pool = Pool() # 请求在那里就在那里使用, 写两个方法 异步处理 # 添加计数器 self.total_response = 0 # 添加响应的计数器 # 设置主线程的运行状态, 主线程是否在执行? self.is_running = True def _auto_import_module_cls(self, paths=[], isspider=False): import importlib if isspider: result = {} # 如果是爬虫, 返回给__init__(self)的就是字典 else: result = [] # 如果不是爬虫, 返回给__init__的就是列表 for path in paths: module_name = path[:path.rfind(".")] ret = importlib.import_module(module_name) cls_name = path[path.rfind(".") + 1:] cls = getattr(ret, cls_name) # 根据绝对路径,返回指定文件里的指定类名的类对象 if isspider: result[cls.name] = cls() else: result.append(cls()) return result """关于这个方法先在setting中添加配置文件,然后在编写_auto_import_moudule_cls的方法进行测试 1. 将main中的配置信息提取到settings中 2. 优化engine, 通过_auto_import_moudule_cls获取项目配置的管道 中间件信息 """ def start(self): # 添加日信息, 记录程序的运行时间 start = datetime.now() logger.info("start time{}:".format(start)) # 启动engine self._start_engine() stop = datetime.now() logger.info("stop time{}:".format(stop)) # 记录程序运行时间 # total_seconds() 计算两个时间之间的总差 logger.info("total time{}:".format((stop - start).total_seconds())) def _callback(self, _): # 这个的callback必须传一个参数,但是这里不用,none所以传一个下划线 if self.is_running == True: self.pool.apply_async(self._excute_request_response_item, callback=self._callback) # 一个递归的过程 def _start_engine(self): if ROLE == "master" or ROLE is None: # 主执行start_request, 当然不是分布式时也需要执行start_request # 处理请求 # self._start_requests() # --->发送请求 """__*** 1.异步非阻塞--->发送请求***__""" self.pool.apply_async(self._start_requests) # 处理调度器的请求 # while True: # ---->执行请求 但并发的数量不能控制 # self.pool.apply_async(self._excute_request_response_item()) if ROLE == 'slave' or ROLE is None: # slave端执行请求, 主从分离 # 如何控制并发的次数? for i in range(ASNYC_MAX_COUNT): logger.info(u'子线程正在执行...') """__*** 2.异步非阻塞--->执行请求***__""" self.pool.apply_async(self._excute_request_response_item, callback=self._callback) while True: # 优化while True等待, 当网络响应慢的时候, 一个响应需要2秒, 那CPU就处在空转中 # 通过测试这样优化后可以减轻cpu负担 time.sleep(0.001) if self.total_response == self.scheduler.total_request and self.total_response != 0: self.is_running = False # total_response != 0 因为初始值是0, 程序没有开始就结束所以去除 # 当请求数==响应数时断开 break self.pool.close() # 不在向线程池中添加任务了 self.pool.join() # 让主线程等待所有子线程执行结束 logger.info(u"主线程执行结束") """处理多爬虫, 对_start_engine方法进行重构""" def _start_requests(self): for spider_name, spider in self.spiders.items(): # 1.获取spider中的url请求list start_request_list = spider.start_requests() for start_request in start_request_list: # 处理spider发送的多个请求 start_request.spider_name = spider.name ### 1.1请求经过爬虫中间件 for spider_middleware in self.spider_mids: start_request = spider_middleware.process_request( start_request) # 2. 请求入调度器 self.scheduler.add_request(start_request) def _excute_request_response_item(self): # 执行请求 响应 items数据 # 3. 获取调度器中的请求对象, 这样就是scheduler中过滤之后的请求 request = self.scheduler.get_request() # 注意: add请求, put请求 # 请求结束时-->所以设定scheduler中的队列get(False)-->表示添加的url请求完, 返回响应None # 当队列中的请求为空的时候, 退出程序 if request is None: # break return # 没有请求函数进来, 程序退出 ### 3.1 将请求经过下载中间件预处理--->请求 for downloader_middleware in self.downloader_mids: request = downloader_middleware.process_request(request) # 4. 将请求交个下载器 response = self.downloader.get_response(request) ### 4.1 将响应经过下载中间件预处理--->响应 for downloader_middleware in self.downloader_mids: response = downloader_middleware.process_response(response) # 5. 得到响应对象交给spider解析数据 # results = self.spider.parse(response) # 1.request.parse--->取parse对应的解析方法 spider = self.spiders[request.spider_name] parse_func = getattr(spider, request.parse) # 2.使用parse_next(处理响应) results = parse_func(response) for result in results: # 6. 判断解析出来的结果进行在判断 if isinstance(result, Request): result.spider_name = request.spider_name # 给爬虫添加一个名字 ### 6.1 如果是请求对象, 交给爬虫中间件预处理, 添加请求入队列 for spider_middleware in self.spider_mids: result = spider_middleware.process_request(result) # 是请求继续入队列请求数据 self.scheduler.add_request(result) elif isinstance(result, Item): ### 6.2 如果是Item数据, 爬虫中间件预处理,在交给管道 for spider_middleware in self.spider_mids: result = spider_middleware.process_item(result) # 得到请求的数据转到管道, 进行存储 # 框架完善--->多管道处理能力 for pipeline in self.pipelines: result = pipeline.process_item(result, spider) # result -->接受在pipelins中处理完毕return的文件 else: raise Exception("Error: parse返回的数据不能被处理") # 不管当前有几个线程, 只要有一个线程提取完响应后就+1 self.total_response += 1
morrisPool.getRealDistance2(w.getPathName()) mousePath = morrisPool.getMousePath() generateReport(mousePath, morrisPool.getRealDistance(), morrisPool.getVideoTimeInS(), morrisPool.getConvFactor()) print 'Total Swimming Time[s]: ' + str(morrisPool.getVideoTimeInS()) op = raw_input('Transfer Experiment? [y/n]: ') op = op.lower() if op == 'y': print 'Continue with Transference Analysis' print '------------------------------------' (centre, rad) = morrisPool.getPoolPosition() p = Pool(centre, convListToTuple(mousePath), w.getPathName()) p.analyseMousePath() p.showPath() d = DrawOutput(morrisPool.poolRadio, morrisPool.poolCenter, p.targetPosition,morrisPool.mousePathA) p.generateReport() print 'Analysis Finished' print '------------------------------------' print 'Done!' else: print 'Creating Base Image...' print '------------------------------------' createBasePhoto(w.getPathName(), videoSequence[0]) (centre, rad) = morrisPool.getPoolPosition() p = Pool(centre, convListToTuple(mousePath), w.getPathName())
obj=pool.acquire() print 'thread3 acquired',obj.name print 'removing two',pool.remove(c2,now=True) print 'release3',pool.release(obj) def thread4(pool): obj=pool.acquire() print 'thread4 acquired',obj.name obj.increment() print 'release4',pool.release(obj) c1=Counter('one') c2=Counter('two') c3=Counter('three') p=Pool([c1]) print 'adding one',p.add(c1) print 'adding two',p.add(c2) p.open() for r in p.resource_list: print r[0].name,r[0].count t1 = Thread(target=thread1, args=(p,)) t1.start() t2 = Thread(target=thread2, args=(p,)) t2.start()
import json from logging import log, INFO, WARN, ERROR IntegrityError = psycopg2.IntegrityError # Global dbpool import OaConfig import Pool # 3 connections. Lets us keep going if one is slow but # doesn't overload the server if there're a lot of us dbpool = Pool.DbPool(OaConfig.oasisdbconnectstring, 3) # Cache stuff on local drives to save our poor database fileCache = Pool.fileCache(OaConfig.cachedir) from Pool import MCPool # Get a pool of memcache connections to use MC = MCPool('127.0.0.1:11211', 3) def run_sql(sql, params=None, quiet=False): """ Execute SQL commands using the dbpool""" conn = dbpool.begin() res = conn.run_sql(sql, params, quiet=quiet) dbpool.commit(conn) return res