Example #1
0
 def testGetScopeSize_badScopeName(self):
     pool = Pool(self.pool)
     try:
         scopeSize = pool.GetScopeSize('BadScope')
         self.fail("Expected exception was not thrown")
     except Pool.Exception, e:
         self.assertEqual("Scope 'BadScope' not found", e.what)
Example #2
0
    def testRemoveAttribute(self):
        pool1 = Pool(self.pool)

        pool1.RemoveAttribute('S1', 'Simple')

        result = pool1.SelectAttributes('S1', 'Simple')
        self.assertEqual(0, len(result))
Example #3
0
 def testDefaultInit(self):
     pool = Pool()
     file = cStringIO.StringIO()
     pool.Dump(file)
     self.assertEqual(
         "<?xml version='1.0' encoding='UTF-8'?>\n<DescriptorsPool/>\n",
         file.getvalue())
Example #4
0
    def __init__(self):
        # 创建初始化对象
        # 接收实际项目spider,
        self.spiders = self._auto_import_module_cls(SPIDERS, True)

        self.scheduler = Scheduler()
        self.downloader = Downloader()
        # self.pipeline = Pipeline()
        # 完善框架处理多管道能力
        self.pipelines = self._auto_import_module_cls(PIPELINES)

        # 初始化中间件文件
        # self.spider_middlewares = SpiderMiddlewares()
        # self.downloader_middlewares = DownloaderMiddlewares()

        # 框架完善处理项目重写的爬虫 下载中间件
        self.spider_mids = self._auto_import_module_cls(SPIDER_MIDDLEWARES)
        self.downloader_mids = self._auto_import_module_cls(
            DOWNLOADER_MIDDLEWARES)

        # 创建线程池对象
        self.pool = Pool()  # 请求在那里就在那里使用, 写两个方法 异步处理

        # 添加计数器
        self.total_response = 0  # 添加响应的计数器

        # 设置主线程的运行状态, 主线程是否在执行?
        self.is_running = True
Example #5
0
	def testInserAttributeAsNodes(self) :
		pool1 = Pool(self.pool)
		pool2 = Pool(self.pool2)
		nodesToMove = pool2.SelectAttributes('S2','AdditionalAttribute')

		pool1._InsertNode(nodesToMove,'S1')

		nodes = pool1.SelectAttributes('S1', 'AdditionalAttribute')
		self.assertEquals(self.serializeXml(nodesToMove), self.serializeXml(nodes))
Example #6
0
def real_getRW(key="default"):
    global g_dbh_pool
    if g_dbh_pool.get(key, None) is None:
        import MySQLdb
        g_dbh_pool[key] = Pool.Pool(
            Pool.Constructor(MySQLdb.connect, **getConnectInfo(key)),
            getConnectInfo(key).get("connections", 30))

    return g_dbh_pool[key].get()
Example #7
0
    def testInserAttribute_withNonExistingAttribute(self):
        pool1 = Pool(self.pool)
        pool2 = Pool(self.pool2)

        try:
            pool1.InsertAttribute(pool2, 'S2', 'BadAttribute', 'S1')
            self.fail("Expected exception was not thrown")
        except Pool.Exception, e:
            self.assertEqual("Attribute 'S2::BadAttribute' not found", e.what)
Example #8
0
 def testAssureScopeWithPopulation_existingScopeDifferentSizes(self):
     pool = Pool(self.pool)
     try:
         pool.AssureScopeWithPopulation("S1", 3)
         self.fail("Expected exception was not thrown")
     except Pool.Exception, e:
         self.assertEqual(
             "Requested size for scope 'S1' was 3 but it is actually 1",
             e.what)
Example #9
0
    def testRemoveAttribute_scopeDoesNotExists(self):
        pool1 = Pool(self.pool)

        try:
            pool1.RemoveAttribute('BadScope', 'Simple')
            self.fail("Expected exception was not thrown")
        except Pool.Exception, e:
            self.assertEqual(
                "Scope 'BadScope' not found while removing 'BadScope::Simple'",
                e.what)
Example #10
0
    def __init__(self):
        conv1 = ConvLayer(28, 28, 1, 6, 5, 1, 2)
        sigmoid1 = Sigmoid()
        pool1 = Pool(2)
        conv2 = ConvLayer(14, 14, 6, 16, 5, 1, 0)
        sigmoid2 = Sigmoid()
        pool2 = Pool(2)
        fc = Perceptron([400, 600, 10])

        self.layers = [conv1, sigmoid1, pool1, conv2, sigmoid2, pool2, fc]
Example #11
0
    def testRemoveAttribute_attributeDoesNotExists(self):
        pool1 = Pool(self.pool)

        try:
            pool1.RemoveAttribute('S1', 'BadAttribute')
            self.fail("Expected exception was not thrown")
        except Pool.Exception, e:
            self.assertEqual(
                "Attribute 'BadAttribute' not found while removing 'S1::BadAttribute'",
                e.what)
Example #12
0
 def _init(self, namespace=None, pool_size=10,
           decode_responses=False, **kwargs):
     self.protocol_factory = partial(RedisStoreConnection, Consumer)
     self._decode_responses = decode_responses
     if namespace:
         self._urlparams['namespace'] = namespace
     self._pool = Pool(self.connect, pool_size=pool_size, loop=self._loop)
     if self._database is None:
         self._database = 0
     self._database = int(self._database)
     self.loaded_scripts = set()
Example #13
0
    def testInsertAttribute_differentScopeSizes(self):
        pool = Pool(self.pool)
        poolDifferentSize = Pool(self.poolDifferentSize)

        try:
            pool.InsertAttribute(poolDifferentSize, 'S2',
                                 'AdditionalAttribute', 'S1')
            self.fail("Expected exception was not thrown")
        except Pool.Exception, e:
            self.assertEqual(
                "Requested size for scope 'S1' was 3 but it is actually 1",
                e.what)
Example #14
0
    def __init__(self, spiders, spider_mids=[], downloader_mids=[]):
        self.spiders = spiders  # 爬虫
        self.scheduler = Scheduler()  # 调度器
        self.downloader = Downloader()  # 下载器
        self.pipline = Pipeline()  # 管道
        self.spider_mids = spider_mids  # 爬虫中间件
        self.downloader_mids = downloader_mids  # 下载中间件

        self.pool = Pool()

        self.response_number = 0  # 响应数量

        self.max_async = settings.MAX_ASYNC  # 最大并发数

        self.running = False
Example #15
0
def pooling_test():
    """定义输入样本"""
    # 输入数据大小为 1x5x8x8
    x = torch.tensor(np.random.randn(1,5,8,8).astype(np.float32), requires_grad=True)
    x_numpy = x.detach().numpy()

    """定义误差"""
    dy = torch.tensor(np.random.randn(1,5,4,4).astype(np.float32), requires_grad=True)
    dy_numpy = dy.detach().numpy()

    # pytorch pool=(2,2) stride=2
    pool_out = F.max_pool2d(x, kernel_size=2, stride=2)
    pool_out.backward(dy)
    print('pool_out: \n', pool_out)
    print('pool_out.shape: \n', pool_out.shape)

    # numpy
    pool1 = Pool.MaxPooling(pool_shape=(2,2), stride=(2,2))
    pool_out_numpy = pool1.forward(x_numpy)
    pool_eta = pool1.gradient(dy_numpy)
    print('pool_out_numpy: \n', pool_out_numpy)
    print('pool_out_numpy.shape: \n', pool_out_numpy.shape)

    # 反向传播误差对比
    print('pool_out_grad: \n', x.grad)
    print('pool_out_grad.shape: \n', x.grad.shape)

    print('pool_out_numpy_grad: \n', pool_eta)
    print('pool_out_numpy_grad.shape: \n', pool_eta.shape)

    print('pool_out_numpy_grad error: \n', pool_eta-x.grad.detach().numpy())
Example #16
0
    def __init__(self):
        self.spiders = self._auto_import_instances(path=SPIDERS,
                                                   isspider=True)  # 爬虫字典
        self.scheduler = Scheduler()
        self.downloader = Downloader()

        self.pipelines = self._auto_import_instances(path=PIPELINES)
        self.spider_mids = self._auto_import_instances(path=SPIDER_MIDDLEWARES)
        self.downloader_mids = self._auto_import_instances(
            path=DOWNLOADER_MIDDLEWARES)

        self.total_response_nums = 0
        self.total_request_nums = 0

        self.pool = Pool(5)  # os.cpu_count() or 1
        self.is_running = True
Example #17
0
    def __init__(self):
        self.spiders = self._auto_import_instances(path=SPIDERS, isspider=True) # 爬虫字典
        self.pipelines = self._auto_import_instances(path=PIPELINES)
        self.spider_mids = self._auto_import_instances(path=SPIDER_MIDDLEWARES)
        self.downloader_mids = self._auto_import_instances(path=DOWNLOADER_MIDDLEWARES)

        if SCHEDULER_PERSIST:
            self.collector = ReidsStatsCollector()
        else:
            self.collector = NormalStatsCollector()
        # self.total_response_nums = 0
        # self.total_request_nums = 0

        self.scheduler = Scheduler(self.collector)
        self.downloader = Downloader()
        self.pool = Pool(5) # os.cpu_count() or 1
        self.is_running = True
Example #18
0
    def testInserAttributeAsNodes(self):
        pool1 = Pool(self.pool)
        pool2 = Pool(self.pool2)
        nodesToMove = pool2.SelectAttributes('S2', 'AdditionalAttribute')

        pool1._InsertNode(nodesToMove, 'S1')

        nodes = pool1.SelectAttributes('S1', 'AdditionalAttribute')
        self.assertEquals(self.serializeXml(nodesToMove),
                          self.serializeXml(nodes))
Example #19
0
 def __init__(self):
     #self.spiders =  spiders
     self.spiders = self._auto_import_module_cls(SPIDERS, True)
     self.scheduler = Scheduler()
     self.downloader = Downloader()
     #self.pipeline = Pipeline()
     #self.pipelines = pipelines
     self.pipelines = self._auto_import_module_cls(PIPELINES)
     #self.spider_middlewares = SpiderMiddlewares()
     #self.downloader_middlewares = DownloaderMiddlewares()
     #self.spider_mids = spider_mids
     self.spider_mids = self._auto_import_module_cls(SPIDER_MIDDLEWARES)
     self.downloader_mids = self._auto_import_module_cls(DOWNLOADER_MIDDLEWARES)
     #self.downloader_mids = downloader_mids
     # 创建线程池/协程池对象
     self.pool = Pool()
     self.total_response = 0
     self.is_running = True
Example #20
0
    def __init__(self):
        # 爬虫
        self.spiders = self.auto_import_module(SPIDERS)
        # 调度
        self.scheduler = Scheduler()
        # 响应
        self.download = Download()
        # 保存
        self.pipelines = self.auto_import_module(PIPELINES)

        # 爬虫中间件/下载中间件
        self.spider_mids = self.auto_import_module(SPIDER_MIDDLEWARES)
        self.download_mids = self.auto_import_module(DOWNLOAD_MINDDLEWARES)
        # 创建对象
        self.pool = Pool(ASYNC_COUNT)
        # 响应计数
        self.response_count = 0
        # 判断是否含有请求
        self.has_request = True
Example #21
0
    def testInserAttribute_onANewScope(self):
        pool1 = Pool(self.pool)
        pool2 = Pool(self.pool2)

        pool1.InsertAttribute(pool2, 'S2', 'AdditionalAttribute', 'NewScope')

        result = pool1.SelectAttributes('NewScope', 'AdditionalAttribute')
        expect = pool2.SelectAttributes('S2', 'AdditionalAttribute')
        self.assertEquals(self.serializeXml(expect), self.serializeXml(result))
Example #22
0
 def QueryDescriptors(self,
                      id,
                      ignoreCache=False,
                      computeIfNotCached=False,
                      keepCache=True):
     print "Computing", self.extractor, "for", id
     if not ignoreCache and False:  # TODO: Use properly the ignoreCache flag
         try:
             result = Pool(file(self._poolPath(id)))
             print "Using cached data"
             return result
         except IOError, e:
             pass  # Not found
 def UpdateDescriptors(self, id, pool, descriptors=None):
     if descriptors == None:
         descriptors = pool.PresentAttributes()
     scripts = self._DisgregatorScripts(descriptors)
     for source, script in scripts.items():
         if script == "": continue
         disgregator = Aggregator(cStringIO.StringIO(script))
         try:
             result = self.sources[source].QueryDescriptors(id)
         except:
             result = Pool()
         disgregator.run(result, [pool])
         self.sources[source].UpdateDescriptors(id, result)
 def QueryDescriptors(self, id, descriptors):
     if self.verbose: print "++ Building aggregation script..."
     (aggregatorScript, sourceIds) = self._AggregatorScriptFor(descriptors)
     aggregator = Aggregator(cStringIO.StringIO(aggregatorScript))
     result = Pool()
     sourcesPools = []
     for sourceId in sourceIds:
         if self.verbose:
             print "++ Querying descriptors from %s..." % sourceId
         sourcePool = self.sources[sourceId].QueryDescriptors(id)
         sourcesPools.append(sourcePool)
     if self.verbose: print "++ Aggregating..."
     aggregator.run(result, sourcesPools)
     return result
Example #25
0
def TestMnistConv():
    # Learn
    #
    Images, Labels = LoadMnistData('MNIST\\t10k-images-idx3-ubyte.gz',
                                   'MNIST\\t10k-labels-idx1-ubyte.gz')
    Images = np.divide(Images, 255)

    W1 = 1e-2 * np.random.randn(9, 9, 20)
    W5 = np.random.uniform(-1, 1,
                           (100, 2000)) * np.sqrt(6) / np.sqrt(360 + 2000)
    Wo = np.random.uniform(-1, 1, (10, 100)) * np.sqrt(6) / np.sqrt(10 + 100)

    X = Images[0:8000, :, :]
    D = Labels[0:8000]

    for _epoch in range(3):
        print(_epoch)
        W1, W5, Wo = MnistConv(W1, W5, Wo, X, D)

    # Test
    #
    X = Images[8000:10000, :, :]
    D = Labels[8000:10000]

    acc = 0
    N = len(D)
    for k in range(N):
        x = X[k, :, :]

        y1 = Conv(x, W1)
        y2 = ReLU(y1)
        y3 = Pool(y2)
        y4 = np.reshape(y3, (-1, 1))
        v5 = np.matmul(W5, y4)
        y5 = ReLU(v5)
        v = np.matmul(Wo, y5)
        y = Softmax(v)

        i = np.argmax(y)
        if i == D[k][0]:
            acc = acc + 1

    acc = acc / N
    print("Accuracy is : ", acc)
Example #26
0
def map(func, params, multiprocess=False, processes=psutil.cpu_count() - 1):
    """Distribute map/starmap on # of processes (default to cores - 1)"""
    if not multiprocess:
        processes = 1
    print("cores: %d" % psutil.cpu_count())
    print("processes: %d" % processes)
    starmap = isinstance(params[0], tuple)
    print("starmap: %s" % starmap)
    t = timer()
    if starmap:
        func(*params[0])
    else:
        func(params[0])
    print("calcs: %d (~%.2fs) .." % (len(params),
                                     (timer() - t) * len(params) / processes))

    # Calculation
    t = timer()
    if processes > 1:
        with Pool(processes=processes) as pool:
            try:
                if starmap:
                    results = pool.starmap(func, params)
                else:
                    results = pool.map(func, params)
            except Exception as e:
                pool.close()
                pool.join()
                raise e
    else:
        if starmap:
            results = [func(*p) for p in params]
        else:
            results = [func(p) for p in params]

    print("done. %.2fs" % (timer() - t))
    return results
Example #27
0
def create_block_volume(volume_name,
                        pool,
                        size,
                        description='',
                        format=128,
                        performance_priority=0,
                        qos_enabled=False,
                        max_total_iops=0,
                        max_total_bw=0,
                        burst_total_iops=0,
                        burst_total_iobw=0,
                        host=None):
    """
    volume_name: volume name
    pool: pool id or name
    size: volume size, like 100M, 100G, or 100000. If the unit is ignored, the unit is byte.

    """
    retval = 0
    if not isinstance(pool, int):
        pool = Pool.get_pool_id(pool, host)
    if pool == -1:
        print "[Error] The pool id is invalid."
        return -1
    cmd = utils.XMS_CLI_HEADER + "-f json block-volume create -p {poolid} -s {volsize} -f {fmt} --pp {pp} {volname}".format(
        poolid=pool,
        volsize=size,
        fmt=format,
        pp=performance_priority,
        volname=volume_name)
    print cmd
    ret = utils.execute_cmd_in_host(cmd, host)
    if ret[2] != 0:
        print "[Error] Failed to create block volume " + str(
            volume_name) + ". Error message: [{err}]".format(err=ret[1])
        retval = -1
    return retval
Example #28
0
def main():
    
    # 取要copy的文件夹名字     
    old_folder_name = input("请输入要copy的文件夹名:")

	# 创建一个文件夹
	new_folder_name = old_folder_name+"-[复件]"
	os.mkdir(new_folder_name)

	# 获取老文件夹所有的文件名字
	old_foldir_list = os.listdir(old_folder_name)

	# 使用多进程方式copy源文件所有文件到nw文件夹
	pool = Pool(5)

	for file_name in old_foldir_list:
		pool.apply_async(copy_file_task, args=(file_name,old_folder_name,new_folder_name))

	pool.join()
Example #29
0
class Engine(object):
    """引擎, 中心调度"""
    def __init__(self):
        # 爬虫
        self.spiders = self.auto_import_module(SPIDERS)
        # 调度
        self.scheduler = Scheduler()
        # 响应
        self.download = Download()
        # 保存
        self.pipelines = self.auto_import_module(PIPELINES)

        # 爬虫中间件/下载中间件
        self.spider_mids = self.auto_import_module(SPIDER_MIDDLEWARES)
        self.download_mids = self.auto_import_module(DOWNLOAD_MINDDLEWARES)
        # 创建对象
        self.pool = Pool(ASYNC_COUNT)
        # 响应计数
        self.response_count = 0
        # 判断是否含有请求
        self.has_request = True

    def main(self):
        if ROLE == "mater" or ROLE is None:
            self._execute_start_requests()
        # for _ in range(ASYNC_COUNT):
        #     self.pool.apply_async(self._execute_request_response_item, callback=self._callback)

        while True:
            if self.scheduler.request_count == self.response_count and self.scheduler.request_count != 0:
                self.has_request = False
                break
            if ROLE == "slave" or ROLE is None:
                self.pool.apply_async(self._execute_request_response_item,
                                      callback=self._callback)

            time.sleep(TIME_SLEEP)

        self.pool.close()

        self.pool.join()

    def _execute_start_requests(self):
        # 遍历爬虫列表
        for spider_name, spider in self.spiders.items():
            # 获取获取请求
            for start_request in spider.start_requests():
                # url入队列之前预处理, spider中间件
                start_request.name = spider_name
                for spider_mid in self.spider_mids:
                    start_request = spider_mid.process_request(start_request)
                # 请求入列, 判重
                self.scheduler.add_request(start_request)

    def _execute_request_response_item(self):
        # 取出队列中的url
        request = self.scheduler.get_request()
        if request is None:
            return True
        # 请求之前预处理, download中间件
        for download in self.download_mids:
            request = download.process_request(request)
        # 发送请求获取响应
        response = self.download.send_request(request)
        # 响应之后处理, download中间件
        for download in self.download_mids:
            response = download.process_response(response)
        # 解析数据
        spider = self.spiders[request.name]
        # 使用生成器获取多个数据解析结果
        parse_func = getattr(spider, request.callback)
        for result in parse_func(response):
            # 判断解析后的数据是url, 还是data:
            if isinstance(result, Request):
                # url入队列之前预处理, spider中间件
                result.spider_name = request.spider_name
                for spider_mid in self.spider_mids:
                    result = spider_mid.process_request(result)
                # 继续解析url
                self.scheduler.add_request(result)
            elif isinstance(result, Item):
                # 保存之前预处理
                for spider_mid in self.spider_mids:
                    result = spider_mid.process_item(result)
                # 保存response
                for pipeline in self.pipelines:
                    pipeline.process_item(result, spider)
        self.response_count += 1

    def _callback(self, foo):
        if foo:
            return True
        elif self.has_request is True:
            self.pool.apply_async(self._execute_request_response_item,
                                  callback=self._callback)

    def start(self):
        start = datetime.now()
        logger.info("Start time is [ {} ]".format(start))
        self.main()
        end = datetime.now()
        logger.info("End time is [ {} ]".format(end))
        print("[INFO]: run time is {}s".format((end - start).total_seconds()))

    @staticmethod
    def auto_import_module(module_list):
        instance = {}
        instance1 = []
        for module in module_list:
            index = module.rfind(".")
            # 遍历并拆分变量名, path_name: 路径, var_name: 变量名
            path_name = module[:index]
            var_name = module[index + 1:]
            # 动态导入模块
            import_module = __import__(path_name)
            # 动态导入变量
            var = getattr(import_module, var_name)
            # 判断是否有tag属性, 并且属性为spider, 则为spider, 进行实例化
            if hasattr(var, "tag") and var.tag == "spider":
                instance[var.name] = var()
            else:
                instance1.append(var())
        return instance or instance1
Example #30
0
def just_for_test_block(volume_num=1000,
                        volume_name_prefix="volume-",
                        volume_size_min=100,
                        volume_size_max=500,
                        snapshot_num=1000,
                        snapshot_name_prefix="snapshot-",
                        client_group_num=1000,
                        client_group_name_prefix="client-group",
                        access_path_num=2,
                        access_path_name_prefix="access_path",
                        host=None):
    if access_path_num < 2 or volume_num < 2 or snapshot_num < 2 or client_group_num < 2:
        print "[Error] The num of access path, volume, snapshot, client group should be larger than 2."
        return
    if volume_size_min < 0 or volume_size_max < 0 or volume_size_min > volume_size_max:
        print "[Error] Invalid parameters."
        return

    # create block volumes
    ret, pool_ids = Pool.get_pool_ids(host=host)
    if ret != 0 or len(pool_ids) < 1:
        print "[Error] Failed to get pool info or pool not exists."
        return
    for i in range(1, volume_num + 1):
        size = random.randint(volume_size_min, volume_size_max)
        pool_id = pool_ids[random.randint(0, len(pool_ids) - 1)]
        print BlockVolume.create_block_volume(volume_name_prefix + str(i),
                                              pool_id,
                                              str(size) + "G",
                                              host=host)

    # create block snapshots
    ret, volume_ids = BlockVolume.get_block_volume_ids(host=host)
    if ret != 0 or len(volume_ids) < 1:
        print "[Error] Failed to get volume info or volumes not exist."
        return
    for i in range(1, snapshot_num + 1):
        idx = random.randint(0, len(volume_ids) - 1)
        print Snapshot.create_block_snapshot(snapshot_name_prefix +
                                             str(uuid.uuid1()),
                                             volume_ids[idx],
                                             host=host)

    for i in range(1, client_group_num + 1):
        iqn = generate_iqn()
        print ClientGroup.create_client_group(client_group_name_prefix +
                                              str(i),
                                              "iSCSI",
                                              iqn,
                                              host=host)

    # create access paths
    for i in range(1, access_path_num + 1):
        print AccessPath.create_access_path(access_path_name_prefix + str(i),
                                            aptype="iSCSI",
                                            host=host)

    # create mapping groups
    ret, client_group_ids = ClientGroup.get_client_group_ids(host=host)
    if ret != 0:
        print "[Error] Failed to get client group info."
        return

    volume_ids.sort()
    client_group_ids.sort()

    cgid_len = len(client_group_ids)
    vid_len = len(volume_ids)

    for i in range(0, cgid_len / 2):
        cgid = client_group_ids[i]
        print MappingGroup.create_mapping_group(1,
                                                volume_ids[:vid_len / 2],
                                                cgid,
                                                host=host)

    for i in range(cgid_len / 2, cgid_len):
        cgid = client_group_ids[i]
        print MappingGroup.create_mapping_group(2,
                                                volume_ids[vid_len / 2:],
                                                cgid,
                                                host=host)
Example #31
0
class Engine(object):
    def __init__(self):
        # 创建初始化对象
        # 接收实际项目spider,
        self.spiders = self._auto_import_module_cls(SPIDERS, True)

        self.scheduler = Scheduler()
        self.downloader = Downloader()
        # self.pipeline = Pipeline()
        # 完善框架处理多管道能力
        self.pipelines = self._auto_import_module_cls(PIPELINES)

        # 初始化中间件文件
        # self.spider_middlewares = SpiderMiddlewares()
        # self.downloader_middlewares = DownloaderMiddlewares()

        # 框架完善处理项目重写的爬虫 下载中间件
        self.spider_mids = self._auto_import_module_cls(SPIDER_MIDDLEWARES)
        self.downloader_mids = self._auto_import_module_cls(
            DOWNLOADER_MIDDLEWARES)

        # 创建线程池对象
        self.pool = Pool()  # 请求在那里就在那里使用, 写两个方法 异步处理

        # 添加计数器
        self.total_response = 0  # 添加响应的计数器

        # 设置主线程的运行状态, 主线程是否在执行?
        self.is_running = True

    def _auto_import_module_cls(self, paths=[], isspider=False):
        import importlib
        if isspider:
            result = {}  # 如果是爬虫, 返回给__init__(self)的就是字典
        else:
            result = []  # 如果不是爬虫, 返回给__init__的就是列表

        for path in paths:
            module_name = path[:path.rfind(".")]
            ret = importlib.import_module(module_name)

            cls_name = path[path.rfind(".") + 1:]

            cls = getattr(ret, cls_name)  # 根据绝对路径,返回指定文件里的指定类名的类对象

            if isspider:
                result[cls.name] = cls()
            else:
                result.append(cls())

        return result

    """关于这个方法先在setting中添加配置文件,然后在编写_auto_import_moudule_cls的方法进行测试
    1. 将main中的配置信息提取到settings中
    2. 优化engine, 通过_auto_import_moudule_cls获取项目配置的管道 中间件信息
    """

    def start(self):
        # 添加日信息, 记录程序的运行时间
        start = datetime.now()
        logger.info("start time{}:".format(start))

        # 启动engine
        self._start_engine()

        stop = datetime.now()
        logger.info("stop time{}:".format(stop))

        # 记录程序运行时间
        # total_seconds()  计算两个时间之间的总差
        logger.info("total time{}:".format((stop - start).total_seconds()))

    def _callback(self, _):  # 这个的callback必须传一个参数,但是这里不用,none所以传一个下划线
        if self.is_running == True:
            self.pool.apply_async(self._excute_request_response_item,
                                  callback=self._callback)  # 一个递归的过程

    def _start_engine(self):
        if ROLE == "master" or ROLE is None:  # 主执行start_request, 当然不是分布式时也需要执行start_request
            # 处理请求
            # self._start_requests()  # --->发送请求
            """__*** 1.异步非阻塞--->发送请求***__"""
            self.pool.apply_async(self._start_requests)

        # 处理调度器的请求
        # while True:
        # ---->执行请求 但并发的数量不能控制
        # self.pool.apply_async(self._excute_request_response_item())
        if ROLE == 'slave' or ROLE is None:  # slave端执行请求, 主从分离
            # 如何控制并发的次数?
            for i in range(ASNYC_MAX_COUNT):
                logger.info(u'子线程正在执行...')
                """__*** 2.异步非阻塞--->执行请求***__"""
                self.pool.apply_async(self._excute_request_response_item,
                                      callback=self._callback)

        while True:
            # 优化while True等待, 当网络响应慢的时候, 一个响应需要2秒, 那CPU就处在空转中
            # 通过测试这样优化后可以减轻cpu负担
            time.sleep(0.001)

            if self.total_response == self.scheduler.total_request and self.total_response != 0:
                self.is_running = False
                # total_response != 0 因为初始值是0, 程序没有开始就结束所以去除
                # 当请求数==响应数时断开
                break
        self.pool.close()  # 不在向线程池中添加任务了
        self.pool.join()  # 让主线程等待所有子线程执行结束

        logger.info(u"主线程执行结束")

    """处理多爬虫, 对_start_engine方法进行重构"""

    def _start_requests(self):
        for spider_name, spider in self.spiders.items():
            # 1.获取spider中的url请求list
            start_request_list = spider.start_requests()

            for start_request in start_request_list:  # 处理spider发送的多个请求
                start_request.spider_name = spider.name

                ### 1.1请求经过爬虫中间件
                for spider_middleware in self.spider_mids:
                    start_request = spider_middleware.process_request(
                        start_request)
                # 2. 请求入调度器
                self.scheduler.add_request(start_request)

    def _excute_request_response_item(self):
        # 执行请求   响应   items数据

        # 3. 获取调度器中的请求对象, 这样就是scheduler中过滤之后的请求
        request = self.scheduler.get_request()
        # 注意: add请求, put请求
        # 请求结束时-->所以设定scheduler中的队列get(False)-->表示添加的url请求完, 返回响应None

        # 当队列中的请求为空的时候, 退出程序
        if request is None:
            # break
            return  # 没有请求函数进来, 程序退出

        ### 3.1 将请求经过下载中间件预处理--->请求
        for downloader_middleware in self.downloader_mids:
            request = downloader_middleware.process_request(request)

        # 4. 将请求交个下载器
        response = self.downloader.get_response(request)

        ### 4.1 将响应经过下载中间件预处理--->响应
        for downloader_middleware in self.downloader_mids:
            response = downloader_middleware.process_response(response)

        # 5. 得到响应对象交给spider解析数据
        # results = self.spider.parse(response)
        # 1.request.parse--->取parse对应的解析方法
        spider = self.spiders[request.spider_name]
        parse_func = getattr(spider, request.parse)

        # 2.使用parse_next(处理响应)
        results = parse_func(response)

        for result in results:
            # 6. 判断解析出来的结果进行在判断
            if isinstance(result, Request):
                result.spider_name = request.spider_name  # 给爬虫添加一个名字
                ### 6.1 如果是请求对象, 交给爬虫中间件预处理, 添加请求入队列
                for spider_middleware in self.spider_mids:
                    result = spider_middleware.process_request(result)

                # 是请求继续入队列请求数据
                self.scheduler.add_request(result)

            elif isinstance(result, Item):

                ### 6.2 如果是Item数据, 爬虫中间件预处理,在交给管道
                for spider_middleware in self.spider_mids:
                    result = spider_middleware.process_item(result)

                # 得到请求的数据转到管道, 进行存储
                # 框架完善--->多管道处理能力
                for pipeline in self.pipelines:
                    result = pipeline.process_item(result, spider)

                    # result -->接受在pipelins中处理完毕return的文件

            else:
                raise Exception("Error: parse返回的数据不能被处理")

        # 不管当前有几个线程, 只要有一个线程提取完响应后就+1
        self.total_response += 1
Example #32
0
morrisPool.getRealDistance2(w.getPathName())

mousePath = morrisPool.getMousePath()

generateReport(mousePath, morrisPool.getRealDistance(),  morrisPool.getVideoTimeInS(), morrisPool.getConvFactor())

print 'Total Swimming Time[s]: ' + str(morrisPool.getVideoTimeInS())

op = raw_input('Transfer Experiment? [y/n]: ')
op = op.lower()

if op == 'y':
    print 'Continue with Transference Analysis'
    print '------------------------------------'
    (centre, rad) = morrisPool.getPoolPosition()
    p = Pool(centre, convListToTuple(mousePath), w.getPathName())
    p.analyseMousePath()
    p.showPath()
    d = DrawOutput(morrisPool.poolRadio, morrisPool.poolCenter,
        p.targetPosition,morrisPool.mousePathA)
    p.generateReport()
    print 'Analysis Finished'
    print '------------------------------------'
    print 'Done!'
    
else:
    print 'Creating Base Image...'
    print '------------------------------------'
    createBasePhoto(w.getPathName(), videoSequence[0])
    (centre, rad) = morrisPool.getPoolPosition()
    p = Pool(centre, convListToTuple(mousePath), w.getPathName())
Example #33
0
    obj=pool.acquire()
    print 'thread3 acquired',obj.name
    print 'removing two',pool.remove(c2,now=True) 
    print 'release3',pool.release(obj)

def thread4(pool):
    obj=pool.acquire()
    print 'thread4 acquired',obj.name
    obj.increment()
    print 'release4',pool.release(obj)

c1=Counter('one')
c2=Counter('two')
c3=Counter('three')

p=Pool([c1])

print 'adding one',p.add(c1)
print 'adding two',p.add(c2)


p.open()

for r in p.resource_list:
    print r[0].name,r[0].count

t1 = Thread(target=thread1, args=(p,))
t1.start()

t2 = Thread(target=thread2, args=(p,))
t2.start()
Example #34
0
import json

from logging import log, INFO, WARN, ERROR

IntegrityError = psycopg2.IntegrityError

# Global dbpool
import OaConfig
import Pool

# 3 connections. Lets us keep going if one is slow but
# doesn't overload the server if there're a lot of us
dbpool = Pool.DbPool(OaConfig.oasisdbconnectstring, 3)

# Cache stuff on local drives to save our poor database
fileCache = Pool.fileCache(OaConfig.cachedir)

from Pool import MCPool

# Get a pool of memcache connections to use
MC = MCPool('127.0.0.1:11211', 3)


def run_sql(sql, params=None, quiet=False):
    """ Execute SQL commands using the dbpool"""
    conn = dbpool.begin()
    res = conn.run_sql(sql, params, quiet=quiet)
    dbpool.commit(conn)
    return res