Esempio n. 1
0
    def testDiskReadAndWritePacked(self, *_):
        test_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerClusterInfoActor, [test_addr],
                              uid=WorkerClusterInfoActor.default_uid())
            pool.create_actor(StatusActor,
                              test_addr,
                              uid=StatusActor.default_uid())
            pool.create_actor(EventsActor, uid=EventsActor.default_uid())

            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            session_id = str(uuid.uuid4())
            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.DISK))

            for handler._compress in self._get_compress_types():
                data_key1 = str(uuid.uuid4())

                storage_client.delete(session_id, [data_key1])
                self.rm_spill_dirs()

                block_data1 = dataserializer.dumps(data1,
                                                   compress=handler._compress)

                def _write_data(ser, writer):
                    with writer:
                        writer.write(ser)
                    return writer.filename

                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes,
                                            packed=True, _promise=True) \
                    .then(functools.partial(_write_data, block_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                file_name = self.get_result(5)
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])
                self.assertTrue(os.path.exists(file_name))

                def _read_data(reader):
                    with reader:
                        return dataserializer.loads(reader.read())

                handler.create_bytes_reader(session_id, data_key1, packed=True, _promise=True) \
                    .then(_read_data) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5), data1)
Esempio n. 2
0
    def _start_calc_pool(self):
        mock_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, backend='gevent',
                              address=mock_addr) as pool:
            pool.create_actor(SchedulerClusterInfoActor, [mock_addr],
                              uid=SchedulerClusterInfoActor.default_uid())
            pool.create_actor(WorkerClusterInfoActor, [mock_addr],
                              uid=WorkerClusterInfoActor.default_uid())

            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(StatusActor,
                              mock_addr,
                              uid=StatusActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())
            pool.create_actor(IORunnerActor)
            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())
            shared_holder_ref = pool.create_actor(
                SharedHolderActor, uid=SharedHolderActor.default_uid())
            pool.create_actor(InProcHolderActor)
            pool.create_actor(CpuCalcActor, uid=CpuCalcActor.default_uid())

            with self.run_actor_test(pool) as test_actor:
                try:
                    yield pool, test_actor
                finally:
                    shared_holder_ref.destroy()
Esempio n. 3
0
    def testDaemon(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=2, backend='gevent', distributor=MarsDistributor(2, 'w:0:'),
                               address=mock_scheduler_addr) as pool:
            daemon_ref = pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            sleeper_ref = daemon_ref.create_actor(DaemonSleeperActor,
                                                  uid='w:1:DaemonSleeperActor')
            daemon_ref.create_actor(ProcessHelperActor, uid='w:1:ProcHelper')
            test_actor = pool.create_actor(DaemonTestActor)
            daemon_ref.register_actor_callback(
                test_actor, DaemonTestActor.handle_process_down_for_actors.__name__)

            test_actor.run_test_sleep(sleeper_ref, 10, _tell=True)
            self.assertTrue(daemon_ref.is_actor_process_alive(sleeper_ref))

            pool.sleep(0.5)

            daemon_ref.kill_actor_process(sleeper_ref)
            # repeated kill shall not produce errors
            daemon_ref.kill_actor_process(sleeper_ref)
            self.assertFalse(daemon_ref.is_actor_process_alive(sleeper_ref))

            pool.restart_process(1)
            daemon_ref.handle_process_down([1])
            pool.sleep(1)
            self.assertTrue(pool.has_actor(sleeper_ref))
            with self.assertRaises(WorkerProcessStopped):
                test_actor.get_result()

            test_actor.run_test_sleep(sleeper_ref, 1)
            pool.sleep(1.5)
            test_actor.get_result()
Esempio n. 4
0
    def testCalcProcessFailure(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=2, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_status=False)

            daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid())
            dispatch_ref = pool.actor_ref(DispatchActor.default_uid())
            calc_ref = daemon_ref.create_actor(
                MockCpuCalcActor, session_id, mock_data, 10, uid='w:1:cpu-calc-a')
            daemon_ref.create_actor(ProcessHelperActor, uid='w:1:proc-helper-a')

            test_actor = pool.create_actor(ExecutionTestActor, uid='w:0:test_actor')
            test_actor.run_simple_calc(session_id, _tell=True)

            pool.sleep(2)
            proc_id = pool.distributor.distribute(calc_ref.uid)
            daemon_ref.kill_actor_process(calc_ref)
            assert not daemon_ref.is_actor_process_alive(calc_ref)
            pool.restart_process(proc_id)
            daemon_ref.handle_process_down([proc_id])

            with self.assertRaises(WorkerProcessStopped):
                self.wait_for_result(pool, test_actor)
            self.assertEqual(len(dispatch_ref.get_slots('cpu')), 1)
Esempio n. 5
0
    def testStopGraphCalc(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=2, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_status=False)

            daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid())
            execution_ref = pool.actor_ref(ExecutionActor.default_uid())

            calc_ref = daemon_ref.create_actor(
                MockCpuCalcActor, session_id, mock_data, 10, uid='w:1:cpu-calc-a')
            daemon_ref.create_actor(ProcessHelperActor, uid='w:1:proc-helper-a')

            test_actor = pool.create_actor(ExecutionTestActor, uid='w:0:test_actor')
            test_actor.run_simple_calc(session_id, _tell=True)

            pool.sleep(2)
            proc_id = pool.distributor.distribute(calc_ref.uid)
            execution_ref.stop_execution(session_id, test_actor.get_graph_key(), _tell=True)
            while daemon_ref.is_actor_process_alive(calc_ref):
                pool.sleep(0.1)
            pool.restart_process(proc_id)
            daemon_ref.handle_process_down([proc_id])

            with self.assertRaises(ExecutionInterrupted):
                self.wait_for_result(pool, test_actor)
Esempio n. 6
0
    def testSharedLoad(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))

            # load from bytes io
            disk_handler = storage_client.get_storage_handler((0, DataStorageDevice.DISK))
            with disk_handler.create_bytes_writer(
                    session_id, data_key1, ser_data1.total_bytes) as writer:
                ser_data1.write_to(writer)

            handler.load_from_bytes_io(session_id, data_key1, disk_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, data_key1)),
                             [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)])

            disk_handler.delete(session_id, data_key1)
            handler.delete(session_id, data_key1)

            ref_data2 = weakref.ref(data2)

            # load from object io
            proc_handler = storage_client.get_storage_handler((0, DataStorageDevice.PROC_MEMORY))
            proc_handler.put_object(session_id, data_key2, data2)
            del data2

            handler.load_from_object_io(session_id, data_key2, proc_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, data_key2)),
                             [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY)])

            proc_handler.delete(session_id, data_key2)
            self.assertIsNone(ref_data2())
            handler.delete(session_id, data_key2)
Esempio n. 7
0
    def testStorageManager(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            manager_ref = pool.create_actor(StorageManagerActor,
                                            uid=StorageManagerActor.default_uid())

            self.assertEqual(list(manager_ref.get_data_locations(session_id, ['NON_EXIST'])[0]), [])

            manager_ref.register_data(session_id, [data_key1],
                                      (0, DataStorageDevice.SHARED_MEMORY), [1024], shapes=[(16, 8)])
            manager_ref.register_data(session_id, [data_key1],
                                      (1, DataStorageDevice.PROC_MEMORY), [1024], shapes=[(16, 8)])
            manager_ref.register_data(session_id, [data_key1],
                                      (0, DataStorageDevice.DISK), [2048])
            self.assertEqual([(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK),
                              (1, DataStorageDevice.PROC_MEMORY)],
                             sorted(manager_ref.get_data_locations(session_id, [data_key1])[0]))
            self.assertEqual(2048, manager_ref.get_data_sizes(session_id, [data_key1])[0])
            self.assertEqual((16, 8), manager_ref.get_data_shapes(session_id, [data_key1])[0])

            manager_ref.register_data(session_id, [data_key2],
                                      (0, DataStorageDevice.SHARED_MEMORY), [1024])
            manager_ref.register_data(session_id, [data_key2],
                                      (1, DataStorageDevice.PROC_MEMORY), [1024])
            self.assertEqual([(0, DataStorageDevice.SHARED_MEMORY), (1, DataStorageDevice.PROC_MEMORY)],
                             sorted(manager_ref.get_data_locations(session_id, [data_key2])[0]))

            manager_ref.unregister_data(session_id, [data_key2],
                                        (0, DataStorageDevice.SHARED_MEMORY))
            self.assertEqual([(1, DataStorageDevice.PROC_MEMORY)],
                             sorted(manager_ref.get_data_locations(session_id, [data_key2])[0]))
            self.assertEqual(1024, manager_ref.get_data_sizes(session_id, [data_key2])[0])
            self.assertEqual([data_key1],
                             list(manager_ref.filter_exist_keys(session_id, [data_key1, data_key2, 'non-exist'],
                                                                [(0, DataStorageDevice.SHARED_MEMORY)])))

            manager_ref.unregister_data(session_id, [data_key2],
                                        (1, DataStorageDevice.PROC_MEMORY))
            self.assertEqual(list(manager_ref.get_data_locations(session_id, [data_key2])[0]), [])
            self.assertIsNone(manager_ref.get_data_sizes(session_id, [data_key2])[0])
            self.assertIsNone(manager_ref.get_data_shapes(session_id, data_key2)[0])

            manager_ref.register_data(session_id, [data_key2],
                                      (1, DataStorageDevice.PROC_MEMORY), [1024])
            manager_ref.handle_process_down([1])
            self.assertEqual([(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)],
                             sorted(manager_ref.get_data_locations(session_id, [data_key1])[0]))
            self.assertEqual(list(manager_ref.get_data_locations(session_id, [data_key2])[0]), [])
            self.assertIsNone(manager_ref.get_data_sizes(session_id, [data_key2])[0])
            self.assertIsNone(manager_ref.get_data_shapes(session_id, [data_key2])[0])
Esempio n. 8
0
    def testSharedPutAndGet(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data2 = dataserializer.serialize(data2)
            bytes_data2 = ser_data2.to_buffer()

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.SHARED_MEMORY))

            handler.put_objects(session_id, [data_key1], [data1])
            self.assertEqual(
                sorted(
                    storage_manager_ref.get_data_locations(
                        session_id, [data_key1])[0]),
                [(0, DataStorageDevice.SHARED_MEMORY)])
            assert_allclose(data1,
                            handler.get_objects(session_id, [data_key1])[0])

            handler.delete(session_id, [data_key1])
            self.assertEqual(
                list(
                    storage_manager_ref.get_data_locations(
                        session_id, [data_key1])[0]), [])
            with self.assertRaises(KeyError):
                handler.get_objects(session_id, [data_key1])

            handler.put_objects(session_id, [data_key2], [ser_data2],
                                serialize=True)
            assert_allclose(data2,
                            handler.get_objects(session_id, [data_key2])[0])
            handler.delete(session_id, [data_key2])

            handler.put_objects(session_id, [data_key2], [bytes_data2],
                                serialize=True)
            assert_allclose(data2,
                            handler.get_objects(session_id, [data_key2])[0])
            handler.delete(session_id, [data_key2])
Esempio n. 9
0
    def testProcMemPutAndGet(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())
            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data2 = dataserializer.serialize(data2)
            bytes_data2 = ser_data2.to_buffer()

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                DataStorageDevice.PROC_MEMORY)

            handler.put_object(session_id, data_key1, data1)
            self.assertEqual(
                sorted(
                    storage_manager_ref.get_data_locations(
                        session_id, data_key1)),
                [(0, DataStorageDevice.PROC_MEMORY)])
            assert_allclose(data1, handler.get_object(session_id, data_key1))

            handler.delete(session_id, data_key1)
            self.assertIsNone(
                storage_manager_ref.get_data_locations(session_id, data_key1))
            with self.assertRaises(KeyError):
                handler.get_object(session_id, data_key1)

            handler.put_object(session_id,
                               data_key2,
                               ser_data2,
                               serialized=True)
            assert_allclose(data2, handler.get_object(session_id, data_key2))
            handler.delete(session_id, data_key2)

            handler.put_object(session_id,
                               data_key2,
                               bytes_data2,
                               serialized=True)
            assert_allclose(data2, handler.get_object(session_id, data_key2))
            handler.delete(session_id, data_key2)
Esempio n. 10
0
    def testLoadStoreInOtherProcess(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=3,
                              address=test_addr,
                              distributor=MarsDistributor(3)) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor1')
            pool.create_actor(InProcHolderActor, uid='w:2:InProcHolderActor2')
            pool.create_actor(IORunnerActor,
                              lock_free=True,
                              dispatched=False,
                              uid=IORunnerActor.gen_uid(1))

            test_ref = pool.create_actor(OtherProcessTestActor,
                                         uid='w:0:OtherProcTest')

            def _get_result():
                start_time = time.time()
                while test_ref.get_result() is None:
                    pool.sleep(0.5)
                    if time.time() - start_time > 10:
                        raise TimeoutError

            test_ref.run_copy_test((0, DataStorageDevice.SHARED_MEMORY),
                                   (1, DataStorageDevice.PROC_MEMORY),
                                   _tell=True)
            _get_result()

            test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY),
                                   (0, DataStorageDevice.SHARED_MEMORY),
                                   _tell=True)
            _get_result()

            test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY),
                                   (2, DataStorageDevice.PROC_MEMORY),
                                   _tell=True)
            _get_result()
Esempio n. 11
0
    def testWorkerProcessRestart(self):
        with self._start_worker_process() as (pool, worker_endpoint):
            daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid(), address=worker_endpoint)
            dispatch_ref = pool.actor_ref(DispatchActor.default_uid(), address=worker_endpoint)
            cpu_slots = dispatch_ref.get_slots('cpu')
            calc_ref = pool.actor_ref(cpu_slots[0], address=worker_endpoint)
            daemon_ref.kill_actor_process(calc_ref)

            check_start = time.time()
            while not daemon_ref.is_actor_process_alive(calc_ref):
                gevent.sleep(0.1)
                if time.time() - check_start > 10:
                    raise TimeoutError('Check process restart timeout')
Esempio n. 12
0
    def testWorkerProcessRestart(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(SchedulerClusterInfoActor,
                                  [mock_scheduler_addr],
                                  uid=SchedulerClusterInfoActor.default_uid())

                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_uid())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_uid())

                proc = subprocess.Popen([
                    sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
                    '--schedulers', mock_scheduler_addr, '--cpu-procs', '1',
                    '--cache-mem', '10m', '--spill-dir', self._spill_dir,
                    '--ignore-avail-mem'
                ])
                worker_endpoint = self._wait_worker_ready(proc, resource_ref)

                daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid(),
                                            address=worker_endpoint)
                dispatch_ref = pool.actor_ref(DispatchActor.default_uid(),
                                              address=worker_endpoint)
                cpu_slots = dispatch_ref.get_slots('cpu')
                calc_ref = pool.actor_ref(cpu_slots[0],
                                          address=worker_endpoint)
                daemon_ref.kill_actor_process(calc_ref)

                check_start = time.time()
                while not daemon_ref.is_actor_process_alive(calc_ref):
                    gevent.sleep(0.1)
                    if time.time() - check_start > 10:
                        raise TimeoutError('Check process restart timeout')
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(0.1)
                    if proc.poll(
                    ) is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
Esempio n. 13
0
    def testSharedLoadFromObjects(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.SHARED_MEMORY))

            # load from object io
            ref_data1 = weakref.ref(data1)

            proc_handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.PROC_MEMORY))
            proc_handler.put_objects(session_id, [data_key1], [data1])
            del data1

            handler.load_from_object_io(session_id, [data_key1], proc_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(
                sorted(
                    storage_manager_ref.get_data_locations(
                        session_id, [data_key1])[0]),
                [(0, DataStorageDevice.PROC_MEMORY),
                 (0, DataStorageDevice.SHARED_MEMORY)])

            proc_handler.delete(session_id, [data_key1])
            self.assertIsNone(ref_data1())
            handler.delete(session_id, [data_key1])
Esempio n. 14
0
    def _start_shared_holder_pool(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerClusterInfoActor, [test_addr],
                              uid=WorkerClusterInfoActor.default_uid())
            pool.create_actor(StatusActor, test_addr, uid=StatusActor.default_uid())

            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid())
            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            yield pool, test_actor
Esempio n. 15
0
    def testCudaMemPutAndGet(self):
        test_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())
            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(CudaHolderActor)

            test_data = np.random.random((10, 10))
            test_suites = [
                (test_data, cp.ndarray, cp.asnumpy, assert_allclose),
                (pd.Series(test_data.flatten()), cudf.Series,
                 lambda o: o.to_pandas(), pd.testing.assert_series_equal),
                (pd.DataFrame(dict(col=test_data.flatten())), cudf.DataFrame,
                 lambda o: o.to_pandas(), pd.testing.assert_frame_equal),
            ]

            for data, cuda_type, move_to_mem, assert_obj_equal in test_suites:
                ser_data = dataserializer.serialize(data)

                session_id = str(uuid.uuid4())
                data_key1 = str(uuid.uuid4())
                data_key2 = str(uuid.uuid4())

                storage_client = test_actor.storage_client
                handler = storage_client.get_storage_handler((0, DataStorageDevice.CUDA))

                handler.put_objects(session_id, [data_key1], [data])
                self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                                 [(0, DataStorageDevice.CUDA)])
                self.assertIsInstance(handler.get_objects(session_id, [data_key1])[0], cuda_type)
                assert_obj_equal(data, move_to_mem(handler.get_objects(session_id, [data_key1])[0]))

                handler.delete(session_id, [data_key1])
                self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [])
                with self.assertRaises(KeyError):
                    handler.get_objects(session_id, [data_key1])

                handler.put_objects(session_id, [data_key2], [ser_data], serialize=True)
                self.assertIsInstance(handler.get_objects(session_id, [data_key2])[0], cuda_type)
                assert_obj_equal(data, move_to_mem(handler.get_objects(session_id, [data_key2])[0]))
                handler.delete(session_id, [data_key2])
Esempio n. 16
0
    def create_standard_actors(cls,
                               pool,
                               address,
                               quota_size=None,
                               with_daemon=True,
                               with_status=True,
                               with_resource=False):
        quota_size = quota_size or (1024 * 1024)

        pool.create_actor(SchedulerClusterInfoActor, [address],
                          uid=SchedulerClusterInfoActor.default_uid())
        pool.create_actor(WorkerClusterInfoActor, [address],
                          uid=WorkerClusterInfoActor.default_uid())

        pool.create_actor(PlasmaKeyMapActor,
                          uid=PlasmaKeyMapActor.default_uid())
        pool.create_actor(StorageManagerActor,
                          uid=StorageManagerActor.default_uid())
        if with_resource:
            pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
        if with_daemon:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
        if with_status:
            pool.create_actor(StatusActor,
                              address,
                              uid=StatusActor.default_uid())

        pool.create_actor(SharedHolderActor,
                          cls.plasma_storage_size,
                          uid=SharedHolderActor.default_uid())
        pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
        pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
        pool.create_actor(QuotaActor,
                          quota_size,
                          uid=MemQuotaActor.default_uid())
        pool.create_actor(ExecutionActor, uid=ExecutionActor.default_uid())
Esempio n. 17
0
    def testClientReadAndWrite(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            options.worker.lock_free_fileio = True
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client

                file_names = []

                def _write_data(ser, writer):
                    file_names.append(writer.filename)
                    self.assertEqual(writer.nbytes, ser_data1.total_bytes)
                    with writer:
                        ser.write_to(writer)

                # test creating non-promised writer and write
                with storage_client.create_writer(session_id,
                                                  data_key1,
                                                  ser_data1.total_bytes,
                                                  (DataStorageDevice.DISK, ),
                                                  _promise=False) as writer:
                    _write_data(ser_data1, writer)
                self.assertTrue(os.path.exists(file_names[0]))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])

                storage_client.delete(session_id, [data_key1])

                # test creating promised writer and write
                file_names[:] = []
                self.waitp(
                    storage_client.create_writer(
                        session_id, data_key2, ser_data1.total_bytes,
                        (DataStorageDevice.DISK, )).then(
                            functools.partial(_write_data, ser_data1)))
                self.assertTrue(os.path.exists(file_names[0]))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.DISK)])

                def _read_data(reader):
                    with reader:
                        return dataserializer.deserialize(reader.read())

                # test creating reader when data exist in location
                result = self.waitp(
                    storage_client.create_reader(
                        session_id, data_key2,
                        (DataStorageDevice.DISK, )).then(_read_data))[0]
                assert_allclose(result, data1)

                # test creating reader when no data in location (should raise)
                with self.assertRaises(IOError):
                    storage_client.create_reader(
                        session_id,
                        data_key2, (DataStorageDevice.SHARED_MEMORY, ),
                        _promise=False)

                # test creating reader when copy needed
                self.waitp(
                    storage_client.create_reader(
                        session_id, data_key2,
                        (DataStorageDevice.SHARED_MEMORY, )).then(_read_data))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.SHARED_MEMORY),
                     (0, DataStorageDevice.DISK)])

                storage_client.delete(session_id, [data_key2])
                while os.path.exists(file_names[0]):
                    test_actor.ctx.sleep(0.05)
                self.assertFalse(os.path.exists(file_names[0]))
Esempio n. 18
0
    def testClientSpill(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            session_id = str(uuid.uuid4())
            data_list = [
                np.random.randint(0, 32767, (655360, ), np.int16)
                for _ in range(20)
            ]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client
                idx = 0

                shared_handler = storage_client.get_storage_handler(
                    (0, DataStorageDevice.SHARED_MEMORY))
                proc_handler = storage_client.get_storage_handler(
                    (0, DataStorageDevice.PROC_MEMORY))

                def _fill_data():
                    i = 0
                    for i, (key,
                            data) in enumerate(zip(data_keys[idx:],
                                                   data_list)):
                        try:
                            shared_handler.put_objects(session_id, [key],
                                                       [data])
                        except StorageFull:
                            break
                    return i + idx

                idx = _fill_data()

                # test copying non-existing keys
                storage_client.copy_to(session_id, ['non-exist-key'], [DataStorageDevice.SHARED_MEMORY]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                with self.assertRaises(KeyError):
                    self.get_result(5)

                # test copying into containing locations
                storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.SHARED_MEMORY]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_keys[0]])[0]),
                    [(0, DataStorageDevice.SHARED_MEMORY)])

                # test unsuccessful copy when no data at target
                def _mock_load_from(*_, **__):
                    return promise.finished(*build_exc_info(SystemError),
                                            _accept=False)

                with patch_method(StorageHandler.load_from, _mock_load_from), \
                        self.assertRaises(SystemError):
                    storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.DISK]) \
                        .then(lambda *_: test_actor.set_result(None),
                              lambda *exc: test_actor.set_result(exc, accept=False))
                    self.get_result(5)

                # test successful copy for multiple objects
                storage_client.delete(session_id, [data_keys[idx - 1]])
                ref_data = weakref.ref(data_list[idx])
                ref_data2 = weakref.ref(data_list[idx + 1])
                proc_handler.put_objects(session_id, data_keys[idx:idx + 2],
                                         data_list[idx:idx + 2])
                data_list[idx:idx + 2] = [None, None]

                storage_client.copy_to(session_id, data_keys[idx:idx + 2],
                                       [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.DISK]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

                proc_handler.delete(session_id, data_keys[idx:idx + 2])

                self.assertEqual(
                    storage_manager_ref.get_data_locations(
                        session_id, data_keys[idx:idx + 2]),
                    [{(0, DataStorageDevice.SHARED_MEMORY)},
                     {(0, DataStorageDevice.DISK)}])
                self.assertIsNone(ref_data())
                self.assertIsNone(ref_data2())

                # test copy with spill
                idx += 2
                proc_handler.put_objects(session_id, [data_keys[idx]],
                                         [data_list[idx]])

                storage_client.copy_to(session_id, [data_keys[idx]], [DataStorageDevice.SHARED_MEMORY]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_keys[idx]])[0]),
                    [(0, DataStorageDevice.PROC_MEMORY),
                     (0, DataStorageDevice.SHARED_MEMORY)])
Esempio n. 19
0
    def testClientPutAndGet(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())
            pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor')

            session_id = str(uuid.uuid4())
            data_list = [
                np.random.randint(0, 32767, (655360, ), np.int16)
                for _ in range(20)
            ]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]
            data_dict = dict(zip(data_keys, data_list))

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client

                # check batch object put with size exceeds
                storage_client.put_objects(session_id, data_keys, data_list,
                                           [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.PROC_MEMORY]) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)
                locations = storage_client.get_data_locations(
                    session_id, data_keys)
                loc_to_keys = defaultdict(list)
                for key, location in zip(data_keys, locations):
                    self.assertEqual(len(location), 1)
                    loc_to_keys[list(location)[0][-1]].append(key)
                self.assertGreater(
                    len(loc_to_keys[DataStorageDevice.PROC_MEMORY]), 1)
                self.assertGreater(
                    len(loc_to_keys[DataStorageDevice.SHARED_MEMORY]), 1)

                # check get object with all cases
                with self.assertRaises(IOError):
                    first_shared_key = loc_to_keys[
                        DataStorageDevice.SHARED_MEMORY][0]
                    storage_client.get_object(session_id,
                                              first_shared_key,
                                              [DataStorageDevice.PROC_MEMORY],
                                              _promise=False)

                shared_objs = storage_client.get_objects(
                    session_id, [first_shared_key],
                    [DataStorageDevice.SHARED_MEMORY],
                    _promise=False)
                self.assertEqual(len(shared_objs), 1)
                assert_allclose(shared_objs[0], data_dict[first_shared_key])

                storage_client.get_object(session_id, first_shared_key,
                                          [DataStorageDevice.PROC_MEMORY], _promise=True) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5),
                                data_dict[first_shared_key])

                storage_client.delete(session_id, data_keys)
                time.sleep(0.5)
                ref = weakref.ref(data_dict[data_keys[0]])
                storage_client.put_objects(session_id, data_keys[:1], [ref()],
                                           [DataStorageDevice.SHARED_MEMORY])
                data_list[:] = []
                data_dict.clear()
                self.assertIsNone(ref())
Esempio n. 20
0
    def testSharedSpill(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            holder_ref = pool.create_actor(
                SharedHolderActor, self.plasma_storage_size,
                uid=SharedHolderActor.default_uid())

            session_id = str(uuid.uuid4())
            data_list = [np.random.randint(0, 32767, (655360,), np.int16)
                         for _ in range(20)]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))
            idx = 0

            def _fill_data():
                i = 0
                for i, (key, data) in enumerate(zip(data_keys[idx:], data_list)):
                    try:
                        handler.put_objects(session_id, [key], [data])
                    except StorageFull:
                        break
                return i + idx

            def _do_spill():
                data_size = storage_manager_ref.get_data_sizes(session_id, [data_keys[0]])[0]
                handler.spill_size(2 * data_size) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

            # test lift data key
            idx = _fill_data()
            handler.lift_data_keys(session_id, [data_keys[0]])
            _do_spill()

            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[1]])[0]),
                             [(0, DataStorageDevice.DISK)])

            handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[idx]])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            idx += 1

            # test pin data key
            idx = _fill_data()
            holder_ref.lift_data_keys(session_id, [data_keys[0]], last=False)
            pin_token = str(uuid.uuid4())
            pinned_keys = handler.pin_data_keys(session_id, (data_keys[0],), pin_token)
            self.assertIn(data_keys[0], pinned_keys)
            _do_spill()

            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[1]])[0]),
                             [(0, DataStorageDevice.DISK)])

            handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[idx]])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            idx += 1

            # test unpin data key
            idx = _fill_data()
            handler.unpin_data_keys(session_id, (data_keys[0],), pin_token)
            _do_spill()

            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]),
                             [(0, DataStorageDevice.DISK)])
Esempio n. 21
0
    def testDiskReadAndWriteMerger(self):
        import logging
        logging.basicConfig(level=logging.DEBUG)

        test_addr = f'127.0.0.1:{get_next_port()}'
        options.worker.filemerger.max_file_size = 2400
        options.worker.filemerger.concurrency = 16

        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerClusterInfoActor, [test_addr],
                              uid=WorkerClusterInfoActor.default_uid())
            pool.create_actor(StatusActor,
                              test_addr,
                              uid=StatusActor.default_uid())
            pool.create_actor(EventsActor, uid=EventsActor.default_uid())

            disk_file_merger_ref = pool.create_actor(
                DiskFileMergerActor, uid=DiskFileMergerActor.default_uid())

            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            session_id = str(uuid.uuid4())
            data_count = 30
            data = [
                np.random.rand(random.randint(10, 30), random.randint(10, 30))
                for _ in range(data_count)
            ]
            ser_data = [dataserializer.serialize(d) for d in data]

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.DISK))

            for handler._compress in self._get_compress_types():
                data_keys = [str(uuid.uuid4()) for _ in range(data_count)]

                promises = []
                for idx in range(data_count):
                    block_data = dataserializer.dumps(
                        data[idx], compress=handler._compress)

                    def _write_data(ser, writer):
                        with writer:
                            writer.write(ser)
                        return writer.filename

                    promises.append(
                        handler.create_bytes_writer(session_id,
                                                    data_keys[idx],
                                                    ser_data[idx].total_bytes,
                                                    packed=True,
                                                    with_merger_lock=True,
                                                    _promise=True).then(
                                                        functools.partial(
                                                            _write_data,
                                                            block_data)))
                promise.all_(promises).then(
                    lambda *_: test_actor.set_result(0),
                    lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(50)

                for key in data_keys:
                    self.assertEqual(
                        sorted(
                            storage_manager_ref.get_data_locations(
                                session_id, [key])[0]),
                        [(0, DataStorageDevice.DISK)])

                dump_result = disk_file_merger_ref.dump_info()
                written_files = list(dump_result[2])
                for fn in written_files:
                    self.assertTrue(os.path.exists(fn))

                data_store = [None] * len(data)
                promises = []
                for idx in range(data_count):

                    def _read_data(reader, idx):
                        with reader:
                            data_store[idx] = dataserializer.loads(
                                reader.read())

                    promises.append(
                        handler.create_bytes_reader(session_id,
                                                    data_keys[idx],
                                                    with_merger_lock=True,
                                                    packed=True,
                                                    _promise=True).then(
                                                        functools.partial(
                                                            _read_data,
                                                            idx=idx)))
                promise.all_(promises).then(
                    lambda *_: test_actor.set_result(0),
                    lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(50)
                for true_data, read_data in zip(data, data_store):
                    assert_allclose(true_data, read_data)

                data_store = [None] * len(data)
                promises = []
                for idx in range(data_count):

                    def _read_data(reader, idx):
                        with reader:
                            data_store[idx] = dataserializer.deserialize(
                                reader.read())

                    promises.append(
                        handler.create_bytes_reader(session_id,
                                                    data_keys[idx],
                                                    _promise=True).then(
                                                        functools.partial(
                                                            _read_data,
                                                            idx=idx)))
                promise.all_(promises).then(
                    lambda *_: test_actor.set_result(0),
                    lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(50)
                for true_data, read_data in zip(data, data_store):
                    assert_allclose(true_data, read_data)

                storage_client.delete(session_id, data_keys)
                pool.sleep(0.1)
                for fn in written_files:
                    self.assertFalse(os.path.exists(fn))
Esempio n. 22
0
 def post_create(self):
     super(DaemonSleeperActor, self).__init__()
     self._daemon_ref = self.promise_ref(WorkerDaemonActor.default_uid())
     self._daemon_ref.register_process(self.ref(), os.getpid(), _tell=True)
Esempio n. 23
0
    def testSharedReadAndWrite(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        io_size = dataserializer.HEADER_LENGTH * 2
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((100, 100))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))

            def _write_data(ser, writer):
                self.assertEqual(writer.nbytes, ser_data1.total_bytes)
                with writer:
                    ser.write_to(writer)

            handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                .then(functools.partial(_write_data, ser_data1)) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            handler.delete(session_id, [data_key1])

            def _write_data(ser, writer):
                with writer:
                    for start in range(0, len(ser), io_size):
                        writer.write(ser[start:start + io_size])

            handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                .then(functools.partial(_write_data, ser_data1.to_buffer())) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])

            def _read_data_all(reader):
                with reader:
                    return dataserializer.deserialize(reader.read())

            handler.create_bytes_reader(session_id, data_key1, _promise=True) \
                .then(_read_data_all) \
                .then(functools.partial(test_actor.set_result),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            assert_allclose(self.get_result(5), data1)

            def _read_data_batch(reader):
                bio = BytesIO()
                with reader:
                    while True:
                        buf = reader.read(io_size)
                        if buf:
                            bio.write(buf)
                        else:
                            break
                return dataserializer.deserialize(bio.getvalue())

            handler.create_bytes_reader(session_id, data_key1, _promise=True) \
                .then(_read_data_batch) \
                .then(functools.partial(test_actor.set_result),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            assert_allclose(self.get_result(5), data1)
            handler.delete(session_id, [data_key1])
Esempio n. 24
0
    def testDiskReadAndWrite(self, *_):
        test_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)
            data2 = np.random.random((10, 10))
            ser_data2 = dataserializer.serialize(data2)

            session_id = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.DISK))

            for handler._compress in self._get_compress_types():
                data_key1 = str(uuid.uuid4())
                data_key2 = (str(uuid.uuid4()), 'subkey')

                storage_client.delete(session_id, [data_key1])
                storage_client.delete(session_id, [data_key2])
                self.rm_spill_dirs()

                def _write_data(ser, writer):
                    self.assertEqual(writer.nbytes, ser.total_bytes)
                    with writer:
                        ser.write_to(writer)
                    return writer.filename

                def _read_data(reader):
                    with reader:
                        return dataserializer.deserialize(reader.read())

                # test normal file write
                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                    .then(functools.partial(_write_data, ser_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                file_name = self.get_result(5)
                self.assertTrue(os.path.exists(file_name))
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])

                # test write existing (this should produce an error)
                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                    .then(functools.partial(_write_data, ser_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                with self.assertRaises(StorageDataExists):
                    self.get_result(5)

                # test writing with unreferenced file
                storage_manager_ref.unregister_data(
                    session_id, [data_key1], (0, DataStorageDevice.DISK))
                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                    .then(functools.partial(_write_data, ser_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                file_name = self.get_result(5)
                self.assertTrue(os.path.exists(file_name))
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])

                # test reading and verifying written data
                handler.create_bytes_reader(session_id, data_key1, _promise=True) \
                    .then(_read_data) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5), data1)

                # test unregistering data
                handler.delete(session_id, [data_key1])
                while os.path.exists(file_name):
                    test_actor.ctx.sleep(0.05)
                self.assertFalse(os.path.exists(file_name))

                # test reading and writing with tuple keys
                handler.create_bytes_writer(session_id, data_key2, ser_data2.total_bytes, _promise=True) \
                    .then(functools.partial(_write_data, ser_data2)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.DISK)])

                handler.create_bytes_reader(session_id, data_key2, _promise=True) \
                    .then(_read_data) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5), data2)
Esempio n. 25
0
    def testSharedLoadFromBytes(self, *_):
        import logging
        logging.basicConfig(level=logging.DEBUG)
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))

            # load from bytes io
            disk_handler = storage_client.get_storage_handler((0, DataStorageDevice.DISK))
            with disk_handler.create_bytes_writer(
                    session_id, data_key1, ser_data1.total_bytes) as writer:
                ser_data1.write_to(writer)

            handler.load_from_bytes_io(session_id, [data_key1], disk_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)])

            disk_handler.delete(session_id, [data_key1])
            handler.delete(session_id, [data_key1])

            # load from bytes io till no capacity
            data_list = [np.random.randint(0, 32767, (655360,), np.int16)
                         for _ in range(20)]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]
            for key, data in zip(data_keys, data_list):
                ser_data = dataserializer.serialize(data)
                with disk_handler.create_bytes_writer(
                        session_id, key, ser_data.total_bytes) as writer:
                    ser_data.write_to(writer)

            handler.load_from_bytes_io(session_id, data_keys, disk_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))

            affected_keys = set()
            try:
                self.get_result(5)
            except StorageFull as ex:
                affected_keys.update(ex.affected_keys)

            storage_client.delete(session_id, data_keys, [DataStorageDevice.DISK])

            self.assertLess(len(affected_keys), len(data_keys))
            self.assertGreater(len(affected_keys), 1)
            for k, size in zip(data_keys, storage_client.get_data_sizes(session_id, data_keys)):
                if k in affected_keys:
                    self.assertIsNone(size)
                else:
                    self.assertIsNotNone(size)