Example #1
0
def start_transfer_test_pool(**kwargs):
    address = kwargs.pop('address')
    plasma_size = kwargs.pop('plasma_size')
    with create_actor_pool(n_process=1,
                           backend='gevent',
                           address=address,
                           **kwargs) as pool:
        pool.create_actor(SchedulerClusterInfoActor,
                          schedulers=[address],
                          uid=SchedulerClusterInfoActor.default_uid())
        pool.create_actor(WorkerClusterInfoActor,
                          schedulers=[address],
                          uid=WorkerClusterInfoActor.default_uid())

        pool.create_actor(PlasmaKeyMapActor,
                          uid=PlasmaKeyMapActor.default_uid())
        pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
        pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
        pool.create_actor(QuotaActor,
                          1024 * 1024 * 20,
                          uid=MemQuotaActor.default_uid())
        chunk_holder_ref = pool.create_actor(
            ChunkHolderActor, plasma_size, uid=ChunkHolderActor.default_uid())
        pool.create_actor(SpillActor)
        pool.create_actor(StatusActor, address, uid=StatusActor.default_uid())

        yield pool

        chunk_holder_ref.destroy()
Example #2
0
def start_transfer_test_pool(**kwargs):
    address = kwargs.pop('address')
    plasma_size = kwargs.pop('plasma_size')
    with create_actor_pool(n_process=1, backend='gevent', address=address, **kwargs) as pool:
        pool.create_actor(SchedulerClusterInfoActor, [address],
                          uid=SchedulerClusterInfoActor.default_uid())
        pool.create_actor(WorkerClusterInfoActor, [address],
                          uid=WorkerClusterInfoActor.default_uid())

        pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
        pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid())
        pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
        pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
        pool.create_actor(QuotaActor, 1024 * 1024 * 20, uid=MemQuotaActor.default_uid())
        shared_holder_ref = pool.create_actor(SharedHolderActor,
                                              plasma_size, uid=SharedHolderActor.default_uid())
        pool.create_actor(StatusActor, address, uid=StatusActor.default_uid())
        pool.create_actor(IORunnerActor)
        pool.create_actor(StorageClientActor, uid=StorageClientActor.default_uid())
        pool.create_actor(InProcHolderActor)
        pool.create_actor(ReceiverManagerActor, uid=ReceiverManagerActor.default_uid())

        try:
            yield pool
        finally:
            shared_holder_ref.destroy()
Example #3
0
    def testDaemon(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=2,
                               backend='gevent',
                               distributor=MarsDistributor(2, 'w:0:'),
                               address=mock_scheduler_addr) as pool:
            daemon_ref = pool.create_actor(
                WorkerDaemonActor, uid=WorkerDaemonActor.default_name())
            pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
            sleeper_ref = daemon_ref.create_actor(DaemonSleeperActor,
                                                  uid='w:1:DaemonSleeperActor')
            daemon_ref.create_actor(ProcessHelperActor, uid='w:1:ProcHelper')
            test_actor = pool.create_actor(DaemonTestActor)
            daemon_ref.register_callback(test_actor, 'handle_process_down')

            test_actor.run_test_sleep(sleeper_ref, 10, _tell=True)
            self.assertTrue(daemon_ref.is_actor_process_alive(sleeper_ref))

            pool.sleep(0.5)

            daemon_ref.kill_actor_process(sleeper_ref)
            # repeated kill shall not produce errors
            daemon_ref.kill_actor_process(sleeper_ref)
            self.assertFalse(daemon_ref.is_actor_process_alive(sleeper_ref))

            pool.restart_process(1)
            daemon_ref.handle_process_down([1])
            pool.sleep(1)
            self.assertTrue(pool.has_actor(sleeper_ref))
            with self.assertRaises(WorkerProcessStopped):
                test_actor.get_result()

            test_actor.run_test_sleep(sleeper_ref, 1)
            pool.sleep(1.5)
            test_actor.get_result()
Example #4
0
    def testCalcProcessFailure(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=2, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_status=False)

            daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid())
            dispatch_ref = pool.actor_ref(DispatchActor.default_uid())
            calc_ref = daemon_ref.create_actor(
                MockCpuCalcActor, session_id, mock_data, 10, uid='w:1:cpu-calc-a')
            daemon_ref.create_actor(ProcessHelperActor, uid='w:1:proc-helper-a')

            test_actor = pool.create_actor(ExecutionTestActor, uid='w:0:test_actor')
            test_actor.run_simple_calc(session_id, _tell=True)

            pool.sleep(2)
            proc_id = pool.distributor.distribute(calc_ref.uid)
            daemon_ref.kill_actor_process(calc_ref)
            assert not daemon_ref.is_actor_process_alive(calc_ref)
            pool.restart_process(proc_id)
            daemon_ref.handle_process_down([proc_id])

            with self.assertRaises(WorkerProcessStopped):
                self.wait_for_result(pool, test_actor)
            self.assertEqual(len(dispatch_ref.get_slots('cpu')), 1)
Example #5
0
 def post_create(self):
     super().post_create()
     self._dispatch_ref = self.ctx.actor_ref(DispatchActor.default_uid())
     self._dispatch_ref.register_free_slot(self.uid, 'receiver')
     self._receiver_manager_ref = self.ctx.actor_ref(ReceiverManagerActor.default_uid())
     if not self.ctx.has_actor(self._receiver_manager_ref):
         self._receiver_manager_ref = None
Example #6
0
    def _start_calc_pool(self):
        mock_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, backend='gevent',
                              address=mock_addr) as pool:
            pool.create_actor(SchedulerClusterInfoActor, [mock_addr],
                              uid=SchedulerClusterInfoActor.default_uid())
            pool.create_actor(WorkerClusterInfoActor, [mock_addr],
                              uid=WorkerClusterInfoActor.default_uid())

            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(StatusActor,
                              mock_addr,
                              uid=StatusActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())
            pool.create_actor(IORunnerActor)
            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())
            shared_holder_ref = pool.create_actor(
                SharedHolderActor, uid=SharedHolderActor.default_uid())
            pool.create_actor(InProcHolderActor)
            pool.create_actor(CpuCalcActor, uid=CpuCalcActor.default_uid())

            with self.run_actor_test(pool) as test_actor:
                try:
                    yield pool, test_actor
                finally:
                    shared_holder_ref.destroy()
Example #7
0
    def testLoadStoreInOtherProcess(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=3,
                              address=test_addr,
                              distributor=MarsDistributor(3)) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor1')
            pool.create_actor(InProcHolderActor, uid='w:2:InProcHolderActor2')
            pool.create_actor(IORunnerActor,
                              lock_free=True,
                              dispatched=False,
                              uid=IORunnerActor.gen_uid(1))

            test_ref = pool.create_actor(OtherProcessTestActor,
                                         uid='w:0:OtherProcTest')

            def _get_result():
                start_time = time.time()
                while test_ref.get_result() is None:
                    pool.sleep(0.5)
                    if time.time() - start_time > 10:
                        raise TimeoutError

            test_ref.run_copy_test((0, DataStorageDevice.SHARED_MEMORY),
                                   (1, DataStorageDevice.PROC_MEMORY),
                                   _tell=True)
            _get_result()

            test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY),
                                   (0, DataStorageDevice.SHARED_MEMORY),
                                   _tell=True)
            _get_result()

            test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY),
                                   (2, DataStorageDevice.PROC_MEMORY),
                                   _tell=True)
            _get_result()
Example #8
0
    def testWorkerProcessRestart(self):
        with self._start_worker_process() as (pool, worker_endpoint):
            daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid(), address=worker_endpoint)
            dispatch_ref = pool.actor_ref(DispatchActor.default_uid(), address=worker_endpoint)
            cpu_slots = dispatch_ref.get_slots('cpu')
            calc_ref = pool.actor_ref(cpu_slots[0], address=worker_endpoint)
            daemon_ref.kill_actor_process(calc_ref)

            check_start = time.time()
            while not daemon_ref.is_actor_process_alive(calc_ref):
                gevent.sleep(0.1)
                if time.time() - check_start > 10:
                    raise TimeoutError('Check process restart timeout')
Example #9
0
    def testWorkerProcessRestart(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(SchedulerClusterInfoActor,
                                  schedulers=[mock_scheduler_addr],
                                  uid=SchedulerClusterInfoActor.default_name())

                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_name())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_name())

                proc = subprocess.Popen([
                    sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
                    '--schedulers', mock_scheduler_addr, '--cpu-procs', '1',
                    '--cache-mem', '10m', '--spill-dir', self._spill_dir,
                    '--ignore-avail-mem'
                ])
                worker_endpoint = self._wait_worker_ready(proc, resource_ref)

                daemon_ref = pool.actor_ref(WorkerDaemonActor.default_name(),
                                            address=worker_endpoint)
                dispatch_ref = pool.actor_ref(DispatchActor.default_name(),
                                              address=worker_endpoint)
                cpu_slots = dispatch_ref.get_slots('cpu')
                calc_ref = pool.actor_ref(cpu_slots[0],
                                          address=worker_endpoint)
                daemon_ref.kill_actor_process(calc_ref)

                check_start = time.time()
                while not daemon_ref.is_actor_process_alive(calc_ref):
                    gevent.sleep(0.1)
                    if time.time() - check_start > 10:
                        raise TimeoutError('Check process restart timeout')
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(0.1)
                    if proc.poll(
                    ) is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
Example #10
0
    def testMemQuotaAllocation(self):
        from mars import resource
        from mars.utils import AttributeDict

        mock_mem_stat = AttributeDict(
            dict(total=300, available=50, used=0, free=50))
        local_pool_addr = 'localhost:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=local_pool_addr) as pool, \
                patch_method(resource.virtual_memory, new=lambda: mock_mem_stat):
            pool.create_actor(WorkerClusterInfoActor,
                              schedulers=[local_pool_addr],
                              uid=WorkerClusterInfoActor.default_name())
            pool.create_actor(StatusActor,
                              local_pool_addr,
                              uid=StatusActor.default_name())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
            pool.create_actor(ProcessHelperActor,
                              uid=ProcessHelperActor.default_name())
            quota_ref = pool.create_actor(MemQuotaActor,
                                          300,
                                          refresh_time=0.1,
                                          uid=MemQuotaActor.default_name())

            time_recs = []
            with self.run_actor_test(pool) as test_actor:
                ref = test_actor.promise_ref(quota_ref)
                time_recs.append(time.time())

                def actual_exec(x):
                    ref.release_quota(x)
                    time_recs.append(time.time())
                    test_actor.set_result(None)

                ref.request_quota('req', 100, _promise=True) \
                    .then(functools.partial(actual_exec, 'req'))

                pool.sleep(0.5)
                mock_mem_stat['available'] = 150
                mock_mem_stat['free'] = 150

                self.get_result(2)

            self.assertGreater(abs(time_recs[0] - time_recs[1]), 0.4)
Example #11
0
    def create_standard_actors(cls,
                               pool,
                               address,
                               quota_size=None,
                               with_daemon=True,
                               with_status=True,
                               with_resource=False):
        quota_size = quota_size or (1024 * 1024)

        pool.create_actor(SchedulerClusterInfoActor, [address],
                          uid=SchedulerClusterInfoActor.default_uid())
        pool.create_actor(WorkerClusterInfoActor, [address],
                          uid=WorkerClusterInfoActor.default_uid())

        pool.create_actor(PlasmaKeyMapActor,
                          uid=PlasmaKeyMapActor.default_uid())
        pool.create_actor(StorageManagerActor,
                          uid=StorageManagerActor.default_uid())
        if with_resource:
            pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
        if with_daemon:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
        if with_status:
            pool.create_actor(StatusActor,
                              address,
                              uid=StatusActor.default_uid())

        pool.create_actor(SharedHolderActor,
                          cls.plasma_storage_size,
                          uid=SharedHolderActor.default_uid())
        pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
        pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
        pool.create_actor(QuotaActor,
                          quota_size,
                          uid=MemQuotaActor.default_uid())
        pool.create_actor(ExecutionActor, uid=ExecutionActor.default_uid())
Example #12
0
 def post_create(self):
     super().post_create()
     self._dispatch_ref = self.promise_ref(DispatchActor.default_uid())
     self._dispatch_ref.register_free_slot(self.uid, 'sender')
Example #13
0
 def post_create(self):
     self._dispatch_ref = self.promise_ref(DispatchActor.default_uid())
     self._dispatch_ref.register_free_slot(self.uid, 'cpu')
Example #14
0
    def testSimpleTransfer(self):
        session_id = str(uuid.uuid4())

        local_pool_addr = 'localhost:%d' % get_next_port()
        remote_pool_addr = 'localhost:%d' % get_next_port()
        remote_chunk_keys = [str(uuid.uuid4()) for _ in range(9)]
        msg_queue = multiprocessing.Queue()

        remote_spill_dir = tempfile.mkdtemp(
            prefix='mars_test_simple_transfer_')

        proc = multiprocessing.Process(target=run_transfer_worker,
                                       args=(remote_pool_addr, session_id,
                                             remote_chunk_keys,
                                             remote_spill_dir, msg_queue))
        proc.start()
        try:
            remote_plasma_socket = msg_queue.get(timeout=30)
        except Empty:
            if proc.is_alive():
                proc.terminate()
            raise

        with start_transfer_test_pool(
                address=local_pool_addr,
                plasma_size=self.plasma_storage_size) as pool:
            sender_refs, receiver_refs = [], []
            for _ in range(2):
                sender_refs.append(
                    pool.create_actor(SenderActor, uid=str(uuid.uuid4())))
                receiver_refs.append(
                    pool.create_actor(ReceiverActor, uid=str(uuid.uuid4())))

            try:
                for data_id in (-1, 0):
                    chunk_key = remote_chunk_keys[data_id]

                    with self.run_actor_test(pool) as test_actor:
                        remote_dispatch_ref = test_actor.promise_ref(
                            DispatchActor.default_uid(),
                            address=remote_pool_addr)

                        def _call_send_data(sender_uid):
                            sender_ref = test_actor.promise_ref(
                                sender_uid, address=remote_pool_addr)
                            return sender_ref.send_data(session_id,
                                                        chunk_key,
                                                        local_pool_addr,
                                                        _promise=True)

                        def _test_data_exist(*_):
                            local_client_ref = test_actor.promise_ref(
                                StorageClientActor.default_uid())
                            remote_client_ref = test_actor.promise_ref(
                                StorageClientActor.default_uid(),
                                address=remote_pool_addr)

                            targets = [DataStorageDevice.PROC_MEMORY]
                            return local_client_ref.get_object(session_id, chunk_key, targets, _promise=True) \
                                .then(lambda local_data: remote_client_ref.get_object(
                                    session_id, chunk_key, targets, _promise=True)
                                      .then(lambda remote_data: assert_array_equal(local_data, remote_data))) \

                        remote_dispatch_ref.get_free_slot('sender', _promise=True) \
                            .then(_call_send_data) \
                            .then(_test_data_exist) \
                            .then(
                            lambda *_: test_actor.set_result(chunk_key),
                            lambda *exc: test_actor.set_result(exc, False),
                        )
                        self.assertEqual(self.get_result(60), chunk_key)

                msg_queue.put(1)
            finally:
                [
                    pool.destroy_actor(ref)
                    for ref in sender_refs + receiver_refs
                ]

                os.unlink(remote_plasma_socket)
                os.kill(proc.pid, signal.SIGINT)

                t = time.time()
                while proc.is_alive() and time.time() < t + 2:
                    time.sleep(1)
                if proc.is_alive():
                    proc.terminate()

                self.rm_spill_dirs(remote_spill_dir)
Example #15
0
 def post_create(self):
     super(MockReceiverActor, self).post_create()
     self._dispatch_ref = self.ctx.actor_ref(DispatchActor.default_name())
     self._dispatch_ref.register_free_slot(self.uid, 'receiver')
Example #16
0
    def testSimpleTransfer(self):
        session_id = str(uuid.uuid4())

        local_pool_addr = 'localhost:%d' % get_next_port()
        remote_pool_addr = 'localhost:%d' % get_next_port()
        remote_chunk_keys = [str(uuid.uuid4()) for _ in range(9)]
        msg_queue = multiprocessing.Queue()

        remote_spill_dir = os.path.join(tempfile.gettempdir(),
                                        'mars_spill_%d_%d' % (os.getpid(), id(run_transfer_worker)))

        proc = multiprocessing.Process(
            target=run_transfer_worker,
            args=(remote_pool_addr, session_id, remote_chunk_keys, remote_spill_dir, msg_queue)
        )
        proc.start()
        try:
            remote_plasma_socket = msg_queue.get(timeout=30)
        except Empty:
            if proc.is_alive():
                proc.terminate()
            raise

        with start_transfer_test_pool(address=local_pool_addr, plasma_size=self.plasma_storage_size) as pool:
            sender_refs, receiver_refs = [], []
            for _ in range(2):
                sender_refs.append(pool.create_actor(SenderActor, uid=str(uuid.uuid4())))
                receiver_refs.append(pool.create_actor(ReceiverActor, uid=str(uuid.uuid4())))

            try:
                for data_id in (-1, 0):
                    chunk_key = remote_chunk_keys[data_id]

                    with self.run_actor_test(pool) as test_actor:
                        remote_dispatch_ref = test_actor.promise_ref(
                            DispatchActor.default_name(), address=remote_pool_addr)
                        remote_mapper_ref = pool.actor_ref(
                            PlasmaKeyMapActor.default_name(), address=remote_pool_addr)
                        remote_plasma_client = plasma.connect(remote_plasma_socket, '', 0)
                        remote_store = PlasmaChunkStore(remote_plasma_client, remote_mapper_ref)

                        def _call_send_data(sender_uid):
                            sender_ref = test_actor.promise_ref(sender_uid, address=remote_pool_addr)
                            return sender_ref.send_data(session_id, chunk_key, local_pool_addr, _promise=True)

                        def _test_data_exist(*_):
                            try:
                                local_data = test_actor._chunk_store.get(session_id, chunk_key)
                            except KeyError:
                                with open(build_spill_file_name(chunk_key), 'rb') as spill_file:
                                    local_data = dataserializer.load(spill_file)

                            try:
                                remote_data = remote_store.get(session_id, chunk_key)
                            except KeyError:
                                with open(build_spill_file_name(chunk_key, remote_spill_dir), 'rb') as spill_file:
                                    remote_data = dataserializer.load(spill_file)
                            assert_array_equal(local_data, remote_data)

                            del local_data, remote_data

                        remote_dispatch_ref.get_free_slot('sender', _promise=True) \
                            .then(_call_send_data) \
                            .then(_test_data_exist) \
                            .then(
                            lambda *_: test_actor.set_result(chunk_key),
                            lambda *exc: test_actor.set_result(exc, False),
                        )
                    self.assertEqual(self.get_result(60), chunk_key)

                msg_queue.put(1)
            finally:
                [pool.destroy_actor(ref) for ref in sender_refs + receiver_refs]

                os.unlink(remote_plasma_socket)
                os.kill(proc.pid, signal.SIGINT)

                t = time.time()
                while proc.is_alive() and time.time() < t + 2:
                    time.sleep(1)
                if proc.is_alive():
                    proc.terminate()
Example #17
0
    def testClientReadAndWrite(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            options.worker.lock_free_fileio = True
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client

                file_names = []

                def _write_data(ser, writer):
                    file_names.append(writer.filename)
                    self.assertEqual(writer.nbytes, ser_data1.total_bytes)
                    with writer:
                        ser.write_to(writer)

                # test creating non-promised writer and write
                with storage_client.create_writer(session_id,
                                                  data_key1,
                                                  ser_data1.total_bytes,
                                                  (DataStorageDevice.DISK, ),
                                                  _promise=False) as writer:
                    _write_data(ser_data1, writer)
                self.assertTrue(os.path.exists(file_names[0]))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])

                storage_client.delete(session_id, [data_key1])

                # test creating promised writer and write
                file_names[:] = []
                self.waitp(
                    storage_client.create_writer(
                        session_id, data_key2, ser_data1.total_bytes,
                        (DataStorageDevice.DISK, )).then(
                            functools.partial(_write_data, ser_data1)))
                self.assertTrue(os.path.exists(file_names[0]))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.DISK)])

                def _read_data(reader):
                    with reader:
                        return dataserializer.deserialize(reader.read())

                # test creating reader when data exist in location
                result = self.waitp(
                    storage_client.create_reader(
                        session_id, data_key2,
                        (DataStorageDevice.DISK, )).then(_read_data))[0]
                assert_allclose(result, data1)

                # test creating reader when no data in location (should raise)
                with self.assertRaises(IOError):
                    storage_client.create_reader(
                        session_id,
                        data_key2, (DataStorageDevice.SHARED_MEMORY, ),
                        _promise=False)

                # test creating reader when copy needed
                self.waitp(
                    storage_client.create_reader(
                        session_id, data_key2,
                        (DataStorageDevice.SHARED_MEMORY, )).then(_read_data))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.SHARED_MEMORY),
                     (0, DataStorageDevice.DISK)])

                storage_client.delete(session_id, [data_key2])
                while os.path.exists(file_names[0]):
                    test_actor.ctx.sleep(0.05)
                self.assertFalse(os.path.exists(file_names[0]))
Example #18
0
    def testClientSpill(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            session_id = str(uuid.uuid4())
            data_list = [
                np.random.randint(0, 32767, (655360, ), np.int16)
                for _ in range(20)
            ]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client
                idx = 0

                shared_handler = storage_client.get_storage_handler(
                    (0, DataStorageDevice.SHARED_MEMORY))
                proc_handler = storage_client.get_storage_handler(
                    (0, DataStorageDevice.PROC_MEMORY))

                def _fill_data():
                    i = 0
                    for i, (key,
                            data) in enumerate(zip(data_keys[idx:],
                                                   data_list)):
                        try:
                            shared_handler.put_objects(session_id, [key],
                                                       [data])
                        except StorageFull:
                            break
                    return i + idx

                idx = _fill_data()

                # test copying non-existing keys
                storage_client.copy_to(session_id, ['non-exist-key'], [DataStorageDevice.SHARED_MEMORY]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                with self.assertRaises(KeyError):
                    self.get_result(5)

                # test copying into containing locations
                storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.SHARED_MEMORY]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_keys[0]])[0]),
                    [(0, DataStorageDevice.SHARED_MEMORY)])

                # test unsuccessful copy when no data at target
                def _mock_load_from(*_, **__):
                    return promise.finished(*build_exc_info(SystemError),
                                            _accept=False)

                with patch_method(StorageHandler.load_from, _mock_load_from), \
                        self.assertRaises(SystemError):
                    storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.DISK]) \
                        .then(lambda *_: test_actor.set_result(None),
                              lambda *exc: test_actor.set_result(exc, accept=False))
                    self.get_result(5)

                # test successful copy for multiple objects
                storage_client.delete(session_id, [data_keys[idx - 1]])
                ref_data = weakref.ref(data_list[idx])
                ref_data2 = weakref.ref(data_list[idx + 1])
                proc_handler.put_objects(session_id, data_keys[idx:idx + 2],
                                         data_list[idx:idx + 2])
                data_list[idx:idx + 2] = [None, None]

                storage_client.copy_to(session_id, data_keys[idx:idx + 2],
                                       [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.DISK]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

                proc_handler.delete(session_id, data_keys[idx:idx + 2])

                self.assertEqual(
                    storage_manager_ref.get_data_locations(
                        session_id, data_keys[idx:idx + 2]),
                    [{(0, DataStorageDevice.SHARED_MEMORY)},
                     {(0, DataStorageDevice.DISK)}])
                self.assertIsNone(ref_data())
                self.assertIsNone(ref_data2())

                # test copy with spill
                idx += 2
                proc_handler.put_objects(session_id, [data_keys[idx]],
                                         [data_list[idx]])

                storage_client.copy_to(session_id, [data_keys[idx]], [DataStorageDevice.SHARED_MEMORY]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_keys[idx]])[0]),
                    [(0, DataStorageDevice.PROC_MEMORY),
                     (0, DataStorageDevice.SHARED_MEMORY)])
Example #19
0
    def testClientPutAndGet(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())
            pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor')

            session_id = str(uuid.uuid4())
            data_list = [
                np.random.randint(0, 32767, (655360, ), np.int16)
                for _ in range(20)
            ]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]
            data_dict = dict(zip(data_keys, data_list))

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client

                # check batch object put with size exceeds
                storage_client.put_objects(session_id, data_keys, data_list,
                                           [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.PROC_MEMORY]) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)
                locations = storage_client.get_data_locations(
                    session_id, data_keys)
                loc_to_keys = defaultdict(list)
                for key, location in zip(data_keys, locations):
                    self.assertEqual(len(location), 1)
                    loc_to_keys[list(location)[0][-1]].append(key)
                self.assertGreater(
                    len(loc_to_keys[DataStorageDevice.PROC_MEMORY]), 1)
                self.assertGreater(
                    len(loc_to_keys[DataStorageDevice.SHARED_MEMORY]), 1)

                # check get object with all cases
                with self.assertRaises(IOError):
                    first_shared_key = loc_to_keys[
                        DataStorageDevice.SHARED_MEMORY][0]
                    storage_client.get_object(session_id,
                                              first_shared_key,
                                              [DataStorageDevice.PROC_MEMORY],
                                              _promise=False)

                shared_objs = storage_client.get_objects(
                    session_id, [first_shared_key],
                    [DataStorageDevice.SHARED_MEMORY],
                    _promise=False)
                self.assertEqual(len(shared_objs), 1)
                assert_allclose(shared_objs[0], data_dict[first_shared_key])

                storage_client.get_object(session_id, first_shared_key,
                                          [DataStorageDevice.PROC_MEMORY], _promise=True) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5),
                                data_dict[first_shared_key])

                storage_client.delete(session_id, data_keys)
                time.sleep(0.5)
                ref = weakref.ref(data_dict[data_keys[0]])
                storage_client.put_objects(session_id, data_keys[:1], [ref()],
                                           [DataStorageDevice.SHARED_MEMORY])
                data_list[:] = []
                data_dict.clear()
                self.assertIsNone(ref())
Example #20
0
    def testDispatch(self, *_):
        call_records = dict()
        group_size = 4

        mock_scheduler_addr = f'127.0.0.1:{get_next_port()}'
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=mock_scheduler_addr) as pool:
            dispatch_ref = pool.create_actor(DispatchActor,
                                             uid=DispatchActor.default_uid())
            # actors of g1
            [
                pool.create_actor(TaskActor, 'g1', call_records)
                for _ in range(group_size)
            ]
            [
                pool.create_actor(TaskActor, 'g2', call_records)
                for _ in range(group_size)
            ]

            self.assertEqual(len(dispatch_ref.get_slots('g1')), group_size)
            self.assertEqual(len(dispatch_ref.get_slots('g2')), group_size)
            self.assertEqual(len(dispatch_ref.get_slots('g3')), 0)

            self.assertEqual(dispatch_ref.get_hash_slot('g1', 'hash_str'),
                             dispatch_ref.get_hash_slot('g1', 'hash_str'))

            dispatch_ref.acquire_free_slot('g1',
                                           callback=(('NonExist',
                                                      mock_scheduler_addr),
                                                     '_non_exist', {}))
            self.assertEqual(dispatch_ref.get_free_slots_num().get('g1'),
                             group_size)

            # tasks within [0, group_size - 1] will run almost simultaneously,
            # while the last one will be delayed due to lack of slots

            delay = 1

            with self.run_actor_test(pool) as test_actor:
                p = promise.finished()
                _dispatch_ref = test_actor.promise_ref(
                    DispatchActor.default_uid())

                def _call_on_dispatched(uid, key=None):
                    if uid is None:
                        call_records[key] = 'NoneUID'
                    else:
                        test_actor.promise_ref(uid).queued_call(key,
                                                                delay,
                                                                _tell=True,
                                                                _wait=False)

                for idx in range(group_size + 1):
                    p = p.then(lambda *_: _dispatch_ref.acquire_free_slot('g1', _promise=True)) \
                        .then(partial(_call_on_dispatched, key=f'{idx}_1')) \
                        .then(lambda *_: _dispatch_ref.acquire_free_slot('g2', _promise=True)) \
                        .then(partial(_call_on_dispatched, key=f'{idx}_2'))

                p.then(lambda *_: _dispatch_ref.acquire_free_slot('g3', _promise=True)) \
                    .then(partial(_call_on_dispatched, key='N_1')) \
                    .then(lambda *_: test_actor.set_result(None))

            self.get_result(20)

            self.assertEqual(call_records['N_1'], 'NoneUID')
            self.assertLess(
                sum(
                    abs(call_records[f'{idx}_1'] - call_records['0_1'])
                    for idx in range(group_size)), delay * 0.5)
            self.assertGreater(
                call_records[f'{group_size}_1'] - call_records['0_1'],
                delay * 0.5)
            self.assertLess(
                call_records[f'{group_size}_1'] - call_records['0_1'],
                delay * 1.5)

            dispatch_ref.destroy()
Example #21
0
    def testProcMemLoad(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)
            pool.create_actor(IORunnerActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.PROC_MEMORY))

            # load from bytes io
            disk_handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.DISK))
            with disk_handler.create_bytes_writer(
                    session_id, data_key1, ser_data1.total_bytes) as writer:
                ser_data1.write_to(writer)

            handler.load_from_bytes_io(session_id, [data_key1], disk_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(
                sorted(
                    storage_manager_ref.get_data_locations(
                        session_id,
                        [data_key1])[0]), [(0, DataStorageDevice.PROC_MEMORY),
                                           (0, DataStorageDevice.DISK)])

            disk_handler.delete(session_id, [data_key1])

            data_load = handler.get_objects(session_id, [data_key1])[0]
            ref_data = weakref.ref(data_load)
            del data_load
            handler.delete(session_id, [data_key1])
            self.assertIsNone(ref_data())

            # load from object io
            shared_handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.SHARED_MEMORY))
            shared_handler.put_objects(session_id, [data_key2], [data2])

            handler.load_from_object_io(session_id, [data_key2], shared_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(
                sorted(
                    storage_manager_ref.get_data_locations(
                        session_id, [data_key2])[0]),
                [(0, DataStorageDevice.PROC_MEMORY),
                 (0, DataStorageDevice.SHARED_MEMORY)])

            shared_handler.delete(session_id, [data_key2])

            data_load = handler.get_objects(session_id, [data_key2])[0]
            ref_data = weakref.ref(data_load)
            del data_load
            handler.delete(session_id, [data_key2])
            self.assertIsNone(ref_data())
Example #22
0
    def testSharedHolderSpill(self):
        with self._start_shared_holder_pool() as (pool, test_actor):
            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(MockIORunnerActor,
                              uid=MockIORunnerActor.default_uid())

            manager_ref = pool.actor_ref(StorageManagerActor.default_uid())
            shared_holder_ref = pool.actor_ref(SharedHolderActor.default_uid())
            mock_runner_ref = pool.actor_ref(MockIORunnerActor.default_uid())
            status_ref = pool.actor_ref(StatusActor.default_uid())

            storage_client = test_actor.storage_client
            shared_handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.SHARED_MEMORY))

            cache_allocations = status_ref.get_cache_allocations()
            self.assertGreater(cache_allocations['total'], 0)

            session_id = str(uuid.uuid4())
            data_list = [
                np.random.randint(0, 32767, (655360, ), np.int16)
                for _ in range(20)
            ]
            key_list = [str(uuid.uuid4()) for _ in range(20)]

            self._fill_shared_storage(session_id, key_list, data_list)
            data_size = manager_ref.get_data_sizes(session_id,
                                                   [key_list[0]])[0]

            # spill huge sizes
            with self.assertRaises(SpillSizeExceeded):
                self.waitp(
                    shared_handler.spill_size(self.plasma_storage_size * 2), )

            # spill size of two data chunks
            keys_before = [tp[1] for tp in shared_holder_ref.dump_keys()]
            pin_token = str(uuid.uuid4())
            shared_holder_ref.pin_data_keys(session_id, key_list[1:2],
                                            pin_token)

            expect_spills = key_list[2:4]

            shared_holder_ref.lift_data_keys(session_id, [key_list[0]])
            shared_handler.spill_size(data_size * 2) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))

            pool.sleep(0.5)
            # when the key is in spill (here we trigger it manually in mock),
            # it cannot be spilled
            with self.assertRaises(PinDataKeyFailed):
                shared_holder_ref.pin_data_keys(session_id, key_list[2:3],
                                                str(uuid.uuid4()))

            for k in key_list[2:6]:
                mock_runner_ref.submit_item(session_id, k)
            self.get_result(5)

            shared_holder_ref.unpin_data_keys(session_id, key_list[1:2],
                                              pin_token)
            keys_after = [tp[1] for tp in shared_holder_ref.dump_keys()]
            self.assertSetEqual(
                set(keys_before) - set(keys_after), set(expect_spills))

            # spill size of a single chunk, should return immediately
            keys_before = [tp[1] for tp in shared_holder_ref.dump_keys()]

            shared_handler.spill_size(data_size) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)

            keys_after = [tp[1] for tp in shared_holder_ref.dump_keys()]
            self.assertSetEqual(set(keys_before), set(keys_after))

            # when all pinned, nothing can be spilled
            # and spill_size() should raises an error
            pin_token = str(uuid.uuid4())
            shared_holder_ref.pin_data_keys(session_id, key_list, pin_token)

            shared_handler.spill_size(data_size * 3) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))

            with self.assertRaises(NoDataToSpill):
                self.get_result(5)

            shared_holder_ref.unpin_data_keys(session_id, key_list, pin_token)

            # when some errors raise when spilling,
            # spill_size() should report it

            mock_runner_ref.clear_submissions()
            shared_handler.spill_size(data_size * 3) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))

            pool.sleep(0.5)
            spill_keys = mock_runner_ref.get_request_keys()
            mock_runner_ref.submit_item(session_id, spill_keys[0],
                                        build_exc_info(SystemError))
            for k in spill_keys[1:]:
                mock_runner_ref.submit_item(session_id, k)

            with self.assertRaises(SystemError):
                self.get_result(5)
Example #23
0
    def post_create(self):
        super().post_create()

        dispatch_ref = self.ctx.actor_ref(DispatchActor.default_uid())
        dispatch_ref.register_free_slot(self.uid, 'iorunner')