Esempio n. 1
0
    def testDiskReadAndWritePacked(self, *_):
        test_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerClusterInfoActor, [test_addr],
                              uid=WorkerClusterInfoActor.default_uid())
            pool.create_actor(StatusActor,
                              test_addr,
                              uid=StatusActor.default_uid())
            pool.create_actor(EventsActor, uid=EventsActor.default_uid())

            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            session_id = str(uuid.uuid4())
            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.DISK))

            for handler._compress in self._get_compress_types():
                data_key1 = str(uuid.uuid4())

                storage_client.delete(session_id, [data_key1])
                self.rm_spill_dirs()

                block_data1 = dataserializer.dumps(data1,
                                                   compress=handler._compress)

                def _write_data(ser, writer):
                    with writer:
                        writer.write(ser)
                    return writer.filename

                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes,
                                            packed=True, _promise=True) \
                    .then(functools.partial(_write_data, block_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                file_name = self.get_result(5)
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])
                self.assertTrue(os.path.exists(file_name))

                def _read_data(reader):
                    with reader:
                        return dataserializer.loads(reader.read())

                handler.create_bytes_reader(session_id, data_key1, packed=True, _promise=True) \
                    .then(_read_data) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5), data1)
Esempio n. 2
0
    def testDiskLoad(self, *_):
        test_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.DISK))

            # load from bytes io
            shared_handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))
            with shared_handler.create_bytes_writer(
                    session_id, data_key1, ser_data1.total_bytes) as writer:
                ser_data1.write_to(writer)

            handler.load_from_bytes_io(session_id, [data_key1], shared_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)])

            shared_handler.delete(session_id, [data_key1])
            handler.delete(session_id, [data_key1])

            # load from object io
            ref_data2 = weakref.ref(data2)
            proc_handler = storage_client.get_storage_handler((0, DataStorageDevice.PROC_MEMORY))
            proc_handler.put_objects(session_id, [data_key2], [data2])
            del data2

            handler.load_from_object_io(session_id, [data_key2], proc_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key2])[0]),
                             [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.DISK)])

            proc_handler.delete(session_id, [data_key2])
            self.assertIsNone(ref_data2())
            handler.delete(session_id, [data_key2])
Esempio n. 3
0
    def testSharedPutAndGet(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data2 = dataserializer.serialize(data2)
            bytes_data2 = ser_data2.to_buffer()

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.SHARED_MEMORY))

            handler.put_objects(session_id, [data_key1], [data1])
            self.assertEqual(
                sorted(
                    storage_manager_ref.get_data_locations(
                        session_id, [data_key1])[0]),
                [(0, DataStorageDevice.SHARED_MEMORY)])
            assert_allclose(data1,
                            handler.get_objects(session_id, [data_key1])[0])

            handler.delete(session_id, [data_key1])
            self.assertEqual(
                list(
                    storage_manager_ref.get_data_locations(
                        session_id, [data_key1])[0]), [])
            with self.assertRaises(KeyError):
                handler.get_objects(session_id, [data_key1])

            handler.put_objects(session_id, [data_key2], [ser_data2],
                                serialize=True)
            assert_allclose(data2,
                            handler.get_objects(session_id, [data_key2])[0])
            handler.delete(session_id, [data_key2])

            handler.put_objects(session_id, [data_key2], [bytes_data2],
                                serialize=True)
            assert_allclose(data2,
                            handler.get_objects(session_id, [data_key2])[0])
            handler.delete(session_id, [data_key2])
Esempio n. 4
0
    def testLoadStoreInOtherProcess(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=3,
                              address=test_addr,
                              distributor=MarsDistributor(3)) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor1')
            pool.create_actor(InProcHolderActor, uid='w:2:InProcHolderActor2')
            pool.create_actor(IORunnerActor,
                              lock_free=True,
                              dispatched=False,
                              uid=IORunnerActor.gen_uid(1))

            test_ref = pool.create_actor(OtherProcessTestActor,
                                         uid='w:0:OtherProcTest')

            def _get_result():
                start_time = time.time()
                while test_ref.get_result() is None:
                    pool.sleep(0.5)
                    if time.time() - start_time > 10:
                        raise TimeoutError

            test_ref.run_copy_test((0, DataStorageDevice.SHARED_MEMORY),
                                   (1, DataStorageDevice.PROC_MEMORY),
                                   _tell=True)
            _get_result()

            test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY),
                                   (0, DataStorageDevice.SHARED_MEMORY),
                                   _tell=True)
            _get_result()

            test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY),
                                   (2, DataStorageDevice.PROC_MEMORY),
                                   _tell=True)
            _get_result()
Esempio n. 5
0
    def testSharedLoadFromObjects(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.SHARED_MEMORY))

            # load from object io
            ref_data1 = weakref.ref(data1)

            proc_handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.PROC_MEMORY))
            proc_handler.put_objects(session_id, [data_key1], [data1])
            del data1

            handler.load_from_object_io(session_id, [data_key1], proc_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(
                sorted(
                    storage_manager_ref.get_data_locations(
                        session_id, [data_key1])[0]),
                [(0, DataStorageDevice.PROC_MEMORY),
                 (0, DataStorageDevice.SHARED_MEMORY)])

            proc_handler.delete(session_id, [data_key1])
            self.assertIsNone(ref_data1())
            handler.delete(session_id, [data_key1])
Esempio n. 6
0
    def _start_shared_holder_pool(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerClusterInfoActor, [test_addr],
                              uid=WorkerClusterInfoActor.default_uid())
            pool.create_actor(StatusActor, test_addr, uid=StatusActor.default_uid())

            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid())
            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            yield pool, test_actor
Esempio n. 7
0
    def testCudaMemPutAndGet(self):
        test_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())
            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(CudaHolderActor)

            test_data = np.random.random((10, 10))
            test_suites = [
                (test_data, cp.ndarray, cp.asnumpy, assert_allclose),
                (pd.Series(test_data.flatten()), cudf.Series,
                 lambda o: o.to_pandas(), pd.testing.assert_series_equal),
                (pd.DataFrame(dict(col=test_data.flatten())), cudf.DataFrame,
                 lambda o: o.to_pandas(), pd.testing.assert_frame_equal),
            ]

            for data, cuda_type, move_to_mem, assert_obj_equal in test_suites:
                ser_data = dataserializer.serialize(data)

                session_id = str(uuid.uuid4())
                data_key1 = str(uuid.uuid4())
                data_key2 = str(uuid.uuid4())

                storage_client = test_actor.storage_client
                handler = storage_client.get_storage_handler((0, DataStorageDevice.CUDA))

                handler.put_objects(session_id, [data_key1], [data])
                self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                                 [(0, DataStorageDevice.CUDA)])
                self.assertIsInstance(handler.get_objects(session_id, [data_key1])[0], cuda_type)
                assert_obj_equal(data, move_to_mem(handler.get_objects(session_id, [data_key1])[0]))

                handler.delete(session_id, [data_key1])
                self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [])
                with self.assertRaises(KeyError):
                    handler.get_objects(session_id, [data_key1])

                handler.put_objects(session_id, [data_key2], [ser_data], serialize=True)
                self.assertIsInstance(handler.get_objects(session_id, [data_key2])[0], cuda_type)
                assert_obj_equal(data, move_to_mem(handler.get_objects(session_id, [data_key2])[0]))
                handler.delete(session_id, [data_key2])
Esempio n. 8
0
    def testClientSpill(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            session_id = str(uuid.uuid4())
            data_list = [
                np.random.randint(0, 32767, (655360, ), np.int16)
                for _ in range(20)
            ]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client
                idx = 0

                shared_handler = storage_client.get_storage_handler(
                    (0, DataStorageDevice.SHARED_MEMORY))
                proc_handler = storage_client.get_storage_handler(
                    (0, DataStorageDevice.PROC_MEMORY))

                def _fill_data():
                    i = 0
                    for i, (key,
                            data) in enumerate(zip(data_keys[idx:],
                                                   data_list)):
                        try:
                            shared_handler.put_objects(session_id, [key],
                                                       [data])
                        except StorageFull:
                            break
                    return i + idx

                idx = _fill_data()

                # test copying non-existing keys
                storage_client.copy_to(session_id, ['non-exist-key'], [DataStorageDevice.SHARED_MEMORY]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                with self.assertRaises(KeyError):
                    self.get_result(5)

                # test copying into containing locations
                storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.SHARED_MEMORY]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_keys[0]])[0]),
                    [(0, DataStorageDevice.SHARED_MEMORY)])

                # test unsuccessful copy when no data at target
                def _mock_load_from(*_, **__):
                    return promise.finished(*build_exc_info(SystemError),
                                            _accept=False)

                with patch_method(StorageHandler.load_from, _mock_load_from), \
                        self.assertRaises(SystemError):
                    storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.DISK]) \
                        .then(lambda *_: test_actor.set_result(None),
                              lambda *exc: test_actor.set_result(exc, accept=False))
                    self.get_result(5)

                # test successful copy for multiple objects
                storage_client.delete(session_id, [data_keys[idx - 1]])
                ref_data = weakref.ref(data_list[idx])
                ref_data2 = weakref.ref(data_list[idx + 1])
                proc_handler.put_objects(session_id, data_keys[idx:idx + 2],
                                         data_list[idx:idx + 2])
                data_list[idx:idx + 2] = [None, None]

                storage_client.copy_to(session_id, data_keys[idx:idx + 2],
                                       [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.DISK]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

                proc_handler.delete(session_id, data_keys[idx:idx + 2])

                self.assertEqual(
                    storage_manager_ref.get_data_locations(
                        session_id, data_keys[idx:idx + 2]),
                    [{(0, DataStorageDevice.SHARED_MEMORY)},
                     {(0, DataStorageDevice.DISK)}])
                self.assertIsNone(ref_data())
                self.assertIsNone(ref_data2())

                # test copy with spill
                idx += 2
                proc_handler.put_objects(session_id, [data_keys[idx]],
                                         [data_list[idx]])

                storage_client.copy_to(session_id, [data_keys[idx]], [DataStorageDevice.SHARED_MEMORY]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_keys[idx]])[0]),
                    [(0, DataStorageDevice.PROC_MEMORY),
                     (0, DataStorageDevice.SHARED_MEMORY)])
Esempio n. 9
0
    def testClientPutAndGet(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())
            pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor')

            session_id = str(uuid.uuid4())
            data_list = [
                np.random.randint(0, 32767, (655360, ), np.int16)
                for _ in range(20)
            ]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]
            data_dict = dict(zip(data_keys, data_list))

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client

                # check batch object put with size exceeds
                storage_client.put_objects(session_id, data_keys, data_list,
                                           [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.PROC_MEMORY]) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)
                locations = storage_client.get_data_locations(
                    session_id, data_keys)
                loc_to_keys = defaultdict(list)
                for key, location in zip(data_keys, locations):
                    self.assertEqual(len(location), 1)
                    loc_to_keys[list(location)[0][-1]].append(key)
                self.assertGreater(
                    len(loc_to_keys[DataStorageDevice.PROC_MEMORY]), 1)
                self.assertGreater(
                    len(loc_to_keys[DataStorageDevice.SHARED_MEMORY]), 1)

                # check get object with all cases
                with self.assertRaises(IOError):
                    first_shared_key = loc_to_keys[
                        DataStorageDevice.SHARED_MEMORY][0]
                    storage_client.get_object(session_id,
                                              first_shared_key,
                                              [DataStorageDevice.PROC_MEMORY],
                                              _promise=False)

                shared_objs = storage_client.get_objects(
                    session_id, [first_shared_key],
                    [DataStorageDevice.SHARED_MEMORY],
                    _promise=False)
                self.assertEqual(len(shared_objs), 1)
                assert_allclose(shared_objs[0], data_dict[first_shared_key])

                storage_client.get_object(session_id, first_shared_key,
                                          [DataStorageDevice.PROC_MEMORY], _promise=True) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5),
                                data_dict[first_shared_key])

                storage_client.delete(session_id, data_keys)
                time.sleep(0.5)
                ref = weakref.ref(data_dict[data_keys[0]])
                storage_client.put_objects(session_id, data_keys[:1], [ref()],
                                           [DataStorageDevice.SHARED_MEMORY])
                data_list[:] = []
                data_dict.clear()
                self.assertIsNone(ref())
Esempio n. 10
0
    def testWebApi(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        timeout = 120 if 'CI' in os.environ else -1
        with new_session(service_ep) as sess:
            self.assertEqual(sess.count_workers(), 1)

            a = mt.ones((100, 100), chunk_size=30)
            b = mt.ones((100, 100), chunk_size=30)
            c = a.dot(b)
            value = sess.run(c, timeout=timeout)
            assert_array_equal(value, np.ones((100, 100)) * 100)

            # check resubmission
            value2 = sess.run(c, timeout=timeout)
            assert_array_equal(value, value2)

            # check when local compression libs are missing
            from mars.serialize import dataserializer
            try:
                a = mt.ones((10, 10), chunk_size=30)
                b = mt.ones((10, 10), chunk_size=30)
                c = a.dot(b)
                value = sess.run(c, timeout=timeout)
                assert_array_equal(value, np.ones((10, 10)) * 10)

                dataserializer.decompressors[dataserializer.CompressType.LZ4] = None
                dataserializer.decompressobjs[dataserializer.CompressType.LZ4] = None
                dataserializer.compress_openers[dataserializer.CompressType.LZ4] = None

                assert_array_equal(sess.fetch(c), np.ones((10, 10)) * 10)
            finally:
                dataserializer.decompressors[dataserializer.CompressType.LZ4] = dataserializer.lz4_decompress
                dataserializer.decompressobjs[dataserializer.CompressType.LZ4] = dataserializer.lz4_decompressobj
                dataserializer.compress_openers[dataserializer.CompressType.LZ4] = dataserializer.lz4_open

            # check serialization by pickle
            try:
                sess._sess._serial_type = SerialType.PICKLE

                a = mt.ones((10, 10), chunk_size=30)
                b = mt.ones((10, 10), chunk_size=30)
                c = a.dot(b)
                value = sess.run(c, timeout=timeout)
                assert_array_equal(value, np.ones((10, 10)) * 10)
            finally:
                sess._sess._serial_type = SerialType.ARROW

            va = np.random.randint(0, 10000, (100, 100))
            vb = np.random.randint(0, 10000, (100, 100))
            a = mt.array(va, chunk_size=30)
            b = mt.array(vb, chunk_size=30)
            c = a.dot(b)
            value = sess.run(c, timeout=timeout)
            assert_array_equal(value, va.dot(vb))

            graphs = sess.get_graph_states()

            # make sure status got uploaded
            time.sleep(1.5)

            # check web UI requests
            res = requests.get(service_ep)
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/scheduler' % (service_ep,))
            self.assertEqual(res.status_code, 200)
            res = requests.get('%s/scheduler/127.0.0.1:%s' % (service_ep, self.scheduler_port))
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/worker' % (service_ep,))
            self.assertEqual(res.status_code, 200)
            res = requests.get('%s/worker/127.0.0.1:%s' % (service_ep, self.worker_port))
            self.assertEqual(res.status_code, 200)
            res = requests.get('%s/worker/127.0.0.1:%s/timeline' % (service_ep, self.worker_port))
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/session' % (service_ep,))
            self.assertEqual(res.status_code, 200)
            task_id = next(iter(graphs.keys()))
            res = requests.get('%s/session/%s/graph/%s' % (service_ep, sess._session_id, task_id))
            self.assertEqual(res.status_code, 200)
            res = requests.get('%s/session/%s/graph/%s/running_nodes' % (service_ep, sess._session_id, task_id))
            self.assertEqual(res.status_code, 200)

            from mars.web.task_pages import PROGRESS_APP_NAME
            res = requests.get('%s/%s?session_id=%s&task_id=%s'
                               % (service_ep, PROGRESS_APP_NAME, sess._session_id, task_id))
            self.assertEqual(res.status_code, 200)

            from mars.web.worker_pages import TIMELINE_APP_NAME
            res = requests.get('%s/%s?endpoint=127.0.0.1:%s'
                               % (service_ep, TIMELINE_APP_NAME, self.worker_port))
            self.assertEqual(res.status_code, 200)

        # make sure all chunks freed when session quits
        from mars.worker.storage import StorageManagerActor
        actor_client = new_client()
        storage_manager_ref = actor_client.actor_ref(StorageManagerActor.default_uid(),
                                                     address='127.0.0.1:' + str(self.worker_port))
        self.assertSetEqual(set(storage_manager_ref.dump_keys()), set())
Esempio n. 11
0
    def testDiskReadAndWrite(self, *_):
        test_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)
            data2 = np.random.random((10, 10))
            ser_data2 = dataserializer.serialize(data2)

            session_id = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.DISK))

            for handler._compress in self._get_compress_types():
                data_key1 = str(uuid.uuid4())
                data_key2 = (str(uuid.uuid4()), 'subkey')

                storage_client.delete(session_id, [data_key1])
                storage_client.delete(session_id, [data_key2])
                self.rm_spill_dirs()

                def _write_data(ser, writer):
                    self.assertEqual(writer.nbytes, ser.total_bytes)
                    with writer:
                        ser.write_to(writer)
                    return writer.filename

                def _read_data(reader):
                    with reader:
                        return dataserializer.deserialize(reader.read())

                # test normal file write
                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                    .then(functools.partial(_write_data, ser_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                file_name = self.get_result(5)
                self.assertTrue(os.path.exists(file_name))
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])

                # test write existing (this should produce an error)
                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                    .then(functools.partial(_write_data, ser_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                with self.assertRaises(StorageDataExists):
                    self.get_result(5)

                # test writing with unreferenced file
                storage_manager_ref.unregister_data(
                    session_id, [data_key1], (0, DataStorageDevice.DISK))
                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                    .then(functools.partial(_write_data, ser_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                file_name = self.get_result(5)
                self.assertTrue(os.path.exists(file_name))
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])

                # test reading and verifying written data
                handler.create_bytes_reader(session_id, data_key1, _promise=True) \
                    .then(_read_data) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5), data1)

                # test unregistering data
                handler.delete(session_id, [data_key1])
                while os.path.exists(file_name):
                    test_actor.ctx.sleep(0.05)
                self.assertFalse(os.path.exists(file_name))

                # test reading and writing with tuple keys
                handler.create_bytes_writer(session_id, data_key2, ser_data2.total_bytes, _promise=True) \
                    .then(functools.partial(_write_data, ser_data2)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.DISK)])

                handler.create_bytes_reader(session_id, data_key2, _promise=True) \
                    .then(_read_data) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5), data2)
Esempio n. 12
0
    def testWebApi(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        timeout = 120 if 'CI' in os.environ else -1
        with new_session(service_ep) as sess:
            session_id = sess._session_id
            self.assertEqual(sess.count_workers(), 1)

            a = mt.ones((100, 100), chunk_size=30)
            b = mt.ones((100, 100), chunk_size=30)
            c = a.dot(b)
            value = sess.run(c, timeout=timeout)
            np.testing.assert_array_equal(value, np.ones((100, 100)) * 100)

            # check resubmission
            value2 = sess.run(c, timeout=timeout)
            np.testing.assert_array_equal(value, value2)

            # check when local compression libs are missing
            from mars.serialize import dataserializer
            try:
                a = mt.ones((10, 10), chunk_size=30)
                b = mt.ones((10, 10), chunk_size=30)
                c = a.dot(b)
                value = sess.run(c, timeout=timeout)
                np.testing.assert_array_equal(value, np.ones((10, 10)) * 10)

                dataserializer.decompressors[
                    dataserializer.CompressType.LZ4] = None
                dataserializer.decompressobjs[
                    dataserializer.CompressType.LZ4] = None
                dataserializer.compress_openers[
                    dataserializer.CompressType.LZ4] = None

                np.testing.assert_array_equal(sess.fetch(c),
                                              np.ones((10, 10)) * 10)
            finally:
                dataserializer.decompressors[
                    dataserializer.CompressType.
                    LZ4] = dataserializer.lz4_decompress
                dataserializer.decompressobjs[
                    dataserializer.CompressType.
                    LZ4] = dataserializer.lz4_decompressobj
                dataserializer.compress_openers[
                    dataserializer.CompressType.LZ4] = dataserializer.lz4_open

            # check serialization by pickle
            try:
                sess._sess._serial_type = SerialType.PICKLE

                a = mt.ones((10, 10), chunk_size=30)
                b = mt.ones((10, 10), chunk_size=30)
                c = a.dot(b)
                value = sess.run(c, timeout=timeout)
                np.testing.assert_array_equal(value, np.ones((10, 10)) * 10)

                raw = pd.DataFrame(np.random.rand(10, 5),
                                   columns=list('ABCDE'),
                                   index=pd.RangeIndex(10, 0, -1))
                data = md.DataFrame(raw).astype({'E': 'arrow_string'})
                ret_data = data.execute(session=sess).fetch(session=sess)
                self.assertEqual(ret_data.dtypes['E'], np.dtype('O'))
                pd.testing.assert_frame_equal(ret_data.astype({'E': 'float'}),
                                              raw,
                                              check_less_precise=True)

                raw = pd.Series(np.random.rand(10),
                                index=pd.RangeIndex(10, 0, -1),
                                name='r')
                data = md.Series(raw).astype('Arrow[string]')
                ret_data = data.execute(session=sess).fetch(session=sess)
                self.assertEqual(ret_data.dtype, np.dtype('O'))
                pd.testing.assert_series_equal(ret_data.astype('float'), raw)
            finally:
                sess._sess._serial_type = SerialType.ARROW

            va = np.random.randint(0, 10000, (100, 100))
            vb = np.random.randint(0, 10000, (100, 100))
            a = mt.array(va, chunk_size=30)
            b = mt.array(vb, chunk_size=30)
            c = a.dot(b)
            value = sess.run(c, timeout=timeout)
            np.testing.assert_array_equal(value, va.dot(vb))

            # test fetch log
            def f():
                print('test')

            r = mr.spawn(f).execute(session=sess, timeout=timeout)
            self.assertEqual(str(r.fetch_log()).strip(), 'test')
            self.assertEqual(str(r.fetch_log(offsets=0)).strip(), 'test')
            self.assertEqual(str(r.fetch_log()).strip(), '')
            self.assertEqual(
                str(r.fetch_log(offsets='-0.003k', sizes=2)).strip(), 'st')

            graphs = sess.get_graph_states()

            # make sure status got uploaded
            time.sleep(1.5)

            # check web UI requests
            res = requests.get(service_ep)
            self.assertEqual(res.status_code, 200)

            res = requests.get(f'{service_ep}/scheduler')
            self.assertEqual(res.status_code, 200)
            res = requests.get(
                f'{service_ep}/scheduler/127.0.0.1:{self.scheduler_port}')
            self.assertEqual(res.status_code, 200)

            res = requests.get(f'{service_ep}/worker')
            self.assertEqual(res.status_code, 200)
            res = requests.get(
                f'{service_ep}/worker/127.0.0.1:{self.worker_port}')
            self.assertEqual(res.status_code, 200)
            res = requests.get(
                f'{service_ep}/worker/127.0.0.1:{self.worker_port}/timeline')
            self.assertEqual(res.status_code, 200)

            res = requests.get(f'{service_ep}/session')
            self.assertEqual(res.status_code, 200)
            task_id = next(iter(graphs.keys()))
            res = requests.get(
                f'{service_ep}/session/{session_id}/graph/{task_id}')
            self.assertEqual(res.status_code, 200)
            res = requests.get(
                f'{service_ep}/session/{session_id}/graph/{task_id}/running_nodes'
            )
            self.assertEqual(res.status_code, 200)

            from mars.web.task_pages import PROGRESS_APP_NAME
            res = requests.get(
                f'{service_ep}/{PROGRESS_APP_NAME}?session_id={session_id}&task_id={task_id}'
            )
            self.assertEqual(res.status_code, 200)

            from mars.web.worker_pages import TIMELINE_APP_NAME
            res = requests.get(
                f'{service_ep}/{TIMELINE_APP_NAME}?endpoint=127.0.0.1:{self.worker_port}'
            )
            self.assertEqual(res.status_code, 200)

        # make sure all chunks freed when session quits
        from mars.worker.storage import StorageManagerActor
        actor_client = new_client()
        storage_manager_ref = actor_client.actor_ref(
            StorageManagerActor.default_uid(),
            address='127.0.0.1:' + str(self.worker_port))
        self.assertSetEqual(set(storage_manager_ref.dump_keys()), set())
Esempio n. 13
0
    def testSharedHolderSpill(self):
        with self._start_shared_holder_pool() as (pool, test_actor):
            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(MockIORunnerActor,
                              uid=MockIORunnerActor.default_uid())

            manager_ref = pool.actor_ref(StorageManagerActor.default_uid())
            shared_holder_ref = pool.actor_ref(SharedHolderActor.default_uid())
            mock_runner_ref = pool.actor_ref(MockIORunnerActor.default_uid())
            status_ref = pool.actor_ref(StatusActor.default_uid())

            storage_client = test_actor.storage_client
            shared_handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.SHARED_MEMORY))

            cache_allocations = status_ref.get_cache_allocations()
            self.assertGreater(cache_allocations['total'], 0)

            session_id = str(uuid.uuid4())
            data_list = [
                np.random.randint(0, 32767, (655360, ), np.int16)
                for _ in range(20)
            ]
            key_list = [str(uuid.uuid4()) for _ in range(20)]

            self._fill_shared_storage(session_id, key_list, data_list)
            data_size = manager_ref.get_data_sizes(session_id,
                                                   [key_list[0]])[0]

            # spill huge sizes
            with self.assertRaises(SpillSizeExceeded):
                self.waitp(
                    shared_handler.spill_size(self.plasma_storage_size * 2), )

            # spill size of two data chunks
            keys_before = [tp[1] for tp in shared_holder_ref.dump_keys()]
            pin_token = str(uuid.uuid4())
            shared_holder_ref.pin_data_keys(session_id, key_list[1:2],
                                            pin_token)

            expect_spills = key_list[2:4]

            shared_holder_ref.lift_data_keys(session_id, [key_list[0]])
            shared_handler.spill_size(data_size * 2) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))

            pool.sleep(0.5)
            # when the key is in spill (here we trigger it manually in mock),
            # it cannot be spilled
            with self.assertRaises(PinDataKeyFailed):
                shared_holder_ref.pin_data_keys(session_id, key_list[2:3],
                                                str(uuid.uuid4()))

            for k in key_list[2:6]:
                mock_runner_ref.submit_item(session_id, k)
            self.get_result(5)

            shared_holder_ref.unpin_data_keys(session_id, key_list[1:2],
                                              pin_token)
            keys_after = [tp[1] for tp in shared_holder_ref.dump_keys()]
            self.assertSetEqual(
                set(keys_before) - set(keys_after), set(expect_spills))

            # spill size of a single chunk, should return immediately
            keys_before = [tp[1] for tp in shared_holder_ref.dump_keys()]

            shared_handler.spill_size(data_size) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)

            keys_after = [tp[1] for tp in shared_holder_ref.dump_keys()]
            self.assertSetEqual(set(keys_before), set(keys_after))

            # when all pinned, nothing can be spilled
            # and spill_size() should raises an error
            pin_token = str(uuid.uuid4())
            shared_holder_ref.pin_data_keys(session_id, key_list, pin_token)

            shared_handler.spill_size(data_size * 3) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))

            with self.assertRaises(NoDataToSpill):
                self.get_result(5)

            shared_holder_ref.unpin_data_keys(session_id, key_list, pin_token)

            # when some errors raise when spilling,
            # spill_size() should report it

            mock_runner_ref.clear_submissions()
            shared_handler.spill_size(data_size * 3) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))

            pool.sleep(0.5)
            spill_keys = mock_runner_ref.get_request_keys()
            mock_runner_ref.submit_item(session_id, spill_keys[0],
                                        build_exc_info(SystemError))
            for k in spill_keys[1:]:
                mock_runner_ref.submit_item(session_id, k)

            with self.assertRaises(SystemError):
                self.get_result(5)
Esempio n. 14
0
    def testSharedReadAndWrite(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        io_size = dataserializer.HEADER_LENGTH * 2
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((100, 100))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))

            def _write_data(ser, writer):
                self.assertEqual(writer.nbytes, ser_data1.total_bytes)
                with writer:
                    ser.write_to(writer)

            handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                .then(functools.partial(_write_data, ser_data1)) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            handler.delete(session_id, [data_key1])

            def _write_data(ser, writer):
                with writer:
                    for start in range(0, len(ser), io_size):
                        writer.write(ser[start:start + io_size])

            handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                .then(functools.partial(_write_data, ser_data1.to_buffer())) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])

            def _read_data_all(reader):
                with reader:
                    return dataserializer.deserialize(reader.read())

            handler.create_bytes_reader(session_id, data_key1, _promise=True) \
                .then(_read_data_all) \
                .then(functools.partial(test_actor.set_result),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            assert_allclose(self.get_result(5), data1)

            def _read_data_batch(reader):
                bio = BytesIO()
                with reader:
                    while True:
                        buf = reader.read(io_size)
                        if buf:
                            bio.write(buf)
                        else:
                            break
                return dataserializer.deserialize(bio.getvalue())

            handler.create_bytes_reader(session_id, data_key1, _promise=True) \
                .then(_read_data_batch) \
                .then(functools.partial(test_actor.set_result),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            assert_allclose(self.get_result(5), data1)
            handler.delete(session_id, [data_key1])
Esempio n. 15
0
    def testSharedSpill(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            holder_ref = pool.create_actor(
                SharedHolderActor, self.plasma_storage_size,
                uid=SharedHolderActor.default_uid())

            session_id = str(uuid.uuid4())
            data_list = [np.random.randint(0, 32767, (655360,), np.int16)
                         for _ in range(20)]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))
            idx = 0

            def _fill_data():
                i = 0
                for i, (key, data) in enumerate(zip(data_keys[idx:], data_list)):
                    try:
                        handler.put_objects(session_id, [key], [data])
                    except StorageFull:
                        break
                return i + idx

            def _do_spill():
                data_size = storage_manager_ref.get_data_sizes(session_id, [data_keys[0]])[0]
                handler.spill_size(2 * data_size) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

            # test lift data key
            idx = _fill_data()
            handler.lift_data_keys(session_id, [data_keys[0]])
            _do_spill()

            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[1]])[0]),
                             [(0, DataStorageDevice.DISK)])

            handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[idx]])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            idx += 1

            # test pin data key
            idx = _fill_data()
            holder_ref.lift_data_keys(session_id, [data_keys[0]], last=False)
            pin_token = str(uuid.uuid4())
            pinned_keys = handler.pin_data_keys(session_id, (data_keys[0],), pin_token)
            self.assertIn(data_keys[0], pinned_keys)
            _do_spill()

            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[1]])[0]),
                             [(0, DataStorageDevice.DISK)])

            handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[idx]])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            idx += 1

            # test unpin data key
            idx = _fill_data()
            handler.unpin_data_keys(session_id, (data_keys[0],), pin_token)
            _do_spill()

            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]),
                             [(0, DataStorageDevice.DISK)])
Esempio n. 16
0
    def testSharedLoadFromBytes(self, *_):
        import logging
        logging.basicConfig(level=logging.DEBUG)
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))

            # load from bytes io
            disk_handler = storage_client.get_storage_handler((0, DataStorageDevice.DISK))
            with disk_handler.create_bytes_writer(
                    session_id, data_key1, ser_data1.total_bytes) as writer:
                ser_data1.write_to(writer)

            handler.load_from_bytes_io(session_id, [data_key1], disk_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)])

            disk_handler.delete(session_id, [data_key1])
            handler.delete(session_id, [data_key1])

            # load from bytes io till no capacity
            data_list = [np.random.randint(0, 32767, (655360,), np.int16)
                         for _ in range(20)]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]
            for key, data in zip(data_keys, data_list):
                ser_data = dataserializer.serialize(data)
                with disk_handler.create_bytes_writer(
                        session_id, key, ser_data.total_bytes) as writer:
                    ser_data.write_to(writer)

            handler.load_from_bytes_io(session_id, data_keys, disk_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))

            affected_keys = set()
            try:
                self.get_result(5)
            except StorageFull as ex:
                affected_keys.update(ex.affected_keys)

            storage_client.delete(session_id, data_keys, [DataStorageDevice.DISK])

            self.assertLess(len(affected_keys), len(data_keys))
            self.assertGreater(len(affected_keys), 1)
            for k, size in zip(data_keys, storage_client.get_data_sizes(session_id, data_keys)):
                if k in affected_keys:
                    self.assertIsNone(size)
                else:
                    self.assertIsNotNone(size)
Esempio n. 17
0
 def post_create(self):
     super().post_create()
     self._manager_ref = self.ctx.actor_ref(
         StorageManagerActor.default_uid())
Esempio n. 18
0
    def testDiskReadAndWriteMerger(self):
        import logging
        logging.basicConfig(level=logging.DEBUG)

        test_addr = f'127.0.0.1:{get_next_port()}'
        options.worker.filemerger.max_file_size = 2400
        options.worker.filemerger.concurrency = 16

        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerClusterInfoActor, [test_addr],
                              uid=WorkerClusterInfoActor.default_uid())
            pool.create_actor(StatusActor,
                              test_addr,
                              uid=StatusActor.default_uid())
            pool.create_actor(EventsActor, uid=EventsActor.default_uid())

            disk_file_merger_ref = pool.create_actor(
                DiskFileMergerActor, uid=DiskFileMergerActor.default_uid())

            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            session_id = str(uuid.uuid4())
            data_count = 30
            data = [
                np.random.rand(random.randint(10, 30), random.randint(10, 30))
                for _ in range(data_count)
            ]
            ser_data = [dataserializer.serialize(d) for d in data]

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.DISK))

            for handler._compress in self._get_compress_types():
                data_keys = [str(uuid.uuid4()) for _ in range(data_count)]

                promises = []
                for idx in range(data_count):
                    block_data = dataserializer.dumps(
                        data[idx], compress=handler._compress)

                    def _write_data(ser, writer):
                        with writer:
                            writer.write(ser)
                        return writer.filename

                    promises.append(
                        handler.create_bytes_writer(session_id,
                                                    data_keys[idx],
                                                    ser_data[idx].total_bytes,
                                                    packed=True,
                                                    with_merger_lock=True,
                                                    _promise=True).then(
                                                        functools.partial(
                                                            _write_data,
                                                            block_data)))
                promise.all_(promises).then(
                    lambda *_: test_actor.set_result(0),
                    lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(50)

                for key in data_keys:
                    self.assertEqual(
                        sorted(
                            storage_manager_ref.get_data_locations(
                                session_id, [key])[0]),
                        [(0, DataStorageDevice.DISK)])

                dump_result = disk_file_merger_ref.dump_info()
                written_files = list(dump_result[2])
                for fn in written_files:
                    self.assertTrue(os.path.exists(fn))

                data_store = [None] * len(data)
                promises = []
                for idx in range(data_count):

                    def _read_data(reader, idx):
                        with reader:
                            data_store[idx] = dataserializer.loads(
                                reader.read())

                    promises.append(
                        handler.create_bytes_reader(session_id,
                                                    data_keys[idx],
                                                    with_merger_lock=True,
                                                    packed=True,
                                                    _promise=True).then(
                                                        functools.partial(
                                                            _read_data,
                                                            idx=idx)))
                promise.all_(promises).then(
                    lambda *_: test_actor.set_result(0),
                    lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(50)
                for true_data, read_data in zip(data, data_store):
                    assert_allclose(true_data, read_data)

                data_store = [None] * len(data)
                promises = []
                for idx in range(data_count):

                    def _read_data(reader, idx):
                        with reader:
                            data_store[idx] = dataserializer.deserialize(
                                reader.read())

                    promises.append(
                        handler.create_bytes_reader(session_id,
                                                    data_keys[idx],
                                                    _promise=True).then(
                                                        functools.partial(
                                                            _read_data,
                                                            idx=idx)))
                promise.all_(promises).then(
                    lambda *_: test_actor.set_result(0),
                    lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(50)
                for true_data, read_data in zip(data, data_store):
                    assert_allclose(true_data, read_data)

                storage_client.delete(session_id, data_keys)
                pool.sleep(0.1)
                for fn in written_files:
                    self.assertFalse(os.path.exists(fn))
Esempio n. 19
0
    def testClientReadAndWrite(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            options.worker.lock_free_fileio = True
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client

                file_names = []

                def _write_data(ser, writer):
                    file_names.append(writer.filename)
                    self.assertEqual(writer.nbytes, ser_data1.total_bytes)
                    with writer:
                        ser.write_to(writer)

                # test creating non-promised writer and write
                with storage_client.create_writer(session_id,
                                                  data_key1,
                                                  ser_data1.total_bytes,
                                                  (DataStorageDevice.DISK, ),
                                                  _promise=False) as writer:
                    _write_data(ser_data1, writer)
                self.assertTrue(os.path.exists(file_names[0]))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])

                storage_client.delete(session_id, [data_key1])

                # test creating promised writer and write
                file_names[:] = []
                self.waitp(
                    storage_client.create_writer(
                        session_id, data_key2, ser_data1.total_bytes,
                        (DataStorageDevice.DISK, )).then(
                            functools.partial(_write_data, ser_data1)))
                self.assertTrue(os.path.exists(file_names[0]))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.DISK)])

                def _read_data(reader):
                    with reader:
                        return dataserializer.deserialize(reader.read())

                # test creating reader when data exist in location
                result = self.waitp(
                    storage_client.create_reader(
                        session_id, data_key2,
                        (DataStorageDevice.DISK, )).then(_read_data))[0]
                assert_allclose(result, data1)

                # test creating reader when no data in location (should raise)
                with self.assertRaises(IOError):
                    storage_client.create_reader(
                        session_id,
                        data_key2, (DataStorageDevice.SHARED_MEMORY, ),
                        _promise=False)

                # test creating reader when copy needed
                self.waitp(
                    storage_client.create_reader(
                        session_id, data_key2,
                        (DataStorageDevice.SHARED_MEMORY, )).then(_read_data))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.SHARED_MEMORY),
                     (0, DataStorageDevice.DISK)])

                storage_client.delete(session_id, [data_key2])
                while os.path.exists(file_names[0]):
                    test_actor.ctx.sleep(0.05)
                self.assertFalse(os.path.exists(file_names[0]))
Esempio n. 20
0
    def testStorageManager(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            self.assertEqual(
                list(
                    manager_ref.get_data_locations(session_id,
                                                   ['NON_EXIST'])[0]), [])

            manager_ref.register_data(session_id, [data_key1],
                                      (0, DataStorageDevice.SHARED_MEMORY),
                                      [1024],
                                      shapes=[(16, 8)])
            manager_ref.register_data(session_id, [data_key1],
                                      (1, DataStorageDevice.PROC_MEMORY),
                                      [1024],
                                      shapes=[(16, 8)])
            manager_ref.register_data(session_id, [data_key1],
                                      (0, DataStorageDevice.DISK), [2048])
            self.assertEqual([(0, DataStorageDevice.SHARED_MEMORY),
                              (0, DataStorageDevice.DISK),
                              (1, DataStorageDevice.PROC_MEMORY)],
                             sorted(
                                 manager_ref.get_data_locations(
                                     session_id, [data_key1])[0]))
            self.assertEqual(
                2048,
                manager_ref.get_data_sizes(session_id, [data_key1])[0])
            self.assertEqual(
                (16, 8),
                manager_ref.get_data_shapes(session_id, [data_key1])[0])

            manager_ref.register_data(session_id, [data_key2],
                                      (0, DataStorageDevice.SHARED_MEMORY),
                                      [1024])
            manager_ref.register_data(session_id, [data_key2],
                                      (1, DataStorageDevice.PROC_MEMORY),
                                      [1024])
            self.assertEqual([(0, DataStorageDevice.SHARED_MEMORY),
                              (1, DataStorageDevice.PROC_MEMORY)],
                             sorted(
                                 manager_ref.get_data_locations(
                                     session_id, [data_key2])[0]))

            manager_ref.unregister_data(session_id, [data_key2],
                                        (0, DataStorageDevice.SHARED_MEMORY))
            self.assertEqual([(1, DataStorageDevice.PROC_MEMORY)],
                             sorted(
                                 manager_ref.get_data_locations(
                                     session_id, [data_key2])[0]))
            self.assertEqual(
                1024,
                manager_ref.get_data_sizes(session_id, [data_key2])[0])
            self.assertEqual([data_key1],
                             list(
                                 manager_ref.filter_exist_keys(
                                     session_id,
                                     [data_key1, data_key2, 'non-exist'],
                                     [(0, DataStorageDevice.SHARED_MEMORY)])))

            manager_ref.unregister_data(session_id, [data_key2],
                                        (1, DataStorageDevice.PROC_MEMORY))
            self.assertEqual(
                list(
                    manager_ref.get_data_locations(session_id,
                                                   [data_key2])[0]), [])
            self.assertIsNone(
                manager_ref.get_data_sizes(session_id, [data_key2])[0])
            self.assertIsNone(
                manager_ref.get_data_shapes(session_id, data_key2)[0])

            manager_ref.register_data(session_id, [data_key2],
                                      (1, DataStorageDevice.PROC_MEMORY),
                                      [1024])
            manager_ref.handle_process_down([1])
            self.assertEqual([(0, DataStorageDevice.SHARED_MEMORY),
                              (0, DataStorageDevice.DISK)],
                             sorted(
                                 manager_ref.get_data_locations(
                                     session_id, [data_key1])[0]))
            self.assertEqual(
                list(
                    manager_ref.get_data_locations(session_id,
                                                   [data_key2])[0]), [])
            self.assertIsNone(
                manager_ref.get_data_sizes(session_id, [data_key2])[0])
            self.assertIsNone(
                manager_ref.get_data_shapes(session_id, [data_key2])[0])