Beispiel #1
0
    def testArrowSerialize(self):
        try:
            import numpy as np
            from numpy.testing import assert_array_equal
        except ImportError:
            np = None

        try:
            import scipy.sparse as sps
        except ImportError:
            sps = None

        if np:
            array = np.random.rand(1000, 100)
            assert_array_equal(
                array,
                dataserializer.deserialize(
                    dataserializer.serialize(array).to_buffer()))

        if sps:
            mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr'))
            des_mat = dataserializer.deserialize(
                dataserializer.serialize(mat).to_buffer())
            self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0)

        if np and sps:
            array = np.random.rand(1000, 100)
            mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr'))
            tp = (array, mat)
            des_tp = dataserializer.deserialize(
                dataserializer.serialize(tp).to_buffer())
            assert_array_equal(tp[0], des_tp[0])
            self.assertTrue((tp[1].spmatrix != des_tp[1].spmatrix).nnz == 0)
Beispiel #2
0
    def testArrowSerialize(self):
        array = np.random.rand(1000, 100)
        assert_array_equal(array, dataserializer.deserialize(dataserializer.serialize(array).to_buffer()))

        if sps:
            mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr'))
            des_mat = dataserializer.deserialize(dataserializer.serialize(mat).to_buffer())
            self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0)

            array = np.random.rand(1000, 100)
            mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr'))
            tp = (array, mat)
            des_tp = dataserializer.deserialize(dataserializer.serialize(tp).to_buffer())
            assert_array_equal(tp[0], des_tp[0])
            self.assertTrue((tp[1].spmatrix != des_tp[1].spmatrix).nnz == 0)
Beispiel #3
0
    def testDiskReadAndWritePacked(self, *_):
        test_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerClusterInfoActor, [test_addr],
                              uid=WorkerClusterInfoActor.default_uid())
            pool.create_actor(StatusActor,
                              test_addr,
                              uid=StatusActor.default_uid())
            pool.create_actor(EventsActor, uid=EventsActor.default_uid())

            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            session_id = str(uuid.uuid4())
            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.DISK))

            for handler._compress in self._get_compress_types():
                data_key1 = str(uuid.uuid4())

                storage_client.delete(session_id, [data_key1])
                self.rm_spill_dirs()

                block_data1 = dataserializer.dumps(data1,
                                                   compress=handler._compress)

                def _write_data(ser, writer):
                    with writer:
                        writer.write(ser)
                    return writer.filename

                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes,
                                            packed=True, _promise=True) \
                    .then(functools.partial(_write_data, block_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                file_name = self.get_result(5)
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])
                self.assertTrue(os.path.exists(file_name))

                def _read_data(reader):
                    with reader:
                        return dataserializer.loads(reader.read())

                handler.create_bytes_reader(session_id, data_key1, packed=True, _promise=True) \
                    .then(_read_data) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5), data1)
Beispiel #4
0
    def testSharedLoad(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))

            # load from bytes io
            disk_handler = storage_client.get_storage_handler((0, DataStorageDevice.DISK))
            with disk_handler.create_bytes_writer(
                    session_id, data_key1, ser_data1.total_bytes) as writer:
                ser_data1.write_to(writer)

            handler.load_from_bytes_io(session_id, data_key1, disk_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, data_key1)),
                             [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)])

            disk_handler.delete(session_id, data_key1)
            handler.delete(session_id, data_key1)

            ref_data2 = weakref.ref(data2)

            # load from object io
            proc_handler = storage_client.get_storage_handler((0, DataStorageDevice.PROC_MEMORY))
            proc_handler.put_object(session_id, data_key2, data2)
            del data2

            handler.load_from_object_io(session_id, data_key2, proc_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, data_key2)),
                             [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY)])

            proc_handler.delete(session_id, data_key2)
            self.assertIsNone(ref_data2())
            handler.delete(session_id, data_key2)
Beispiel #5
0
    def testSharedPutAndGet(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data2 = dataserializer.serialize(data2)
            bytes_data2 = ser_data2.to_buffer()

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.SHARED_MEMORY))

            handler.put_objects(session_id, [data_key1], [data1])
            self.assertEqual(
                sorted(
                    storage_manager_ref.get_data_locations(
                        session_id, [data_key1])[0]),
                [(0, DataStorageDevice.SHARED_MEMORY)])
            assert_allclose(data1,
                            handler.get_objects(session_id, [data_key1])[0])

            handler.delete(session_id, [data_key1])
            self.assertEqual(
                list(
                    storage_manager_ref.get_data_locations(
                        session_id, [data_key1])[0]), [])
            with self.assertRaises(KeyError):
                handler.get_objects(session_id, [data_key1])

            handler.put_objects(session_id, [data_key2], [ser_data2],
                                serialize=True)
            assert_allclose(data2,
                            handler.get_objects(session_id, [data_key2])[0])
            handler.delete(session_id, [data_key2])

            handler.put_objects(session_id, [data_key2], [bytes_data2],
                                serialize=True)
            assert_allclose(data2,
                            handler.get_objects(session_id, [data_key2])[0])
            handler.delete(session_id, [data_key2])
Beispiel #6
0
    def testProcMemPutAndGet(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())
            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data2 = dataserializer.serialize(data2)
            bytes_data2 = ser_data2.to_buffer()

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                DataStorageDevice.PROC_MEMORY)

            handler.put_object(session_id, data_key1, data1)
            self.assertEqual(
                sorted(
                    storage_manager_ref.get_data_locations(
                        session_id, data_key1)),
                [(0, DataStorageDevice.PROC_MEMORY)])
            assert_allclose(data1, handler.get_object(session_id, data_key1))

            handler.delete(session_id, data_key1)
            self.assertIsNone(
                storage_manager_ref.get_data_locations(session_id, data_key1))
            with self.assertRaises(KeyError):
                handler.get_object(session_id, data_key1)

            handler.put_object(session_id,
                               data_key2,
                               ser_data2,
                               serialized=True)
            assert_allclose(data2, handler.get_object(session_id, data_key2))
            handler.delete(session_id, data_key2)

            handler.put_object(session_id,
                               data_key2,
                               bytes_data2,
                               serialized=True)
            assert_allclose(data2, handler.get_object(session_id, data_key2))
            handler.delete(session_id, data_key2)
Beispiel #7
0
    def testTileContextInLocalCluster(self):
        from mars.serialize import dataserializer
        with new_cluster(scheduler_n_process=2, worker_n_process=2,
                         shared_memory='20M', modules=[__name__], web=True) as cluster:
            session = cluster.session

            raw = np.random.rand(10, 20)
            data_bytes = dataserializer.serialize(raw).total_bytes
            data = mt.tensor(raw)

            session.run(data)

            data2 = TileWithContextOperand().new_tensor([data], shape=data.shape)

            result = session.run(data2)
            np.testing.assert_array_equal(raw * data_bytes, result)
Beispiel #8
0
    def testCudaMemPutAndGet(self):
        test_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())
            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(CudaHolderActor)

            test_data = np.random.random((10, 10))
            test_suites = [
                (test_data, cp.ndarray, cp.asnumpy, assert_allclose),
                (pd.Series(test_data.flatten()), cudf.Series,
                 lambda o: o.to_pandas(), pd.testing.assert_series_equal),
                (pd.DataFrame(dict(col=test_data.flatten())), cudf.DataFrame,
                 lambda o: o.to_pandas(), pd.testing.assert_frame_equal),
            ]

            for data, cuda_type, move_to_mem, assert_obj_equal in test_suites:
                ser_data = dataserializer.serialize(data)

                session_id = str(uuid.uuid4())
                data_key1 = str(uuid.uuid4())
                data_key2 = str(uuid.uuid4())

                storage_client = test_actor.storage_client
                handler = storage_client.get_storage_handler((0, DataStorageDevice.CUDA))

                handler.put_objects(session_id, [data_key1], [data])
                self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                                 [(0, DataStorageDevice.CUDA)])
                self.assertIsInstance(handler.get_objects(session_id, [data_key1])[0], cuda_type)
                assert_obj_equal(data, move_to_mem(handler.get_objects(session_id, [data_key1])[0]))

                handler.delete(session_id, [data_key1])
                self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [])
                with self.assertRaises(KeyError):
                    handler.get_objects(session_id, [data_key1])

                handler.put_objects(session_id, [data_key2], [ser_data], serialize=True)
                self.assertIsInstance(handler.get_objects(session_id, [data_key2])[0], cuda_type)
                assert_obj_equal(data, move_to_mem(handler.get_objects(session_id, [data_key2])[0]))
                handler.delete(session_id, [data_key2])
Beispiel #9
0
    def testDiskReadAndWrite(self, *_):
        test_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)
            data2 = np.random.random((10, 10))
            ser_data2 = dataserializer.serialize(data2)

            session_id = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.DISK))

            for handler._compress in self._get_compress_types():
                data_key1 = str(uuid.uuid4())
                data_key2 = (str(uuid.uuid4()), 'subkey')

                storage_client.delete(session_id, [data_key1])
                storage_client.delete(session_id, [data_key2])
                self.rm_spill_dirs()

                def _write_data(ser, writer):
                    self.assertEqual(writer.nbytes, ser.total_bytes)
                    with writer:
                        ser.write_to(writer)
                    return writer.filename

                def _read_data(reader):
                    with reader:
                        return dataserializer.deserialize(reader.read())

                # test normal file write
                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                    .then(functools.partial(_write_data, ser_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                file_name = self.get_result(5)
                self.assertTrue(os.path.exists(file_name))
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])

                # test write existing (this should produce an error)
                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                    .then(functools.partial(_write_data, ser_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                with self.assertRaises(StorageDataExists):
                    self.get_result(5)

                # test writing with unreferenced file
                storage_manager_ref.unregister_data(
                    session_id, [data_key1], (0, DataStorageDevice.DISK))
                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                    .then(functools.partial(_write_data, ser_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                file_name = self.get_result(5)
                self.assertTrue(os.path.exists(file_name))
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])

                # test reading and verifying written data
                handler.create_bytes_reader(session_id, data_key1, _promise=True) \
                    .then(_read_data) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5), data1)

                # test unregistering data
                handler.delete(session_id, [data_key1])
                while os.path.exists(file_name):
                    test_actor.ctx.sleep(0.05)
                self.assertFalse(os.path.exists(file_name))

                # test reading and writing with tuple keys
                handler.create_bytes_writer(session_id, data_key2, ser_data2.total_bytes, _promise=True) \
                    .then(functools.partial(_write_data, ser_data2)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.DISK)])

                handler.create_bytes_reader(session_id, data_key2, _promise=True) \
                    .then(_read_data) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5), data2)
Beispiel #10
0
    def testDiskReadAndWriteMerger(self):
        import logging
        logging.basicConfig(level=logging.DEBUG)

        test_addr = f'127.0.0.1:{get_next_port()}'
        options.worker.filemerger.max_file_size = 2400
        options.worker.filemerger.concurrency = 16

        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerClusterInfoActor, [test_addr],
                              uid=WorkerClusterInfoActor.default_uid())
            pool.create_actor(StatusActor,
                              test_addr,
                              uid=StatusActor.default_uid())
            pool.create_actor(EventsActor, uid=EventsActor.default_uid())

            disk_file_merger_ref = pool.create_actor(
                DiskFileMergerActor, uid=DiskFileMergerActor.default_uid())

            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            session_id = str(uuid.uuid4())
            data_count = 30
            data = [
                np.random.rand(random.randint(10, 30), random.randint(10, 30))
                for _ in range(data_count)
            ]
            ser_data = [dataserializer.serialize(d) for d in data]

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.DISK))

            for handler._compress in self._get_compress_types():
                data_keys = [str(uuid.uuid4()) for _ in range(data_count)]

                promises = []
                for idx in range(data_count):
                    block_data = dataserializer.dumps(
                        data[idx], compress=handler._compress)

                    def _write_data(ser, writer):
                        with writer:
                            writer.write(ser)
                        return writer.filename

                    promises.append(
                        handler.create_bytes_writer(session_id,
                                                    data_keys[idx],
                                                    ser_data[idx].total_bytes,
                                                    packed=True,
                                                    with_merger_lock=True,
                                                    _promise=True).then(
                                                        functools.partial(
                                                            _write_data,
                                                            block_data)))
                promise.all_(promises).then(
                    lambda *_: test_actor.set_result(0),
                    lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(50)

                for key in data_keys:
                    self.assertEqual(
                        sorted(
                            storage_manager_ref.get_data_locations(
                                session_id, [key])[0]),
                        [(0, DataStorageDevice.DISK)])

                dump_result = disk_file_merger_ref.dump_info()
                written_files = list(dump_result[2])
                for fn in written_files:
                    self.assertTrue(os.path.exists(fn))

                data_store = [None] * len(data)
                promises = []
                for idx in range(data_count):

                    def _read_data(reader, idx):
                        with reader:
                            data_store[idx] = dataserializer.loads(
                                reader.read())

                    promises.append(
                        handler.create_bytes_reader(session_id,
                                                    data_keys[idx],
                                                    with_merger_lock=True,
                                                    packed=True,
                                                    _promise=True).then(
                                                        functools.partial(
                                                            _read_data,
                                                            idx=idx)))
                promise.all_(promises).then(
                    lambda *_: test_actor.set_result(0),
                    lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(50)
                for true_data, read_data in zip(data, data_store):
                    assert_allclose(true_data, read_data)

                data_store = [None] * len(data)
                promises = []
                for idx in range(data_count):

                    def _read_data(reader, idx):
                        with reader:
                            data_store[idx] = dataserializer.deserialize(
                                reader.read())

                    promises.append(
                        handler.create_bytes_reader(session_id,
                                                    data_keys[idx],
                                                    _promise=True).then(
                                                        functools.partial(
                                                            _read_data,
                                                            idx=idx)))
                promise.all_(promises).then(
                    lambda *_: test_actor.set_result(0),
                    lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(50)
                for true_data, read_data in zip(data, data_store):
                    assert_allclose(true_data, read_data)

                storage_client.delete(session_id, data_keys)
                pool.sleep(0.1)
                for fn in written_files:
                    self.assertFalse(os.path.exists(fn))
Beispiel #11
0
    def testSender(self):
        send_pool_addr = 'localhost:%d' % get_next_port()
        recv_pool_addr = 'localhost:%d' % get_next_port()
        recv_pool_addr2 = 'localhost:%d' % get_next_port()

        options.worker.spill_directory = tempfile.mkdtemp(
            prefix='mars_test_sender_')
        session_id = str(uuid.uuid4())

        mock_data = np.array([1, 2, 3, 4])
        chunk_key1 = str(uuid.uuid4())
        chunk_key2 = str(uuid.uuid4())

        @contextlib.contextmanager
        def start_send_recv_pool():
            with start_transfer_test_pool(
                    address=send_pool_addr,
                    plasma_size=self.plasma_storage_size) as sp:
                sp.create_actor(SenderActor, uid=SenderActor.default_uid())
                with start_transfer_test_pool(
                        address=recv_pool_addr,
                        plasma_size=self.plasma_storage_size) as rp:
                    rp.create_actor(MockReceiverActor,
                                    uid=ReceiverActor.default_uid())
                    yield sp, rp

        with start_send_recv_pool() as (send_pool, recv_pool):
            sender_ref = send_pool.actor_ref(SenderActor.default_uid())
            receiver_ref = recv_pool.actor_ref(ReceiverActor.default_uid())

            with self.run_actor_test(send_pool) as test_actor:
                storage_client = test_actor.storage_client

                # send when data missing
                sender_ref_p = test_actor.promise_ref(sender_ref)
                sender_ref_p.send_data(session_id, str(uuid.uuid4()), recv_pool_addr, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s)) \
                    .catch(lambda *exc: test_actor.set_result(exc, accept=False))
                with self.assertRaises(DependencyMissing):
                    self.get_result(5)

                # send data in spill
                serialized = dataserializer.serialize(mock_data)
                self.waitp(
                    storage_client.create_writer(
                        session_id, chunk_key1, serialized.total_bytes,
                        [DataStorageDevice.DISK
                         ]).then(lambda writer: promise.finished().then(
                             lambda *_: writer.write(serialized)).then(
                                 lambda *_: writer.close())))

                sender_ref_p.send_data(session_id, chunk_key1, recv_pool_addr, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s)) \
                    .catch(lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)
                assert_array_equal(
                    mock_data,
                    receiver_ref.get_result_data(session_id, chunk_key1))
                storage_client.delete(session_id, chunk_key1)

                # send data in plasma store
                self.waitp(
                    storage_client.put_object(
                        session_id, chunk_key1, mock_data,
                        [DataStorageDevice.SHARED_MEMORY]))

                sender_ref_p.send_data(session_id, chunk_key1, recv_pool_addr, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s)) \
                    .catch(lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)
                assert_array_equal(
                    mock_data,
                    receiver_ref.get_result_data(session_id, chunk_key1))

                # send data to multiple targets
                with start_transfer_test_pool(
                        address=recv_pool_addr2,
                        plasma_size=self.plasma_storage_size) as rp2:
                    recv_ref2 = rp2.create_actor(
                        MockReceiverActor, uid=ReceiverActor.default_uid())

                    self.waitp(
                        sender_ref_p.send_data(
                            session_id,
                            chunk_key1, [recv_pool_addr, recv_pool_addr2],
                            _promise=True))
                    # send data to already transferred / transferring
                    sender_ref_p.send_data(session_id, chunk_key1,
                                           [recv_pool_addr, recv_pool_addr2], _promise=True) \
                        .then(lambda *s: test_actor.set_result(s)) \
                        .catch(lambda *exc: test_actor.set_result(exc, accept=False))
                    self.get_result(5)
                    assert_array_equal(
                        mock_data,
                        recv_ref2.get_result_data(session_id, chunk_key1))

                # send data to non-exist endpoint which causes error
                self.waitp(
                    storage_client.put_object(
                        session_id, chunk_key2, mock_data,
                        [DataStorageDevice.SHARED_MEMORY]))

                sender_ref_p.send_data(session_id, chunk_key2, recv_pool_addr2, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s)) \
                    .catch(lambda *exc: test_actor.set_result(exc, accept=False))
                with self.assertRaises(BrokenPipeError):
                    self.get_result(5)

                def mocked_receive_data_part(*_):
                    raise ChecksumMismatch

                with patch_method(MockReceiverActor.receive_data_part,
                                  new=mocked_receive_data_part):
                    sender_ref_p.send_data(session_id, chunk_key2, recv_pool_addr, _promise=True) \
                        .then(lambda *s: test_actor.set_result(s)) \
                        .catch(lambda *exc: test_actor.set_result(exc, accept=False))
                    with self.assertRaises(ChecksumMismatch):
                        self.get_result(5)
Beispiel #12
0
    def testSharedReadAndWrite(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        io_size = dataserializer.HEADER_LENGTH * 2
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((100, 100))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))

            def _write_data(ser, writer):
                self.assertEqual(writer.nbytes, ser_data1.total_bytes)
                with writer:
                    ser.write_to(writer)

            handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                .then(functools.partial(_write_data, ser_data1)) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            handler.delete(session_id, [data_key1])

            def _write_data(ser, writer):
                with writer:
                    for start in range(0, len(ser), io_size):
                        writer.write(ser[start:start + io_size])

            handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                .then(functools.partial(_write_data, ser_data1.to_buffer())) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])

            def _read_data_all(reader):
                with reader:
                    return dataserializer.deserialize(reader.read())

            handler.create_bytes_reader(session_id, data_key1, _promise=True) \
                .then(_read_data_all) \
                .then(functools.partial(test_actor.set_result),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            assert_allclose(self.get_result(5), data1)

            def _read_data_batch(reader):
                bio = BytesIO()
                with reader:
                    while True:
                        buf = reader.read(io_size)
                        if buf:
                            bio.write(buf)
                        else:
                            break
                return dataserializer.deserialize(bio.getvalue())

            handler.create_bytes_reader(session_id, data_key1, _promise=True) \
                .then(_read_data_batch) \
                .then(functools.partial(test_actor.set_result),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            assert_allclose(self.get_result(5), data1)
            handler.delete(session_id, [data_key1])
Beispiel #13
0
    def testSharedLoadFromBytes(self, *_):
        import logging
        logging.basicConfig(level=logging.DEBUG)
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))

            # load from bytes io
            disk_handler = storage_client.get_storage_handler((0, DataStorageDevice.DISK))
            with disk_handler.create_bytes_writer(
                    session_id, data_key1, ser_data1.total_bytes) as writer:
                ser_data1.write_to(writer)

            handler.load_from_bytes_io(session_id, [data_key1], disk_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)])

            disk_handler.delete(session_id, [data_key1])
            handler.delete(session_id, [data_key1])

            # load from bytes io till no capacity
            data_list = [np.random.randint(0, 32767, (655360,), np.int16)
                         for _ in range(20)]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]
            for key, data in zip(data_keys, data_list):
                ser_data = dataserializer.serialize(data)
                with disk_handler.create_bytes_writer(
                        session_id, key, ser_data.total_bytes) as writer:
                    ser_data.write_to(writer)

            handler.load_from_bytes_io(session_id, data_keys, disk_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))

            affected_keys = set()
            try:
                self.get_result(5)
            except StorageFull as ex:
                affected_keys.update(ex.affected_keys)

            storage_client.delete(session_id, data_keys, [DataStorageDevice.DISK])

            self.assertLess(len(affected_keys), len(data_keys))
            self.assertGreater(len(affected_keys), 1)
            for k, size in zip(data_keys, storage_client.get_data_sizes(session_id, data_keys)):
                if k in affected_keys:
                    self.assertIsNone(size)
                else:
                    self.assertIsNotNone(size)
Beispiel #14
0
    def testReceiverWorker(self):
        pool_addr = f'localhost:{get_next_port()}'
        options.worker.spill_directory = tempfile.mkdtemp(
            prefix='mars_test_receiver_')
        session_id = str(uuid.uuid4())

        mock_data = np.array([1, 2, 3, 4])
        serialized_arrow_data = dataserializer.serialize(mock_data)
        data_size = serialized_arrow_data.total_bytes
        dumped_mock_data = dataserializer.dumps(mock_data)

        chunk_key1 = str(uuid.uuid4())
        chunk_key2 = str(uuid.uuid4())
        chunk_key3 = str(uuid.uuid4())
        chunk_key4 = str(uuid.uuid4())
        chunk_key5 = str(uuid.uuid4())
        chunk_key6 = str(uuid.uuid4())
        chunk_key7 = str(uuid.uuid4())
        chunk_key8 = str(uuid.uuid4())
        chunk_key9 = str(uuid.uuid4())

        with start_transfer_test_pool(address=pool_addr, plasma_size=self.plasma_storage_size) as pool, \
                self.run_actor_test(pool) as test_actor:
            storage_client = test_actor.storage_client
            receiver_ref = test_actor.promise_ref(
                pool.create_actor(ReceiverWorkerActor, uid=str(uuid.uuid4())))
            receiver_manager_ref = test_actor.promise_ref(
                ReceiverManagerActor.default_uid())

            # SCENARIO 1: create two writers and write with chunks
            self.waitp(
                receiver_ref.create_data_writers(session_id,
                                                 [chunk_key1, chunk_key2],
                                                 [data_size] * 2,
                                                 test_actor,
                                                 _promise=True))
            receiver_ref.receive_data_part(
                session_id, [chunk_key1, chunk_key2], [True, False],
                dumped_mock_data,
                dumped_mock_data[:len(dumped_mock_data) // 2])
            self.assertEqual(receiver_ref.check_status(session_id, chunk_key1),
                             ReceiveStatus.RECEIVED)
            self.assertEqual(receiver_ref.check_status(session_id, chunk_key2),
                             ReceiveStatus.RECEIVING)
            receiver_ref.receive_data_part(
                session_id, [chunk_key2], [True],
                dumped_mock_data[len(dumped_mock_data) // 2:])
            self.assertEqual(receiver_ref.check_status(session_id, chunk_key2),
                             ReceiveStatus.RECEIVED)
            assert_array_equal(
                storage_client.get_object(session_id,
                                          chunk_key1,
                                          [DataStorageDevice.SHARED_MEMORY],
                                          _promise=False), mock_data)
            assert_array_equal(
                storage_client.get_object(session_id,
                                          chunk_key2,
                                          [DataStorageDevice.SHARED_MEMORY],
                                          _promise=False), mock_data)

            # SCENARIO 2: one of the writers failed to create,
            # will test both existing and non-existing keys
            old_create_writer = StorageClient.create_writer

            def _create_writer_with_fail(self, session_id, chunk_key, *args,
                                         **kwargs):
                if chunk_key == fail_key:
                    if kwargs.get('_promise', True):
                        return promise.finished(*build_exc_info(ValueError),
                                                **dict(_accept=False))
                    else:
                        raise ValueError
                return old_create_writer(self, session_id, chunk_key, *args,
                                         **kwargs)

            with patch_method(StorageClient.create_writer, new=_create_writer_with_fail), \
                    self.assertRaises(ValueError):
                fail_key = chunk_key4
                self.waitp(
                    receiver_ref.create_data_writers(
                        session_id, [chunk_key3, chunk_key4, chunk_key5],
                        [data_size] * 3,
                        test_actor,
                        ensure_cached=False,
                        _promise=True))
            self.assertEqual(receiver_ref.check_status(session_id, chunk_key3),
                             ReceiveStatus.NOT_STARTED)
            self.assertEqual(receiver_ref.check_status(session_id, chunk_key4),
                             ReceiveStatus.NOT_STARTED)
            self.assertEqual(receiver_ref.check_status(session_id, chunk_key5),
                             ReceiveStatus.NOT_STARTED)

            with patch_method(StorageClient.create_writer,
                              new=_create_writer_with_fail):
                fail_key = chunk_key2
                self.waitp(
                    receiver_ref.create_data_writers(session_id,
                                                     [chunk_key2, chunk_key3],
                                                     [data_size] * 2,
                                                     test_actor,
                                                     ensure_cached=False,
                                                     _promise=True))

            # SCENARIO 3: transfer timeout
            receiver_manager_ref.register_pending_keys(session_id,
                                                       [chunk_key6])
            self.waitp(
                receiver_ref.create_data_writers(session_id, [chunk_key6],
                                                 [data_size],
                                                 test_actor,
                                                 timeout=1,
                                                 _promise=True))
            with self.assertRaises(TimeoutError):
                self.waitp(
                    receiver_manager_ref.add_keys_callback(session_id,
                                                           [chunk_key6],
                                                           _promise=True))

            # SCENARIO 4: cancelled transfer (both before and during transfer)
            receiver_manager_ref.register_pending_keys(session_id,
                                                       [chunk_key7])
            self.waitp(
                receiver_ref.create_data_writers(session_id, [chunk_key7],
                                                 [data_size],
                                                 test_actor,
                                                 timeout=1,
                                                 _promise=True))
            receiver_ref.cancel_receive(session_id, [chunk_key2, chunk_key7])
            with self.assertRaises(KeyError):
                receiver_ref.receive_data_part(
                    session_id, [chunk_key7], [False],
                    dumped_mock_data[:len(dumped_mock_data) // 2])
            with self.assertRaises(KeyError):
                self.waitp(
                    receiver_manager_ref.add_keys_callback(session_id,
                                                           [chunk_key7],
                                                           _promise=True))

            # SCENARIO 5: sender halt and receiver is notified (reusing previous unsuccessful key)
            receiver_manager_ref.register_pending_keys(session_id,
                                                       [chunk_key7])
            mock_ref = pool.actor_ref(test_actor.uid, address='MOCK_ADDR')
            self.waitp(
                receiver_ref.create_data_writers(session_id, [chunk_key7],
                                                 [data_size],
                                                 mock_ref,
                                                 timeout=1,
                                                 _promise=True))
            receiver_ref.notify_dead_senders(['MOCK_ADDR'])
            with self.assertRaises(WorkerDead):
                self.waitp(
                    receiver_manager_ref.add_keys_callback(session_id,
                                                           [chunk_key7],
                                                           _promise=True))

            # SCENARIO 6: successful transfer without promise
            receiver_ref.create_data_writers(session_id, [chunk_key8],
                                             [data_size],
                                             mock_ref,
                                             use_promise=False)
            receiver_ref.receive_data_part(session_id, [chunk_key8], [True],
                                           dumped_mock_data)
            self.assertEqual(receiver_ref.check_status(session_id, chunk_key8),
                             ReceiveStatus.RECEIVED)
            assert_array_equal(
                storage_client.get_object(session_id,
                                          chunk_key8,
                                          [DataStorageDevice.SHARED_MEMORY],
                                          _promise=False), mock_data)

            # SCENARIO 7: failed transfer without promise
            with patch_method(StorageClient.create_writer, new=_create_writer_with_fail), \
                    self.assertRaises(ValueError):
                fail_key = chunk_key9
                receiver_ref.create_data_writers(session_id, [chunk_key9],
                                                 [data_size],
                                                 mock_ref,
                                                 use_promise=False)
Beispiel #15
0
    def testReceiverManager(self):
        pool_addr = 'localhost:%d' % get_next_port()
        session_id = str(uuid.uuid4())

        mock_data = np.array([1, 2, 3, 4])
        serialized_arrow_data = dataserializer.serialize(mock_data)
        data_size = serialized_arrow_data.total_bytes
        serialized_mock_data = serialized_arrow_data.to_buffer()

        chunk_key1 = str(uuid.uuid4())
        chunk_key2 = str(uuid.uuid4())
        chunk_key3 = str(uuid.uuid4())
        chunk_key4 = str(uuid.uuid4())
        chunk_key5 = str(uuid.uuid4())
        chunk_key6 = str(uuid.uuid4())
        chunk_key7 = str(uuid.uuid4())

        with start_transfer_test_pool(address=pool_addr, plasma_size=self.plasma_storage_size) as pool, \
                self.run_actor_test(pool) as test_actor:
            mock_receiver_ref = pool.create_actor(MockReceiverWorkerActor,
                                                  uid=str(uuid.uuid4()))
            storage_client = test_actor.storage_client
            receiver_manager_ref = test_actor.promise_ref(
                ReceiverManagerActor.default_uid())

            # SCENARIO 1: test transferring existing keys
            self.waitp(
                storage_client.create_writer(
                    session_id, chunk_key1, serialized_arrow_data.total_bytes,
                    [DataStorageDevice.DISK
                     ]).then(lambda writer: promise.finished().then(
                         lambda *_: writer.write(serialized_arrow_data)).then(
                             lambda *_: writer.close())))
            result = self.waitp(
                receiver_manager_ref.create_data_writers(session_id,
                                                         [chunk_key1],
                                                         [data_size],
                                                         test_actor,
                                                         _promise=True))
            self.assertEqual(result[0].uid, mock_receiver_ref.uid)
            self.assertEqual(result[1][0], ReceiveStatus.RECEIVED)

            # test adding callback for transferred key (should return immediately)
            result = self.waitp(
                receiver_manager_ref.add_keys_callback(session_id,
                                                       [chunk_key1],
                                                       _promise=True))
            self.assertTupleEqual(result, ())

            receiver_manager_ref.register_pending_keys(
                session_id, [chunk_key1, chunk_key2])
            self.assertEqual(
                receiver_manager_ref.filter_receiving_keys(
                    session_id, [chunk_key1, chunk_key2, 'non_exist']),
                [chunk_key2])

            # SCENARIO 2: test transferring new keys and wait on listeners
            result = self.waitp(
                receiver_manager_ref.create_data_writers(
                    session_id, [chunk_key2, chunk_key3], [data_size] * 2,
                    test_actor,
                    _promise=True))
            self.assertEqual(result[0].uid, mock_receiver_ref.uid)
            self.assertIsNone(result[1][0])

            # transfer with transferring keys will report RECEIVING
            result = self.waitp(
                receiver_manager_ref.create_data_writers(session_id,
                                                         [chunk_key2],
                                                         [data_size],
                                                         test_actor,
                                                         _promise=True))
            self.assertEqual(result[1][0], ReceiveStatus.RECEIVING)

            # add listener and finish transfer
            receiver_manager_ref.add_keys_callback(session_id, [chunk_key1, chunk_key2], _promise=True) \
                .then(lambda *s: test_actor.set_result(s))
            mock_receiver_ref.receive_data_part(session_id, [chunk_key2],
                                                [True], serialized_mock_data)
            mock_receiver_ref.receive_data_part(session_id, [chunk_key3],
                                                [True], serialized_mock_data)
            self.get_result(5)

            # SCENARIO 3: test listening on multiple transfers
            receiver_manager_ref.create_data_writers(
                session_id, [chunk_key4, chunk_key5], [data_size] * 2, test_actor, _promise=True) \
                .then(lambda *s: test_actor.set_result(s))
            self.get_result(5)
            # add listener
            receiver_manager_ref.add_keys_callback(session_id, [chunk_key4, chunk_key5], _promise=True) \
                .then(lambda *s: test_actor.set_result(s))
            mock_receiver_ref.receive_data_part(session_id, [chunk_key4],
                                                [True], serialized_mock_data)
            # when some chunks are not transferred, promise will not return
            with self.assertRaises(TimeoutError):
                self.get_result(0.5)
            mock_receiver_ref.receive_data_part(session_id, [chunk_key5],
                                                [True], serialized_mock_data)
            self.get_result(5)

            # SCENARIO 4: test listening on transfer with errors
            self.waitp(
                receiver_manager_ref.create_data_writers(session_id,
                                                         [chunk_key6],
                                                         [data_size],
                                                         test_actor,
                                                         _promise=True))
            receiver_manager_ref.add_keys_callback(session_id, [chunk_key6], _promise=True) \
                .then(lambda *s: test_actor.set_result(s)) \
                .catch(lambda *exc: test_actor.set_result(exc, accept=False))
            mock_receiver_ref.cancel_receive(session_id, [chunk_key6])
            with self.assertRaises(ExecutionInterrupted):
                self.get_result(5)

            # SCENARIO 5: test creating writers without promise
            ref, statuses = receiver_manager_ref.create_data_writers(
                session_id, [chunk_key7], [data_size],
                test_actor,
                use_promise=False)
            self.assertIsNone(statuses[0])
            self.assertEqual(ref.uid, mock_receiver_ref.uid)
Beispiel #16
0
    def testClientReadAndWrite(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            options.worker.lock_free_fileio = True
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client

                file_names = []

                def _write_data(ser, writer):
                    file_names.append(writer.filename)
                    self.assertEqual(writer.nbytes, ser_data1.total_bytes)
                    with writer:
                        ser.write_to(writer)

                # test creating non-promised writer and write
                with storage_client.create_writer(session_id,
                                                  data_key1,
                                                  ser_data1.total_bytes,
                                                  (DataStorageDevice.DISK, ),
                                                  _promise=False) as writer:
                    _write_data(ser_data1, writer)
                self.assertTrue(os.path.exists(file_names[0]))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])

                storage_client.delete(session_id, [data_key1])

                # test creating promised writer and write
                file_names[:] = []
                self.waitp(
                    storage_client.create_writer(
                        session_id, data_key2, ser_data1.total_bytes,
                        (DataStorageDevice.DISK, )).then(
                            functools.partial(_write_data, ser_data1)))
                self.assertTrue(os.path.exists(file_names[0]))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.DISK)])

                def _read_data(reader):
                    with reader:
                        return dataserializer.deserialize(reader.read())

                # test creating reader when data exist in location
                result = self.waitp(
                    storage_client.create_reader(
                        session_id, data_key2,
                        (DataStorageDevice.DISK, )).then(_read_data))[0]
                assert_allclose(result, data1)

                # test creating reader when no data in location (should raise)
                with self.assertRaises(IOError):
                    storage_client.create_reader(
                        session_id,
                        data_key2, (DataStorageDevice.SHARED_MEMORY, ),
                        _promise=False)

                # test creating reader when copy needed
                self.waitp(
                    storage_client.create_reader(
                        session_id, data_key2,
                        (DataStorageDevice.SHARED_MEMORY, )).then(_read_data))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.SHARED_MEMORY),
                     (0, DataStorageDevice.DISK)])

                storage_client.delete(session_id, [data_key2])
                while os.path.exists(file_names[0]):
                    test_actor.ctx.sleep(0.05)
                self.assertFalse(os.path.exists(file_names[0]))
Beispiel #17
0
    def testReceiver(self):
        pool_addr = 'localhost:%d' % get_next_port()
        options.worker.spill_directory = tempfile.mkdtemp(
            prefix='mars_test_receiver_')
        session_id = str(uuid.uuid4())

        mock_data = np.array([1, 2, 3, 4])
        serialized_arrow_data = dataserializer.serialize(mock_data)
        data_size = serialized_arrow_data.total_bytes
        serialized_mock_data = serialized_arrow_data.to_buffer()
        serialized_crc32 = zlib.crc32(serialized_arrow_data.to_buffer())

        chunk_key1 = str(uuid.uuid4())
        chunk_key2 = str(uuid.uuid4())
        chunk_key3 = str(uuid.uuid4())
        chunk_key4 = str(uuid.uuid4())
        chunk_key5 = str(uuid.uuid4())
        chunk_key6 = str(uuid.uuid4())
        chunk_key7 = str(uuid.uuid4())
        chunk_key8 = str(uuid.uuid4())

        with start_transfer_test_pool(
                address=pool_addr,
                plasma_size=self.plasma_storage_size) as pool:
            receiver_ref = pool.create_actor(ReceiverActor,
                                             uid=str(uuid.uuid4()))

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client

                # check_status on receiving and received
                self.assertEqual(
                    receiver_ref.check_status(session_id, chunk_key1),
                    ReceiveStatus.NOT_STARTED)

                self.waitp(
                    storage_client.create_writer(
                        session_id, chunk_key1,
                        serialized_arrow_data.total_bytes,
                        [DataStorageDevice.DISK
                         ]).then(lambda writer: promise.finished().then(
                             lambda *_: writer.write(serialized_arrow_data)).
                                 then(lambda *_: writer.close())))
                self.assertEqual(
                    receiver_ref.check_status(session_id, chunk_key1),
                    ReceiveStatus.RECEIVED)
                storage_client.delete(session_id, chunk_key1)

                self.waitp(
                    storage_client.put_object(
                        session_id, chunk_key1, mock_data,
                        [DataStorageDevice.SHARED_MEMORY]))

                self.assertEqual(
                    receiver_ref.check_status(session_id, chunk_key1),
                    ReceiveStatus.RECEIVED)

                receiver_ref_p = test_actor.promise_ref(receiver_ref)

                # cancel on an un-run / missing result will result in nothing
                receiver_ref_p.cancel_receive(session_id, chunk_key2)

                # start creating writer
                receiver_ref_p.create_data_writer(session_id, chunk_key1, data_size, test_actor, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s))
                self.assertTupleEqual(
                    self.get_result(5),
                    (receiver_ref.address, ReceiveStatus.RECEIVED))

                result = receiver_ref_p.create_data_writer(session_id,
                                                           chunk_key1,
                                                           data_size,
                                                           test_actor,
                                                           use_promise=False)
                self.assertTupleEqual(
                    result, (receiver_ref.address, ReceiveStatus.RECEIVED))

                receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s))
                self.assertTupleEqual(self.get_result(5),
                                      (receiver_ref.address, None))

                result = receiver_ref_p.create_data_writer(session_id,
                                                           chunk_key2,
                                                           data_size,
                                                           test_actor,
                                                           use_promise=False)
                self.assertTupleEqual(
                    result, (receiver_ref.address, ReceiveStatus.RECEIVING))

                receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s))
                self.assertTupleEqual(
                    self.get_result(5),
                    (receiver_ref.address, ReceiveStatus.RECEIVING))

                receiver_ref_p.cancel_receive(session_id, chunk_key2)
                self.assertEqual(
                    receiver_ref.check_status(session_id, chunk_key2),
                    ReceiveStatus.NOT_STARTED)

                # test checksum error on receive_data_part
                receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s))
                self.get_result(5)

                receiver_ref_p.register_finish_callback(session_id, chunk_key2, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s)) \
                    .catch(lambda *exc: test_actor.set_result(exc, accept=False))

                receiver_ref_p.receive_data_part(session_id, chunk_key2,
                                                 serialized_mock_data, 0)

                with self.assertRaises(ChecksumMismatch):
                    self.get_result(5)

                # test checksum error on finish_receive
                receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s))
                self.assertTupleEqual(self.get_result(5),
                                      (receiver_ref.address, None))

                receiver_ref_p.receive_data_part(session_id, chunk_key2,
                                                 serialized_mock_data,
                                                 serialized_crc32)
                receiver_ref_p.finish_receive(session_id, chunk_key2, 0)

                receiver_ref_p.register_finish_callback(session_id, chunk_key2, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s)) \
                    .catch(lambda *exc: test_actor.set_result(exc, accept=False))

                with self.assertRaises(ChecksumMismatch):
                    self.get_result(5)

                receiver_ref_p.cancel_receive(session_id, chunk_key2)

                # test intermediate cancellation
                receiver_ref_p.create_data_writer(session_id, chunk_key2, data_size, test_actor, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s))
                self.assertTupleEqual(self.get_result(5),
                                      (receiver_ref.address, None))

                receiver_ref_p.register_finish_callback(session_id, chunk_key2, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s)) \
                    .catch(lambda *exc: test_actor.set_result(exc, accept=False))

                receiver_ref_p.receive_data_part(
                    session_id, chunk_key2, serialized_mock_data[:64],
                    zlib.crc32(serialized_mock_data[:64]))
                receiver_ref_p.cancel_receive(session_id, chunk_key2)
                receiver_ref_p.receive_data_part(session_id, chunk_key2,
                                                 serialized_mock_data[64:],
                                                 serialized_crc32)
                with self.assertRaises(ExecutionInterrupted):
                    self.get_result(5)

                # test transfer in memory
                receiver_ref_p.register_finish_callback(session_id, chunk_key3, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s)) \
                    .catch(lambda *exc: test_actor.set_result(exc, accept=False))

                receiver_ref_p.create_data_writer(session_id, chunk_key3, data_size, test_actor, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s))
                self.assertTupleEqual(self.get_result(5),
                                      (receiver_ref.address, None))

                receiver_ref_p.receive_data_part(
                    session_id, chunk_key3, serialized_mock_data[:64],
                    zlib.crc32(serialized_mock_data[:64]))
                receiver_ref_p.receive_data_part(session_id, chunk_key3,
                                                 serialized_mock_data[64:],
                                                 serialized_crc32)
                receiver_ref_p.finish_receive(session_id, chunk_key3,
                                              serialized_crc32)

                self.assertTupleEqual((), self.get_result(5))

                receiver_ref_p.create_data_writer(session_id, chunk_key3, data_size, test_actor, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s))
                self.assertTupleEqual(
                    self.get_result(5),
                    (receiver_ref.address, ReceiveStatus.RECEIVED))

                # test transfer in spill file
                def mocked_store_create(*_):
                    raise StorageFull

                with patch_method(PlasmaSharedStore.create,
                                  new=mocked_store_create):
                    with self.assertRaises(StorageFull):
                        receiver_ref_p.create_data_writer(session_id,
                                                          chunk_key4,
                                                          data_size,
                                                          test_actor,
                                                          ensure_cached=True,
                                                          use_promise=False)
                    # test receive aborted
                    receiver_ref_p.create_data_writer(
                        session_id, chunk_key4, data_size, test_actor, ensure_cached=False, _promise=True) \
                        .then(lambda *s: test_actor.set_result(s))
                    self.assertTupleEqual(self.get_result(5),
                                          (receiver_ref.address, None))

                    receiver_ref_p.register_finish_callback(session_id, chunk_key4, _promise=True) \
                        .then(lambda *s: test_actor.set_result(s)) \
                        .catch(lambda *exc: test_actor.set_result(exc, accept=False))

                    receiver_ref_p.receive_data_part(
                        session_id, chunk_key4, serialized_mock_data[:64],
                        zlib.crc32(serialized_mock_data[:64]))
                    receiver_ref_p.cancel_receive(session_id, chunk_key4)
                    with self.assertRaises(ExecutionInterrupted):
                        self.get_result(5)

                    # test receive into spill
                    receiver_ref_p.create_data_writer(
                        session_id, chunk_key4, data_size, test_actor, ensure_cached=False, _promise=True) \
                        .then(lambda *s: test_actor.set_result(s))
                    self.assertTupleEqual(self.get_result(5),
                                          (receiver_ref.address, None))

                    receiver_ref_p.register_finish_callback(session_id, chunk_key4, _promise=True) \
                        .then(lambda *s: test_actor.set_result(s)) \
                        .catch(lambda *exc: test_actor.set_result(exc, accept=False))

                    receiver_ref_p.receive_data_part(session_id, chunk_key4,
                                                     serialized_mock_data,
                                                     serialized_crc32)
                    receiver_ref_p.finish_receive(session_id, chunk_key4,
                                                  serialized_crc32)

                    self.assertTupleEqual((), self.get_result(5))

                # test intermediate error
                def mocked_store_create(*_):
                    raise SpillNotConfigured

                with patch_method(PlasmaSharedStore.create,
                                  new=mocked_store_create):
                    receiver_ref_p.create_data_writer(
                        session_id, chunk_key5, data_size, test_actor, ensure_cached=False, _promise=True) \
                        .then(lambda *s: test_actor.set_result(s),
                              lambda *s: test_actor.set_result(s, accept=False))

                    with self.assertRaises(SpillNotConfigured):
                        self.get_result(5)

                # test receive timeout
                receiver_ref_p.register_finish_callback(session_id, chunk_key6, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s)) \
                    .catch(lambda *exc: test_actor.set_result(exc, accept=False))

                receiver_ref_p.create_data_writer(session_id, chunk_key6, data_size, test_actor,
                                                  timeout=2, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s))
                self.assertTupleEqual(self.get_result(5),
                                      (receiver_ref.address, None))
                receiver_ref_p.receive_data_part(
                    session_id, chunk_key6, serialized_mock_data[:64],
                    zlib.crc32(serialized_mock_data[:64]))

                with self.assertRaises(TimeoutError):
                    self.get_result(5)

                # test sender halt
                receiver_ref_p.register_finish_callback(session_id, chunk_key7, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s)) \
                    .catch(lambda *exc: test_actor.set_result(exc, accept=False))

                mock_ref = pool.actor_ref(test_actor.uid, address='MOCK_ADDR')
                receiver_ref_p.create_data_writer(
                    session_id, chunk_key7, data_size, mock_ref, _promise=True) \
                    .then(lambda *s: test_actor.set_result(s))
                self.assertTupleEqual(self.get_result(5),
                                      (receiver_ref.address, None))
                receiver_ref_p.receive_data_part(
                    session_id, chunk_key7, serialized_mock_data[:64],
                    zlib.crc32(serialized_mock_data[:64]))
                receiver_ref_p.notify_dead_senders(['MOCK_ADDR'])

                with self.assertRaises(WorkerDead):
                    self.get_result(5)

                # test checksum error on finish_receive
                result = receiver_ref_p.create_data_writer(session_id,
                                                           chunk_key8,
                                                           data_size,
                                                           test_actor,
                                                           use_promise=False)
                self.assertTupleEqual(result, (receiver_ref.address, None))

                receiver_ref_p.receive_data_part(session_id, chunk_key8,
                                                 serialized_mock_data,
                                                 serialized_crc32)
                receiver_ref_p.finish_receive(session_id, chunk_key8, 0)