Example #1
0
    def testDiskReadAndWritePacked(self, *_):
        test_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerClusterInfoActor, [test_addr],
                              uid=WorkerClusterInfoActor.default_uid())
            pool.create_actor(StatusActor,
                              test_addr,
                              uid=StatusActor.default_uid())
            pool.create_actor(EventsActor, uid=EventsActor.default_uid())

            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            session_id = str(uuid.uuid4())
            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.DISK))

            for handler._compress in self._get_compress_types():
                data_key1 = str(uuid.uuid4())

                storage_client.delete(session_id, [data_key1])
                self.rm_spill_dirs()

                block_data1 = dataserializer.dumps(data1,
                                                   compress=handler._compress)

                def _write_data(ser, writer):
                    with writer:
                        writer.write(ser)
                    return writer.filename

                handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes,
                                            packed=True, _promise=True) \
                    .then(functools.partial(_write_data, block_data1)) \
                    .then(test_actor.set_result,
                          lambda *exc: test_actor.set_result(exc, accept=False))
                file_name = self.get_result(5)
                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])
                self.assertTrue(os.path.exists(file_name))

                def _read_data(reader):
                    with reader:
                        return dataserializer.loads(reader.read())

                handler.create_bytes_reader(session_id, data_key1, packed=True, _promise=True) \
                    .then(_read_data) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5), data1)
Example #2
0
    def testDiskReadAndWriteMerger(self):
        import logging
        logging.basicConfig(level=logging.DEBUG)

        test_addr = f'127.0.0.1:{get_next_port()}'
        options.worker.filemerger.max_file_size = 2400
        options.worker.filemerger.concurrency = 16

        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerClusterInfoActor, [test_addr],
                              uid=WorkerClusterInfoActor.default_uid())
            pool.create_actor(StatusActor,
                              test_addr,
                              uid=StatusActor.default_uid())
            pool.create_actor(EventsActor, uid=EventsActor.default_uid())

            disk_file_merger_ref = pool.create_actor(
                DiskFileMergerActor, uid=DiskFileMergerActor.default_uid())

            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            session_id = str(uuid.uuid4())
            data_count = 30
            data = [
                np.random.rand(random.randint(10, 30), random.randint(10, 30))
                for _ in range(data_count)
            ]
            ser_data = [dataserializer.serialize(d) for d in data]

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.DISK))

            for handler._compress in self._get_compress_types():
                data_keys = [str(uuid.uuid4()) for _ in range(data_count)]

                promises = []
                for idx in range(data_count):
                    block_data = dataserializer.dumps(
                        data[idx], compress=handler._compress)

                    def _write_data(ser, writer):
                        with writer:
                            writer.write(ser)
                        return writer.filename

                    promises.append(
                        handler.create_bytes_writer(session_id,
                                                    data_keys[idx],
                                                    ser_data[idx].total_bytes,
                                                    packed=True,
                                                    with_merger_lock=True,
                                                    _promise=True).then(
                                                        functools.partial(
                                                            _write_data,
                                                            block_data)))
                promise.all_(promises).then(
                    lambda *_: test_actor.set_result(0),
                    lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(50)

                for key in data_keys:
                    self.assertEqual(
                        sorted(
                            storage_manager_ref.get_data_locations(
                                session_id, [key])[0]),
                        [(0, DataStorageDevice.DISK)])

                dump_result = disk_file_merger_ref.dump_info()
                written_files = list(dump_result[2])
                for fn in written_files:
                    self.assertTrue(os.path.exists(fn))

                data_store = [None] * len(data)
                promises = []
                for idx in range(data_count):

                    def _read_data(reader, idx):
                        with reader:
                            data_store[idx] = dataserializer.loads(
                                reader.read())

                    promises.append(
                        handler.create_bytes_reader(session_id,
                                                    data_keys[idx],
                                                    with_merger_lock=True,
                                                    packed=True,
                                                    _promise=True).then(
                                                        functools.partial(
                                                            _read_data,
                                                            idx=idx)))
                promise.all_(promises).then(
                    lambda *_: test_actor.set_result(0),
                    lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(50)
                for true_data, read_data in zip(data, data_store):
                    assert_allclose(true_data, read_data)

                data_store = [None] * len(data)
                promises = []
                for idx in range(data_count):

                    def _read_data(reader, idx):
                        with reader:
                            data_store[idx] = dataserializer.deserialize(
                                reader.read())

                    promises.append(
                        handler.create_bytes_reader(session_id,
                                                    data_keys[idx],
                                                    _promise=True).then(
                                                        functools.partial(
                                                            _read_data,
                                                            idx=idx)))
                promise.all_(promises).then(
                    lambda *_: test_actor.set_result(0),
                    lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(50)
                for true_data, read_data in zip(data, data_store):
                    assert_allclose(true_data, read_data)

                storage_client.delete(session_id, data_keys)
                pool.sleep(0.1)
                for fn in written_files:
                    self.assertFalse(os.path.exists(fn))