def benchmark_plasma(): arr = np.zeros((int(1e8), 3)) time.sleep(2) start = calc_mem() tstart = time.time() def print_msg(msg): print(f'{msg}: {calc_mem() - start } MB. t={time.time()-tstart:.2f}') client = plasma.connect('/tmp/plasma') print_msg('have client') oid = client.put(arr) client.disconnect() print_msg('done put') client = plasma.connect('/tmp/plasma') gc.collect() print_msg('deleted original array') import ipdb ipdb.set_trace() #arrs = [client.get(oid) for _ in range(10)] a2 = client.get(oid) print_msg('read full array') #a2[4] = a2[3] print(type(a2)) del a2 gc.collect() print_msg('del array') a3 = client.get(oid)[0] print_msg('read one entry') gc.collect() print_msg('collect, done')
def start_plasma_store(path=DEFAULT_PLASMA_PATH, nbytes: int = GB200) -> subprocess.Popen: # best practice is to allocate more space than we need. The limitation seems to be the size of /dev/shm _server = subprocess.Popen(["plasma_store", "-m", str(nbytes), "-s", path]) plasma.connect(path, num_retries=200) # If we can't connect we fail immediately return _server
def start(path=DEFAULT_PLASMA_PATH, nbytes: int = GB100) -> subprocess.Popen: if not PYARROW_AVAILABLE: raise ImportError("please run pip install pyarrow to use --use_plasma_view") # best practice is to allocate more space than we need. The limitation seems to be the size of /dev/shm _server = subprocess.Popen(["plasma_store", "-m", str(nbytes), "-s", path]) plasma.connect(path, num_retries=200) # If we can't connect we fail immediately return _server
def initialize(self, plasma_store_name, use_exist_plasma_server): """ os.environ["LOCAL_RANK"] is checked to make sure there is only one Plasma Store Server is runing on each local machine """ if self.connected: raise ValueError("Plasma has already been initialized!") if (int(os.environ.get("LOCAL_RANK", False)) == 0) and (not use_exist_plasma_server): memory = psutil.virtual_memory() plasma_store_memory = int(memory.available * self.use_mem_percent) self.plasma_store_name, self.plasma_store_path, self.plasma_store_proc = _start_plasma_store( plasma_store_memory, plasma_store_name ) self.connected = True logger.info( f"Initializing Plasma with {plasma_store_memory // 1e9} GB Memory\n" f" Plasma Location on {self.plasma_store_name}" ) self.client = plasma.connect(self.plasma_store_path) else: time.sleep(1) # init plasma name if plasma_store_name is None: self.plasma_store_name = _hash(os.getcwd()) # assume plasma server is running self.plasma_store_path = f"/tmp/torchfly/plasma/{self.plasma_store_name}/plasma.sock" local_rank = int(os.environ.get("LOCAL_RANK", 0)) logger.info(f"Plasma Store on {local_rank} is connected without starting server!") self.client = plasma.connect(self.plasma_store_path) logger.info("Plasma Client Connected!")
def setup_method(self, test_method): import pyarrow.plasma as plasma # Start Plasma store. plasma_store_name, self.p = start_plasma_store( use_valgrind=os.getenv("PLASMA_VALGRIND") == "1") # Connect to Plasma. self.plasma_client = plasma.connect(plasma_store_name, "", 64) # For the eviction test self.plasma_client2 = plasma.connect(plasma_store_name, "", 0)
def setup_method(self, test_method): import pyarrow.plasma as plasma # Start Plasma store. self.plasma_store_ctx = plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY, use_valgrind=USE_VALGRIND) self.plasma_store_name, self.p = self.plasma_store_ctx.__enter__() # Connect to Plasma. self.plasma_client = plasma.connect(self.plasma_store_name) self.plasma_client2 = plasma.connect(self.plasma_store_name)
def _init_shared_store(self): import pyarrow.plasma as plasma from .storage.sharedstore import PlasmaSharedStore, PlasmaKeyMapActor mapper_ref = self.ctx.actor_ref(uid=PlasmaKeyMapActor.default_uid()) try: self._plasma_client = plasma.connect(options.worker.plasma_socket) except TypeError: # pragma: no cover self._plasma_client = plasma.connect(options.worker.plasma_socket, '', 0) self._shared_store = PlasmaSharedStore(self._plasma_client, mapper_ref)
def setup_method(self, test_method): use_one_memory_mapped_file = (test_method == self.test_use_one_memory_mapped_file) import pyarrow.plasma as plasma # Start Plasma store. self.plasma_store_ctx = start_plasma_store( use_valgrind=USE_VALGRIND, use_one_memory_mapped_file=use_one_memory_mapped_file) plasma_store_name, self.p = self.plasma_store_ctx.__enter__() # Connect to Plasma. self.plasma_client = plasma.connect(plasma_store_name, "", 64) self.plasma_client2 = plasma.connect(plasma_store_name, "", 0)
def setup_method(self, test_method): use_one_memory_mapped_file = ( test_method == self.test_use_one_memory_mapped_file) import pyarrow.plasma as plasma # Start Plasma store. plasma_store_name, self.p = start_plasma_store( use_valgrind=os.getenv("PLASMA_VALGRIND") == "1", use_one_memory_mapped_file=use_one_memory_mapped_file) # Connect to Plasma. self.plasma_client = plasma.connect(plasma_store_name, "", 64) # For the eviction test self.plasma_client2 = plasma.connect(plasma_store_name, "", 0)
def setup_method(self, test_method): use_one_memory_mapped_file = (test_method == self.test_use_one_memory_mapped_file) import pyarrow.plasma as plasma # Start Plasma store. plasma_store_name, self.p = start_plasma_store( use_valgrind=os.getenv("PLASMA_VALGRIND") == "1", use_one_memory_mapped_file=use_one_memory_mapped_file) # Connect to Plasma. self.plasma_client = plasma.connect(plasma_store_name, "", 64) # For the eviction test self.plasma_client2 = plasma.connect(plasma_store_name, "", 0)
def setUpClass(cls): import pyarrow.plasma as plasma from mars import kvstore cls._plasma_store = plasma.start_plasma_store(cls.plasma_storage_size) cls.plasma_socket = options.worker.plasma_socket = cls._plasma_store.__enter__()[0] options.worker.spill_directory = cls.spill_dir try: cls._plasma_client = plasma.connect(options.worker.plasma_socket) except TypeError: cls._plasma_client = plasma.connect(options.worker.plasma_socket, '', 0) cls._kv_store = kvstore.get(options.kv_store)
def start(self): """Start plasma server.""" try: plasma.connect(self.path, int_num_retries=2) except Exception: Popen( "plasma_store -m {} -s {}".format(self.size_shared_mem, self.path), shell=True, stderr=PIPE, ) print("plasma_store -m {} -s {} is acitvated!".format( self.size_shared_mem, self.path)) time.sleep(0.1)
def test_use_huge_pages(): import pyarrow.plasma as plasma plasma_store_name, p = start_plasma_store( plasma_directory="/mnt/hugepages", use_hugepages=True) plasma_client = plasma.connect(plasma_store_name, "", 64) create_object(plasma_client, 100000000) p.kill()
def __init__(self, namespace="default", path="/tmp/plasma"): self.path = path self.namespace = namespace self.client = plasma.connect(self.path, num_retries=5) self.bytes = self.size() self.mb = "{} MB".format(round(self.bytes / 1000000)) self.set_namespace(namespace)
def transfer(df): """ Function that makes the transfer to the worker df is passed in the meassure wrapper """ client = plasma.connect("/tmp/sock/plasma.sock") # Convert the Pandas DataFrame into a PyArrow RecordBatch record_batch = pa.RecordBatch.from_pandas(df) # Create the Plasma object from the PyArrow RecordBatch. Most of the work here # is done to determine the size of buffer to request from the object store. object_id = plasma.ObjectID(np.random.bytes(20)) mock_sink = pa.MockOutputStream() stream_writer = pa.RecordBatchStreamWriter(mock_sink, record_batch.schema) stream_writer.write_batch(record_batch) stream_writer.close() data_size = mock_sink.size() buf = client.create(object_id, data_size) # Write the PyArrow RecordBatch to Plasma stream = pa.FixedSizeBufferWriter(buf) stream_writer = pa.RecordBatchStreamWriter(stream, record_batch.schema) stream_writer.write_batch(record_batch) stream_writer.close() # Seal the Plasma object client.seal(object_id) object_id_str = re.search('ObjectID\((.*)\)', str(object_id)) task = read.delay(object_id_str.group(1)) task.wait()
def worker(worker_args): ( plasma_store_name, object_id, groups_id, attribute2value, chunk, func, args, kwargs, ) = worker_args client = plasma.connect(plasma_store_name) df = client.get(object_id) groups = client.get(groups_id)[chunk] results = [] for name, indexes in groups: item = ( df.iloc[indexes].rolling(**attribute2value).apply(func, *args, **kwargs) ) item.index = pd.MultiIndex.from_product([[name], item.index]) results.append(item) return client.put(pd.concat(results))
def upload_partitions_from_plasma(self, partition_id, plasma_object_id, object_store_address): import pyarrow.plasma as plasma client = plasma.connect(object_store_address) partition = client.get(plasma_object_id) partition_ref = ray.put(partition) ray.get(self.meta_store_handle.set_partition_ref(partition_id, [partition_ref])) return 0
def _delete_plasma_object(self, plasma_client, object_id): """安全的删除plasma store中的对象。用内部函数_delete会莫名其妙的挂掉,故开发了此函数 有时,产出plasam store中的对象会不起作用(也没错报错,但是对象依然还存在),因为这个对象可能仍然被引用,特别是被当前plasma_client引用, 所以本函数会先断开连接,切断引用关系,然后再尝试多次删除,成功后再重建连接,并返回。 Parameters ---------- plasma_client: 必须传入当前的plama_client,因为有些时候某个object会被plasma_client给引用(refs) object_id: 欲删除的object_id Returns ------- plasma_client """ if plasma_client: plasma_client.disconnect() # 断开重连以释放对object的ref plasma_client = plasma.connect(self.plasma_store_name) plasma_client.delete([object_id]) retry_times = 120 # 貌似plasam的delete是个后台线程处理的,删除后并不是立即生效,而是待下次删除线程工作时才会真正之星,下面代码就是等待执行线程生效后再返回,避免删除后发现对象仍然存在。 while plasma_client.contains(object_id): self.stop_event.wait(1) if retry_times < 0: break else: retry_times -= 1 return plasma_client
def test_plasma(self): in_file = '/tmp/test.ms' out_file = '/tmp/copy.ms' with tarfile.open('./data/test_ms.tar.gz', 'r') as ref: ref.extractall('/tmp/') a = FileDROP('a', 'a', filepath=in_file) b = MSPlasmaWriter('b', 'b') c = PlasmaDROP('c', 'c') d = MSPlasmaReader('d', 'd') e = FileDROP('e', 'e', filepath=out_file) b.addInput(a) b.addOutput(c) d.addInput(c) d.addOutput(e) # Check the MS DATA content is the same as original with droputils.DROPWaiterCtx(self, e, 5): a.setCompleted() self.compare_ms(in_file, out_file) # check we can go from dataURL to plasma ID client = plasma.connect("/tmp/plasma") a = c.dataURL.split('//')[1].decode("hex") client.get(plasma.ObjectID(a))
def producer(data, batch_max_rows=BATCH_MAX_ROWS): """Saves the input data as a set of batches (of random size) into the plasma store. Args: data (numpy): The input data saved into the plasma store. batch_max_rows (int): The maximum size allowed for each batch. Returns: float: The checksum value for all the input data. """ logging.info("Producer: connecting to the plasma store") client = plasma.connect(PLASMA_STORE_LOCATION) logging.info( "Producer: starting to load data (%i rows) onto the plasma store", len(data)) row_num = 0 batch_num = 0 checksum = 0.0 while row_num < len(data): k = random.randint(1, batch_max_rows) rows = data[row_num:row_num + k] checksum = get_data_checksum(checksum, rows) logging.debug("Producer: storing batch number: %i", batch_num) put_df(client, batch_num, rows) row_num += k batch_num += 1 client.disconnect() logging.info("Producer: total %i rows distributed (checksum: %f)", len(data), checksum) return checksum
def _init_chunk_store(self): import pyarrow.plasma as plasma from .chunkstore import PlasmaChunkStore, PlasmaKeyMapActor mapper_ref = self.ctx.actor_ref(uid=PlasmaKeyMapActor.default_name()) self._plasma_client = plasma.connect(options.worker.plasma_socket, '', 0) self._chunk_store = PlasmaChunkStore(self._plasma_client, mapper_ref)
def init_plasma(mem=1000): """Initializes a Plasma object store. Args: mem (int, optional): The argument specifies the size of the store in megabytes. Defaults to 1000. Returns: (PlasmaClient): Plasma client object """ import subprocess global plasma_info if not plasma_info.init: import pyarrow.plasma as plasma plasma_info.plasma = plasma # get random string which will make it unlikely that two instances of plasma are trying to use the same file import string characters = string.ascii_uppercase + string.ascii_lowercase + string.digits characters = [c for c in characters] rstr = "".join(np.random.choice(characters, 10)) plasma_info.plasma_client_file_name = "/tmp/plasma_" + rstr PLASMA_STORE_EXECUTABLE = sys.executable[:-6]+ "plasma_store" # Run Plasma system_run(f"{PLASMA_STORE_EXECUTABLE} -m {int(mem * 1000000)} -s {plasma_info.plasma_client_file_name}") plasma_info.plasma_client = plasma.connect(plasma_info.plasma_client_file_name) plasma_info.init = True return plasma_info.plasma_client else: print("Plasma has already been initialized before.") return plasma_info.plasma_client
def client(self): """Connects to the plasma store if not already connected. Returns: A plasma slient. Raises: MemoryOutputNotFoundError: If output from `step_uuid` cannot be found. OrchestNetworkError: Could not connect to the ``Config.STORE_SOCKET_NAME``, because it does not exist. Which might be because the specified value was wrong or the store died. """ if self._client is not None: return self._client try: self._client = plasma.connect(Config.STORE_SOCKET_NAME, num_retries=Config.CONN_NUM_RETRIES) except OSError: raise error.OrchestNetworkError( "Failed to connect to in-memory object store.") return self._client
def worker_apply(worker_args): (plasma_store_name, object_id, chunk, func, progress_bar, queue, index, args, kwargs) = worker_args client = plasma.connect(plasma_store_name) series = client.get(object_id) counter = c_uint64(0) last_push_time = c_double(time()) def with_progress(func): def decorator(*args, **kwargs): counter.value += 1 cur_time = time() if cur_time - last_push_time.value >= REFRESH_PROGRESS_TIME: queue.put_nowait((index, counter.value, False)) last_push_time.value = cur_time return func(*args, **kwargs) return decorator func_to_apply = with_progress(func) if progress_bar else func res = series[chunk].apply(func_to_apply, *args, **kwargs) if progress_bar: queue.put((index, counter.value, True)) return client.put(res)
def __init__(self, fetch_indexes: List[Tuple[str, int]], block_sizes: List[int], block_holder_mapping: Dict[str, BlockHolderActorHandlerWrapper], plasma_store_socket_name: str = None): assert len(fetch_indexes) == len(block_sizes), \ "The length of fetch_indexes and block_sizes should be equalled" self._fetch_indexes: List[Tuple[str, int]] = fetch_indexes self._block_sizes = block_sizes self._total_size = sum(self._block_sizes) self._block_holder_mapping = block_holder_mapping self._resolved = False self._resolved_block: Dict[int, ray.ObjectID] = {} self._plasma_store_socket_name = plasma_store_socket_name in_ray_worker: bool = ray.is_initialized() self._get_data_func = ray.get if not in_ray_worker: # if the current process is not a Ray worker, the # plasma_store_socket_name must be set assert plasma_store_socket_name is not None, "plasma_store_socket_name must be set" plasma_client: Optional[PlasmaClient] = plasma.connect( plasma_store_socket_name) def get_by_plasma(object_id: ray.ObjectID): plasma_object_id = plasma.ObjectID(object_id.binary()) # this should be really faster becuase of zero copy data = plasma_client.get_buffers([plasma_object_id])[0] return data self._get_data_func = get_by_plasma
def write_to_plasma(df, name): print("Connecting to Plasma store...") client = plasma.connect("/tmp/plasma") # Convert the Pandas DataFrame into a PyArrow RecordBatch print("Converting df to recordbatch...") record_batch = pa.RecordBatch.from_pandas(df) # Create the Plasma object from the PyArrow RecordBatch. Most of the work here # is done to determine the size of buffer to request from the object store. print("Determine size of buffer to request etc...") object_id = plasma.ObjectID(np.random.bytes(20)) mock_sink = pa.MockOutputStream() stream_writer = pa.RecordBatchStreamWriter(mock_sink, record_batch.schema) stream_writer.write_batch(record_batch) stream_writer.close() data_size = mock_sink.size() buf = client.create(object_id, data_size) # Write the PyArrow RecordBatch to Plasma print("Write the recordbatch to Plasma...") stream = pa.FixedSizeBufferWriter(buf) stream_writer = pa.RecordBatchStreamWriter(stream, record_batch.schema) stream_writer.write_batch(record_batch) stream_writer.close() # Seal the Plasma object print("Sealing the plasma object in store") client.seal(object_id) # end the client print("Disconnecting from plasma store") client.disconnect() # Write the new object ID print("Storing the object_id to plasma_store") with open("plasma_state.pkl", "rb") as f: plasma_state = pickle.load(f) plasma_state[name] = object_id with open("plasma_state.pkl", "wb") as f: pickle.dump(plasma_state, f)
def run_detector(detection_queue, avg_speed, start): print(f"Starting detection process: {os.getpid()}") listen() plasma_client = plasma.connect("/tmp/plasma") object_detector = ObjectDetector() while True: object_id_str = detection_queue.get() object_id_hash = hashlib.sha1(str.encode(object_id_str)) object_id = plasma.ObjectID(object_id_hash.digest()) object_id_out = plasma.ObjectID( hashlib.sha1(str.encode(f"out-{object_id_str}")).digest()) input_frame = plasma_client.get(object_id, timeout_ms=0) if input_frame is plasma.ObjectNotAvailable: continue # detect and put the output in the plasma store start.value = datetime.datetime.now().timestamp() plasma_client.put(object_detector.detect_raw(input_frame), object_id_out) duration = datetime.datetime.now().timestamp() - start.value start.value = 0.0 avg_speed.value = (avg_speed.value * 9 + duration) / 10
def test_delayed_start(self): num_objects = 10 # Create some objects using one client. object_ids = [random_object_id() for _ in range(num_objects)] for i in range(10): create_object_with_id(self.client, object_ids[i], 2000, 2000) # Wait until the objects have been sealed in the store. ready, waiting = self.client.wait(object_ids, num_returns=num_objects) self.assertEqual(set(ready), set(object_ids)) self.assertEqual(waiting, []) # Start a second plasma manager attached to the same store. manager_name, self.p5, self.port2 = ray.plasma.start_plasma_manager( self.store_name, self.redis_address, use_valgrind=USE_VALGRIND) self.processes_to_kill = [self.p5] + self.processes_to_kill # Check that the second manager knows about existing objects. client2 = plasma.connect(self.store_name, manager_name, 64) ready, waiting = [], object_ids while True: ready, waiting = client2.wait(object_ids, num_returns=num_objects, timeout=0) if len(ready) == len(object_ids): break self.assertEqual(set(ready), set(object_ids)) self.assertEqual(waiting, [])
def post_create(self): super(TransferTestActor, self).post_create() self._remote_plasma_client = plasma.connect(self._remote_plasma_socket, '', 0) self._remote_store = PlasmaChunkStore( self._remote_plasma_client, self.ctx.actor_ref(KVStoreActor.default_name()))
def run_transfer_worker(pool_address, session_id, chunk_keys, spill_dir, msg_queue): options.worker.spill_directory = spill_dir plasma_size = 1024 * 1024 * 10 # don't use multiple with-statement as we need the options be forked with plasma.start_plasma_store(plasma_size) as store_args: options.worker.plasma_socket = plasma_socket = store_args[0] plasma_client = plasma.connect(plasma_socket, '', 0) with start_transfer_test_pool(address=pool_address, plasma_size=plasma_size) as pool: chunk_holder_ref = pool.actor_ref(ChunkHolderActor.default_name()) mapper_ref = pool.actor_ref(PlasmaKeyMapActor.default_name()) plasma_store = PlasmaChunkStore(plasma_client, mapper_ref) for _ in range(2): pool.create_actor(SenderActor, uid='%s' % str(uuid.uuid4())) pool.create_actor(ReceiverActor, uid='%s' % str(uuid.uuid4())) for idx in range(0, len(chunk_keys) - 7): data = np.ones((640 * 1024,), dtype=np.int16) * idx write_spill_file(chunk_keys[idx], data) for idx in range(len(chunk_keys) - 7, len(chunk_keys)): data = np.ones((640 * 1024,), dtype=np.int16) * idx plasma_store.put(session_id, chunk_keys[idx], data) chunk_holder_ref.register_chunk(session_id, chunk_keys[idx]) msg_queue.put(plasma_socket) t = time.time() while True: try: msg_queue.get_nowait() except Empty: if time.time() > t + 60: raise SystemError('Transfer finish timed out.') pool.sleep(0.1)
def setup(self, size): self.plasma_store_ctx = plasma.start_plasma_store( plasma_store_memory=10**9) plasma_store_name, p = self.plasma_store_ctx.__enter__() self.plasma_client = plasma.connect(plasma_store_name, "", 64) self.data = np.random.randn(size // 8)
def test_use_huge_pages(): import pyarrow.plasma as plasma with plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY, plasma_directory="/mnt/hugepages", use_hugepages=True) as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name, "", 64) create_object(plasma_client, 100000000)
def test_use_huge_pages(): import pyarrow.plasma as plasma with plasma.start_plasma_store( plasma_store_memory=2*10**9, plasma_directory="/mnt/hugepages", use_hugepages=True) as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name) create_object(plasma_client, 10**8)
def setup_method(self, test_method): import pyarrow.plasma as plasma # Start Plasma store. self.plasma_store_ctx = plasma.start_plasma_store( plasma_store_memory=1000 * 1024, use_valgrind=USE_VALGRIND, external_store=EXTERNAL_STORE) self.plasma_store_name, self.p = self.plasma_store_ctx.__enter__() # Connect to Plasma. self.plasma_client = plasma.connect(self.plasma_store_name)
def test_plasma_client_sharing(): import pyarrow.plasma as plasma with plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \ as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name) object_id = plasma_client.put(np.zeros(3)) buf = plasma_client.get(object_id) del plasma_client assert (buf == np.zeros(3)).all() del buf # This segfaulted pre ARROW-2448.
def test_plasma_tf_op(use_gpu=False): import pyarrow.plasma as plasma import tensorflow as tf plasma.build_plasma_tensorflow_op() if plasma.tf_plasma_op is None: pytest.skip("TensorFlow Op not found") with plasma.start_plasma_store(10**8) as (plasma_store_name, p): client = plasma.connect(plasma_store_name, "", 0) for dtype in [np.float32, np.float64, np.int8, np.int16, np.int32, np.int64]: run_tensorflow_test_with_dtype(tf, plasma, plasma_store_name, client, use_gpu, dtype)
def test_plasma_list(): import pyarrow.plasma as plasma with plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \ as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name) # Test sizes u, _, _ = create_object(plasma_client, 11, metadata_size=7, seal=False) l1 = plasma_client.list() assert l1[u]["data_size"] == 11 assert l1[u]["metadata_size"] == 7 # Test ref_count v = plasma_client.put(np.zeros(3)) # Ref count has already been released # XXX flaky test, disabled (ARROW-3344) # l2 = plasma_client.list() # assert l2[v]["ref_count"] == 0 a = plasma_client.get(v) l3 = plasma_client.list() assert l3[v]["ref_count"] == 1 del a # Test state w, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False) l4 = plasma_client.list() assert l4[w]["state"] == "created" plasma_client.seal(w) l5 = plasma_client.list() assert l5[w]["state"] == "sealed" # Test timestamps slack = 1.5 # seconds t1 = time.time() x, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False) t2 = time.time() l6 = plasma_client.list() assert t1 - slack <= l6[x]["create_time"] <= t2 + slack time.sleep(2.0) t3 = time.time() plasma_client.seal(x) t4 = time.time() l7 = plasma_client.list() assert t3 - t2 - slack <= l7[x]["construct_duration"] assert l7[x]["construct_duration"] <= t4 - t1 + slack
def test_plasma_list(): import pyarrow.plasma as plasma with plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \ as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name, "", 0) # Test sizes u, _, _ = create_object(plasma_client, 11, metadata_size=7, seal=False) l1 = plasma_client.list() assert l1[u]["data_size"] == 11 assert l1[u]["metadata_size"] == 7 # Test ref_count v = plasma_client.put(np.zeros(3)) l2 = plasma_client.list() # Ref count has already been released assert l2[v]["ref_count"] == 0 a = plasma_client.get(v) l3 = plasma_client.list() assert l3[v]["ref_count"] == 1 del a # Test state w, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False) l4 = plasma_client.list() assert l4[w]["state"] == "created" plasma_client.seal(w) l5 = plasma_client.list() assert l5[w]["state"] == "sealed" # Test timestamps t1 = time.time() x, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False) t2 = time.time() l6 = plasma_client.list() assert math.floor(t1) <= l6[x]["create_time"] <= math.ceil(t2) time.sleep(2.0) t3 = time.time() plasma_client.seal(x) t4 = time.time() l7 = plasma_client.list() assert math.floor(t3 - t2) <= l7[x]["construct_duration"] assert l7[x]["construct_duration"] <= math.ceil(t4 - t1)
def test_plasma_tf_op(use_gpu=False): import pyarrow.plasma as plasma import tensorflow as tf plasma.build_plasma_tensorflow_op() if plasma.tf_plasma_op is None: pytest.skip("TensorFlow Op not found") with plasma.start_plasma_store(10**8) as (plasma_store_name, p): client = plasma.connect(plasma_store_name) for dtype in [np.float32, np.float64, np.int8, np.int16, np.int32, np.int64]: run_tensorflow_test_with_dtype(tf, plasma, plasma_store_name, client, use_gpu, dtype) # Make sure the objects have been released. for _, info in client.list().items(): assert info['ref_count'] == 0
def setup(self): self.plasma_store_ctx = plasma.start_plasma_store( plasma_store_memory=10**9) plasma_store_name, p = self.plasma_store_ctx.__enter__() self.plasma_client = plasma.connect(plasma_store_name, "", 64)
def connect(): global client client = plasma.connect('/tmp/store', '', 0) np.random.seed(int(time.time() * 10e7) % 10000000)
def client_get_multiple(plasma_store_name): client = plasma.connect(self.plasma_store_name) # Try to get an object ID that doesn't exist. This should block. client.get(object_ids)
def client_blocked_in_get(plasma_store_name): client = plasma.connect(self.plasma_store_name) # Try to get an object ID that doesn't exist. This should block. client.get([object_id])
def test_connection_failure_raises_exception(self): import pyarrow.plasma as plasma # ARROW-1264 with pytest.raises(IOError): plasma.connect('unknown-store-name', num_retries=1)
def test_store_capacity(): import pyarrow.plasma as plasma with plasma.start_plasma_store(plasma_store_memory=10000) as (name, p): plasma_client = plasma.connect(name) assert plasma_client.store_capacity() == 10000