Ejemplo n.º 1
0
def benchmark_plasma():
    arr = np.zeros((int(1e8), 3))
    time.sleep(2)
    start = calc_mem()
    tstart = time.time()

    def print_msg(msg):
        print(f'{msg}: {calc_mem() - start } MB. t={time.time()-tstart:.2f}')

    client = plasma.connect('/tmp/plasma')
    print_msg('have client')
    oid = client.put(arr)
    client.disconnect()
    print_msg('done put')
    client = plasma.connect('/tmp/plasma')
    gc.collect()
    print_msg('deleted original array')
    import ipdb
    ipdb.set_trace()
    #arrs = [client.get(oid) for _ in range(10)]
    a2 = client.get(oid)
    print_msg('read full array')
    #a2[4] = a2[3]
    print(type(a2))
    del a2
    gc.collect()
    print_msg('del array')
    a3 = client.get(oid)[0]
    print_msg('read one entry')
    gc.collect()
    print_msg('collect, done')
Ejemplo n.º 2
0
def start_plasma_store(path=DEFAULT_PLASMA_PATH,
                       nbytes: int = GB200) -> subprocess.Popen:
    # best practice is to allocate more space than we need. The limitation seems to be the size of /dev/shm
    _server = subprocess.Popen(["plasma_store", "-m", str(nbytes), "-s", path])
    plasma.connect(path,
                   num_retries=200)  # If we can't connect we fail immediately
    return _server
Ejemplo n.º 3
0
 def start(path=DEFAULT_PLASMA_PATH, nbytes: int = GB100) -> subprocess.Popen:
     if not PYARROW_AVAILABLE:
         raise ImportError("please run pip install pyarrow to use --use_plasma_view")
     # best practice is to allocate more space than we need. The limitation seems to be the size of /dev/shm
     _server = subprocess.Popen(["plasma_store", "-m", str(nbytes), "-s", path])
     plasma.connect(path, num_retries=200)  # If we can't connect we fail immediately
     return _server
Ejemplo n.º 4
0
    def initialize(self, plasma_store_name, use_exist_plasma_server):
        """
        os.environ["LOCAL_RANK"] is checked to make sure there is only one Plasma Store Server is runing on each local machine 
        """
        if self.connected:
            raise ValueError("Plasma has already been initialized!")

        if (int(os.environ.get("LOCAL_RANK", False)) == 0) and (not use_exist_plasma_server):
            memory = psutil.virtual_memory()
            plasma_store_memory = int(memory.available * self.use_mem_percent)

            self.plasma_store_name, self.plasma_store_path, self.plasma_store_proc = _start_plasma_store(
                plasma_store_memory, plasma_store_name
            )

            self.connected = True
            logger.info(
                f"Initializing Plasma with {plasma_store_memory // 1e9} GB Memory\n"
                f"    Plasma Location on {self.plasma_store_name}"
            )
            self.client = plasma.connect(self.plasma_store_path)
        else:
            time.sleep(1)
            #  init plasma name
            if plasma_store_name is None:
                self.plasma_store_name = _hash(os.getcwd())

            # assume plasma server is running
            self.plasma_store_path = f"/tmp/torchfly/plasma/{self.plasma_store_name}/plasma.sock"
            local_rank = int(os.environ.get("LOCAL_RANK", 0))
            logger.info(f"Plasma Store on {local_rank} is connected without starting server!")
            self.client = plasma.connect(self.plasma_store_path)

        logger.info("Plasma Client Connected!")
Ejemplo n.º 5
0
 def setup_method(self, test_method):
     import pyarrow.plasma as plasma
     # Start Plasma store.
     plasma_store_name, self.p = start_plasma_store(
         use_valgrind=os.getenv("PLASMA_VALGRIND") == "1")
     # Connect to Plasma.
     self.plasma_client = plasma.connect(plasma_store_name, "", 64)
     # For the eviction test
     self.plasma_client2 = plasma.connect(plasma_store_name, "", 0)
Ejemplo n.º 6
0
 def setup_method(self, test_method):
     import pyarrow.plasma as plasma
     # Start Plasma store.
     plasma_store_name, self.p = start_plasma_store(
         use_valgrind=os.getenv("PLASMA_VALGRIND") == "1")
     # Connect to Plasma.
     self.plasma_client = plasma.connect(plasma_store_name, "", 64)
     # For the eviction test
     self.plasma_client2 = plasma.connect(plasma_store_name, "", 0)
Ejemplo n.º 7
0
 def setup_method(self, test_method):
     import pyarrow.plasma as plasma
     # Start Plasma store.
     self.plasma_store_ctx = plasma.start_plasma_store(
         plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY,
         use_valgrind=USE_VALGRIND)
     self.plasma_store_name, self.p = self.plasma_store_ctx.__enter__()
     # Connect to Plasma.
     self.plasma_client = plasma.connect(self.plasma_store_name)
     self.plasma_client2 = plasma.connect(self.plasma_store_name)
Ejemplo n.º 8
0
    def _init_shared_store(self):
        import pyarrow.plasma as plasma
        from .storage.sharedstore import PlasmaSharedStore, PlasmaKeyMapActor

        mapper_ref = self.ctx.actor_ref(uid=PlasmaKeyMapActor.default_uid())
        try:
            self._plasma_client = plasma.connect(options.worker.plasma_socket)
        except TypeError:  # pragma: no cover
            self._plasma_client = plasma.connect(options.worker.plasma_socket, '', 0)
        self._shared_store = PlasmaSharedStore(self._plasma_client, mapper_ref)
Ejemplo n.º 9
0
    def setup_method(self, test_method):
        use_one_memory_mapped_file = (test_method ==
                                      self.test_use_one_memory_mapped_file)

        import pyarrow.plasma as plasma
        # Start Plasma store.
        self.plasma_store_ctx = start_plasma_store(
            use_valgrind=USE_VALGRIND,
            use_one_memory_mapped_file=use_one_memory_mapped_file)
        plasma_store_name, self.p = self.plasma_store_ctx.__enter__()
        # Connect to Plasma.
        self.plasma_client = plasma.connect(plasma_store_name, "", 64)
        self.plasma_client2 = plasma.connect(plasma_store_name, "", 0)
    def setup_method(self, test_method):
        use_one_memory_mapped_file = (
            test_method == self.test_use_one_memory_mapped_file)

        import pyarrow.plasma as plasma
        # Start Plasma store.
        plasma_store_name, self.p = start_plasma_store(
            use_valgrind=os.getenv("PLASMA_VALGRIND") == "1",
            use_one_memory_mapped_file=use_one_memory_mapped_file)
        # Connect to Plasma.
        self.plasma_client = plasma.connect(plasma_store_name, "", 64)
        # For the eviction test
        self.plasma_client2 = plasma.connect(plasma_store_name, "", 0)
Ejemplo n.º 11
0
    def setup_method(self, test_method):
        use_one_memory_mapped_file = (test_method ==
                                      self.test_use_one_memory_mapped_file)

        import pyarrow.plasma as plasma
        # Start Plasma store.
        plasma_store_name, self.p = start_plasma_store(
            use_valgrind=os.getenv("PLASMA_VALGRIND") == "1",
            use_one_memory_mapped_file=use_one_memory_mapped_file)
        # Connect to Plasma.
        self.plasma_client = plasma.connect(plasma_store_name, "", 64)
        # For the eviction test
        self.plasma_client2 = plasma.connect(plasma_store_name, "", 0)
Ejemplo n.º 12
0
    def setup_method(self, test_method):
        use_one_memory_mapped_file = (test_method ==
                                      self.test_use_one_memory_mapped_file)

        import pyarrow.plasma as plasma
        # Start Plasma store.
        self.plasma_store_ctx = start_plasma_store(
            use_valgrind=USE_VALGRIND,
            use_one_memory_mapped_file=use_one_memory_mapped_file)
        plasma_store_name, self.p = self.plasma_store_ctx.__enter__()
        # Connect to Plasma.
        self.plasma_client = plasma.connect(plasma_store_name, "", 64)
        self.plasma_client2 = plasma.connect(plasma_store_name, "", 0)
Ejemplo n.º 13
0
    def setUpClass(cls):
        import pyarrow.plasma as plasma
        from mars import kvstore

        cls._plasma_store = plasma.start_plasma_store(cls.plasma_storage_size)
        cls.plasma_socket = options.worker.plasma_socket = cls._plasma_store.__enter__()[0]

        options.worker.spill_directory = cls.spill_dir

        try:
            cls._plasma_client = plasma.connect(options.worker.plasma_socket)
        except TypeError:
            cls._plasma_client = plasma.connect(options.worker.plasma_socket, '', 0)
        cls._kv_store = kvstore.get(options.kv_store)
Ejemplo n.º 14
0
 def start(self):
     """Start plasma server."""
     try:
         plasma.connect(self.path, int_num_retries=2)
     except Exception:
         Popen(
             "plasma_store -m {} -s {}".format(self.size_shared_mem,
                                               self.path),
             shell=True,
             stderr=PIPE,
         )
         print("plasma_store -m {} -s {} is acitvated!".format(
             self.size_shared_mem, self.path))
         time.sleep(0.1)
Ejemplo n.º 15
0
def test_use_huge_pages():
    import pyarrow.plasma as plasma
    plasma_store_name, p = start_plasma_store(
        plasma_directory="/mnt/hugepages", use_hugepages=True)
    plasma_client = plasma.connect(plasma_store_name, "", 64)
    create_object(plasma_client, 100000000)
    p.kill()
Ejemplo n.º 16
0
 def __init__(self, namespace="default", path="/tmp/plasma"):
     self.path = path
     self.namespace = namespace
     self.client = plasma.connect(self.path, num_retries=5)
     self.bytes = self.size()
     self.mb = "{} MB".format(round(self.bytes / 1000000))
     self.set_namespace(namespace)
def transfer(df):
    """
    Function that makes the transfer to the worker

    df is passed in the meassure wrapper
    """

    client = plasma.connect("/tmp/sock/plasma.sock")

    # Convert the Pandas DataFrame into a PyArrow RecordBatch
    record_batch = pa.RecordBatch.from_pandas(df)

    # Create the Plasma object from the PyArrow RecordBatch. Most of the work here
    # is done to determine the size of buffer to request from the object store.
    object_id = plasma.ObjectID(np.random.bytes(20))
    mock_sink = pa.MockOutputStream()
    stream_writer = pa.RecordBatchStreamWriter(mock_sink, record_batch.schema)
    stream_writer.write_batch(record_batch)
    stream_writer.close()
    data_size = mock_sink.size()
    buf = client.create(object_id, data_size)

    # Write the PyArrow RecordBatch to Plasma
    stream = pa.FixedSizeBufferWriter(buf)
    stream_writer = pa.RecordBatchStreamWriter(stream, record_batch.schema)
    stream_writer.write_batch(record_batch)
    stream_writer.close()

    # Seal the Plasma object
    client.seal(object_id)

    object_id_str = re.search('ObjectID\((.*)\)', str(object_id))

    task = read.delay(object_id_str.group(1))
    task.wait()
Ejemplo n.º 18
0
    def worker(worker_args):
        (
            plasma_store_name,
            object_id,
            groups_id,
            attribute2value,
            chunk,
            func,
            args,
            kwargs,
        ) = worker_args

        client = plasma.connect(plasma_store_name)
        df = client.get(object_id)
        groups = client.get(groups_id)[chunk]

        results = []
        for name, indexes in groups:
            item = (
                df.iloc[indexes].rolling(**attribute2value).apply(func, *args, **kwargs)
            )

            item.index = pd.MultiIndex.from_product([[name], item.index])

            results.append(item)

        return client.put(pd.concat(results))
Ejemplo n.º 19
0
 def upload_partitions_from_plasma(self, partition_id, plasma_object_id, object_store_address):
     import pyarrow.plasma as plasma
     client = plasma.connect(object_store_address)
     partition = client.get(plasma_object_id)
     partition_ref = ray.put(partition)
     ray.get(self.meta_store_handle.set_partition_ref(partition_id, [partition_ref]))
     return 0
Ejemplo n.º 20
0
    def _delete_plasma_object(self, plasma_client, object_id):
        """安全的删除plasma store中的对象。用内部函数_delete会莫名其妙的挂掉,故开发了此函数
        有时,产出plasam store中的对象会不起作用(也没错报错,但是对象依然还存在),因为这个对象可能仍然被引用,特别是被当前plasma_client引用,
        所以本函数会先断开连接,切断引用关系,然后再尝试多次删除,成功后再重建连接,并返回。
        Parameters
        ----------
        plasma_client: 必须传入当前的plama_client,因为有些时候某个object会被plasma_client给引用(refs)
        object_id: 欲删除的object_id

        Returns
        -------
        plasma_client
        """
        if plasma_client:
            plasma_client.disconnect()  # 断开重连以释放对object的ref
        plasma_client = plasma.connect(self.plasma_store_name)
        plasma_client.delete([object_id])
        retry_times = 120
        # 貌似plasam的delete是个后台线程处理的,删除后并不是立即生效,而是待下次删除线程工作时才会真正之星,下面代码就是等待执行线程生效后再返回,避免删除后发现对象仍然存在。
        while plasma_client.contains(object_id):
            self.stop_event.wait(1)
            if retry_times < 0:
                break
            else:
                retry_times -= 1
        return plasma_client
Ejemplo n.º 21
0
    def test_plasma(self):
        in_file = '/tmp/test.ms'
        out_file = '/tmp/copy.ms'

        with tarfile.open('./data/test_ms.tar.gz', 'r') as ref:
            ref.extractall('/tmp/')

        a = FileDROP('a', 'a', filepath=in_file)
        b = MSPlasmaWriter('b', 'b')
        c = PlasmaDROP('c', 'c')
        d = MSPlasmaReader('d', 'd')
        e = FileDROP('e', 'e', filepath=out_file)

        b.addInput(a)
        b.addOutput(c)
        d.addInput(c)
        d.addOutput(e)

        # Check the MS DATA content is the same as original
        with droputils.DROPWaiterCtx(self, e, 5):
            a.setCompleted()

        self.compare_ms(in_file, out_file)

        # check we can go from dataURL to plasma ID
        client = plasma.connect("/tmp/plasma")
        a = c.dataURL.split('//')[1].decode("hex")
        client.get(plasma.ObjectID(a))
def producer(data, batch_max_rows=BATCH_MAX_ROWS):
    """Saves the input data as a set of batches (of random size) into the plasma store.

    Args:
        data (numpy): The input data saved into the plasma store.
        batch_max_rows (int): The maximum size allowed for each batch.

    Returns:
        float: The checksum value for all the input data.
    """
    logging.info("Producer: connecting to the plasma store")
    client = plasma.connect(PLASMA_STORE_LOCATION)

    logging.info(
        "Producer: starting to load data (%i rows) onto the plasma store",
        len(data))
    row_num = 0
    batch_num = 0
    checksum = 0.0
    while row_num < len(data):
        k = random.randint(1, batch_max_rows)
        rows = data[row_num:row_num + k]
        checksum = get_data_checksum(checksum, rows)

        logging.debug("Producer: storing batch number: %i", batch_num)
        put_df(client, batch_num, rows)

        row_num += k
        batch_num += 1

    client.disconnect()
    logging.info("Producer: total %i rows distributed (checksum: %f)",
                 len(data), checksum)
    return checksum
Ejemplo n.º 23
0
    def _init_chunk_store(self):
        import pyarrow.plasma as plasma
        from .chunkstore import PlasmaChunkStore, PlasmaKeyMapActor

        mapper_ref = self.ctx.actor_ref(uid=PlasmaKeyMapActor.default_name())
        self._plasma_client = plasma.connect(options.worker.plasma_socket, '', 0)
        self._chunk_store = PlasmaChunkStore(self._plasma_client, mapper_ref)
Ejemplo n.º 24
0
def init_plasma(mem=1000):
    """Initializes a Plasma object store.

    Args:
        mem (int, optional): The argument specifies the size of the store in megabytes. Defaults to 1000.

    Returns:
        (PlasmaClient): Plasma client object
    """
    import subprocess
    global plasma_info

    if not plasma_info.init:
        import pyarrow.plasma as plasma
        plasma_info.plasma = plasma

        # get random string which will make it unlikely that two instances of plasma are trying to use the same file
        import string
        characters = string.ascii_uppercase + string.ascii_lowercase + string.digits
        characters = [c for c in characters]
        rstr = "".join(np.random.choice(characters, 10))

        plasma_info.plasma_client_file_name = "/tmp/plasma_" + rstr

        PLASMA_STORE_EXECUTABLE = sys.executable[:-6]+ "plasma_store"

        # Run Plasma
        system_run(f"{PLASMA_STORE_EXECUTABLE} -m {int(mem * 1000000)} -s {plasma_info.plasma_client_file_name}")
        plasma_info.plasma_client = plasma.connect(plasma_info.plasma_client_file_name)
        plasma_info.init = True
        return plasma_info.plasma_client
    else:
        print("Plasma has already been initialized before.")
        return plasma_info.plasma_client
Ejemplo n.º 25
0
    def client(self):
        """Connects to the plasma store if not already connected.

        Returns:
            A plasma slient.

        Raises:
            MemoryOutputNotFoundError: If output from `step_uuid` cannot
                be found.
            OrchestNetworkError: Could not connect to the
                ``Config.STORE_SOCKET_NAME``, because it does not exist.
                Which might be because the specified value was wrong or
                the store died.
        """
        if self._client is not None:
            return self._client

        try:
            self._client = plasma.connect(Config.STORE_SOCKET_NAME,
                                          num_retries=Config.CONN_NUM_RETRIES)
        except OSError:
            raise error.OrchestNetworkError(
                "Failed to connect to in-memory object store.")

        return self._client
Ejemplo n.º 26
0
    def worker_apply(worker_args):
        (plasma_store_name, object_id, chunk, func, progress_bar, queue, index,
         args, kwargs) = worker_args

        client = plasma.connect(plasma_store_name)
        series = client.get(object_id)

        counter = c_uint64(0)
        last_push_time = c_double(time())

        def with_progress(func):
            def decorator(*args, **kwargs):
                counter.value += 1

                cur_time = time()

                if cur_time - last_push_time.value >= REFRESH_PROGRESS_TIME:
                    queue.put_nowait((index, counter.value, False))
                    last_push_time.value = cur_time

                return func(*args, **kwargs)

            return decorator

        func_to_apply = with_progress(func) if progress_bar else func

        res = series[chunk].apply(func_to_apply, *args, **kwargs)

        if progress_bar:
            queue.put((index, counter.value, True))

        return client.put(res)
Ejemplo n.º 27
0
    def __init__(self,
                 fetch_indexes: List[Tuple[str, int]],
                 block_sizes: List[int],
                 block_holder_mapping: Dict[str,
                                            BlockHolderActorHandlerWrapper],
                 plasma_store_socket_name: str = None):
        assert len(fetch_indexes) == len(block_sizes), \
            "The length of fetch_indexes and block_sizes should be equalled"
        self._fetch_indexes: List[Tuple[str, int]] = fetch_indexes
        self._block_sizes = block_sizes
        self._total_size = sum(self._block_sizes)
        self._block_holder_mapping = block_holder_mapping

        self._resolved = False
        self._resolved_block: Dict[int, ray.ObjectID] = {}

        self._plasma_store_socket_name = plasma_store_socket_name
        in_ray_worker: bool = ray.is_initialized()
        self._get_data_func = ray.get
        if not in_ray_worker:
            # if the current process is not a Ray worker, the
            # plasma_store_socket_name must be set
            assert plasma_store_socket_name is not None, "plasma_store_socket_name must be set"
            plasma_client: Optional[PlasmaClient] = plasma.connect(
                plasma_store_socket_name)

            def get_by_plasma(object_id: ray.ObjectID):
                plasma_object_id = plasma.ObjectID(object_id.binary())
                # this should be really faster becuase of zero copy
                data = plasma_client.get_buffers([plasma_object_id])[0]
                return data

            self._get_data_func = get_by_plasma
Ejemplo n.º 28
0
def write_to_plasma(df, name):
    print("Connecting to Plasma store...")
    client = plasma.connect("/tmp/plasma")
    # Convert the Pandas DataFrame into a PyArrow RecordBatch
    print("Converting df to recordbatch...")
    record_batch = pa.RecordBatch.from_pandas(df)
    # Create the Plasma object from the PyArrow RecordBatch. Most of the work here
    # is done to determine the size of buffer to request from the object store.
    print("Determine size of buffer to request etc...")
    object_id = plasma.ObjectID(np.random.bytes(20))
    mock_sink = pa.MockOutputStream()
    stream_writer = pa.RecordBatchStreamWriter(mock_sink, record_batch.schema)
    stream_writer.write_batch(record_batch)
    stream_writer.close()
    data_size = mock_sink.size()
    buf = client.create(object_id, data_size)
    # Write the PyArrow RecordBatch to Plasma
    print("Write the recordbatch to Plasma...")
    stream = pa.FixedSizeBufferWriter(buf)
    stream_writer = pa.RecordBatchStreamWriter(stream, record_batch.schema)
    stream_writer.write_batch(record_batch)
    stream_writer.close()
    # Seal the Plasma object
    print("Sealing the plasma object in store")
    client.seal(object_id)
    # end the client
    print("Disconnecting from plasma store")
    client.disconnect()
    # Write the new object ID
    print("Storing the object_id to plasma_store")
    with open("plasma_state.pkl", "rb") as f:
        plasma_state = pickle.load(f)
    plasma_state[name] = object_id
    with open("plasma_state.pkl", "wb") as f:
        pickle.dump(plasma_state, f)
Ejemplo n.º 29
0
def run_detector(detection_queue, avg_speed, start):
    print(f"Starting detection process: {os.getpid()}")
    listen()
    plasma_client = plasma.connect("/tmp/plasma")
    object_detector = ObjectDetector()

    while True:
        object_id_str = detection_queue.get()
        object_id_hash = hashlib.sha1(str.encode(object_id_str))
        object_id = plasma.ObjectID(object_id_hash.digest())
        object_id_out = plasma.ObjectID(
            hashlib.sha1(str.encode(f"out-{object_id_str}")).digest())
        input_frame = plasma_client.get(object_id, timeout_ms=0)

        if input_frame is plasma.ObjectNotAvailable:
            continue

        # detect and put the output in the plasma store
        start.value = datetime.datetime.now().timestamp()
        plasma_client.put(object_detector.detect_raw(input_frame),
                          object_id_out)
        duration = datetime.datetime.now().timestamp() - start.value
        start.value = 0.0

        avg_speed.value = (avg_speed.value * 9 + duration) / 10
Ejemplo n.º 30
0
    def test_delayed_start(self):
        num_objects = 10
        # Create some objects using one client.
        object_ids = [random_object_id() for _ in range(num_objects)]
        for i in range(10):
            create_object_with_id(self.client, object_ids[i], 2000, 2000)

        # Wait until the objects have been sealed in the store.
        ready, waiting = self.client.wait(object_ids, num_returns=num_objects)
        self.assertEqual(set(ready), set(object_ids))
        self.assertEqual(waiting, [])

        # Start a second plasma manager attached to the same store.
        manager_name, self.p5, self.port2 = ray.plasma.start_plasma_manager(
            self.store_name, self.redis_address, use_valgrind=USE_VALGRIND)
        self.processes_to_kill = [self.p5] + self.processes_to_kill

        # Check that the second manager knows about existing objects.
        client2 = plasma.connect(self.store_name, manager_name, 64)
        ready, waiting = [], object_ids
        while True:
            ready, waiting = client2.wait(object_ids,
                                          num_returns=num_objects,
                                          timeout=0)
            if len(ready) == len(object_ids):
                break

        self.assertEqual(set(ready), set(object_ids))
        self.assertEqual(waiting, [])
Ejemplo n.º 31
0
 def post_create(self):
     super(TransferTestActor, self).post_create()
     self._remote_plasma_client = plasma.connect(self._remote_plasma_socket,
                                                 '', 0)
     self._remote_store = PlasmaChunkStore(
         self._remote_plasma_client,
         self.ctx.actor_ref(KVStoreActor.default_name()))
Ejemplo n.º 32
0
def run_transfer_worker(pool_address, session_id, chunk_keys, spill_dir, msg_queue):
    options.worker.spill_directory = spill_dir
    plasma_size = 1024 * 1024 * 10

    # don't use multiple with-statement as we need the options be forked
    with plasma.start_plasma_store(plasma_size) as store_args:
        options.worker.plasma_socket = plasma_socket = store_args[0]
        plasma_client = plasma.connect(plasma_socket, '', 0)

        with start_transfer_test_pool(address=pool_address, plasma_size=plasma_size) as pool:
            chunk_holder_ref = pool.actor_ref(ChunkHolderActor.default_name())
            mapper_ref = pool.actor_ref(PlasmaKeyMapActor.default_name())
            plasma_store = PlasmaChunkStore(plasma_client, mapper_ref)

            for _ in range(2):
                pool.create_actor(SenderActor, uid='%s' % str(uuid.uuid4()))
                pool.create_actor(ReceiverActor, uid='%s' % str(uuid.uuid4()))

            for idx in range(0, len(chunk_keys) - 7):
                data = np.ones((640 * 1024,), dtype=np.int16) * idx
                write_spill_file(chunk_keys[idx], data)
            for idx in range(len(chunk_keys) - 7, len(chunk_keys)):
                data = np.ones((640 * 1024,), dtype=np.int16) * idx
                plasma_store.put(session_id, chunk_keys[idx], data)
                chunk_holder_ref.register_chunk(session_id, chunk_keys[idx])

            msg_queue.put(plasma_socket)
            t = time.time()
            while True:
                try:
                    msg_queue.get_nowait()
                except Empty:
                    if time.time() > t + 60:
                        raise SystemError('Transfer finish timed out.')
                    pool.sleep(0.1)
Ejemplo n.º 33
0
    def setup(self, size):
        self.plasma_store_ctx = plasma.start_plasma_store(
            plasma_store_memory=10**9)
        plasma_store_name, p = self.plasma_store_ctx.__enter__()
        self.plasma_client = plasma.connect(plasma_store_name, "", 64)

        self.data = np.random.randn(size // 8)
Ejemplo n.º 34
0
def test_use_huge_pages():
    import pyarrow.plasma as plasma
    with plasma.start_plasma_store(
            plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY,
            plasma_directory="/mnt/hugepages",
            use_hugepages=True) as (plasma_store_name, p):
        plasma_client = plasma.connect(plasma_store_name, "", 64)
        create_object(plasma_client, 100000000)
Ejemplo n.º 35
0
def test_use_huge_pages():
    import pyarrow.plasma as plasma
    with plasma.start_plasma_store(
            plasma_store_memory=2*10**9,
            plasma_directory="/mnt/hugepages",
            use_hugepages=True) as (plasma_store_name, p):
        plasma_client = plasma.connect(plasma_store_name)
        create_object(plasma_client, 10**8)
Ejemplo n.º 36
0
 def setup_method(self, test_method):
     import pyarrow.plasma as plasma
     # Start Plasma store.
     self.plasma_store_ctx = plasma.start_plasma_store(
         plasma_store_memory=1000 * 1024,
         use_valgrind=USE_VALGRIND,
         external_store=EXTERNAL_STORE)
     self.plasma_store_name, self.p = self.plasma_store_ctx.__enter__()
     # Connect to Plasma.
     self.plasma_client = plasma.connect(self.plasma_store_name)
Ejemplo n.º 37
0
def test_plasma_client_sharing():
    import pyarrow.plasma as plasma

    with plasma.start_plasma_store(
            plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \
            as (plasma_store_name, p):
        plasma_client = plasma.connect(plasma_store_name)
        object_id = plasma_client.put(np.zeros(3))
        buf = plasma_client.get(object_id)
        del plasma_client
        assert (buf == np.zeros(3)).all()
        del buf  # This segfaulted pre ARROW-2448.
Ejemplo n.º 38
0
def test_plasma_tf_op(use_gpu=False):
    import pyarrow.plasma as plasma
    import tensorflow as tf

    plasma.build_plasma_tensorflow_op()

    if plasma.tf_plasma_op is None:
        pytest.skip("TensorFlow Op not found")

    with plasma.start_plasma_store(10**8) as (plasma_store_name, p):
        client = plasma.connect(plasma_store_name, "", 0)
        for dtype in [np.float32, np.float64,
                      np.int8, np.int16, np.int32, np.int64]:
            run_tensorflow_test_with_dtype(tf, plasma, plasma_store_name,
                                           client, use_gpu, dtype)
Ejemplo n.º 39
0
def test_plasma_list():
    import pyarrow.plasma as plasma

    with plasma.start_plasma_store(
            plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \
            as (plasma_store_name, p):
        plasma_client = plasma.connect(plasma_store_name)

        # Test sizes
        u, _, _ = create_object(plasma_client, 11, metadata_size=7, seal=False)
        l1 = plasma_client.list()
        assert l1[u]["data_size"] == 11
        assert l1[u]["metadata_size"] == 7

        # Test ref_count
        v = plasma_client.put(np.zeros(3))
        # Ref count has already been released
        # XXX flaky test, disabled (ARROW-3344)
        # l2 = plasma_client.list()
        # assert l2[v]["ref_count"] == 0
        a = plasma_client.get(v)
        l3 = plasma_client.list()
        assert l3[v]["ref_count"] == 1
        del a

        # Test state
        w, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False)
        l4 = plasma_client.list()
        assert l4[w]["state"] == "created"
        plasma_client.seal(w)
        l5 = plasma_client.list()
        assert l5[w]["state"] == "sealed"

        # Test timestamps
        slack = 1.5  # seconds
        t1 = time.time()
        x, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False)
        t2 = time.time()
        l6 = plasma_client.list()
        assert t1 - slack <= l6[x]["create_time"] <= t2 + slack
        time.sleep(2.0)
        t3 = time.time()
        plasma_client.seal(x)
        t4 = time.time()
        l7 = plasma_client.list()
        assert t3 - t2 - slack <= l7[x]["construct_duration"]
        assert l7[x]["construct_duration"] <= t4 - t1 + slack
Ejemplo n.º 40
0
def test_plasma_list():
    import pyarrow.plasma as plasma

    with plasma.start_plasma_store(
            plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \
            as (plasma_store_name, p):
        plasma_client = plasma.connect(plasma_store_name, "", 0)

        # Test sizes
        u, _, _ = create_object(plasma_client, 11, metadata_size=7, seal=False)
        l1 = plasma_client.list()
        assert l1[u]["data_size"] == 11
        assert l1[u]["metadata_size"] == 7

        # Test ref_count
        v = plasma_client.put(np.zeros(3))
        l2 = plasma_client.list()
        # Ref count has already been released
        assert l2[v]["ref_count"] == 0
        a = plasma_client.get(v)
        l3 = plasma_client.list()
        assert l3[v]["ref_count"] == 1
        del a

        # Test state
        w, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False)
        l4 = plasma_client.list()
        assert l4[w]["state"] == "created"
        plasma_client.seal(w)
        l5 = plasma_client.list()
        assert l5[w]["state"] == "sealed"

        # Test timestamps
        t1 = time.time()
        x, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False)
        t2 = time.time()
        l6 = plasma_client.list()
        assert math.floor(t1) <= l6[x]["create_time"] <= math.ceil(t2)
        time.sleep(2.0)
        t3 = time.time()
        plasma_client.seal(x)
        t4 = time.time()
        l7 = plasma_client.list()
        assert math.floor(t3 - t2) <= l7[x]["construct_duration"]
        assert l7[x]["construct_duration"] <= math.ceil(t4 - t1)
Ejemplo n.º 41
0
def test_plasma_tf_op(use_gpu=False):
    import pyarrow.plasma as plasma
    import tensorflow as tf

    plasma.build_plasma_tensorflow_op()

    if plasma.tf_plasma_op is None:
        pytest.skip("TensorFlow Op not found")

    with plasma.start_plasma_store(10**8) as (plasma_store_name, p):
        client = plasma.connect(plasma_store_name)
        for dtype in [np.float32, np.float64,
                      np.int8, np.int16, np.int32, np.int64]:
            run_tensorflow_test_with_dtype(tf, plasma, plasma_store_name,
                                           client, use_gpu, dtype)

        # Make sure the objects have been released.
        for _, info in client.list().items():
            assert info['ref_count'] == 0
Ejemplo n.º 42
0
 def setup(self):
     self.plasma_store_ctx = plasma.start_plasma_store(
         plasma_store_memory=10**9)
     plasma_store_name, p = self.plasma_store_ctx.__enter__()
     self.plasma_client = plasma.connect(plasma_store_name, "", 64)
Ejemplo n.º 43
0
def connect():
    global client
    client = plasma.connect('/tmp/store', '', 0)
    np.random.seed(int(time.time() * 10e7) % 10000000)
Ejemplo n.º 44
0
 def client_get_multiple(plasma_store_name):
     client = plasma.connect(self.plasma_store_name)
     # Try to get an object ID that doesn't exist. This should block.
     client.get(object_ids)
Ejemplo n.º 45
0
 def client_blocked_in_get(plasma_store_name):
     client = plasma.connect(self.plasma_store_name)
     # Try to get an object ID that doesn't exist. This should block.
     client.get([object_id])
Ejemplo n.º 46
0
 def test_connection_failure_raises_exception(self):
     import pyarrow.plasma as plasma
     # ARROW-1264
     with pytest.raises(IOError):
         plasma.connect('unknown-store-name', num_retries=1)
Ejemplo n.º 47
0
def test_store_capacity():
    import pyarrow.plasma as plasma
    with plasma.start_plasma_store(plasma_store_memory=10000) as (name, p):
        plasma_client = plasma.connect(name)
        assert plasma_client.store_capacity() == 10000