def put(self, name, obj): object_id = plasma.ObjectID(hashlib.sha1(str.encode(name)).digest()) while True: try: self.plasma_client.put(obj, object_id) return except Exception as e: print(f"Failed to put in plasma: {e}") self.connect() time.sleep(1)
def remove_namespace(self, namespace=None): # if no namespace is defined, just remove the current namespace if namespace == None: namespace == self.namespace # cannot delete the default namespace if namespace == 'default': raise BaseException('BrainError: cannot remove default namespace') # cannot delete a namespace that doesn't exist if namespace not in self.show_namespaces(): raise BaseException( 'BrainError: namespace "{}" does not exist'.format(namespace)) # save the current namespace current_namespace = self.namespace self.namespace = namespace # delete all the variables in <namespace> for name in self.names(): self.forget(name) ## remove namespace from set of namespaces # get current namespaces namespaces = self.client.get( plasma.ObjectID(b'brain_namespaces_set')).union( [self.namespace, 'default']) # remove <namespace> from current namespaces set namespaces = namespaces - set([namespace]) # remove the old namespaces object self.client.delete([plasma.ObjectID(b'brain_namespaces_set')]) # add the new namespaces object self.client.put(namespaces, plasma.ObjectID(b'brain_namespaces_set')) # if we cleared the current namespace, change the namespace to default if current_namespace == namespace: self.namespace = 'default' # otherwise, just change self.namespace back to what it was else: self.namespace = current_namespace return 'Deleted namespace {}. Using namespace {}.'.format( namespace, self.namespace)
def get(self, name, timeout_ms=0): object_id = plasma.ObjectID(hashlib.sha1(str.encode(name)).digest()) while True: if self.stop_event != None and self.stop_event.is_set(): return try: return self.plasma_client.get(object_id, timeout_ms=timeout_ms) except: self.connect() time.sleep(1)
def get_seal(): # Create a different client. Note that this second client could be # created in the same or in a separate, concurrent Python session. client2 = plasma.connect("/tmp/plasma", "", 0) # Get the object in the second client. This blocks until the object has been sealed. object_id2 = plasma.ObjectID(20 * b'b') [buffer2] = client2.get_buffers([object_id2]) view2 = memoryview(buffer2) for i in range(200): print( view2[i] )
def object_id(self, name: str) -> plasma.ObjectID: """ get the ObjectId of the value in the store for name returns None if it doesn't exist """ if not self.exists(name): return None metadata = self.metadata(name) return plasma.ObjectID(metadata["value_id"])
def brain_new_ids_or_existing_ids(name,client): '''if name exists, returns object id of that name and that client; else new ids''' if brain_name_exists(name,client): # get the brain_object for the old name brain_object = brain_names_objects(client) for x in brain_object: if x['name']==name: brain_object=x break # delete the old name and thing objects client.delete([plasma.ObjectID(brain_object['name_id']),plasma.ObjectID(brain_object['id'])]) # get the new ids thing_id = plasma.ObjectID(brain_object['id']) name_id = plasma.ObjectID(brain_object['name_id']) else: # create a new name id and thing id name_id = brain_create_named_object(name) thing_id = plasma.ObjectID.from_random() return thing_id,name_id
def test_learn_exists(brain): brain.learn("this", "that") # did it work with the right value? assert brain.exists("this") assert brain.recall("this") == "that" # did it create metadata? out = brain.metadata("this") assert "name" in out assert brain.client.get(plasma.ObjectID(out["value_id"])) == "that"
def putStr(size): data = generateInput(size) len_max = 0 for i in data: if (len_max < len(i)): len_max = len(i) client = plasma.connect("/tmp/plasma", "", 0) start = time.time() """ Put longest item's length """ len_max = [len(max(data,key=len))] random_bytes_lenmax = np.random.bytes(20) object_id_lenmax = plasma.ObjectID(random_bytes_lenmax) membuf_lenmax = client.create(object_id_lenmax,8) dt = np.dtype(int) array = np.frombuffer(membuf_lenmax, dtype=dt) array.setfield(len_max,dtype=dt) client.seal(object_id_lenmax) """ Put Data Array """ random_bytes = np.random.bytes(20) object_id = plasma.ObjectID(random_bytes) membuf = client.create(object_id,len(data)*len_max[0]) dt = np.dtype("|S"+str(len_max[0])) array = np.frombuffer(membuf, dtype=dt) array.setfield(data,dtype=dt) client.seal(object_id) end = time.time() - start print("Total time ( %d ): %f"%(len(array),end)) f = open("<path_for_object_id_file_of_your_data_in_plasma>", "wb") f.write(object_id.binary()) f.close() f = open("<path_for_object_id_file_of_your_lenmax_in_plasma>","wb") f.write(object_id_lenmax.binary()) f.close()
def forget(self, name): '''delete an object based on its Brain name''' names_ = self._brain_names_ids(self.client) brain_object = self._brain_names_objects(self.client) for x in brain_object: if x['name'] == name: brain_object = x break this_id = names_[name] name_id = plasma.ObjectID(brain_object['name_id']) self.client.delete([this_id, name_id])
def delete(self, name): object_id = plasma.ObjectID(hashlib.sha1(str.encode(name)).digest()) while True: if self.stop_event != None and self.stop_event.is_set(): return try: self.plasma_client.delete([object_id]) return except: self.connect() time.sleep(1)
def get_object_id(batch_num): """Generates an object id used by the plasma store for a given batch number. Args: batch_num (int): The batch number. Returns: The plasma object id. """ return plasma.ObjectID( batch_num.to_bytes(PLASMA_OBJID_SIZE_BYTES, byteorder="big"))
def _convert_uuid_to_object_id(step_uuid: str) -> plasma.ObjectID: """Converts a UUID to a plasma.ObjectID. Args: step_uuid: UUID of a step. Returns: An ObjectID of the first 20 characters of the `step_uuid`. """ binary_uuid = str.encode(step_uuid) return plasma.ObjectID(binary_uuid[:20])
def seal(): object_id = plasma.ObjectID(20 * b'b') object_size = 1000 buffer = memoryview(client.create(object_id, object_size)) # Write to the buffer. for i in range(1000): buffer[i] = i%128 # Seal the object making it immutable and available to other clients. client.seal(object_id)
def put(self, name, frame): object_id = plasma.ObjectID(hashlib.sha1(str.encode(name)).digest()) while True: if self.stop_event != None and self.stop_event.is_set(): return try: self.plasma_client.put(frame, object_id) return except Exception as e: print(f"Failed to put in plasma: {e}") self.connect() time.sleep(1)
def load_data_frame(self, key): object_key = self.get_cache_key(key) object_id = plasma.ObjectID(object_key) if not self.client.contains(object_id): return None [data] = self.client.get_buffers([object_id]) buffer = pa.BufferReader(data) reader = pa.RecordBatchStreamReader(buffer) record_batch = reader.read_next_batch() df = record_batch.to_pandas() return df
def get_all(self, object_ids, update = False): if not isinstance(object_ids[0], pyarrow._plasma.ObjectID): object_ids = list(map(lambda x: plasma.ObjectID(x), object_ids)) buffers = self.plasma_client.get_buffers(object_ids) def get_res(buffer): buffer = memoryview(buffer) # data_head = DataHead() data_head.from_buffer(buffer, update) return data_head.parse_data() return list(map(get_res, buffers))
def payload_to_single( # pylint: disable=arguments-differ cls, payload: Payload, plasma_db: "ext.PlasmaClient" = Provide[DeploymentContainer.plasma_db], ): if payload.meta.get("plasma"): import pyarrow.plasma as plasma assert plasma_db return plasma_db.get(plasma.ObjectID(payload.data)) return pickle.loads(payload.data)
def set_namespace(self, namespace=None): """ either return the current namespace or change the current namespace to something new """ if namespace is None: return self.namespace # MUST BE AT LEAST FIVE CHARACTERS AND FEWER THAN 15 if len(namespace) < 5: raise BrainNamespaceNameError( f"Namespace wrong length; 5 >= namespace >= 15; name {namespace} is {len(namespace)}" ) elif len(namespace) > 15: raise BrainNamespaceNameError( f"Namespace wrong length; 5 >= namespace >= 15; name {namespace} is {len(namespace)}" ) # CHANGE THE NAMESPACE AND ACKNOWLEDGE THE CHANGE self.namespace = namespace # IF THE NAMESPACE OBJECT EXISTS ALREADY, JUST ADD THE NEW NAMESPACE if plasma.ObjectID(b"brain_namespaces_set") in self.client.list().keys(): # ADD TO NAMESPACES namespaces = self.client.get( plasma.ObjectID(b"brain_namespaces_set") ).union([self.namespace, "default"]) # REMOVE OLD NAMESPACES OBJECT self.client.delete([plasma.ObjectID(b"brain_namespaces_set")]) # ASSIGN NEW NAMESPACES OBJECT self.client.put(namespaces, plasma.ObjectID(b"brain_namespaces_set")) # OTHERWISE, CREATE THE NAMESPACES OBJECT AND ADD TO PLASMA else: self.client.put( set([self.namespace, "default"]), plasma.ObjectID(b"brain_namespaces_set"), ) # RETURN THE CURRENT NAMESPACE return self.namespace
def getInt(): start = time.time() f = open("<path_to_object_id_file>","rb") byte = f.read() # bytes for object_id client = plasma.connect("/tmp/plasma", "", 0) object_id = plasma.ObjectID(byte) # object_id [buffer2] = client.get_buffers([object_id]) data = np.frombuffer(buffer2, dtype="uint32") # the array from c++. This line takes 97% of the time end = time.time() - start print("total time for ( %d ): %f" %(len(data),end))
def _brain_new_ids_or_existing_ids(self, name, client): """if name exists, returns object id of that name and that client; else new ids""" if self._brain_name_exists(name, client): # get the brain_object for the old name brain_object = self._brain_names_objects(client) for x in brain_object: if x["name"] == name: brain_object = x break # delete the old name and thing objects client.delete([ plasma.ObjectID(brain_object["name_id"]), plasma.ObjectID(brain_object["id"]), ]) # get the new ids thing_id = plasma.ObjectID(brain_object["id"]) name_id = plasma.ObjectID(brain_object["name_id"]) else: # create a new name id and thing id name_id = self._brain_create_named_object(name) thing_id = plasma.ObjectID.from_random() return thing_id, name_id
def run_detector(detection_queue, avg_speed, start, tf_device): print(f"Starting detection process: {os.getpid()}") listen() plasma_client = plasma.connect("/tmp/plasma") object_detector = LocalObjectDetector(tf_device=tf_device) while True: object_id_str = detection_queue.get() object_id_hash = hashlib.sha1(str.encode(object_id_str)) object_id = plasma.ObjectID(object_id_hash.digest()) object_id_out = plasma.ObjectID(hashlib.sha1(str.encode(f"out-{object_id_str}")).digest()) input_frame = plasma_client.get(object_id, timeout_ms=0) if input_frame is plasma.ObjectNotAvailable: continue # detect and put the output in the plasma store start.value = datetime.datetime.now().timestamp() plasma_client.put(object_detector.detect_raw(input_frame), object_id_out) duration = datetime.datetime.now().timestamp()-start.value start.value = 0.0 avg_speed.value = (avg_speed.value*9 + duration)/10
def f5(): client = plasma.connect("/tmp/store","",0) ids = [] for i in range(100): data = np.arange(100000, dtype="int64") arr = pa.array(data) objId = plasma.ObjectID(np.random.bytes(20)) dataSize = data.nbytes ids.append(objId) buf = client.create(objId, dataSize) client.seal(objId)
def set_namespace(self, namespace=None): """either return the current namespace or change the current namespace to something new""" if namespace is None: return self.namespace # must be at least four characters and fewer than 15 if len(namespace) < 5: raise BaseException( 'BrainError: namespace "{}" must be at least 5 characters'. format(namespace)) elif len(namespace) > 15: raise BaseException( 'BrainError: namespace "{}" must be fewer than 15 characters'. format(namespace)) # change the namespace and acknowledge the change self.namespace = namespace # if the namespace object exists already, just add the new namespace if plasma.ObjectID( b"brain_namespaces_set") in self.client.list().keys(): # add to namespaces namespaces = self.client.get( plasma.ObjectID(b"brain_namespaces_set")).union( [self.namespace, "default"]) # remove old namespaces object self.client.delete([plasma.ObjectID(b"brain_namespaces_set")]) # assign new namespaces object self.client.put(namespaces, plasma.ObjectID(b"brain_namespaces_set")) # otherwise, create the namespaces object and add to plasma else: self.client.put( set([self.namespace, "default"]), plasma.ObjectID(b"brain_namespaces_set"), ) # return the current namespace return self.namespace
def store_msg(self, msg): client = plasma.connect(config['common']['plasma_path']) data = pickle.loads(msg[:-19]) name = msg[-19:-9].decode() #W01_123|02, W01: W of first layer, 123: iteration, 02: the second part id = plasma.ObjectID(10 * b'0' + msg[-19:-9]) logger.info("Server logs: Received %s", name) while (1): client.put(data, id) if client.contains(id): logger.info("Server logs: Store %s success!", name) break else: logger.info("Put fail, start again!") client.disconnect()
def read(object_id_str): print("Reading df from Plasma") client = plasma.connect("/tmp/sock/plasma.sock") # Fetch the Plasma object object_id = plasma.ObjectID(bytes.fromhex(object_id_str)) [data] = client.get_buffers([object_id]) # Get PlasmaBuffer from ObjectID buffer = pa.BufferReader(data) # Convert object back into an Arrow RecordBatch reader = pa.RecordBatchStreamReader(buffer) record_batch = reader.read_next_batch() result = record_batch.to_pandas()
def set_all(self, object_ids, data, update): from multiprocessing import Pool if not isinstance(object_ids[0], pyarrow._plasma.ObjectID): object_ids = list(map(lambda x: plasma.ObjectID(x), object_ids)) def set_data(idx): # data_head = DataHead() data_head.from_data(data[idx], update) object_size = data_head.nbytes buffer = self.plasma_client.create(object_ids[idx], object_size) buffer = memoryview(buffer) data_head.write_to_buffer(buffer) p = Pool(5) p.map(set_data, [x for x in range(len(object_ids))])
async def aset(self, object_id, data, update=False): tmpt = time.time() if not isinstance(object_id, pyarrow._plasma.ObjectID): object_id = plasma.ObjectID(object_id) data_head = DataHead() data_head.from_data(data, update) object_size = data_head.nbytes buffer = self.plasma_client.create(object_id, object_size) buffer = memoryview(buffer) data_head.buffer = buffer # maybe with concurrent.futures.ThreadPoolExecutor() as pool: await self._loop.run_in_executor(pool, functools.partial(_set_data, data_head)) self.plasma_client.seal(object_id)
def getStr(): start = time.time() f = open("/home/osman/Desktop/pyjava/objid_fromJava", "rb") byte = f.read() client = plasma.connect("/tmp/plasma", "", 0) object_id_data = plasma.ObjectID(byte) [buffer1] = client.get_buffers([object_id_data]) data = buffer1.to_pybytes().decode('utf-8').split("\n") del data[-1] # since we split by "\n", last item might be empty string. end = time.time() - start print("Time for (%d): %f" % (len(data), end)) print(data[:5]) print(data[len(data) - 5:len(data)])
def store_dataframe(self, data): record_batch = RecordBatch.from_pandas(data) object_id = plasma.ObjectID(np.random.bytes(20)) mock_sink = MockOutputStream() with RecordBatchStreamWriter(mock_sink, record_batch.schema) as stream_writer: stream_writer.write_batch(record_batch) data_size = mock_sink.size() buf = self.client.create(object_id, data_size) stream = FixedSizeBufferWriter(buf) with RecordBatchStreamWriter(stream, record_batch.schema) as stream_writer: stream_writer.write_batch(record_batch) self.client.seal(object_id) return object_id
def recall(self, name): """ get an object value based on its Brain name Errors: KeyError """ if not self.exists(name): raise KeyError(f"Name {name} does not exist.") metadata_id = self._name_to_namespace_hash(name) metadata = self.client.get(metadata_id, timeout_ms=100) value_hash = metadata["value_id"] value_id = plasma.ObjectID(value_hash) return self.client.get(value_id, timeout_ms=100)