def test_numpy_subclass_serialization(): # Check that we can properly serialize subclasses of np.ndarray. class CustomNDArray(np.ndarray): def __new__(cls, input_array): array = np.asarray(input_array).view(cls) return array def serializer(obj): return {'numpy': obj.view(np.ndarray)} def deserializer(data): array = data['numpy'].view(CustomNDArray) return array context = pa.default_serialization_context() context.register_type(CustomNDArray, 'CustomNDArray', custom_serializer=serializer, custom_deserializer=deserializer) x = CustomNDArray(np.zeros(3)) serialized = pa.serialize(x, context=context).to_buffer() new_x = pa.deserialize(serialized, context=context) assert type(new_x) == CustomNDArray assert np.alltrue(new_x.view(np.ndarray) == np.zeros(3))
def deserialize_or_output(data_tuple): if data_tuple[0] == use_meta: return data_tuple[1].to_pybytes() else: if data_tuple[1] is None: return pa.plasma.ObjectNotAvailable else: return pa.deserialize(data_tuple[1])
def test_tensor_alignment(): # Deserialized numpy arrays should be 64-byte aligned. x = np.random.normal(size=(10, 20, 30)) y = pa.deserialize(pa.serialize(x).to_buffer()) assert y.ctypes.data % 64 == 0 xs = [np.random.normal(size=i) for i in range(100)] ys = pa.deserialize(pa.serialize(xs).to_buffer()) for y in ys: assert y.ctypes.data % 64 == 0 xs = [np.random.normal(size=i * (1,)) for i in range(20)] ys = pa.deserialize(pa.serialize(xs).to_buffer()) for y in ys: assert y.ctypes.data % 64 == 0 xs = [np.random.normal(size=i * (5,)) for i in range(1, 8)] xs = [xs[i][(i + 1) * (slice(1, 3),)] for i in range(len(xs))] ys = pa.deserialize(pa.serialize(xs).to_buffer()) for y in ys: assert y.ctypes.data % 64 == 0
def test_serialize_with_pandas_objects(): df = pd.DataFrame({'a': [1, 2, 3]}, index=[1, 2, 3]) data = { 'a_series': df['a'], 'a_frame': df } serialized = pa.serialize(data).to_buffer() deserialized = pa.deserialize(serialized) assert_frame_equal(deserialized['a_frame'], df) assert_series_equal(deserialized['a_series'], df['a'])
def test_set_pickle(): # Use a custom type to trigger pickling. class Foo(object): pass context = pa.SerializationContext() context.register_type(Foo, 'Foo', pickle=True) test_object = Foo() # Define a custom serializer and deserializer to use in place of pickle. def dumps1(obj): return b'custom' def loads1(serialized_obj): return serialized_obj + b' serialization 1' # Test that setting a custom pickler changes the behavior. context.set_pickle(dumps1, loads1) serialized = pa.serialize(test_object, context=context).to_buffer() deserialized = pa.deserialize(serialized.to_pybytes(), context=context) assert deserialized == b'custom serialization 1' # Define another custom serializer and deserializer. def dumps2(obj): return b'custom' def loads2(serialized_obj): return serialized_obj + b' serialization 2' # Test that setting another custom pickler changes the behavior again. context.set_pickle(dumps2, loads2) serialized = pa.serialize(test_object, context=context).to_buffer() deserialized = pa.deserialize(serialized.to_pybytes(), context=context) assert deserialized == b'custom serialization 2'
def test_buffer_serialization(): class BufferClass(object): pass def serialize_buffer_class(obj): return pa.frombuffer(b"hello") def deserialize_buffer_class(serialized_obj): return serialized_obj pa._default_serialization_context.register_type( BufferClass, "BufferClass", pickle=False, custom_serializer=serialize_buffer_class, custom_deserializer=deserialize_buffer_class) b = pa.serialize(BufferClass()).to_buffer() assert pa.deserialize(b).to_pybytes() == b"hello"
def test_buffer_serialization(): class BufferClass(object): pass def serialize_buffer_class(obj): return pa.py_buffer(b"hello") def deserialize_buffer_class(serialized_obj): return serialized_obj context = pa.default_serialization_context() context.register_type( BufferClass, "BufferClass", custom_serializer=serialize_buffer_class, custom_deserializer=deserialize_buffer_class) b = pa.serialize(BufferClass(), context=context).to_buffer() assert pa.deserialize(b, context=context).to_pybytes() == b"hello"
def test_serialize_with_pandas_objects(): df = pd.DataFrame({'a': [1, 2, 3]}, index=[1, 2, 3]) s = pd.Series([1, 2, 3, 4]) data = { 'a_series': df['a'], 'a_frame': df, 's_series': s } serialized = pa.serialize(data).to_buffer() deserialized = pa.deserialize(serialized) assert_frame_equal(deserialized['a_frame'], df) assert_series_equal(deserialized['a_series'], df['a']) assert deserialized['a_series'].name == 'a' assert_series_equal(deserialized['s_series'], s) assert deserialized['s_series'].name is None
def unpack(data): if LZ4_ENABLED: data = base64.b64decode(data) data = lz4.frame.decompress(data) data = pyarrow.deserialize(data) return data
def unpack(data): if SNAPPY_ENABLED: data = base64.b64decode(data) return pyarrow.deserialize(snappy.decompress(data)) else: return data
def test_lmdb_get_put_with_variant_id(): ddir = get_data_dir() with open(ddir + "/raw/v1.3/training_data/sample_variant_ids.pkl", 'rb') as f: varids = pickle.load(f) inputfile = \ get_data_dir() + "/raw/v1.3/training_data/training_data.imputed.csv" separator = ',' choose = 8 lmdbpath = ddir + "/tests/lmdb_2" rows = {"variant_ids": [], "row_infos": []} with open(inputfile) as input_file: _ = next(input_file) # skip header line row_number = 0 row_example = None for row in tqdm(input_file): row = np.array(row.split(separator), dtype=np.float16) rows["variant_ids"].append(varids.iloc[row_number, 0]) rows["row_infos"].append(row) row_number += 1 if row_number > 10: row_example = (row, varids.iloc[row_number, 0]) break map_size = cadd_serialize_string_row(row_example[0], row_example[1], separator, np.float16, 0).to_buffer().size map_size = map_size * varids.shape[0] * 1.2 env = lmdb.Environment(lmdbpath, map_size=map_size, max_dbs=0, lock=False) with env.begin(write=True, buffers=True) as txn: with open(inputfile) as input_file: _ = next(input_file) # skip header line row_number = 0 for row in tqdm(input_file): variant_id = varids.iloc[row_number, 0] ser_data = cadd_serialize_string_row(row, variant_id, separator, np.float16, 0) buf = ser_data.to_buffer() print(buf.size) txn.put(variant_id.encode('ascii'), buf) row_number += 1 if row_number > 10: break find_variant = varids.iloc[choose, 0] print("Find variant", find_variant) with env.begin(write=False, buffers=True) as txn: buf = bytes(txn.get(find_variant.encode('ascii'))) variant_info = pa.deserialize(buf)['inputs'] check_variant_info = np.array(rows["row_infos"][choose][1:]) # if os.path.exists(lmdbpath): # shutil.rmtree(lmdbpath, ignore_errors=True) # os.rmdir(lmdbpath) assert np.array_equal(variant_info, check_variant_info)
def deserialize(data): return pyarrow.deserialize(data, mars_serialize_context())
def deserialize(data): try: return pa.deserialize(data) except (pa.lib.ArrowInvalid, OSError): return pickle.loads(data)
def loads_pyarrow(buf): """ Args: buf: the output of `dumps`. """ return pa.deserialize(buf)
def from_bytes(cls, pyarrow_bytes): return cls(**pa.deserialize(pyarrow_bytes))
def load(self, msg): return pa.deserialize(msg)
import numpy as np import pyarrow as pa x = np.random.standard_normal(100) buf = pa.serialize(x).to_buffer() y = pa.deserialize(buf) print(x) print(y) print(np.allclose(x, y))
def deserialize(data): result = pyarrow.deserialize(data, mars_serialize_context()) return _patch_pandas_mgr(result)
def load_pyarrow(buf): assert buf is not None, 'buf should not be None.' return pyarrow.deserialize(buf)
def recv_replay_data(self): """Receive replay data from gloal buffer.""" replay_data_id = self.rep_socket.recv() replay_data = pa.deserialize(replay_data_id) return replay_data
def testPlasmaSharedStore(self): import pyarrow from pyarrow import plasma store_size = 10 * 1024**2 test_addr = f'127.0.0.1:{get_next_port()}' with plasma.start_plasma_store(store_size) as (sckt, _), \ create_actor_pool(n_process=1, address=test_addr) as pool: km_ref = pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) try: plasma_client = plasma.connect(sckt) except TypeError: plasma_client = plasma.connect(sckt, '', 0) store = PlasmaSharedStore(plasma_client, km_ref) self.assertGreater(store.get_actual_capacity(store_size), store_size / 2) session_id = str(uuid.uuid4()) data_list = [ np.random.randint(0, 32767, (655360, ), np.int16) for _ in range(20) ] key_list = [str(uuid.uuid4()) for _ in range(20)] self.assertFalse(store.contains(session_id, str(uuid.uuid4()))) with self.assertRaises(KeyError): store.get(session_id, str(uuid.uuid4())) with self.assertRaises(KeyError): store.get_actual_size(session_id, str(uuid.uuid4())) with self.assertRaises(KeyError): store.seal(session_id, str(uuid.uuid4())) fake_data_key = str(uuid.uuid4()) km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) self.assertFalse(store.contains(session_id, fake_data_key)) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(KeyError): km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) store.get(session_id, fake_data_key) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(KeyError): km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) store.seal(session_id, fake_data_key) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(KeyError): km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) store.get_actual_size(session_id, fake_data_key) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(KeyError): km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) store.get_buffer(session_id, fake_data_key) self.assertIsNone(km_ref.get(session_id, fake_data_key)) store.delete(session_id, fake_data_key) with self.assertRaises(SerializationFailed): non_serial = type('non_serial', (object, ), dict(nbytes=10)) store.put(session_id, fake_data_key, non_serial()) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(Exception): store.create(session_id, fake_data_key, 'abcd') self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(StorageFull): store.create(session_id, fake_data_key, store_size * 2) self.assertIsNone(km_ref.get(session_id, fake_data_key)) arrow_ser = pyarrow.serialize(data_list[0]) buf = store.create(session_id, key_list[0], arrow_ser.total_bytes) writer = pyarrow.FixedSizeBufferWriter(buf) arrow_ser.write_to(writer) writer.close() store.seal(session_id, key_list[0]) self.assertTrue(store.contains(session_id, key_list[0])) self.assertEqual(store.get_actual_size(session_id, key_list[0]), arrow_ser.total_bytes) assert_allclose(store.get(session_id, key_list[0]), data_list[0]) assert_allclose( pyarrow.deserialize(store.get_buffer(session_id, key_list[0])), data_list[0]) with self.assertRaises(StorageDataExists): store.create(session_id, key_list[0], arrow_ser.total_bytes) self.assertIsNotNone(km_ref.get(session_id, key_list[0])) store.delete(session_id, key_list[0]) del buf bufs = [] for key, data in zip(key_list, data_list): try: bufs.append(store.put(session_id, key, data)) except StorageFull: break del bufs
def deserialize_regex(serialized, q): import pyarrow as pa q.put(pa.deserialize(serialized))
def loads(o): return pyarrow.deserialize(o)
def recv(self, name=None, block=True): msg = self.socket.recv() data = pyarrow.deserialize(msg) # data = pickle.loads(msg) return data
def load_pyarrow(buf): return pyarrow.deserialize(buf)
def deserialize_from_file(path): with open(path, 'rb') as file: data = pa.deserialize(file.read()) return data
def deserialize(buf): data = pa.deserialize(buf) return data
def __init__(self, noused, db_path, num_groups=16, frames_per_group=1, sample_offset=0, num_clips=1, modality='rgb', dense_sampling=False, fixed_offset=True, image_tmpl='{:05d}.jpg', transform=None, is_train=True, test_mode=False, seperator=' ', filter_video=0, num_classes=None): """ Arguments have different meaning when dense_sampling is True: - num_groups ==> number of frames - frames_per_group ==> sample every K frame - sample_offset ==> number of clips used in validation or test mode Args: db_path (str): the file path to the root of video folder num_groups (int): number of frames per data sample frames_per_group (int): number of frames within one group sample_offset (int): used in validation/test, the offset when sampling frames from a group modality (str): rgb or flow dense_sampling (bool): dense sampling in I3D fixed_offset (bool): used for generating the same videos used in TSM image_tmpl (str): template of image ids transform: the transformer for preprocessing is_train (bool): shuffle the video but keep the causality test_mode (bool): testing mode, no label """ # TODO: handle multi-label? # TODO: flow data? if modality not in ['flow', 'rgb']: raise ValueError("modality should be 'flow' or 'rgb'.") self.db_path = db_path self.num_groups = num_groups self.num_frames = num_groups self.frames_per_group = frames_per_group self.sample_freq = frames_per_group self.num_clips = num_clips self.sample_offset = sample_offset self.fixed_offset = fixed_offset self.dense_sampling = dense_sampling self.modality = modality.lower() self.image_tmpl = image_tmpl self.transform = transform self.is_train = is_train self.test_mode = test_mode self.seperator = seperator self.filter_video = filter_video if self.modality == 'flow': self.num_consecutive_frames = 5 else: self.num_consecutive_frames = 1 self.multi_label = None self.db = None db = lmdb.open(self.db_path, max_readers=1, subdir=os.path.isdir(self.db_path), readonly=True, lock=False, readahead=False, meminit=False) with db.begin(write=False) as txn: self.length = pa.deserialize(txn.get(b'__len__')) self.keys = pa.deserialize(txn.get(b'__keys__')) db.close() # TODO: a hack way to filter video self.list_file = db_path.replace(".lmdb", ".txt") valid_video_numbers = 0 invalid_video_ids = [] for x in open(self.list_file): elements = x.strip().split(self.seperator) start_frame = int(elements[1]) end_frame = int(elements[2]) total_frame = end_frame - start_frame + 1 if self.test_mode: valid_video_numbers += 1 else: if total_frame >= self.filter_video: valid_video_numbers += 1 else: name = u'{}'.format(elements[0].split("/")[-1]).encode('ascii') invalid_video_ids.append(name) print("The number of videos is {} (with more than {} frames) " "(original: {})".format(valid_video_numbers, self.filter_video, self.length), flush=True) # remove keys and update length self.length = valid_video_numbers self.keys = [k for k in self.keys if k not in invalid_video_ids] if self.length != len(self.keys): raise ValueError("Do not filter video correctly.") self.num_classes = num_classes
def __next__(self): """ create a num of tasks data """ env = self.env total_support_x = [] total_query_x = [] total_support_y = [] total_query_y = [] for t in range(self.t_task): # create a task (n_way*k_shot+ n_way*k_query) support_x = [] query_x = [] support_y = [] query_y = [] support_imgs = [] query_imgs = [] # select n_way classes randomly selected_classes = np.random.choice(self.total_cls, self.n_way) # select k_shot + k_query for each class for selected_class in selected_classes: selected_imgs = np.random.choice( self.dic_img_label[self.num2label[selected_class]], self.k_shot + self.k_query, False) support_imgs += selected_imgs[:self.k_shot].tolist() query_imgs += selected_imgs[self.k_shot:].tolist() with env.begin(write=False) as txn: for i, img_id in enumerate(support_imgs): res = pyarrow.deserialize(txn.get(u'{}'.format(img_id).encode('ascii'))) support_x.append(np.frombuffer(res[0], np.uint8)) support_y.append(np.array([self.label2num[res[1]]])) for i, img_id in enumerate(query_imgs): res = pyarrow.deserialize(txn.get(u'{}'.format(img_id).encode('ascii'))) query_x.append(np.frombuffer(res[0], np.uint8)) query_y.append(np.array([self.label2num[res[1]]])) support_x = np.array(support_x) query_x = np.array(query_x) support_y = np.array(support_y) query_y = np.array(query_y) # shuffle: index = np.random.permutation(len(support_y)) support_x = support_x[index] if not self.fet_global: support_y = np.array([i for i in range(self.n_way) for j in range(self.k_shot)]) support_y = support_y[index] index = np.random.permutation(len(query_y)) query_x = query_x[index] if not self.fet_global: query_y = np.array([i for i in range(self.n_way) for j in range(self.k_query)]) query_y = query_y[index] # a batch total_query_x.append(query_x) total_query_y.append(query_y) total_support_x.append(support_x) total_support_y.append(support_y) total_query_x = np.hstack(total_query_x) total_query_y = np.hstack(total_query_y) total_support_x = np.hstack(total_support_x) total_support_y = np.hstack(total_support_y) return np.hstack([total_support_x, total_query_x]).tolist(), \ np.hstack([total_support_y, total_query_y]).reshape([-1, 1])
def time_deserialize_from_buffer(self): pa.deserialize(self.as_buffer)
def __init__(self, path_root, n_way, k_shot, k_query, x_dim, split, augment='0', test=None, shuffle=True, fetch_global=False): self.n_way = n_way self.k_shot = k_shot self.k_query = k_query self.x_dim = list(map(int, x_dim.split(','))) self.split = split self.shuffle = shuffle self.path_root = path_root self.fet_global = fetch_global if augment == '0': self.transform = transforms.Compose([ transforms.Lambda(f1), transforms.Resize(self.x_dim[:2]), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) elif augment == '1': if self.split == 'train': self.transform = transforms.Compose([ # lambda x: Image.open(x).convert('RGB'), transforms.Lambda(f1), transforms.Resize( (self.x_dim[0] + 20, self.x_dim[1] + 20)), transforms.RandomCrop(self.x_dim[:2]), transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness=.1, contrast=.1, saturation=.1, hue=.1), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) else: self.transform = transforms.Compose([ # lambda x: Image.open(x).convert('RGB'), transforms.Lambda(f1), transforms.Resize( (self.x_dim[0] + 20, self.x_dim[1] + 20)), transforms.RandomCrop(self.x_dim[:2]), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) self.path = os.path.join(path_root, 'images') self.lmdb_file = os.path.join(path_root, "lmdb_data", "%s.lmdb" % self.split) if not os.path.exists(self.lmdb_file): print("lmdb_file is not found, start to generate %s" % self.lmdb_file) self._generate_lmdb() # read lmdb_file self.env = lmdb.open(self.lmdb_file, subdir=False, readonly=True, lock=False, readahead=False, meminit=False) with self.env.begin(write=False) as txn: self.total_sample = pyarrow.deserialize(txn.get(b'__len__')) self.keys = pyarrow.deserialize(txn.get(b'__keys__')) self.label2num = pyarrow.deserialize(txn.get(b'__label2num__')) self.num2label = pyarrow.deserialize(txn.get(b'__num2label__')) self.image_labels = [i.decode() for i in self.keys] self.total_cls = len(self.num2label) self.dic_img_label = defaultdict(list) for i in self.image_labels: self.dic_img_label[i[:9]].append(i) self.support_set_size = self.n_way * self.k_shot # num of samples per support set self.query_set_size = self.n_way * self.k_query self.episode = self.total_sample // ( self.support_set_size + self.query_set_size) # how many episode # create episodes self.episode_sets = [] for i in tqdm(range(self.episode), desc="preparing episodes for %s" % self.split): support_imgs, query_imgs = [], [] # select n_way classes randomly selected_classes = np.random.choice(self.total_cls, self.n_way) # select k_shot + k_query for each class for selected_class in selected_classes: selected_imgs = np.random.choice( self.dic_img_label[self.num2label[selected_class]], self.k_shot + self.k_query, False) support_imgs += selected_imgs[:self.k_shot].tolist() query_imgs += selected_imgs[self.k_shot:].tolist() self.episode_sets.append({ "support_set": support_imgs, "query_set": query_imgs }) del self.env
if __name__ == '__main__': model = torch.load( '/home/tmr-algorithms/catkin_ws/src/deep-road-segnet/models/unet/model.pt', map_location='cpu') model.eval() context = zmq.Context() socket = context.socket(zmq.REP) socket.bind("ipc:///tmp/deep") while True: message = socket.recv() print(len(message)) image = pa.deserialize(message) classified_img = predict(model, image) message = pa.serialize(classified_img).to_buffer().to_pybytes() socket.send(message) # X = cv2.imread('/home/bml/images_from_salinas/left0000.jpg') # start = time.time() # for i in range(1): # y = predict(model, X) # end = time.time()
def deserialize_data(data): """deserialize_data""" return pyarrow.deserialize(data, context=context)
def test_serialize_to_buffer(): for nthreads in [1, 4]: for value in COMPLEX_OBJECTS: buf = pa.serialize(value).to_buffer(nthreads=nthreads) result = pa.deserialize(buf) assert_equal(value, result)
def load(key='test_df'): df = pa.deserialize(r.get(key)) return df
def deserialize(self, obj): return pyarrow.deserialize(obj)
def ray_decompress(data): if isinstance(data, bytes): data = base64.b64decode(data) data = lz4.frame.decompress(data) data = pyarrow.deserialize(data) return data
# Read message length and unpack it into an integer raw_msglen = recvall(sock, 4) if not raw_msglen: return None msglen = struct.unpack('>I', raw_msglen)[0] # Read the message data return recvall(sock, msglen) def recvall(sock, n): # Helper function to recv n bytes or return None if EOF is hit data = bytearray() while len(data) < n: packet = sock.recv(n - len(data)) if not packet: return None data.extend(packet) return data host = 'localhost' port = 12345 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((host, port)) data = recv_msg(s) s.close() add = pa.deserialize(memoryview(data)) print(add(1, 0))
def _load_numpy(self, msg): if not msg: return msg return pa.deserialize(msg)
def __init__(self, opt, is_training=True, is_testing=False, live_test=False): self.node_dim = opt.node_dim self.state_dim = opt.state_dim self.is_training = is_training self.is_testing = is_testing if live_test: all_data_node_id, all_data_node_type = load_single_program( opt.test_graph_path) all_data_node_id = np.array( all_data_node_id)[0:len(all_data_node_id)] all_data_node_type = np.array( all_data_node_type)[0:len(all_data_node_type)] else: base_name = os.path.basename(opt.path) if is_training: saved_input_filename = "%s/%s-%d-train.pkl" % ( opt.path, base_name, opt.n_classes) if is_testing: saved_input_filename = "%s/%s-%d-test.pkl" % ( opt.path, base_name, opt.n_classes) if os.path.exists(saved_input_filename): input_file = open(saved_input_filename, 'rb') buf = input_file.read() all_data_node_id, all_data_node_type = pyarrow.deserialize(buf) input_file.close() else: all_data_node_id, all_data_node_type = load_program_graphs_from_directory( opt.path, is_training, is_testing, opt.n_classes) all_data_node_id = np.array( all_data_node_id)[0:len(all_data_node_id)] all_data_node_type = np.array( all_data_node_type)[0:len(all_data_node_type)] buf = pyarrow.serialize( (all_data_node_id, all_data_node_type)).to_buffer() out = pyarrow.OSFile(saved_input_filename, 'wb') out.write(buf) out.close() self.pretrained_embeddings = opt.pretrained_embeddings self.batch_size = opt.train_batch_size label_lookup = { label: _onehot(label, opt.n_classes) for label in range(0, opt.n_classes) } self.label_lookup = label_lookup # if is_train == True: print("Number of all data : " + str(len(all_data_node_id))) # else: # print("Number of all testing data : " + str(len(all_data_node_id))) # self.n_edge_types = find_max_edge_id(all_data_node_id) self.n_edge_types = 7 # print("Edge types : " + str(self.n_edge_types)) max_node_id = find_max_node_id(all_data_node_id) min_node_id = find_min_node_id(all_data_node_id) print("Max node id in data : " + str(max_node_id)) print("Min node id in data : " + str(min_node_id)) max_node_type = find_max_node_id(all_data_node_type) min_node_type = find_min_node_id(all_data_node_type) print("Max node type in data : " + str(max_node_type)) print("Min node type in data : " + str(min_node_type)) # print("Max node id : " + str(max_node_id)) # print("Max node type : " + str(max_node_type)) self.n_node_by_id = max_node_id self.n_node_by_type = max_node_type all_data_node_id = convert_program_data(all_data_node_id) all_data_node_type = convert_program_data(all_data_node_type) self.all_data_node_id = all_data_node_id self.all_data_node_type = all_data_node_type self.data = self.process_raw_graphs()
# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # This file is called from a test in test_serialization.py. import sys import pyarrow as pa with open(sys.argv[1], 'rb') as f: data = f.read() pa.deserialize(data)