def main(): # # Initialize the serializer. At the moment sliced loading is only supported by the Binary # archive # serializer_write = ser.Serializer(ser.OpenModeKind.Write, "./slice", "field", "Binary") # # Allocate 3D numpy arrays # field_in = np.random.rand(512, 512, 80) field_out = np.zeros((512, 512, 80)) # # Write the numpy array to disk at savepoint `sp` # start = time.time() savepoint = ser.Savepoint('sp') serializer_write.write('field', savepoint, field_in) print("Serializer.write : %8.2f s" % (time.time() - start)) # # Initialize a serializer for reading. # serializer_read = ser.Serializer(ser.OpenModeKind.Read, "./slice", "field", "Binary") # # Assume we are only interested in a certain layer of the data (k = 50), we can use the slice # object (ser.Slice) to encode this information and instruct the serializer to only load # the desired data. Note that you still need to allocate memory for the whole field! # start = time.time() serializer_read.read_slice('field', savepoint, ser.Slice[:, :, 50], field_out) print("Serializer.read_slice : %8.2f s" % (time.time() - start)) assert (np.allclose(field_in[:, :, 50], field_out[:, :, 50])) # # You can of course load the full data and slice it afterwards with numpy which yields the same # result, though is most likely slower. # start = time.time() serializer_read.read('field', savepoint, field_out) print("Serializer.read : %8.2f s" % (time.time() - start)) assert (np.allclose(field_in[:, :, 50], field_out[:, :, 50])) # # Remove directory # import shutil shutil.rmtree("./slice")
def serialize(self, intent, stage_id, stage_name, fields): sp = ser.Savepoint(self.name + "__" + intent) sp.metainfo.insert("stage_id", stage_id) sp.metainfo.insert("stage_name", stage_name) sp.metainfo.insert("invocation_count", self.invocation_count) for name, field in fields.items(): self.serializer.write(name, sp, field)
def read_data(path, tile, ser_count, is_in): mode_str = "in" if is_in else "out" vars = IN_VARS if is_in else OUT_VARS if is_in: serializer = ser.Serializer(ser.OpenModeKind.Read, path, "Generator_rank" + str(tile)) savepoint = ser.Savepoint(f"cloud_mp-{mode_str}-{ser_count:0>6d}") else: serializer = ser.Serializer(ser.OpenModeKind.Read, path, "Serialized_rank" + str(tile)) savepoint = ser.Savepoint(f"cloud_mp-{mode_str}-x-{ser_count:0>6d}") return data_dict_from_var_list(vars, serializer, savepoint)
def read_data(tile, is_in, path, ser_count=0): """ Read serialbox2 format data under `./data` folder with prefix of `Generator_rank{tile}` :param tile: specify the number of tile in data :type tile: int :param is_in: true means in, false means out :type is_in: boolean """ # TODO: read_async and readbuffer serializer = ser.Serializer(ser.OpenModeKind.Read, path, "Generator_rank" + str(tile)) inoutstr = "in" if is_in else "out" sp = ser.Savepoint(f"samfshalcnv-{inoutstr}-{ser_count:0>6d}") vars = IN_VARS if is_in else OUT_VARS data = data_dict_from_var_list(vars, serializer, sp) return data
def read_input_x_index(tile, ser_count, indices, path): serializer = ser.Serializer(ser.OpenModeKind.Read, path, "Generator_rank" + str(tile)) sp = ser.Savepoint(f"samfshalcnv-in-{ser_count:0>6d}") vars = set(IN_VARS) - set(scalar_vars) data = data_dict_from_var_list(vars, serializer, sp) for key in data: ndim = len(data[key].shape) arr = data[key] if ndim == 1 and key != "fscav": data[key] = arr[indices] elif ndim == 2: data[key] = arr[indices, :] elif ndim == 3: data[key] = arr[indices, :, :] return data
def main(): N = 512 M = 512 K = 80 savepoint = ser.Savepoint('sp') # # First, we write some data to disk ... # serializer_write = ser.Serializer(ser.OpenModeKind.Write, "./async", "Field", "Binary") field_1 = np.random.rand(N, M, K) field_2 = np.random.rand(N, M, K) field_3 = np.random.rand(N, M, K) field_4 = np.random.rand(N, M, K) field_5 = np.random.rand(N, M, K) field_6 = np.random.rand(N, M, K) serializer_write.write('field_1', savepoint, field_1) serializer_write.write('field_2', savepoint, field_2) serializer_write.write('field_3', savepoint, field_3) serializer_write.write('field_4', savepoint, field_4) serializer_write.write('field_5', savepoint, field_5) serializer_write.write('field_6', savepoint, field_6) # # ... and read it again. # serializer_read = ser.Serializer(ser.OpenModeKind.Read, "./async", "Field", "Binary") start = time.time() field_1_rd = serializer_read.read('field_1', savepoint) field_2_rd = serializer_read.read('field_2', savepoint) field_3_rd = serializer_read.read('field_3', savepoint) field_4_rd = serializer_read.read('field_4', savepoint) field_5_rd = serializer_read.read('field_5', savepoint) field_6_rd = serializer_read.read('field_6', savepoint) print("Serializer.read : %8.2f s" % (time.time() - start)) # # Read operations are usually embarrassingly parallel and we can leverage this parallelism by # launching the operations asynchronously. If the archive is not thread-safe or if the library # was not configured with `SERIALBOX_ASYNC_API` the method falls back to synchronous execution. # To synchronize the tasks in the end, we can add a blocking Serializer.wait_for_all(). # start = time.time() field_1_rd_async = serializer_read.read_async('field_1', savepoint) field_2_rd_async = serializer_read.read_async('field_2', savepoint) field_3_rd_async = serializer_read.read_async('field_3', savepoint) field_4_rd_async = serializer_read.read_async('field_4', savepoint) field_5_rd_async = serializer_read.read_async('field_5', savepoint) field_6_rd_async = serializer_read.read_async('field_6', savepoint) serializer_read.wait_for_all() print("Serializer.read_async : %8.2f s" % (time.time() - start)) # # Finally, we verify the read operations actually do the same. # assert (np.allclose(field_1_rd, field_1_rd_async)) assert (np.allclose(field_2_rd, field_2_rd_async)) assert (np.allclose(field_3_rd, field_3_rd_async)) assert (np.allclose(field_4_rd, field_4_rd_async)) assert (np.allclose(field_5_rd, field_5_rd_async)) assert (np.allclose(field_6_rd, field_6_rd_async)) # # Remove directory # import shutil shutil.rmtree("./async")
def read_serialization_partx(var_list, part, tile = 0, path = DATAPATH): serializer = ser.Serializer(ser.OpenModeKind.Read, path, "Serialized_rank"+str(tile)) sp = ser.Savepoint(f"samfshalcnv-part{part}-input") data = data_dict_from_var_list(var_list, serializer, sp) return numpy_dict_to_gt4py_dict(data)
def write(): # # Create a Serializer for writing. Besides the open-policy, we have to specify the `directory` # in which the Serializer is created and the `prefix` of all files. In case the directory does # not exist, it will be created. In addition, if the directory is not empty, all fields with the # same `prefix` will be erased (this behaviour can be inhibited using the Append mode). # serializer = ser.Serializer(ser.OpenModeKind.Write, "./laplacian/", "field") # # Allocate a 2D numpy array and fill it with some random numbers # phi = np.random.rand(10, 10) # # Register the field within the Serializer. Note that for the Python Interface this step is not # strictly necessary as it can be done implicitly in the write method (see below). # fieldmetainfo = ser.FieldMetainfo(ser.TypeID.Float64, phi.shape) serializer.register_field('phi', fieldmetainfo) # # Add some global meta-information to the serializer. Besides the usual `key = value` pair, # you can also add `key = {value1, ..., valueN}` pairs. # serializer.global_metainfo.insert('answer', 42) serializer.global_metainfo.insert('halos', [1, 1, 1, 1]) # # Up to this point nothing has been written to disk. Using update_meta_data() will force a write # of all meta-information to the corresponding JSON files. Note that the meta-data is updated # after each call and thus a manual update of the meta-data is seldom required. If you are # curious you can inspect the files './laplacian/MetaData-field.json' and # './laplacian/ArchiveMetaData-field.json' # serializer.update_meta_data() # # We now apply the `laplacian_stencil` three times to phi. In each iteration we will create an # input and output savepoint where we save the current `phi` field (input) and `lap` field # (output) # for t in range(3): # # Create a Savepoint. Savepoints can have the same name as long as they have different # meta-information. In our case we will always store the current time step `t` as a # meta-information, thus making it unique. # savepoint_in = ser.Savepoint('laplacian-in') savepoint_in.metainfo.insert('time', t) # # Register the Savepoint. # serializer.register_savepoint(savepoint_in) # # Write phi to disk at our input savepoint. This will create the file `field_phi.dat` upon # first invocation and afterwards the data is appended. # serializer.write('phi', savepoint_in, phi) # # Apply the laplacian_stencil to phi # lap = laplacian_stencil(phi) # # Create the output savepoint. This time we directly initialize the meta-information of the # savepoint. # savepoint_out = ser.Savepoint('laplacian-out', {'time': t}) # # Write lap to disk. Note that here we implicitly register the field `lap` upon first # invocation. Same goes for the output savepoint. # serializer.write('lap', savepoint_out, lap) # # Finally, we swap phi with lap # phi = lap