def clone(self): # First performing a deep copy of the vector vec_clone = deepcopy(self) if vec_clone.vecfile is None: # Creating header and binary files from vector space # Placing temporary file into datapath folder tmp_vec = sep.datapath + "clone_tmp_vector" + str( int(time() * 1000000)) + ".H" axis_file = "" for iaxis, naxis in enumerate(tuple(reversed(vec_clone.shape))): axis_file += "n%s=%s " % (iaxis + 1, naxis) # Creating temporary vector file cmd = "Spike %s | Add scale=0.0 > %s" % (axis_file, tmp_vec) RunShellCmd(cmd, get_stat=False, get_output=False) vec_clone.vecfile = tmp_vec vec_clone.binfile = sep.get_binary(vec_clone.vecfile) else: # Creating a temporary file with similar name but computer time at the end tmp_vec = self.vecfile.split(".H")[0].split("/")[ -1] # Getting filename only # Placing temporary file into datapath folder tmp_vec = sep.datapath + tmp_vec + "_clone_" + str( int(time() * 1000000)) + ".H" tmp_bin = tmp_vec + "@" # Copying header and binary files and setting pointers to new file copyfile(self.vecfile, tmp_vec) # Copying header copyfile(self.binfile, tmp_bin) # Copying binary vec_clone.vecfile = tmp_vec vec_clone.binfile = tmp_bin # "Fixing" header file with open(vec_clone.vecfile, "a") as fid: fid.write("in='%s\n'" % tmp_bin) # By default the clone file is going to be removed once the vector is deleted vec_clone.remove_file = True return vec_clone
def __init__(self, in_content): """ VectorOC constructor Args: in_content: numpy array, header file, Vector instance """ # Verify that input is a numpy array or header file or vectorOC super(VectorOC).__init__() if isinstance(in_content, Vector): # VectorIC passed to constructor # Placing temporary file into datapath folder tmp_vec = sep.datapath + "tmp_vectorOC" + str(int( time() * 1000000)) + ".H" sep.write_file(tmp_vec, in_content.getNdArray(), in_content.ax_info) self.vecfile = tmp_vec # Assigning internal vector array # Removing header file? (Default behavior is to remove temporary file) self.remove_file = True elif isinstance(in_content, np.ndarray): # Numpy array passed to constructor tmp_vec = sep.datapath + "tmp_vectorOC" + str(int( time() * 1000000)) + ".H" sep.write_file(tmp_vec, in_content) self.vecfile = tmp_vec # Assigning internal vector array # Removing header file? (Default behavior is to remove temporary file) self.remove_file = True elif isinstance(in_content, str): # Header file passed to constructor self.vecfile = in_content # Assigning internal vector array # Removing header file? (Default behavior is to preserve user file) self.remove_file = False else: # Not supported type raise ValueError("ERROR! Input variable not currently supported!") # Assigning binary file pointer self.binfile = sep.get_binary(self.vecfile) # Number of axes integer self.ndim = sep.get_num_axes(self.vecfile) # Number of elements per axis (tuple) axes_info = sep.get_axes(self.vecfile) axis_elements = tuple([ii[0] for ii in axes_info[:self.ndim]]) self.shape = tuple(reversed(axis_elements)) self.size = np.product(self.shape) self.ndim = len(self.shape)
def writeVec(self, filename, mode='w'): """Function to write vector to file""" # Check writing mode if not mode in 'wa': raise ValueError("Mode must be appending 'a' or writing 'w' ") # writing header/pointer file if not present and not append mode if not (os.path.isfile(filename) and mode in 'a'): binfile = sep.datapath + filename.split('/')[-1] + '@' # Copying SEPlib header file copyfile(self.vecfile, filename) # Substituting binary file with open(filename, 'a') as fid: fid.write("\nin='%s'\n" % binfile) fid.close() else: binfile = sep.get_binary(filename) if mode in 'a': axes = sep.get_axes(filename) # Number of vectors already present in the file if self.shape == (1, ): n_vec = axes[0][0] append_dim = self.ndim else: n_vec = axes[self.ndim][0] append_dim = self.ndim + 1 with open(filename, mode) as fid: fid.write("n%s=%s o%s=0.0 d%s=1.0 \n" % (append_dim, n_vec + 1, append_dim, append_dim)) fid.close() # Writing or Copying binary file if not (os.path.isfile(binfile) and mode in 'a'): copyfile(self.binfile, binfile) else: # Writing file if with open(binfile, mode + 'b') as fid, open(self.binfile, 'rb') as fid_toread: while True: data = fid_toread.read(BUF_SIZE) if not data: break fid.write(data) fid.close() fid_toread.close() return
def _get_binaries(**kwargs): """ Function to obtain associated binary files to each file name :param filenames: list; List/Array containing file names to read :return: binfiles: list; List containing binary files associated to each file Nbytes: list; List containing the number of bytes within binary files """ binfiles = list() Nbytes = list() filenames = kwargs.get("filenames") for filename in filenames: _, ext = os.path.splitext(filename) # Getting file extension if ext == ".H": # SEPlib file binfiles.append(sep.get_binary(filename)) Nbytes.append(os.path.getsize(binfiles[-1])) elif ext == ".h5": raise NotImplementedError("ERROR! h5 files not supported yet.") else: raise ValueError("ERROR! Unknown format for file %s" % filename) return binfiles, Nbytes
def writeVec(self, filename, mode='w', multi_file=False): """ Function to write vector to file: :param filename : string - Filename to write the vector to :param mode : string - Writing mode 'w'=overwrite file or 'a'=append to file ['w'] :param multi_file : boolean - If True multiple files will be written with suffix _chunk1,2,3,...; otherwise, a single will be written [False] """ # Check writing mode if not mode in 'wa': raise ValueError("Mode must be appending 'a' or writing 'w' ") # Multi-node writing mode Nvecs = len(self.vecDask) # Creating vector-chunk names vec_names = [ os.getcwd() + "/" + "".join(filename.split('.')[:-1]) + "_chunk%s.H" % (ii + 1) for ii in range(Nvecs) ] futures = self.client.map(_call_writeVec, self.vecDask, vec_names, [mode] * Nvecs, pure=False) daskD.wait(futures) # Single-file writing mode (concatenating all binary files) if not multi_file: # Getting binary-file locations bin_files = [sep.get_binary(vec_name) for vec_name in vec_names] # Getting all-axis information ax_info = [ sep.get_axes(vec_name)[:sep.get_num_axes(vec_name)] for vec_name in vec_names ] binfile = sep.datapath + filename.split('/')[-1] + '@' # Checks for writing header file len_ax = [len(ax) for ax in ax_info] max_len_idx = np.argmax(len_ax) cat_axis_multi = len_ax[ max_len_idx] # Axis on with files are concatenated # Getting largest-vector-axis information main_axes = ax_info[max_len_idx] N_elements_multi = 0 # Number of elements on the concatenation axis of multifiles # Getting number of elements if appending mode is requested last_axis = [[1, 1.0, 1.0, "Undefined"]] if os.path.isfile(filename) and 'a' in mode: file_axes = sep.get_axes(filename) last_axis[0][0] += file_axes[cat_axis_multi][0] # Checking compatibility of vectors for axes2check in ax_info: # First checking for len of given axis Naxes = len(axes2check) if Naxes < cat_axis_multi - 1: print( "WARNING! Cannot write single file with given vector chunks: " "number of axes not compatible. Wrote chunks!") return for idx, ax in enumerate(axes2check): if ax[0] != main_axes[idx][0] and idx != cat_axis_multi - 1: print( "WARNING! Cannot write single file with given vector chunks: " "elements on axis number %s not compatible. Wrote chunks!" % (idx + 1)) return if Naxes == cat_axis_multi: N_elements_multi += axes2check[cat_axis_multi - 1][ 0] # Adding number of elements on the given concatenation axis else: N_elements_multi += 1 # Only one element present # Changing number of elements on last axis main_axes[-1][0] = N_elements_multi # Adding last appending axes if file existed main_axes += last_axis # Writing header file with open(filename, mode) as fid: for ii, ax in enumerate(main_axes): ax_id = ii + 1 fid.write("n%s=%s o%s=%s d%s=%s label%s='%s'\n" % (ax_id, ax[0], ax_id, ax[1], ax_id, ax[2], ax_id, ax[3])) fid.write("in='%s'\n" % binfile) fid.write("esize=4\n") fid.write("data_format=\"native_float\"\n") # Writing binary file ("reading each binary file by chuncks of BUF_SIZE") with open(binfile, mode + 'b') as fid: for binfile_ii in bin_files: with open(binfile_ii, 'rb') as fid_toread: while True: data = fid_toread.read(BUF_SIZE) if not data: break fid.write(data) # Removing header binary files associated to chunks for idx, vec_name in enumerate(vec_names): os.remove(vec_name) os.remove(bin_files[idx]) return
def writeVec(self, filename, mode='w'): """ Write vector to file Args: filename: path/to/file.ext mode: 'a' for append, 'w' for overwriting """ # Check writing mode if mode not in 'wa': raise ValueError("Mode must be appending (a) or writing (w)") # Construct ax_info if the object has getHyper if hasattr(self, "getHyper"): hyper = self.getHyper() self.ax_info = [] for iaxis in range(hyper.getNdim()): self.ax_info.append(AxInfo(hyper.getAxis(iaxis + 1).n, hyper.getAxis(iaxis + 1).o, hyper.getAxis(iaxis + 1).data, hyper.getAxis(iaxis + 1).label)) # check output file type _, ext = os.path.splitext(filename) # file extension # SEP vector with header in filename.H and binary in DATAPATH/filename.H@ if ext == ".H": # writing header/pointer file if not present and not append mode if not (os.path.isfile(filename) and mode in 'a'): binfile = sep.datapath + filename.split('/')[-1] + '@' with open(filename, mode) as f: # Writing axis info if self.ax_info: for ii, ax_info in enumerate(self.ax_info): f.write(ax_info.to_string(ii+1)) else: for ii, n_axis in enumerate(tuple(reversed(self.shape))): ax_info = AxInfo(n=n_axis) f.write(ax_info.to_string(ii+1)) # Writing last axis for allowing appending (unless we are dealing with a scalar) if self.shape != (1,): ax_info = AxInfo(n=1) f.write(ax_info.to_string(self.ndim + 1)) f.write("in='%s'\n" % binfile) esize = "esize=4\n" if self.getNdArray().dtype == np.complex64: esize = "esize=8\n" f.write(esize) f.write("data_format=\"native_float\"\n") f.close() else: binfile = sep.get_binary(filename) if mode in 'a': axes = sep.get_axes(filename) # Number of vectors already present in the file if self.shape == (1,): n_vec = axes[0][0] append_dim = self.ndim else: n_vec = axes[self.ndim][0] append_dim = self.ndim + 1 with open(filename, mode) as f: ax_info = AxInfo(n_vec + 1) f.write(ax_info.to_string(append_dim)) f.close() # Writing binary file fmt = '>f' if self.getNdArray().dtype == np.complex64 or self.getNdArray().dtype == np.complex128: fmt = '>c8' with open(binfile, mode + 'b') as f: # Writing big-ending floating point number if np.isfortran(self.getNdArray()): # Forcing column-wise binary writing # self.getNdArray().flatten('F').astype(fmt,copy=False).tofile(fid) self.getNdArray().flatten('F').tofile(f, format=fmt) else: # self.getNdArray().astype(fmt,order='C',subok=False,copy=False).tofile(fid) self.getNdArray().tofile(f, format=fmt) f.close() # numpy dictionary elif ext == '.npy': if mode not in 'a': if self.ax_info: np.save(file=filename, arr=dict(arr=self.getNdArray(), ax_info=self.ax_info), allow_pickle=True) else: np.save(file=filename, arr=self.getNdArray(), allow_pickle=False) else: raise NotImplementedError("Extension %s not implemented yet" % ext) return
def writeVec(self, filename, mode='w'): """Function to write vector to file""" # Check writing mode if mode not in 'wa': raise ValueError("Mode must be appending 'a' or writing 'w' ") # Construct ax_info if the object has getHyper if hasattr(self, "getHyper"): hyper = self.getHyper() self.ax_info = [] for iaxis in range(hyper.getNdim()): self.ax_info.append([hyper.getAxis(iaxis + 1).n, hyper.getAxis(iaxis + 1).o, hyper.getAxis(iaxis + 1).d, hyper.getAxis(iaxis + 1).label]) # check output file type _, ext = os.path.splitext(filename) # file extension # SEP vector with header in filename.H and binary in DATAPATH/filename.H@ if ext == ".H": # writing header/pointer file if not present and not append mode if not (os.path.isfile(filename) and mode in 'a'): binfile = sep.datapath + filename.split('/')[-1] + '@' with open(filename, mode) as f: # Writing axis info if self.ax_info: for ii, ax_info in enumerate(self.ax_info): ax_id = ii + 1 f.write("n%s=%s o%s=%s d%s=%s label%s='%s'\n" % ( ax_id, ax_info[0], ax_id, ax_info[1], ax_id, ax_info[2], ax_id, ax_info[3])) else: for ii, n_axis in enumerate(tuple(reversed(self.shape))): ax_id = ii + 1 f.write("n%s=%s o%s=0.0 d%s=1.0 \n" % (ax_id, n_axis, ax_id, ax_id)) # Writing last axis for allowing appending (unless we are dealing with a scalar) if self.shape != (1,): ax_id = self.ndim + 1 f.write("n%s=%s o%s=0.0 d%s=1.0 \n" % (ax_id, 1, ax_id, ax_id)) f.write("in='%s'\n" % binfile) esize = "esize=4\n" if self.getNdArray().dtype == np.complex64: esize = "esize=8\n" f.write(esize) f.write("data_format=\"native_float\"\n") f.close() else: binfile = sep.get_binary(filename) if mode in 'a': axes = sep.get_axes(filename) # Number of vectors already present in the file if self.shape == (1,): n_vec = axes[0][0] append_dim = self.ndim else: n_vec = axes[self.ndim][0] append_dim = self.ndim + 1 with open(filename, mode) as f: f.write("n%s=%s o%s=0.0 d%s=1.0 \n" % (append_dim, n_vec + 1, append_dim, append_dim)) f.close() # Writing binary file fmt = '>f' if self.getNdArray().dtype == np.complex64 or self.getNdArray().dtype == np.complex128: format = '>c8' with open(binfile, mode + 'b') as f: # Writing big-ending floating point number if np.isfortran(self.getNdArray()): # Forcing column-wise binary writing # self.getNdArray().flatten('F').astype(fmt,copy=False).tofile(fid) self.getNdArray().flatten('F').tofile(f, format=fmt) else: # self.getNdArray().astype(fmt,order='C',subok=False,copy=False).tofile(fid) self.getNdArray().tofile(f, format=fmt) f.close() # numpy dictionary elif ext == '.npy': if mode not in 'a': if self.ax_info: # TODO fix saving of ax_info axes = dict() for ii, ax_info in enumerate(self.ax_info): axes['%s' % ii + 1] = dict(n=ax_info[0], o=ax_info[1], d=ax_info[2], label=ax_info[3]) np.save(file=filename, arr=dict(arr=self.getNdArray(), ax_info=axes), allow_pickle=True) else: np.save(file=filename, arr=self.getNdArray(), allow_pickle=False) elif ext == '.h5': # TODO implement saving to hdf5 # https://moonbooks.org/Articles/How-to-save-a-large-dataset-in-a-hdf5-file-using-python--Quick-Guide/ if mode not in 'a': with h5py.File(filename, 'wb') as f: dset = f.create_dataset("vec", data=self.getNdArray()) else: raise NotImplementedError else: raise ValueError("ERROR! Output format has to be H, npy, or h5") return