def to_h5(metAry, dest, debug=False): """ Write the metAry into given file path in the HDF5 format dest can be string or h5py.Group object. If dest is a string, it is interpret as the destination file path. If dest is a h5py.Group object, the content will be store under the given group. """ # Writing to existing group if isinstance(dest, h5py.Group): dest.create_dataset('ndarray', data=metAry.data) # Write the array __dict_loop(metAry.info, dest.create_group('info')) # Write the meta info return # Writing file to path path = filePath(dest) if not path.write: raise ValueError, "Unable to write to: " + str(path.full) with h5py.File(path.full, 'w') as f: f.create_dataset('ndarray', data=metAry.data) # Write the array __dict_loop(metAry.info, f.create_group('info')) # Write the meta info return
def from_h5(src, debug=False): """ Read the HDF5 file in the given path, build the metAry accordingly. dest can be string or h5py.Group object. If dest is a string, it is interpret as the destination file path. If dest is a h5py.Group object, the content will be read from the given group. """ # Reading from existing group if isinstance(src, h5py.Group): ary = src['ndarray'][()] # Read the array info = __read_info(src['info']) # Read the meta info # Reading from file path path = filePath(src) if not path.read: raise ValueError, "Unable to read from: " + str(path.full) with h5py.File(path.full, 'r') as f: ary = f['ndarray'][()] # Read the array info = __read_info(f['info']) # Read the meta info return metaArray(ary, info=info)
def __init__(self, path, debug = False): self.file_path = filePath(path) self.unpack_str = '' self.header_len = 0 self.endian = '<' self.record_pos = 0 self.record_num = 0 self.record_index = [] self.flg_debug = debug # Try to guess parameters like byte Order and header length. if self._parameterChk_(): self._index_() # Index the file else: # No idea how to unpack the records msg = "Can not guess how to unpack the records. " + \ "Please try to specify the following parameters manually: " + sep + \ "'.header_len'" + sep + \ "'.unpack_str'" + sep + \ "'.endian'" + sep + \ "then index the file manually using '._index_()'" print(msg) if debug: print'file_path: ', self.file_path.full print'unpack_str: ', self.unpack_str print'header_len: ', self.header_len print'record_pos: ', self.record_pos print'record_num: ', self.record_num return
def __init__(self, path, debug = False, buffer_size = 4096): """ 4kB buffer size """ self.file_path = filePath(path) self.debug = debug self.buffer_size = buffer_size = buffer_size * 1024 # f.read(buffer_size) In case of very large files self.idx = idx = [] # Index of the records in the file, each item # should have the format: # [hdr_pos, hdr_len, data_pos, data_len, header_dict, unpack_str] #metainfo = self.metainfo = {} # File header representations # Known header fields and value type. # This is only used for data type conversion from the byte stream # String values are left as is hdr_format = self.hdr_format = [] hdr_format.append([':WFMP:NR_P', int]) hdr_format.append([':WFMPRE:NR_PT', int]) hdr_format.append([':WFMP:BYT_N', int]) hdr_format.append([':WFMPRE:BYT_NR', int]) hdr_format.append(['BIT_N', int]) hdr_format.append(['BIT_NR', int]) # ENC or ENCDG -> BIN # BN_F or BN_FMT-> RI # BYT_O or BYT_OR -> LSB or MSB # CH2:WFID "Ch2, AC coupling, 20mVolts/div, 50ns/div, 50000 points, Sample mode"; # WFI "Ch2, DC coupling, 1.000mV/div, 40.00us/div, 1000000 points, Average mode"; # PT_F or PT_FMT -> Y # XUN or XUNIT -> "s" hdr_format.append(['NR_P', int]) hdr_format.append(['NR_PT', int]) hdr_format.append(['PT_O', int]) hdr_format.append(['PT_OFF', int]) hdr_format.append(['XIN', float]) hdr_format.append(['XINCR', float]) hdr_format.append(['XZE', float]) # YUN or YUNIT -> "V" or "Volts" hdr_format.append(['YMU', float]) hdr_format.append(['YMULT', float]) hdr_format.append(['YOF', float]) hdr_format.append(['YOFF', float]) hdr_format.append(['YZE', float]) hdr_format.append(['YZERO', float]) hdr_format.append(['VSCALE', float]) hdr_format.append(['HSCALE', float]) hdr_format.append(['VPOS', float]) hdr_format.append(['VOFFSET', float]) hdr_format.append(['HDELAY', float]) # COMP -> COMPOSITE_YT hdr_format.append(['FILTERF', int]) hdr_format.append(['CENTERFREQUENCY', float]) # DOMAIN -> TIME hdr_format.append(['REFLEVEL', float]) hdr_format.append(['SPAN', float]) # WFMTYPE -> ANALOG # :CURV -> #72000000 debug_str = '' # Identify the header and data byte positions in the byte streams # # The data string can take the following forms: # # ":WFMPRE:" --- Header info ---- ":CURVE #" ------ Binary Data ----- # ":WFMP:" ----- Header info ---- ":CURV #" ------- Binary Data ----- # END structure INIT f = self.open() f_pos = 0 # Identify key locations in the data stream, write into header_rcd # There maybe multiple header-data streams while True: # Find the beginning of the header hdr_start = buffered_search(f, ':WFMP:', start = f_pos, buffer_size = buffer_size) if hdr_start == -1: hdr_start = buffered_search(f, ':WFMPRE:', start = f_pos, buffer_size = buffer_size) if hdr_start == -1: if debug > 0: debug_str += 'Neither header descriptors (":WFMP:" or ":WFMPRE:") is found.' print(debug_str) break # no more header found elif debug > 0: debug_str += 'Header descriptor ":WFMPRE:" found at ' + str(hdr_start) + linesep elif debug > 0: debug_str += 'Header descriptor ":WFMP:" found at ' + str(hdr_start) + linesep # Find the following data stream data_start = buffered_search(f, ':CURV #', start = hdr_start, buffer_size = buffer_size) if data_start == -1: data_start = buffered_search(f, ':CURVE #', start = hdr_start, buffer_size = buffer_size) if data_start == -1: # Very bad, found header but no data! # Show debug info if requested, ignore otherwise if debug > 0: debug_str += 'Failed to find the following data stream!' print(debug_str) f_pos += 6 continue elif debug > 0: debug_str += 'Data descriptor ":CURVE #" found at ' + str(data_start) + linesep elif debug > 0: debug_str += 'Data descriptor ":CURV #" found at ' + str(data_start) + linesep # Work out the length of the data stream is # :CURVE # # :CURV #<x><yyy><data><newline> f.seek(data_start) buf = f.read(20) pos_x = buf.find('#') + 1 pos_yyy = pos_x + 1 desc_len = int(buf[pos_x:pos_yyy]) # <x> pos_data = pos_yyy + desc_len data_byte_len = int(buf[pos_yyy:pos_data]) # <yyy> # Parse the headers # Include ":CURV #<x><yyy>" into the header data_start += pos_data hdr_len = data_start-hdr_start # Read the header byte stream f.seek(hdr_start) hdr_dict = self.proc_header(f.read(hdr_len)) # Have a quick guess on the unpack string, this is the most rudimentary # information necessary to decode the binary data # unpack_str == None if unable to work out from here unpack_str = self.get_unpackstr(hdr_dict) if debug > 0: if unpack_str is None: debug_str += 'Unable to guess how to unpack the binary data.' + linesep debug_str += 'Require the knowledge of at least the following header fields:' + linesep debug_str += '\t BYT_O(R)' + linesep debug_str += '\t NR_P(T)' + linesep debug_str += '\t BIT_N(R)' + linesep debug_str += '\t BN_F(MT)' + linesep else: debug_str += 'Binary data is thought to be packed as: ' + unpack_str + linesep # Assemble into index list # idx [hdr_pos, hdr_len, data_pos, data_len, hdr_dict, unpack_str] idx.append([hdr_start, hdr_len, data_start, data_byte_len, hdr_dict, unpack_str]) # Advance to the end of the data stream, prepare for the next search loop f_pos = data_start + data_byte_len if debug > 0: debug_str += 'Continue searching for the next record from ' + str(f_pos) + ' byte......' print(debug_str) debug_str = '' f.close() return
# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. from os.path import join from os import linesep from textwrap import TextWrapper import cStringIO from misc import filePath, dirPath # Enviromental variables demo_dir = join(filePath(__file__).baseDir, 'example') tty_width = 72 partition = '-' * tty_width prompt = '>>> ' # Current dir # current_dir = dirPath('./') wrapper = TextWrapper() wrapper.width = tty_width wrapper.replace_whitespace = False # wrapper.drop_whitespace = False wrapper.initial_indent = "- " wrapper.subsequent_indent = '- '
def to_csv(metAry, path, debug=False, \ field_delimiter = ',', text_delimiter = '"', linesep = "\r\n"): """ Write the metAry into given file path in the CSV format dest can be string or h5py.Group object. If dest is a string, it is interpret as the destination file path. If dest is a h5py.Group object, the content will be store under the given group. """ path = filePath(path) FD = field_delimiter TD = text_delimiter LS = linesep if not path.write: print "Given file path is not writable." + path.full raise IOError info = metAry.copy_info() data = metAry.data nfo_keys = info.keys() nfo_keys.sort() with open(path.full, 'wb') as f: # Write out the meta info first for key in nfo_keys: val = info.pop(key) if isinstance(val, (int, long, float, complex)): val = str(val) else: val = TD + str(val) + TD f.write(key + FD + val + LS) # Write out the content, format depends on the number of data dimensions if data.ndim == 1: # One dimentional data, write out the index - value pairs content = zip(metAry.get_axis(), data) for idx, val in content: f.write(str(idx) + FD + str(val) + LS) elif data.ndim == 2: # Two dimensional data, write out the x-y grid x, y = data.shape for x_idx in range(x): row = FD.join(map(str, data[x_idx])) f.write(row + LS) else: # N-dimensional data, flatten the array, the dump f.write(LS.join(map(str, data.flatten()))) f.write(LS) return
def __init__(self, path = None, debug = False, analyse = True, \ buffer_size = 10485760, field_delimiter = ',', text_delimiter = '"'): """ Given the file path, prepare the csv file. Options: analyse If true will try to scan the file to guess where the numerical data starts. It can take a long time for large files of unsual layouts. """ self.file_path = path self.name = filePath(path).name self.analyse = analyse self.debug = debug # self.f_handler = None self.rows = None # Number of rows self.cols = None # Number of column self.idx = None # File seek index, linking the file # byte position with each non-blank # rows. self.metainfo = {} # File header representations self.buffer_size = buffer_size # Read file in this size chunks self.field_delimiter = field_delimiter self.text_delimiter = text_delimiter self.label_row = 0 # Row number for column labels self.data_start = 0 # Row number for where data starts # Check if the file is readable f = self.open() cols = 0 f.seek(0) idx = [0] # Byte position index of all non-blank rows ####################################################################### # "for line in f" doesnt work because f.tell() will be stuck at certain # multiples of buffer length. ####################################################################### #for line in f: # # line = line.strip() # # # Skip blank lines # if ''.join(line.split(field_delimiter)) == '': # continue # else: # idx.append(f.tell()) # # items = len(line.split(field_delimiter)) # if items > cols: # cols = items ####################################################################### while True: line = f.readline() # Read the CVS file line by line. # Not used readlines because the file # can potentially be very large if line == '': del(idx[-1]) # Remove the last index entry break # Reached the end of the file line = line.strip() # strip of '\n' lst = line.split(field_delimiter) #if ''.join(lst) == '': # continue # In case of a blank row #else: # idx.append(f.tell()) # Write down the next byte position idx.append(f.tell()) # Don't ignore blank rows items = len(lst) if items > cols: cols = items self.rows = len(idx) self.cols = cols self.idx = idx f.close() if analyse == True: self.chk_data_start() # Try to read the header if exists (i.e. data isn't starting on the 1st row if self.data_start != 0: self.getmetainfo() return