Exemple #1
0
def to_h5(metAry, dest, debug=False):
    """
    Write the metAry into given file path in the HDF5 format
    
    dest can be string or h5py.Group object. 
    
    If dest is a string, it is interpret as the destination file path.
    
    If dest is a h5py.Group object, the content will be store under the given group.
    """
    
    # Writing to existing group
    if isinstance(dest, h5py.Group):
        dest.create_dataset('ndarray', data=metAry.data)    # Write the array
        __dict_loop(metAry.info, dest.create_group('info')) # Write the meta info
        return
    
    
    # Writing file to path
    path = filePath(dest)
    
    if not path.write:
        raise ValueError, "Unable to write to: " + str(path.full)
    
    with h5py.File(path.full, 'w') as f:
        f.create_dataset('ndarray', data=metAry.data)     # Write the array
        __dict_loop(metAry.info, f.create_group('info'))  # Write the meta info
    
    return
Exemple #2
0
def from_h5(src, debug=False):
    """
    Read the HDF5 file in the given path, build the metAry accordingly.
    
    dest can be string or h5py.Group object. 
    
    If dest is a string, it is interpret as the destination file path.
    
    If dest is a h5py.Group object, the content will be read from the given group.
    
    """
    
    # Reading from existing group
    if isinstance(src, h5py.Group):
        ary = src['ndarray'][()]          # Read the array
        info = __read_info(src['info'])   # Read the meta info
    
    # Reading from file path
    path = filePath(src)
    
    if not path.read:
        raise ValueError, "Unable to read from: " + str(path.full)
    
    with h5py.File(path.full, 'r') as f:
        ary = f['ndarray'][()]          # Read the array
        info = __read_info(f['info'])   # Read the meta info
    
    
    
    return metaArray(ary, info=info)
 def __init__(self, path, debug = False):
     
     self.file_path = filePath(path)
     
     self.unpack_str = ''
     self.header_len = 0
     self.endian = '<'
     
     self.record_pos = 0
     self.record_num = 0
     
     self.record_index = []
     
     self.flg_debug = debug
     
     # Try to guess parameters like byte Order and header length.
     if self._parameterChk_():
         self._index_()      # Index the file
     else:
         # No idea how to unpack the records
         msg = "Can not guess how to unpack the records. " + \
                 "Please try to specify the following parameters manually: " + sep + \
                 "'.header_len'" + sep + \
                 "'.unpack_str'"  + sep + \
                 "'.endian'"  + sep + \
                 "then index the file manually using '._index_()'"
         print(msg)
     
     if debug:
         print'file_path: ',  self.file_path.full
         
         print'unpack_str: ', self.unpack_str
         print'header_len: ', self.header_len
         
         print'record_pos: ', self.record_pos
         print'record_num: ', self.record_num
     
     return
Exemple #4
0
 def __init__(self, path, debug = False, buffer_size = 4096):
     """
     4kB buffer size
     """
     self.file_path = filePath(path)
     self.debug = debug
     self.buffer_size = buffer_size = buffer_size * 1024     # f.read(buffer_size) In case of very large files
     
     self.idx = idx = []     # Index of the records in the file, each item 
                             #      should have the format:
                             #      [hdr_pos, hdr_len, data_pos, data_len, header_dict, unpack_str]
     
     #metainfo = self.metainfo = {}                # File header representations
     
     
     # Known header fields and value type. 
     # This is only used for data type conversion from the byte stream
     # String values are left as is
     hdr_format = self.hdr_format = []
     hdr_format.append([':WFMP:NR_P', int])
     hdr_format.append([':WFMPRE:NR_PT', int])
     hdr_format.append([':WFMP:BYT_N', int])
     hdr_format.append([':WFMPRE:BYT_NR', int])
     hdr_format.append(['BIT_N', int])
     hdr_format.append(['BIT_NR', int])
     # ENC or ENCDG -> BIN
     # BN_F or BN_FMT-> RI
     # BYT_O or BYT_OR -> LSB or MSB
     # CH2:WFID "Ch2, AC coupling, 20mVolts/div, 50ns/div, 50000 points, Sample mode";
     # WFI "Ch2, DC coupling, 1.000mV/div, 40.00us/div, 1000000 points, Average mode";
     # PT_F or PT_FMT -> Y
     # XUN or XUNIT -> "s"
     hdr_format.append(['NR_P', int])
     hdr_format.append(['NR_PT', int])
     hdr_format.append(['PT_O', int])
     hdr_format.append(['PT_OFF', int])
     hdr_format.append(['XIN', float])
     hdr_format.append(['XINCR', float])
     hdr_format.append(['XZE', float])
     # YUN or YUNIT -> "V" or  "Volts"
     hdr_format.append(['YMU', float])
     hdr_format.append(['YMULT', float])
     hdr_format.append(['YOF', float])
     hdr_format.append(['YOFF', float])
     hdr_format.append(['YZE', float])
     hdr_format.append(['YZERO', float])
     hdr_format.append(['VSCALE', float])
     hdr_format.append(['HSCALE', float])
     hdr_format.append(['VPOS', float])
     hdr_format.append(['VOFFSET', float])
     hdr_format.append(['HDELAY', float])
     # COMP -> COMPOSITE_YT
     hdr_format.append(['FILTERF', int])
     hdr_format.append(['CENTERFREQUENCY', float])
     # DOMAIN -> TIME
     hdr_format.append(['REFLEVEL', float])
     hdr_format.append(['SPAN', float])
     # WFMTYPE -> ANALOG
     # :CURV -> #72000000
     
     debug_str = ''
     
     # Identify the header and data byte positions in the byte streams
     #
     # The data string can take the following forms:
     #
     # ":WFMPRE:" --- Header info ---- ":CURVE #" ------ Binary Data ----- 
     # ":WFMP:" ----- Header info ---- ":CURV #" ------- Binary Data -----
     
     # END structure INIT
     
     f = self.open()
     f_pos = 0
     
     # Identify key locations in the data stream, write into header_rcd
     # There maybe multiple header-data streams
     while True:
         
         # Find the beginning of the header
         hdr_start = buffered_search(f, ':WFMP:', start = f_pos, buffer_size = buffer_size)
         if hdr_start == -1:
             hdr_start = buffered_search(f, ':WFMPRE:', start = f_pos, buffer_size = buffer_size)
             if hdr_start == -1:
                 if debug > 0:
                     debug_str += 'Neither header descriptors (":WFMP:" or ":WFMPRE:") is found.'
                     print(debug_str)
                 break # no more header found
             elif debug > 0:
                 debug_str += 'Header descriptor ":WFMPRE:" found at ' + str(hdr_start) + linesep
         elif debug > 0:
             debug_str += 'Header descriptor ":WFMP:" found at ' + str(hdr_start) + linesep
         
         # Find the following data stream
         data_start = buffered_search(f, ':CURV #', start = hdr_start, buffer_size = buffer_size)
         if data_start == -1:
             data_start = buffered_search(f, ':CURVE #', start = hdr_start, buffer_size = buffer_size)
             
             if data_start == -1:
                 # Very bad, found header but no data!
                 # Show debug info if requested, ignore otherwise
                 if debug > 0:
                     debug_str += 'Failed to find the following data stream!'
                     print(debug_str)
                 
                 f_pos += 6
                 continue
             
             elif debug > 0:
                 debug_str += 'Data descriptor ":CURVE #" found at ' + str(data_start) + linesep
         elif debug > 0:
             debug_str += 'Data descriptor ":CURV #" found at ' + str(data_start) + linesep
         
         # Work out the length of the data stream is
         # :CURVE #
         # :CURV #<x><yyy><data><newline>
         f.seek(data_start)
         buf =  f.read(20)
         
         pos_x = buf.find('#') + 1
         pos_yyy = pos_x + 1
         desc_len = int(buf[pos_x:pos_yyy])                  # <x>
         
         pos_data = pos_yyy + desc_len
         data_byte_len = int(buf[pos_yyy:pos_data])              # <yyy>
         
         # Parse the headers
         # Include ":CURV #<x><yyy>" into the header
         data_start += pos_data
         hdr_len = data_start-hdr_start
         
         # Read the header byte stream
         f.seek(hdr_start)
         hdr_dict = self.proc_header(f.read(hdr_len))
         
         # Have a quick guess on the unpack string, this is the most rudimentary
         # information necessary to decode the binary data
         # unpack_str == None if unable to work out from here
         unpack_str = self.get_unpackstr(hdr_dict)
         if debug > 0:
             if unpack_str is None:
                 debug_str += 'Unable to guess how to unpack the binary data.' + linesep
                 debug_str += 'Require the knowledge of at least the following header fields:' + linesep
                 debug_str += '\t BYT_O(R)' + linesep
                 debug_str += '\t NR_P(T)' + linesep
                 debug_str += '\t BIT_N(R)' + linesep
                 debug_str += '\t BN_F(MT)' + linesep
             else:
                 debug_str += 'Binary data is thought to be packed as: ' + unpack_str + linesep
         
         # Assemble into index list
         # idx [hdr_pos, hdr_len, data_pos, data_len, hdr_dict, unpack_str]
         idx.append([hdr_start, hdr_len, data_start, data_byte_len, hdr_dict, unpack_str])
         
         # Advance to the end of the data stream, prepare for the next search loop
         f_pos = data_start + data_byte_len
         
         if debug > 0:
             debug_str += 'Continue searching for the next record from ' + str(f_pos) + ' byte......'
             print(debug_str)
             debug_str = ''
     
     f.close()
     
     return
Exemple #5
0
#       
#       You should have received a copy of the GNU General Public License
#       along with this program; if not, write to the Free Software
#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#       MA 02110-1301, USA.

from os.path import join
from os import linesep

from textwrap import TextWrapper
import cStringIO

from misc import filePath, dirPath

# Enviromental variables
demo_dir = join(filePath(__file__).baseDir, 'example')
tty_width = 72
partition = '-' * tty_width
prompt = '>>> '

# Current dir
# current_dir = dirPath('./')


wrapper = TextWrapper()
wrapper.width = tty_width
wrapper.replace_whitespace = False
# wrapper.drop_whitespace = False
wrapper.initial_indent = "- "
wrapper.subsequent_indent = '- '
Exemple #6
0
def to_csv(metAry, path, debug=False, \
        field_delimiter = ',', text_delimiter = '"', linesep = "\r\n"):
    """
    Write the metAry into given file path in the CSV format
    
    dest can be string or h5py.Group object. 
    
    If dest is a string, it is interpret as the destination file path.
    
    If dest is a h5py.Group object, the content will be store under the given group.
    """
    
    path = filePath(path)
    
    FD = field_delimiter
    TD = text_delimiter
    LS = linesep
    
    if not path.write:
        print "Given file path is not writable." + path.full
        raise IOError
    
    info = metAry.copy_info()
    data = metAry.data
    
    nfo_keys = info.keys()
    nfo_keys.sort()
    
    with open(path.full, 'wb') as f:
        
        # Write out the meta info first
        for key in nfo_keys:
            
            val = info.pop(key)
            
            if isinstance(val, (int, long, float, complex)):
                val = str(val)
            else:
                val = TD + str(val) + TD
            
            f.write(key + FD + val + LS)
        
        # Write out the content, format depends on the number of data dimensions
        if data.ndim == 1:
            # One dimentional data, write out the index - value pairs
            
            content = zip(metAry.get_axis(), data)
            
            for idx, val in content:
                f.write(str(idx) + FD + str(val) + LS)
            
        elif data.ndim == 2:
            # Two dimensional data, write out the x-y grid
            x, y = data.shape
            
            for x_idx in range(x):
                row = FD.join(map(str, data[x_idx]))
                f.write(row + LS)
            
        else:
            # N-dimensional data, flatten the array, the dump
            f.write(LS.join(map(str, data.flatten())))
            f.write(LS)
    
    return
Exemple #7
0
 def __init__(self, path = None, debug = False, analyse = True, \
     buffer_size = 10485760, field_delimiter = ',', text_delimiter = '"'):
     """
     Given the file path, prepare the csv file.
     
     Options:
     
     analyse     If true will try to scan the file to guess where the 
                    numerical data starts. It can take a long time for large 
                    files of unsual layouts.
     """
     self.file_path = path
     self.name = filePath(path).name
     self.analyse = analyse
     self.debug = debug
     # self.f_handler = None
     self.rows = None                    # Number of rows
     self.cols = None                    # Number of column
     self.idx = None                     # File seek index, linking the file
                                         #  byte position with each non-blank
                                         #  rows.
     self.metainfo = {}                # File header representations
     
     self.buffer_size = buffer_size      # Read file in this size chunks
     self.field_delimiter = field_delimiter
     self.text_delimiter = text_delimiter
     
     self.label_row = 0                  # Row number for column labels
     self.data_start = 0                 # Row number for where data starts
     
     # Check if the file is readable
     f = self.open()
     
     cols = 0
     f.seek(0)
     idx = [0]               # Byte position index of all non-blank rows
     
     #######################################################################
     # "for line in f" doesnt work because f.tell() will be stuck at certain
     # multiples of buffer length.
     #######################################################################
     #for line in f:   
     #    
     #    line = line.strip()
     #    
     #    # Skip blank lines
     #    if ''.join(line.split(field_delimiter)) == '':
     #        continue
     #    else:
     #        idx.append(f.tell())
     #    
     #    items = len(line.split(field_delimiter))
     #    if items > cols:
     #        cols = items
     #######################################################################
     
     while True:
         line = f.readline()         # Read the CVS file line by line.
                                     # Not used readlines because the file 
                                     #     can potentially be very large
         
         if line == '':
             del(idx[-1])            # Remove the last index entry
             break                   # Reached the end of the file
         
         line = line.strip()         # strip of '\n'
         lst = line.split(field_delimiter)
         
         #if ''.join(lst) == '':
         #    continue                # In case of a blank row
         #else:
         #    idx.append(f.tell())    # Write down the next byte position
         
         idx.append(f.tell())         # Don't ignore blank rows
         
         items = len(lst)
         if items > cols:
             cols = items
         
     self.rows = len(idx)
     self.cols = cols
     self.idx = idx
     
     f.close()
     
     if analyse == True:
         self.chk_data_start()
         # Try to read the header if exists (i.e. data isn't starting on the 1st row
         if self.data_start != 0:
             self.getmetainfo()
     
     return