Ejemplo n.º 1
0
def to_h5(metAry, dest, debug=False):
    """
    Write the metAry into given file path in the HDF5 format

    dest can be string or h5py.Group object.

    If dest is a string, it is interpret as the destination file path.

    If dest is a h5py.Group object, the content will be store under the given group.
    """

    # Writing to existing group
    if isinstance(dest, h5py.Group):
        dest.create_dataset('ndarray', data=metAry.data)    # Write the array
        __dict_loop(metAry.info, dest.create_group('info')) # Write the meta info
        return


    # Writing file to path
    path = filePath(dest)

    if not path.write:
        raise ValueError("Unable to write to: " + str(path.full))

    with h5py.File(path.full, 'w') as f:
        f.create_dataset('ndarray', data=metAry.data)     # Write the array
        __dict_loop(metAry.info, f.create_group('info'))  # Write the meta info

    return
Ejemplo n.º 2
0
def from_h5(src, debug=False):
    """
    Read the HDF5 file in the given path, build the metAry accordingly.

    dest can be string or h5py.Group object.

    If src is a string, it is interpret as the destination file path.

    If src is a h5py.Group object, the content will be read from the given group.

    """

    # Reading from existing group
    if isinstance(src, h5py.Group):
        ary = src['ndarray'][()]          # Read the array
        info = __read_info(src['info'])   # Read the meta info

    # Reading from file path
    path = filePath(src)

    if not path.exist:
        raise IOError('File ' + str(path.full) + ' does not exist')
    
    if not path.read:
        raise IOError("Unable to read from: " + str(path.full))

    with h5py.File(path.full, 'r') as f:
        ary = f['ndarray'][()]          # Read the array
        info = __read_info(f['info'])   # Read the meta info

    return metaArray(ary, info=info)
Ejemplo n.º 3
0
    def __init__(self, path, debug=False):

        self.file_path = filePath(path)

        self.unpack_str = ''
        self.header_len = 0
        self.endian = '<'

        self.record_pos = 0
        self.record_num = 0

        self.record_index = []

        self.flg_debug = debug

        # Try to guess parameters like byte Order and header length.
        if self._parameterChk_():
            self._index_()      # Index the file
        else:
            # No idea how to unpack the records
            msg = "Can not guess how to unpack the records. " + \
                    "Please try to specify the following parameters manually: " + sep + \
                    "'.header_len'" + sep + \
                    "'.unpack_str'"  + sep + \
                    "'.endian'"  + sep + \
                    "then index the file manually using '._index_()'"
            print(msg)

        if debug:
            print'file_path: ', self.file_path.full

            print'unpack_str: ', self.unpack_str
            print'header_len: ', self.header_len

            print'record_pos: ', self.record_pos
            print'record_num: ', self.record_num

        return
Ejemplo n.º 4
0
def to_csv(metAry, path, debug=False, \
        field_delimiter=',', text_delimiter='"', linesep="\r\n"):
    """
    Write the metAry into given file path in the CSV format

    dest can be string or h5py.Group object.

    If dest is a string, it is interpret as the destination file path.

    If dest is a h5py.Group object, the content will be store under the given group.
    """

    path = filePath(path)

    FD = field_delimiter
    TD = text_delimiter
    LS = linesep

    if not path.write:
        print("Given file path is not writable." + path.full)
        raise IOError

    info = metAry.copy_info()
    data = metAry.data

    nfo_keys = info.keys()
    nfo_keys.sort()

    with open(path.full, 'wb') as f:

        # Write out the meta info first
        for key in nfo_keys:

            val = info.pop(key)

            if isinstance(val, (int, long, float, complex)):
                val = str(val)
            else:
                val = TD + str(val) + TD

            f.write(key + FD + val + LS)

        # Write out the content, format depends on the number of data dimensions
        if data.ndim == 1:
            # One dimentional data, write out the index - value pairs

            content = zip(metAry.get_axis(), data)

            for idx, val in content:
                f.write(str(idx) + FD + str(val) + LS)

        elif data.ndim == 2:
            # Two dimensional data, write out the x-y grid
            x, y = data.shape

            for x_idx in range(x):
                row = FD.join(map(str, data[x_idx]))
                f.write(row + LS)

        else:
            # N-dimensional data, flatten the array, the dump
            f.write(LS.join(map(str, data.flatten())))
            f.write(LS)

    return
Ejemplo n.º 5
0
    def __init__(self, path=None, debug=False, analyse=True, \
        buffer_size=10485760, field_delimiter=',', text_delimiter='"'):
        """
        Given the file path, prepare the csv file.

        Options:

        analyse     If true will try to scan the file to guess where the
                       numerical data starts. It can take a long time for large
                       files of unsual layouts.
        """
        self.file_path = path
        self.name = filePath(path).name
        self.analyse = analyse
        self.debug = debug
        # self.f_handler = None
        self.rows = None                    # Number of rows
        self.cols = None                    # Number of column
        self.idx = None                     # File seek index, linking the file
                                            #  byte position with each non-blank
                                            #  rows.
        self.metainfo = {}                # File header representations

        self.buffer_size = buffer_size      # Read file in this size chunks
        self.field_delimiter = field_delimiter
        self.text_delimiter = text_delimiter

        self.label_row = 0                  # Row number for column labels
        self.data_start = 0                 # Row number for where data starts

        # Check if the file is readable
        f = self.open()

        cols = 0
        f.seek(0)
        idx = [0]               # Byte position index of all non-blank rows

        #######################################################################
        # "for line in f" doesnt work because f.tell() will be stuck at certain
        # multiples of buffer length.
        #######################################################################
        #for line in f:
        #
        #    line = line.strip()
        #
        #    # Skip blank lines
        #    if ''.join(line.split(field_delimiter)) == '':
        #        continue
        #    else:
        #        idx.append(f.tell())
        #
        #    items = len(line.split(field_delimiter))
        #    if items > cols:
        #        cols = items
        #######################################################################

        while True:
            line = f.readline()         # Read the CVS file line by line.
                                        # Not used readlines because the file
                                        #     can potentially be very large

            if line == '':
                del idx[-1]             # Remove the last index entry
                break                   # Reached the end of the file

            line = line.strip()         # strip of '\n'
            lst = line.split(field_delimiter)

            #if ''.join(lst) == '':
            #    continue                # In case of a blank row
            #else:
            #    idx.append(f.tell())    # Write down the next byte position

            idx.append(f.tell())         # Don't ignore blank rows

            items = len(lst)
            if items > cols:
                cols = items

        self.rows = len(idx)
        self.cols = cols
        self.idx = idx

        f.close()

        if analyse == True:
            self.chk_data_start()
            # Try to read the header if exists (i.e. data isn't starting on the 1st row
            if self.data_start != 0:
                self.getmetainfo()

        return
Ejemplo n.º 6
0
def to_csv(metAry, path, debug=False, \
        field_delimiter=',', text_delimiter='"', linesep="\r\n"):
    """
    Write the metAry into given file path in the CSV format

    dest can be string or h5py.Group object.

    If dest is a string, it is interpret as the destination file path.

    If dest is a h5py.Group object, the content will be store under the given group.
    """

    path = filePath(path)

    FD = field_delimiter
    TD = text_delimiter
    LS = linesep

    if not path.write:
        print("Given file path is not writable." + path.full)
        raise IOError

    info = metAry.copy_info()
    data = metAry.data

    nfo_keys = info.keys()
    nfo_keys.sort()

    with open(path.full, 'wb') as f:

        # Write out the meta info first
        for key in nfo_keys:

            val = info.pop(key)

            if isinstance(val, (int, long, float, complex)):
                val = str(val)
            else:
                val = TD + str(val) + TD

            f.write(key + FD + val + LS)

        # Write out the content, format depends on the number of data dimensions
        if data.ndim == 1:
            # One dimentional data, write out the index - value pairs

            content = zip(metAry.get_axis(), data)

            for idx, val in content:
                f.write(str(idx) + FD + str(val) + LS)

        elif data.ndim == 2:
            # Two dimensional data, write out the x-y grid
            x, y = data.shape

            for x_idx in range(x):
                row = FD.join(map(str, data[x_idx]))
                f.write(row + LS)

        else:
            # N-dimensional data, flatten the array, the dump
            f.write(LS.join(map(str, data.flatten())))
            f.write(LS)

    return
Ejemplo n.º 7
0
    def __init__(self, path=None, debug=False, analyse=True, \
        buffer_size=10485760, field_delimiter=',', text_delimiter='"'):
        """
        Given the file path, prepare the csv file.

        Options:

        analyse     If true will try to scan the file to guess where the
                       numerical data starts. It can take a long time for large
                       files of unsual layouts.
        """
        self.file_path = path
        self.name = filePath(path).name
        self.analyse = analyse
        self.debug = debug
        # self.f_handler = None
        self.rows = None  # Number of rows
        self.cols = None  # Number of column
        self.idx = None  # File seek index, linking the file
        #  byte position with each non-blank
        #  rows.
        self.metainfo = {}  # File header representations

        self.buffer_size = buffer_size  # Read file in this size chunks
        self.field_delimiter = field_delimiter
        self.text_delimiter = text_delimiter

        self.label_row = 0  # Row number for column labels
        self.data_start = 0  # Row number for where data starts

        # Check if the file is readable
        f = self.open()

        cols = 0
        f.seek(0)
        idx = [0]  # Byte position index of all non-blank rows

        #######################################################################
        # "for line in f" doesnt work because f.tell() will be stuck at certain
        # multiples of buffer length.
        #######################################################################
        #for line in f:
        #
        #    line = line.strip()
        #
        #    # Skip blank lines
        #    if ''.join(line.split(field_delimiter)) == '':
        #        continue
        #    else:
        #        idx.append(f.tell())
        #
        #    items = len(line.split(field_delimiter))
        #    if items > cols:
        #        cols = items
        #######################################################################

        while True:
            line = f.readline()  # Read the CVS file line by line.
            # Not used readlines because the file
            #     can potentially be very large

            if line == '':
                del idx[-1]  # Remove the last index entry
                break  # Reached the end of the file

            line = line.strip()  # strip of '\n'
            lst = line.split(field_delimiter)

            #if ''.join(lst) == '':
            #    continue                # In case of a blank row
            #else:
            #    idx.append(f.tell())    # Write down the next byte position

            idx.append(f.tell())  # Don't ignore blank rows

            items = len(lst)
            if items > cols:
                cols = items

        self.rows = len(idx)
        self.cols = cols
        self.idx = idx

        f.close()

        if analyse == True:
            self.chk_data_start()
            # Try to read the header if exists (i.e. data isn't starting on the 1st row
            if self.data_start != 0:
                self.getmetainfo()

        return
Ejemplo n.º 8
0
#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#       MA 02110-1301, USA.

'''
Demo Programme that should the basics of using metaArray project
'''
from os.path import join
from os import linesep

from textwrap import TextWrapper
import cStringIO

from metaArray.misc import filePath

# Enviromental variables
demo_dir = join(filePath(__file__).baseDir, 'example')
tty_width = 72
partition = '-' * tty_width
prompt = '>>> '

# Current dir
# current_dir = dirPath('./')


wrapper = TextWrapper()
wrapper.width = tty_width
wrapper.replace_whitespace = False
# wrapper.drop_whitespace = False
wrapper.initial_indent = "- "
wrapper.subsequent_indent = '- '
Ejemplo n.º 9
0
    def __init__(self, path, debug=False, buffer_size=4096):
        """
        4kB buffer size
        """
        self.file_path = filePath(path)
        self.debug = debug
        self.buffer_size = buffer_size = buffer_size * 1024     # f.read(buffer_size) In case of very large files

        self.idx = idx = []     # Index of the records in the file, each item
                                #      should have the format:
                                #      [hdr_pos, hdr_len, data_pos, data_len, header_dict, unpack_str]

        #metainfo = self.metainfo = {}                # File header representations


        # Known header fields and value type.
        # This is only used for data type conversion from the byte stream
        # String values are left as is
        hdr_format = self.hdr_format = []
        hdr_format.append([':WFMP:NR_P', int])
        hdr_format.append([':WFMPRE:NR_PT', int])
        hdr_format.append([':WFMP:BYT_N', int])
        hdr_format.append([':WFMPRE:BYT_NR', int])
        hdr_format.append([':WFMO:BYT_N', int])
        hdr_format.append(['BIT_N', int])
        hdr_format.append(['BIT_NR', int])
        # ENC or ENCDG -> BIN
        # BN_F or BN_FMT-> RI
        # BYT_O or BYT_OR -> LSB or MSB
        # CH2:WFID "Ch2, AC coupling, 20mVolts/div, 50ns/div, 50000 points, Sample mode";
        # WFI "Ch2, DC coupling, 1.000mV/div, 40.00us/div, 1000000 points, Average mode";
        # PT_F or PT_FMT -> Y
        # XUN or XUNIT -> "s"
        hdr_format.append(['NR_P', int])
        hdr_format.append(['NR_PT', int])
        hdr_format.append(['PT_O', int])
        hdr_format.append(['PT_OFF', int])
        # PT_OR       LINEA                 # Always LINEAr
        hdr_format.append(['XIN', float])
        hdr_format.append(['XINCR', float])
        hdr_format.append(['XZE', float])
        # YUN or YUNIT -> "V" or  "Volts"
        hdr_format.append(['YMU', float])
        hdr_format.append(['YMULT', float])
        hdr_format.append(['YOF', float])
        hdr_format.append(['YOFF', float])
        hdr_format.append(['YZE', float])
        hdr_format.append(['YZERO', float])
        hdr_format.append(['VSCALE', float])
        hdr_format.append(['HSCALE', float])
        hdr_format.append(['VPOS', float])
        hdr_format.append(['VOFFSET', float])
        hdr_format.append(['HDELAY', float])
        # COMP -> COMPOSITE_YT
        hdr_format.append(['FILTERF', int])
        hdr_format.append(['CENTERFREQUENCY', float])
        # DOMAIN -> TIME
        hdr_format.append(['REFLEVEL', float])
        hdr_format.append(['SPAN', float])
        # WFMTYPE -> ANALOG
        # :CURV -> #72000000

        debug_str = ''

        # Identify the header and data byte positions in the byte streams
        #
        # The data string can take the following forms:
        #
        # ":WFMPRE:" --- Header info ---- ":CURVE #" ------ Binary Data -----
        # ":WFMP:" ----- Header info ---- ":CURV #" ------- Binary Data -----

        # END structure INIT

        f = self.open()
        f_pos = 0

        # Identify key locations in the data stream, write into header_rcd
        # There maybe multiple header-data streams
        while True:

            # Find the beginning of the header
            hdr_start = buffered_search(f, ':WFMP:', start=f_pos, buffer_size=buffer_size)
            if hdr_start == -1:
                hdr_start = buffered_search(f, ':WFMPRE:', start=f_pos, buffer_size=buffer_size)
                if hdr_start == -1:
                    if debug > 0:
                        debug_str += 'Neither header descriptors (":WFMP:" or ":WFMPRE:") is found.'
                        print(debug_str)
                    break # no more header found
                elif debug > 0:
                    debug_str += 'Header descriptor ":WFMPRE:" found at ' + str(hdr_start) + linesep
            elif debug > 0:
                debug_str += 'Header descriptor ":WFMP:" found at ' + str(hdr_start) + linesep

            # Find the following data stream
            data_start = buffered_search(f, ':CURV #', start=hdr_start, buffer_size=buffer_size)
            if data_start == -1:
                data_start = buffered_search(f, ':CURVE #', start=hdr_start, buffer_size=buffer_size)

                if data_start == -1:
                    # Very bad, found header but no data!
                    # Show debug info if requested, ignore otherwise
                    if debug > 0:
                        debug_str += 'Failed to find the following data stream!'
                        print(debug_str)

                    f_pos += 6
                    continue

                elif debug > 0:
                    debug_str += 'Data descriptor ":CURVE #" found at ' + str(data_start) + linesep
            elif debug > 0:
                debug_str += 'Data descriptor ":CURV #" found at ' + str(data_start) + linesep

            # Work out the length of the data stream is
            # :CURVE #
            # :CURV #<x><yyy><data><newline>
            f.seek(data_start)
            buf = f.read(20)

            pos_x = buf.find('#') + 1
            pos_yyy = pos_x + 1
            desc_len = int(buf[pos_x:pos_yyy])                  # <x>

            pos_data = pos_yyy + desc_len
            data_byte_len = int(buf[pos_yyy:pos_data])              # <yyy>

            # Parse the headers
            # Include ":CURV #<x><yyy>" into the header
            data_start += pos_data
            hdr_len = data_start-hdr_start

            # Read the header byte stream
            f.seek(hdr_start)
            hdr_dict = self.proc_header(f.read(hdr_len))

            # Have a quick guess on the unpack string, this is the most rudimentary
            # information necessary to decode the binary data
            # unpack_str == None if unable to work out from here
            unpack_str = self.get_unpackstr(hdr_dict)
            if debug > 0:
                if unpack_str is None:
                    debug_str += 'Unable to guess how to unpack the binary data.' + linesep
                    debug_str += 'Require the knowledge of at least the following header fields:' + linesep
                    debug_str += '\t BYT_O(R)' + linesep
                    debug_str += '\t NR_P(T)' + linesep
                    debug_str += '\t BIT_N(R)' + linesep
                    debug_str += '\t BN_F(MT)' + linesep
                else:
                    debug_str += 'Binary data is thought to be packed as: ' + unpack_str + linesep

            # Assemble into index list
            # idx [hdr_pos, hdr_len, data_pos, data_len, hdr_dict, unpack_str]
            idx.append([hdr_start, hdr_len, data_start, data_byte_len, hdr_dict, unpack_str])

            # Advance to the end of the data stream, prepare for the next search loop
            f_pos = data_start + data_byte_len

            if debug > 0:
                debug_str += 'Continue searching for the next record from ' + str(f_pos) + ' byte......'
                print(debug_str)
                debug_str = ''

        f.close()

        return
Ejemplo n.º 10
0
    def __init__(self, path, debug=False, buffer_size=4096):
        """
        4kB buffer size
        """
        self.file_path = filePath(path)
        self.debug = debug
        self.buffer_size = buffer_size = buffer_size * 1024  # f.read(buffer_size) In case of very large files

        self.idx = idx = []  # Index of the records in the file, each item
        #      should have the format:
        #      [hdr_pos, hdr_len, data_pos, data_len, header_dict, unpack_str]

        #metainfo = self.metainfo = {}                # File header representations

        # Known header fields and value type.
        # This is only used for data type conversion from the byte stream
        # String values are left as is
        hdr_format = self.hdr_format = []
        hdr_format.append([':WFMP:NR_P', int])
        hdr_format.append([':WFMPRE:NR_PT', int])
        hdr_format.append([':WFMP:BYT_N', int])
        hdr_format.append([':WFMPRE:BYT_NR', int])
        hdr_format.append([':WFMO:BYT_N', int])
        hdr_format.append(['BIT_N', int])
        hdr_format.append(['BIT_NR', int])
        # ENC or ENCDG -> BIN
        # BN_F or BN_FMT-> RI
        # BYT_O or BYT_OR -> LSB or MSB
        # CH2:WFID "Ch2, AC coupling, 20mVolts/div, 50ns/div, 50000 points, Sample mode";
        # WFI "Ch2, DC coupling, 1.000mV/div, 40.00us/div, 1000000 points, Average mode";
        # PT_F or PT_FMT -> Y
        # XUN or XUNIT -> "s"
        hdr_format.append(['NR_P', int])
        hdr_format.append(['NR_PT', int])
        hdr_format.append(['PT_O', int])
        hdr_format.append(['PT_OFF', int])
        # PT_OR       LINEA                 # Always LINEAr
        hdr_format.append(['XIN', float])
        hdr_format.append(['XINCR', float])
        hdr_format.append(['XZE', float])
        # YUN or YUNIT -> "V" or  "Volts"
        hdr_format.append(['YMU', float])
        hdr_format.append(['YMULT', float])
        hdr_format.append(['YOF', float])
        hdr_format.append(['YOFF', float])
        hdr_format.append(['YZE', float])
        hdr_format.append(['YZERO', float])
        hdr_format.append(['VSCALE', float])
        hdr_format.append(['HSCALE', float])
        hdr_format.append(['VPOS', float])
        hdr_format.append(['VOFFSET', float])
        hdr_format.append(['HDELAY', float])
        # COMP -> COMPOSITE_YT
        hdr_format.append(['FILTERF', int])
        hdr_format.append(['CENTERFREQUENCY', float])
        # DOMAIN -> TIME
        hdr_format.append(['REFLEVEL', float])
        hdr_format.append(['SPAN', float])
        # WFMTYPE -> ANALOG
        # :CURV -> #72000000

        debug_str = ''

        # Identify the header and data byte positions in the byte streams
        #
        # The data string can take the following forms:
        #
        # ":WFMPRE:" --- Header info ---- ":CURVE #" ------ Binary Data -----
        # ":WFMP:" ----- Header info ---- ":CURV #" ------- Binary Data -----

        # END structure INIT

        f = self.open()
        f_pos = 0

        # Identify key locations in the data stream, write into header_rcd
        # There maybe multiple header-data streams
        while True:

            # Find the beginning of the header
            hdr_start = buffered_search(f,
                                        ':WFMP:',
                                        start=f_pos,
                                        buffer_size=buffer_size)
            if hdr_start == -1:
                hdr_start = buffered_search(f,
                                            ':WFMPRE:',
                                            start=f_pos,
                                            buffer_size=buffer_size)
                if hdr_start == -1:
                    if debug > 0:
                        debug_str += 'Neither header descriptors (":WFMP:" or ":WFMPRE:") is found.'
                        print(debug_str)
                    break  # no more header found
                elif debug > 0:
                    debug_str += 'Header descriptor ":WFMPRE:" found at ' + str(
                        hdr_start) + linesep
            elif debug > 0:
                debug_str += 'Header descriptor ":WFMP:" found at ' + str(
                    hdr_start) + linesep

            # Find the following data stream
            data_start = buffered_search(f,
                                         ':CURV #',
                                         start=hdr_start,
                                         buffer_size=buffer_size)
            if data_start == -1:
                data_start = buffered_search(f,
                                             ':CURVE #',
                                             start=hdr_start,
                                             buffer_size=buffer_size)

                if data_start == -1:
                    # Very bad, found header but no data!
                    # Show debug info if requested, ignore otherwise
                    if debug > 0:
                        debug_str += 'Failed to find the following data stream!'
                        print(debug_str)

                    f_pos += 6
                    continue

                elif debug > 0:
                    debug_str += 'Data descriptor ":CURVE #" found at ' + str(
                        data_start) + linesep
            elif debug > 0:
                debug_str += 'Data descriptor ":CURV #" found at ' + str(
                    data_start) + linesep

            # Work out the length of the data stream is
            # :CURVE #
            # :CURV #<x><yyy><data><newline>
            f.seek(data_start)
            buf = f.read(20)

            pos_x = buf.find('#') + 1
            pos_yyy = pos_x + 1
            desc_len = int(buf[pos_x:pos_yyy])  # <x>

            pos_data = pos_yyy + desc_len
            data_byte_len = int(buf[pos_yyy:pos_data])  # <yyy>

            # Parse the headers
            # Include ":CURV #<x><yyy>" into the header
            data_start += pos_data
            hdr_len = data_start - hdr_start

            # Read the header byte stream
            f.seek(hdr_start)
            hdr_dict = self.proc_header(f.read(hdr_len))

            # Have a quick guess on the unpack string, this is the most rudimentary
            # information necessary to decode the binary data
            # unpack_str == None if unable to work out from here
            unpack_str = self.get_unpackstr(hdr_dict)
            if debug > 0:
                if unpack_str is None:
                    debug_str += 'Unable to guess how to unpack the binary data.' + linesep
                    debug_str += 'Require the knowledge of at least the following header fields:' + linesep
                    debug_str += '\t BYT_O(R)' + linesep
                    debug_str += '\t NR_P(T)' + linesep
                    debug_str += '\t BIT_N(R)' + linesep
                    debug_str += '\t BN_F(MT)' + linesep
                else:
                    debug_str += 'Binary data is thought to be packed as: ' + unpack_str + linesep

            # Assemble into index list
            # idx [hdr_pos, hdr_len, data_pos, data_len, hdr_dict, unpack_str]
            idx.append([
                hdr_start, hdr_len, data_start, data_byte_len, hdr_dict,
                unpack_str
            ])

            # Advance to the end of the data stream, prepare for the next search loop
            f_pos = data_start + data_byte_len

            if debug > 0:
                debug_str += 'Continue searching for the next record from ' + str(
                    f_pos) + ' byte......'
                print(debug_str)
                debug_str = ''

        f.close()

        return