Example #1
0
def getData(url):
    """
    Get tabular data from either a text file or HDF5 table, and return it as a
    numpy array.
    """
    path = getFromURL(url)
    if use_hdf5 and tables.isHDF5File(path):
        h5file = tables.openFile(path, mode="r")
        assert len(h5file.listNodes(h5file.root, classname="Table")) == 1, "File contains more than one table."
        table = h5file.listNodes(h5file.root, classname="Table")[0]
        arr = numpy.zeros((table.nrows, len(table.colnames)), "float")
        for i, row in enumerate(table):
            for j, colname in enumerate(table.colnames):
                arr[i, j] = row[colname]
    elif os.path.isfile(path):
        f = srblib.urlopen(url)
        arr = numpy.array([l.split() for l in f.readlines() if l[0] != "#"]).astype("float")
        f.close()
    else:
        raise Exception("Not a valid file.")
    return arr
Example #2
0
def getData(url):
    """
    Get tabular data from either a text file or HDF5 table, and return it as a
    numpy array.
    """
    path = getFromURL(url)
    if use_hdf5 and tables.isHDF5File(path):
        h5file = tables.openFile(path, mode='r')
        assert len(h5file.listNodes(
            h5file.root,
            classname="Table")) == 1, "File contains more than one table."
        table = h5file.listNodes(h5file.root, classname="Table")[0]
        arr = numpy.zeros((table.nrows, len(table.colnames)), 'float')
        for i, row in enumerate(table):
            for j, colname in enumerate(table.colnames):
                arr[i, j] = row[colname]
    elif os.path.isfile(path):
        f = srblib.urlopen(url)
        arr = numpy.array([l.split() for l in f.readlines()
                           if l[0] != '#']).astype('float')
        f.close()
    else:
        raise Exception("Not a valid file.")
    return arr
Example #3
0
def txt_to_hdf5(input_url, output_url):
    """
    Convert a data file in ascii format to a simple HDF5 file containing a
    single table. The data file should contain numerical data in columns
    separated by spaces and/or tabs.
    Lines beginning with '#' are taken to be comments.
    Column headings can be specified as a comment on the first line of the file.

    Example:
    # time voltage
    0.0  -65.0
    0.1  -64.8
    """
    if not use_hdf5:
        raise Exception("PyTables not installed.")
    f = srblib.urlopen(input_url)
    contents = f.readlines()
    f.close()

    # extract header information
    firstline = contents[0]
    headers = []
    udict = {}
    if firstline[0] == "#":
        if '"' in firstline:
            headers = [h.strip() for h in firstline[1:].split('"') if h and h.strip()]
        else:
            headers = firstline[1:].split()
        for h, colname in enumerate(headers):
            # check for units
            p = colname.find("(")
            if p > 0:
                units = colname[p:]
                colname = colname[:p].strip().replace(" ", "_")
                udict[colname] = units
            headers[h] = colname.strip().replace(" ", "_")
    secondline = contents[1]
    nfields = len(secondline.split())
    if len(headers) != nfields:
        headers = ["col%d" % i for i in range(1, nfields + 1)]

    # create row format
    class_str = "class DataRow(tables.IsDescription):\n"
    for pos, colname in enumerate(headers):
        class_str += "  %s = tables.Float32Col(pos=%d)\n" % (colname, pos)
    exec (class_str)

    # create HDF5 file
    protocol, path = output_url.split("://")
    path, ext = os.path.splitext(path)
    filename = os.path.basename(path)
    assert protocol == "file", "For now, we only support writing to local files."
    h5file = tables.openFile("%s.h5" % path, mode="w", title=filename)

    # create and fill table
    table = h5file.createTable(h5file.root, filename, DataRow, "Converted from %s" % input_url)
    for k, v in udict.items():
        table.attrs.__setattr__("units_%s" % k, v)
    for line in contents[1:]:
        line = line.strip()
        data = [float(x) for x in line.split()]
        row = table.row
        for col, val in zip(headers, data):
            row[col] = val
        row.append()
    table.flush()

    h5file.close()
Example #4
0
def txt_to_hdf5(input_url, output_url):
    """
    Convert a data file in ascii format to a simple HDF5 file containing a
    single table. The data file should contain numerical data in columns
    separated by spaces and/or tabs.
    Lines beginning with '#' are taken to be comments.
    Column headings can be specified as a comment on the first line of the file.

    Example:
    # time voltage
    0.0  -65.0
    0.1  -64.8
    """
    if not use_hdf5:
        raise Exception("PyTables not installed.")
    f = srblib.urlopen(input_url)
    contents = f.readlines()
    f.close()

    # extract header information
    firstline = contents[0]
    headers = []
    udict = {}
    if firstline[0] == '#':
        if '"' in firstline:
            headers = [
                h.strip() for h in firstline[1:].split('"') if h and h.strip()
            ]
        else:
            headers = firstline[1:].split()
        for h, colname in enumerate(headers):
            # check for units
            p = colname.find("(")
            if p > 0:
                units = colname[p:]
                colname = colname[:p].strip().replace(" ", "_")
                udict[colname] = units
            headers[h] = colname.strip().replace(" ", "_")
    secondline = contents[1]
    nfields = len(secondline.split())
    if len(headers) != nfields:
        headers = ['col%d' % i for i in range(1, nfields + 1)]

    # create row format
    class_str = "class DataRow(tables.IsDescription):\n"
    for pos, colname in enumerate(headers):
        class_str += "  %s = tables.Float32Col(pos=%d)\n" % (colname, pos)
    exec(class_str)

    # create HDF5 file
    protocol, path = output_url.split('://')
    path, ext = os.path.splitext(path)
    filename = os.path.basename(path)
    assert protocol == 'file', "For now, we only support writing to local files."
    h5file = tables.openFile('%s.h5' % path, mode='w', title=filename)

    # create and fill table
    table = h5file.createTable(h5file.root, filename, DataRow,
                               "Converted from %s" % input_url)
    for k, v in udict.items():
        table.attrs.__setattr__("units_%s" % k, v)
    for line in contents[1:]:
        line = line.strip()
        data = [float(x) for x in line.split()]
        row = table.row
        for col, val in zip(headers, data):
            row[col] = val
        row.append()
    table.flush()

    h5file.close()