Beispiel #1
0
def read(fileobj, **keywords): 
    """
    Name:
        io.read

    Usage:
        import esutil
        data = esutil.io.read(
            filename/fileobject,
            typ=None,
            ext=0,
            rows=None, fields=None, columns=None,
            header=False, 
            combine=False, 
            view=None,
            lower=False, upper=False,
            noroot=True, seproot=False,
            verbose=False, 
            ensure_native=False)

    Purpose:
        Provide a single interface to read from a variety of file types.
        Supports reading from a list of files.


    Inputs:
        filename/fileobject:  
            File name or an open file object.  Can also be a sequence.  If a
            sequence is input, the return value will, by default, be a list of
            results.  If the return types are numpy arrays, one can send the
            combine=True keyword to combine them into a single array as long
            as the data types match.

    Keywords:
        type: 
            A string describing the file type, see below.  If this is not sent,
            then the file type is determined from the file extension.
        ext: 
            The file extension.  If multiple extensions are supported by the
            file type, such as for FITS, then use this keyword to select which
            is to be read. Default is the first extension with data.

        rows:  
            For numpy record-type files such as FITS binary tables or simple
            REC files, setting this keyword will return a subset of the rows.
            For FITS, this requires reading the entire file and selecting a
            subset.  For REC files only the requested rows are read from disk
            by using the recfile package.  Default is all rows.

        fields=, columns=:  
            For numpy record-type files such as FITS binary tables or simple
            REC files, return a subset of the columns or fields.  The keywords
            "fields" and "columns" are synonyms.  For FITS, this requires
            reading the entire file and selecting a subset.  For REC files only
            the requested rows are read from disk by using the recfile package.
            Default is all columns.

        header:  
            If True, and the file type supports header+data, return a tuple
            (data, header).  Can also be 'only' in which case only the header
            is read and returned (rec and fits only for now).  Default is
            False.

        combine:  If a list of filenames/fileobjects is sent, the default
            behavior is to return a list of data.  If combine=True and the
            data are numpy arrays, attempt to combine them into a single
            array.  Only works if the data types match.
        view:  If the result is derived from a numpy array, set this to
            pick the view.  E.g. pyfits returns a special pyfits type for
            binary table.  You can request a simple numpy array with fields
            by setting view=numpy.ndarray, or a numpy recarray type with
            view=numpy.recarray

        lower,upper:  For FITS files, if true convert the case of the
            fields to all lower or all upper.  Certain FITS writers
            tend to write all fields names as capitals which can result
            in annoyance.

        noroot:  For XML files, do not return the root name as the base
            name in the dictionary.  Default is True
        seproot: For XML files, return a tuple (data, rootname) instead of
            just the data under the root.

        ensure_native: For numpy arrays, make sure data is in native
            byte ordering.

    Currently Supported File Types:
        fits
            Flexible Image Transport System
        rec
            Simple ascii header followed by data in binary or text form. These
            files can be written/read using the esutil.sfile module.  REC files
            support appending rows.  Also supports reading sub-selections of
            rows and columns.
        xml
            Extensible Markup Language
        json
            JavaScript Object Notation.  Less flexible than XML but more useful
            in most practical situations such as storing inhomogeneous data in
            a portable way. 
        yaml
            A nice, human readable markup language, especially useful
            for configuration files.  YAML stands for
                YAML Ain't Markup Language
        pyobj
            A straight dump of an object to disk using it's repr().  Files are
            written using pprint, read simply using eval(open(file).read()).

            This is not secure so use with caution.


    Revision History:
        Use **keywords for input and for sending to all called methods. Much
        more flexible when adding new keywords and file types.
        2010
    """


    verbose = keywords.get('verbose', False)

    # If input is a sequence, read them all.
    if isinstance(fileobj, (list,tuple)):
        combine = keywords.get('combine', False)

        # a list was given
        alldata = []
        for f in fileobj:
            # note, only fields/columns is begin passed on but not rows
            # also note seproot is not being passed on
            data = read(f, **keywords) 
            alldata.append(data)

        if combine:
            if len(fileobj) == 1:
                alldata = alldata[0]
            else:
                fn,fobj,type,fs = _get_fname_ftype_from_inputs(fileobj[0], **keywords)
                if type == 'fits' or type == 'rec':
                    # this will only work if the all data has the 
                    # same structure
                    if verbose:
                        stderr.write("Combining arrays\n")
                    alldata = numpy_util.combine_arrlist(alldata)
        return alldata

    # a scalar was input
    fname,fobj,type,fs = _get_fname_ftype_from_inputs(fileobj, **keywords)

    if fs == 'hdfs':
        with hdfs.HDFSFile(fname, verbose=verbose) as hdfs_file:
            data = hdfs_file.read(read, **keywords)
        return data
    else:
        if verbose:
            stderr.write("Reading: %s\n" % fname)

    # pick the right reader based on type
    try:
        if type == 'fits':
            data = read_fits(fobj, **keywords)
        elif type == 'json':
            data = json_util.read(fobj, **keywords)
        elif type == 'yaml':
            data = read_yaml(fobj, **keywords)
        elif type == 'rec':
            data = read_rec(fobj, **keywords)
        elif type == 'xml':
            data = read_xml(fobj, **keywords)
        elif type == 'pyobj':
            data = read_pyobj(fobj, **keywords)
        else:
            raise ValueError("Don't know about file type '%s'" % type)
    finally:
        pass

    return data
Beispiel #2
0
def write(fileobj, data, **keywords):
    """
    Name:
        io.write
    Purpose:
        Provide a single interface to write a variety of file types.


    Usage:
        import esutil
        esutil.io.write(fileobj, data, **keywords)

    Inputs:
        filename/object:
            File name or an open file object.  If type= is not sent, file
            type is determined from the name of the file.
        data:
            Data that can be written to indicated file type. E.g. for 
            FITS files this should be a numpy array or a fits object.

    Optional Inputs:
        type:
            Indicator of the file type, e.g. 'fits', see below.  If None, the
            type is determined from the file name.
        header:
            If not None, write the header to the file if supported.

    There are other keywords for the individual writers.

    Currently Supported File Types:
        fits
            Flexible Image Transport System

            extra write keywords (if using fitsio)
                extname: a name for the new extension
                units: units for each column in tables
                compress: compression scheme for images
                header: a header to write
                clobber: remove any existing file
        rec
            Simple ascii header followed by data in binary or text form. These
            files can be written/read using the esutil.sfile module.  REC files
            support appending rows.  Also supports reading sub-selections of
            rows and columns.

            extra write keywords
                header: a header to write
                append: append rows instead of clobbering
                delim: If not None, write ascii data with the specified 
                    delimiter
                padnull:  When writing ascii, replace Null characters with spaces.
                ignorenull: When writing ascii, ignore Null characters. Note
                    you won't be able to read the data back in, but it is
                    useful for things like sqlite database input.

        xml
            Extensible Markup Language.  Extra keyword roottag= gives
            a root tag name.  If not sent, it is assumed the input
            is a dict and the first key found is the root.
        json
            JavaScript Object Notation.  Less flexible than XML but more useful
            in most practical situations such as storing inhomogeneous data in
            a portable way. 
        yaml
            A nice, human readable markup language, especially useful
            for configuration files.  YAML stands for
                YAML Ain't Markup Language
        pyobj
            A straight dump of an object to disk using it's repr().  Files are
            written using pprint, read simply using eval(open(file).read()).

            This is not secure so use with caution.


    """

    verbose = keywords.get('verbose', False)

    # a scalar was input
    fname,fobj,type,fs =_get_fname_ftype_from_inputs(fileobj, **keywords)

    if fs == 'hdfs':
        with hdfs.HDFSFile(fname, verbose=verbose) as hdfs_file:
            hdfs_file.write(write, data, **keywords)
        return

    try:
        # pick the right reader based on type
        if type == 'fits':
            write_fits(fobj, data, **keywords)
        elif type == 'yaml':
            write_yaml(fobj, data, **keywords)
        elif type == 'xml':
            write_xml(fobj, data, **keywords)
        elif type == 'json':
            json_util.write(data, fobj, **keywords)
        elif type == 'rec':
            write_rec(fobj, data, **keywords)
        elif type == 'pyobj':
            data = write_pyobj(fobj, data, **keywords)
        else:
            raise ValueError("Need to implement writing file type: %s\n" % type)

        if fs == 'hdfs':
            hdfs_put(fobj, fname_hdfs, verbose=verbose)

    finally:
        pass