def read(fileobj, **keywords): """ Name: io.read Usage: import esutil data = esutil.io.read( filename/fileobject, typ=None, ext=0, rows=None, fields=None, columns=None, header=False, combine=False, view=None, lower=False, upper=False, noroot=True, seproot=False, verbose=False, ensure_native=False) Purpose: Provide a single interface to read from a variety of file types. Supports reading from a list of files. Inputs: filename/fileobject: File name or an open file object. Can also be a sequence. If a sequence is input, the return value will, by default, be a list of results. If the return types are numpy arrays, one can send the combine=True keyword to combine them into a single array as long as the data types match. Keywords: type: A string describing the file type, see below. If this is not sent, then the file type is determined from the file extension. ext: The file extension. If multiple extensions are supported by the file type, such as for FITS, then use this keyword to select which is to be read. Default is the first extension with data. rows: For numpy record-type files such as FITS binary tables or simple REC files, setting this keyword will return a subset of the rows. For FITS, this requires reading the entire file and selecting a subset. For REC files only the requested rows are read from disk by using the recfile package. Default is all rows. fields=, columns=: For numpy record-type files such as FITS binary tables or simple REC files, return a subset of the columns or fields. The keywords "fields" and "columns" are synonyms. For FITS, this requires reading the entire file and selecting a subset. For REC files only the requested rows are read from disk by using the recfile package. Default is all columns. header: If True, and the file type supports header+data, return a tuple (data, header). Can also be 'only' in which case only the header is read and returned (rec and fits only for now). Default is False. combine: If a list of filenames/fileobjects is sent, the default behavior is to return a list of data. If combine=True and the data are numpy arrays, attempt to combine them into a single array. Only works if the data types match. view: If the result is derived from a numpy array, set this to pick the view. E.g. pyfits returns a special pyfits type for binary table. You can request a simple numpy array with fields by setting view=numpy.ndarray, or a numpy recarray type with view=numpy.recarray lower,upper: For FITS files, if true convert the case of the fields to all lower or all upper. Certain FITS writers tend to write all fields names as capitals which can result in annoyance. noroot: For XML files, do not return the root name as the base name in the dictionary. Default is True seproot: For XML files, return a tuple (data, rootname) instead of just the data under the root. ensure_native: For numpy arrays, make sure data is in native byte ordering. Currently Supported File Types: fits Flexible Image Transport System rec Simple ascii header followed by data in binary or text form. These files can be written/read using the esutil.sfile module. REC files support appending rows. Also supports reading sub-selections of rows and columns. xml Extensible Markup Language json JavaScript Object Notation. Less flexible than XML but more useful in most practical situations such as storing inhomogeneous data in a portable way. yaml A nice, human readable markup language, especially useful for configuration files. YAML stands for YAML Ain't Markup Language pyobj A straight dump of an object to disk using it's repr(). Files are written using pprint, read simply using eval(open(file).read()). This is not secure so use with caution. Revision History: Use **keywords for input and for sending to all called methods. Much more flexible when adding new keywords and file types. 2010 """ verbose = keywords.get('verbose', False) # If input is a sequence, read them all. if isinstance(fileobj, (list,tuple)): combine = keywords.get('combine', False) # a list was given alldata = [] for f in fileobj: # note, only fields/columns is begin passed on but not rows # also note seproot is not being passed on data = read(f, **keywords) alldata.append(data) if combine: if len(fileobj) == 1: alldata = alldata[0] else: fn,fobj,type,fs = _get_fname_ftype_from_inputs(fileobj[0], **keywords) if type == 'fits' or type == 'rec': # this will only work if the all data has the # same structure if verbose: stderr.write("Combining arrays\n") alldata = numpy_util.combine_arrlist(alldata) return alldata # a scalar was input fname,fobj,type,fs = _get_fname_ftype_from_inputs(fileobj, **keywords) if fs == 'hdfs': with hdfs.HDFSFile(fname, verbose=verbose) as hdfs_file: data = hdfs_file.read(read, **keywords) return data else: if verbose: stderr.write("Reading: %s\n" % fname) # pick the right reader based on type try: if type == 'fits': data = read_fits(fobj, **keywords) elif type == 'json': data = json_util.read(fobj, **keywords) elif type == 'yaml': data = read_yaml(fobj, **keywords) elif type == 'rec': data = read_rec(fobj, **keywords) elif type == 'xml': data = read_xml(fobj, **keywords) elif type == 'pyobj': data = read_pyobj(fobj, **keywords) else: raise ValueError("Don't know about file type '%s'" % type) finally: pass return data
def write(fileobj, data, **keywords): """ Name: io.write Purpose: Provide a single interface to write a variety of file types. Usage: import esutil esutil.io.write(fileobj, data, **keywords) Inputs: filename/object: File name or an open file object. If type= is not sent, file type is determined from the name of the file. data: Data that can be written to indicated file type. E.g. for FITS files this should be a numpy array or a fits object. Optional Inputs: type: Indicator of the file type, e.g. 'fits', see below. If None, the type is determined from the file name. header: If not None, write the header to the file if supported. There are other keywords for the individual writers. Currently Supported File Types: fits Flexible Image Transport System extra write keywords (if using fitsio) extname: a name for the new extension units: units for each column in tables compress: compression scheme for images header: a header to write clobber: remove any existing file rec Simple ascii header followed by data in binary or text form. These files can be written/read using the esutil.sfile module. REC files support appending rows. Also supports reading sub-selections of rows and columns. extra write keywords header: a header to write append: append rows instead of clobbering delim: If not None, write ascii data with the specified delimiter padnull: When writing ascii, replace Null characters with spaces. ignorenull: When writing ascii, ignore Null characters. Note you won't be able to read the data back in, but it is useful for things like sqlite database input. xml Extensible Markup Language. Extra keyword roottag= gives a root tag name. If not sent, it is assumed the input is a dict and the first key found is the root. json JavaScript Object Notation. Less flexible than XML but more useful in most practical situations such as storing inhomogeneous data in a portable way. yaml A nice, human readable markup language, especially useful for configuration files. YAML stands for YAML Ain't Markup Language pyobj A straight dump of an object to disk using it's repr(). Files are written using pprint, read simply using eval(open(file).read()). This is not secure so use with caution. """ verbose = keywords.get('verbose', False) # a scalar was input fname,fobj,type,fs =_get_fname_ftype_from_inputs(fileobj, **keywords) if fs == 'hdfs': with hdfs.HDFSFile(fname, verbose=verbose) as hdfs_file: hdfs_file.write(write, data, **keywords) return try: # pick the right reader based on type if type == 'fits': write_fits(fobj, data, **keywords) elif type == 'yaml': write_yaml(fobj, data, **keywords) elif type == 'xml': write_xml(fobj, data, **keywords) elif type == 'json': json_util.write(data, fobj, **keywords) elif type == 'rec': write_rec(fobj, data, **keywords) elif type == 'pyobj': data = write_pyobj(fobj, data, **keywords) else: raise ValueError("Need to implement writing file type: %s\n" % type) if fs == 'hdfs': hdfs_put(fobj, fname_hdfs, verbose=verbose) finally: pass