def get_filename_from_dmsyntax(filename, blockname=None): arg = str(filename) isbinary = True colnames = True dmsyn = '' if '[' in filename and ']' in filename: parts = filename.split('[') filename = parts.pop(0) if parts: dmsyn = parts.pop(0).lower() if not is_binary_file(filename): isbinary = False fd = open(filename, 'r') try: last=None line = fd.readline().strip() while len(line) > 0 and line[0] in '#%': last = line line = fd.readline().strip() if (last is not None and (len(last.split(' ')) != len(line.split(' ')) )): colnames = False finally: fd.close() if blockname is not None: arg += "[%s]" % str(blockname).upper() if (not isbinary) and (not colnames) and (not 'cols' in dmsyn): arg += "[opt colnames=none]" return arg
def read_table_blocks(arg, make_copy=False): if isinstance(arg, fits.HDUList): filename = arg[0]._file.name hdus = arg elif isinstance(arg, basestring) and is_binary_file(arg): filename = arg hdus = fits.open(arg) else: raise IOErr('badfile', arg, "a binary FITS table or a BinTableHDU list") cols = {} hdr = {} for ii, hdu in enumerate(hdus): blockidx = ii + 1 hdr[blockidx] = {} header = hdu.header if header is not None: for key in header.keys(): hdr[blockidx][key] = header[key] # skip over primary, hdu.data is None cols[blockidx] = {} recarray = hdu.data if recarray is not None: for colname in recarray.names: cols[blockidx][colname] = recarray[colname] return filename, cols, hdr
def get_ascii_data(filename, ncols=1, colkeys=None, sep=' ', dstype=Data1D, comment='#', require_floats=True): if is_binary_file(filename): raise IOErr('notascii', filename) names, args = read_file_data(filename, sep, comment, require_floats) if colkeys is None: kwargs = [] if ncols != 1: _check_args(ncols, dstype) kwargs.extend(args[:ncols]) return (names, kwargs, filename) kwargs = [] colkeys = list(colkeys) if len(names) > len(args): raise IOErr('toomanycols') assert (len(names) <= len(args)) for key in colkeys: if key not in names: raise IOErr('reqcol', key, numpy.asarray(names, numpy.string_)) kwargs.append(args[names.index(key)]) _check_args(len(kwargs), dstype) return (colkeys, kwargs, filename)
def get_filename_from_dmsyntax(filename, blockname=None): arg = str(filename) isbinary = True colnames = True dmsyn = '' if '[' in filename and ']' in filename: parts = filename.split('[') filename = parts.pop(0) if parts: dmsyn = parts.pop(0).lower() if not is_binary_file(filename): isbinary = False # TODO: set encoding='UTF-8' or maybe 'ascii' with open(filename, mode='r') as fh: last = None line = fh.readline().strip() while len(line) > 0 and line[0] in '#%': last = line line = fh.readline().strip() if (last is not None and (len(last.strip('#').strip().split(' ')) != len(line.strip().split(' ')))): colnames = False if blockname is not None: arg += f"[{str(blockname).upper()}]" if not isbinary and not colnames and 'cols' not in dmsyn: arg += "[opt colnames=none]" return arg
def get_ascii_data(filename, ncols=1, colkeys=None, sep=" ", dstype=Data1D, comment="#", require_floats=True): if is_binary_file(filename): raise IOErr("notascii", filename) names, args = read_file_data(filename, sep, comment, require_floats) if colkeys is None: kwargs = [] if ncols != 1: _check_args(ncols, dstype) kwargs.extend(args[:ncols]) return (names, kwargs, filename) kwargs = [] colkeys = list(colkeys) if len(names) > len(args): raise IOErr("toomanycols") assert len(names) <= len(args) for key in colkeys: if key not in names: raise IOErr("reqcol", key, numpy.asarray(names, numpy.string_)) kwargs.append(args[names.index(key)]) _check_args(len(kwargs), dstype) return (colkeys, kwargs, filename)
def get_filename_from_dmsyntax(filename, blockname=None): arg = str(filename) isbinary = True colnames = True dmsyn = '' if '[' in filename and ']' in filename: parts = filename.split('[') filename = parts.pop(0) if parts: dmsyn = parts.pop(0).lower() if not is_binary_file(filename): isbinary = False fd = open(filename, 'r') try: last = None line = fd.readline().strip() while len(line) > 0 and line[0] in '#%': last = line line = fd.readline().strip() if (last is not None and (len(last.split(' ')) != len(line.split(' ')))): colnames = False finally: fd.close() if blockname is not None: arg += "[%s]" % str(blockname).upper() if (not isbinary) and (not colnames) and ('cols' not in dmsyn): arg += "[opt colnames=none]" return arg
def get_ascii_data(filename, ncols=1, colkeys=None, sep=' ', dstype=Data1D, comment='#'): if is_binary_file(filename): raise IOErr('notascii', filename) names, args = read_file_data(filename, sep, comment) if colkeys is None: kwargs = [] if ncols != 1: _check_args(ncols, dstype) kwargs.extend(args[:ncols]) return (names, kwargs, filename) kwargs = [] colkeys = list(colkeys) if len(names) > len(args): raise IOErr('toomanycols') assert( len(names) <= len(args) ) for key in colkeys: if key not in names: raise IOErr('reqcol', key, numpy.asarray(names, numpy.string_)) kwargs.append(args[names.index(key)]) _check_args(len(kwargs), dstype) return (colkeys, kwargs, filename)
def read_table_blocks(arg, make_copy=False): filename = '' hdus = None if type(arg) is pyfits.HDUList: filename = arg[0]._file.name hdus = arg elif type(arg) in (str, unicode, numpy.str_) and is_binary_file(arg): filename = arg hdus = pyfits.open(arg) else: raise IOErr('badfile', arg, "a binary FITS table or a PyFITS.BinTableHDU list") cols = {} hdr = {} for ii, hdu in enumerate(hdus): blockidx = ii+1 hdr[blockidx] = {} header = hdu.header if header is not None: for key in header.keys(): hdr[blockidx][key] = header[key] # skip over primary, hdu.data is None cols[blockidx] = {} recarray = hdu.data if recarray is not None: for colname in recarray.names: cols[blockidx][colname] = recarray[colname] return filename, cols, hdr
def get_header_data( arg, blockname=None, hdrkeys=None ): filename = '' if type(arg) == str and pycrates.Crate(arg).is_image()==0: filename = arg isbinary = True colnames = True dmsyn = '' if '[' in filename and ']' in filename: parts = filename.split('[') filename = parts.pop(0) if parts: dmsyn = parts.pop(0).lower() if not is_binary_file(filename): isbinary = False fd = open(filename, 'r') try: last=None line = fd.readline().strip() while len(line) > 0 and line[0] in '#%': last = line line = fd.readline().strip() if (last is not None and (len(last.split(' ')) != len(line.split(' ')) )): colnames = False finally: fd.close() if blockname is not None: arg += "[%s]" % str(blockname).upper() if (not isbinary) and (not colnames) and (not 'cols' in dmsyn): arg += "[opt colnames=none]" try: tbl = _open_crate(pycrates.TABLECrate, [arg]) except Exception, e: try: tbl = _open_crate(pycrates.IMAGECrate, [arg]) except: raise e filename = tbl.get_filename()
def get_header_data( arg, blockname=None, hdrkeys=None ): filename = '' if type(arg) == str and is_binary_file(arg): tbl = pyfits.open(arg) filename = arg elif ( (type(arg) is pyfits.HDUList) and (len(arg) > 0) and (arg[0].__class__ is pyfits.PrimaryHDU) ): tbl = arg filename = tbl[0]._file.name else: raise IOErr('badfile', arg, "a binary FITS table or a PyFITS.BinTableHDU list") hdr={} try: # Use the first binary table extension we find. Throw an exception # if there aren't any. for hdu in tbl: if blockname is None: if hdu.__class__ is pyfits.BinTableHDU: break else: continue elif (hdu.name.lower() == str(blockname).strip().lower()): break else: raise IOErr('badext', filename) if hdrkeys is not None: for key in hdrkeys: hdr[key] = _require_key(hdu, key, dtype=str) else: for key in hdu.header.keys(): hdr[key] = _require_key(hdu, key, dtype=str) finally: tbl.close() return hdr
def _get_file_contents(arg, exptype="PrimaryHDU", nobinary=False): """arg is a filename or a list of HDUs, with the first one a PrimaryHDU. The return value is the list of HDUs and the filename. Set nobinary to True to avoid checking that the input file is a binary file (via the is_binary_file routine). """ if isinstance(arg, basestring) and (not nobinary or is_binary_file(arg)): tbl = fits.open(arg) filename = arg elif isinstance(arg, fits.HDUList) and len(arg) > 0 and \ isinstance(arg[0], fits.PrimaryHDU): tbl = arg filename = tbl[0]._file.name else: msg = "a binary FITS table or a {} list".format(exptype) raise IOErr('badfile', arg, msg) return (tbl, filename)
def get_table_data(arg, ncols=1, colkeys=None, make_copy=False, fix_type=False, blockname = None, hdrkeys=None): """ get_table_data( filename , ncols=1 [, colkeys=None [, make_copy=False [, blockname=None [, hdrkeys=None ]]]]) get_table_data( [PrimaryHDU, BinTableHDU] , ncols=1 [, colkeys=None [, make_copy=False [, blockname=None [, hdrkeys=None ]]]]) """ filename = '' if type(arg) == str and is_binary_file(arg): tbl = pyfits.open(arg) filename = arg elif ( (type(arg) is pyfits.HDUList) and (len(arg) > 0) and (arg[0].__class__ is pyfits.PrimaryHDU) ): tbl = arg filename = tbl[0]._file.name else: raise IOErr('badfile', arg, "a binary FITS table or a PyFITS.BinTableHDU list") try: # Use the first binary table extension we find. Throw an exception # if there aren't any. for hdu in tbl: if blockname is None: if hdu.__class__ is pyfits.BinTableHDU: break else: continue elif (hdu.name.lower() == str(blockname).strip().lower() and hdu.__class__ is pyfits.BinTableHDU): break else: raise IOErr('badext', filename) cnames = list(hdu.columns.names) if colkeys is not None: colkeys = [name.strip().upper() for name in list(colkeys)] # Try Channel, Counts or X,Y before defaulting to first two table cols elif ('CHANNEL' in cnames) and ('COUNTS' in cnames): colkeys = ['CHANNEL','COUNTS'] elif ('X' in cnames) and ('Y' in cnames): colkeys = ['X','Y'] else: colkeys = cnames[:ncols] cols = [] for name in colkeys: for col in _require_tbl_col(hdu, name, fix_type=fix_type): cols.append(col) hdr={} if hdrkeys is not None: for key in hdrkeys: hdr[key] = _require_key(hdu, key) finally: tbl.close() return colkeys, cols, filename, hdr
def test_is_binary_file(make_data_path): ascii = make_data_path("gauss2d.dat") pha = make_data_path("3c273.pi") assert is_binary_file(pha) assert not is_binary_file(ascii)
def get_pha_data(arg, make_copy=False, use_background=False): """ get_pha_data( filename [, make_copy=False [, use_background=False[]) get_pha_data( [PrimaryHDU, BinTableHDU] [, make_copy=False [, use_background=False]]) """ filename = '' if type(arg) == str and is_binary_file(arg): pha = pyfits.open(arg) filename = arg elif ( (type(arg) is pyfits.HDUList) and (len(arg) > 0) and (arg[0].__class__ is pyfits.PrimaryHDU) ): pha = arg filename = pha[0]._file.name else: raise IOErr('badfile', arg, "a binary FITS spectrum or a PyFITS.BinTableHDU list") try: if _has_hdu(pha, 'SPECTRUM'): hdu = pha['SPECTRUM'] elif (_has_hdu(pha, 1) and ((_try_key(pha[1], 'HDUCLAS1') == 'SPECTRUM') or (_try_key(pha[1], 'HDUCLAS2') == 'SPECTRUM'))): hdu = pha[1] else: raise IOErr('notrsp', filename, "a PHA spectrum") if use_background: for block in pha: if (_try_key(block, 'HDUCLAS2') == 'BKG'): hdu = block keys = ['BACKFILE','ANCRFILE','RESPFILE', 'BACKSCAL','AREASCAL','EXPOSURE'] datasets = [] if _try_col(hdu, 'SPEC_NUM') is None: data = {} # Keywords data['exposure'] = _try_key(hdu, 'EXPOSURE', True, SherpaFloat) #data['poisserr'] = _try_key(hdu, 'POISSERR', True, bool) data['backfile'] = _try_key(hdu, 'BACKFILE') data['arffile'] = _try_key(hdu, 'ANCRFILE') data['rmffile'] = _try_key(hdu, 'RESPFILE') # Keywords or columns data['backscal'] = _try_col_or_key(hdu, 'BACKSCAL', fix_type=True) data['backscup'] = _try_col_or_key(hdu, 'BACKSCUP', fix_type=True) data['backscdn'] = _try_col_or_key(hdu, 'BACKSCDN', fix_type=True) data['areascal'] = _try_col_or_key(hdu, 'AREASCAL', fix_type=True) # Columns data['channel'] = _require_col(hdu, 'CHANNEL', fix_type=True) #Make sure channel numbers not indices chan = list(hdu.columns.names).index('CHANNEL') + 1 tlmin = _try_key(hdu, 'TLMIN'+str(chan), True, SherpaUInt) if int(data['channel'][0]) == 0 or ((tlmin is not None) and tlmin == 0): data['channel'] = data['channel']+1 data['counts'] = _try_col(hdu, 'COUNTS', fix_type=True) if data['counts'] is None: data['counts'] = _require_col(hdu, 'RATE', fix_type=True) * data['exposure'] data['staterror'] = _try_col(hdu, 'STAT_ERR') data['syserror'] = _try_col(hdu, 'SYS_ERR') data['background_up'] = _try_col(hdu, 'BACKGROUND_UP', fix_type=True) data['background_down'] = _try_col(hdu, 'BACKGROUND_DOWN', fix_type=True) data['bin_lo'] = _try_col(hdu, 'BIN_LO', fix_type=True) data['bin_hi'] = _try_col(hdu, 'BIN_HI', fix_type=True) data['grouping'] = _try_col(hdu, 'GROUPING', SherpaInt) data['quality'] = _try_col(hdu, 'QUALITY', SherpaInt) data['header'] = _get_meta_data(hdu) for key in keys: try: data['header'].pop(key) except KeyError: pass if data['syserror'] is not None: # SYS_ERR is the fractional systematic error data['syserror'] = data['syserror'] * data['counts'] datasets.append(data) else: data = {} # Type 2 PHA file support specnum = _try_col_or_key(hdu, 'SPEC_NUM') num = len(specnum) # Keywords exposure = _try_key(hdu, 'EXPOSURE', True, SherpaFloat) #poisserr = _try_key(hdu, 'POISSERR', True, bool) backfile = _try_key(hdu, 'BACKFILE') arffile = _try_key(hdu, 'ANCRFILE') rmffile = _try_key(hdu, 'RESPFILE') # Keywords or columns backscal = _try_vec_or_key(hdu, 'BACKSCAL', num, fix_type=True) backscup = _try_vec_or_key(hdu, 'BACKSCUP', num, fix_type=True) backscdn = _try_vec_or_key(hdu, 'BACKSCDN', num, fix_type=True) areascal = _try_vec_or_key(hdu, 'AREASCAL', num, fix_type=True) # Columns channel = _require_vec(hdu, 'CHANNEL', num, fix_type=True) #Make sure channel numbers not indices chan = list(hdu.columns.names).index('CHANNEL') + 1 tlmin = _try_key(hdu, 'TLMIN'+str(chan), True, SherpaUInt) for ii in range(num): if int(channel[ii][0]) == 0: channel[ii] += 1 #if ((tlmin is not None) and tlmin == 0) or int(channel[0]) == 0: # channel += 1 counts = _try_vec(hdu, 'COUNTS', num, fix_type=True) if None in counts: counts = _require_vec(hdu, 'RATE', num, fix_type=True) * data['exposure'] staterror = _try_vec(hdu, 'STAT_ERR', num) syserror = _try_vec(hdu, 'SYS_ERR', num) background_up = _try_vec(hdu, 'BACKGROUND_UP', num, fix_type=True) background_down = _try_vec(hdu, 'BACKGROUND_DOWN', num, fix_type=True) bin_lo = _try_vec(hdu, 'BIN_LO', num, fix_type=True) bin_hi = _try_vec(hdu, 'BIN_HI', num, fix_type=True) grouping = _try_vec(hdu, 'GROUPING', num, SherpaInt) quality = _try_vec(hdu, 'QUALITY', num, SherpaInt) orders = _try_vec(hdu, 'TG_M', num, SherpaInt) parts = _try_vec(hdu, 'TG_PART', num, SherpaInt) specnums = _try_vec(hdu, 'SPEC_NUM', num, SherpaInt) srcids = _try_vec(hdu, 'TG_SRCID', num, SherpaInt) # Iterate over all rows of channels, counts, errors, etc # Populate a list of dictionaries containing # individual dataset info for (bscal, bscup, bscdn, arsc, chan, cnt, staterr, syserr, backup, backdown, binlo, binhi, group, qual, ordr, prt, specnum, srcid ) in izip(backscal, backscup, backscdn, areascal, channel, counts, staterror, syserror, background_up, background_down, bin_lo, bin_hi, grouping, quality, orders, parts, specnums, srcids): data = {} data['exposure'] = exposure #data['poisserr'] = poisserr data['backfile'] = backfile data['arffile'] = arffile data['rmffile'] = rmffile data['backscal'] = bscal data['backscup'] = bscup data['backscdn'] = bscdn data['areascal'] = arsc data['channel'] = chan data['counts'] = cnt data['staterror'] = staterr data['syserror'] = syserr data['background_up'] = backup data['background_down'] = backdown data['bin_lo'] = binlo data['bin_hi'] = binhi data['grouping'] = group data['quality'] = qual data['header'] = _get_meta_data(hdu) data['header']['TG_M'] = ordr data['header']['TG_PART'] = prt data['header']['SPEC_NUM'] = specnum data['header']['TG_SRCID'] = srcid for key in keys: try: data['header'].pop(key) except KeyError: pass if syserr is not None: # SYS_ERR is the fractional systematic error data['syserror'] = syserr * cnt datasets.append(data) finally: pha.close() return datasets, filename
def get_table_data( arg, ncols=1, colkeys=None, make_copy=True, fix_type=True, blockname=None, hdrkeys=None): """ get_table_data( filename , ncols=1 [, colkeys=None [, make_copy=True [, fix_type=True [, blockname=None [, hdrkeys=None ]]]]]) get_table_data( TABLECrate , ncols=1 [, colkeys=None [, make_copy=True [, fix_type=True [, blockname=None [, hdrkeys=None ] ]]]]) """ filename = '' if type(arg) == str and pycrates.Crate(arg).is_image()==0: filename = arg isbinary = True colnames = True dmsyn = '' if '[' in filename and ']' in filename: parts = filename.split('[') filename = parts.pop(0) if parts: dmsyn = parts.pop(0).lower() if not is_binary_file(filename): isbinary = False fd = open(filename, 'r') try: last=None line = fd.readline().strip() while len(line) > 0 and line[0] in '#%': last = line line = fd.readline().strip() if (last is not None and (len(last.split(' ')) != len(line.split(' ')) )): colnames = False finally: fd.close() if blockname is not None: arg += "[%s]" % str(blockname).upper() if (not isbinary) and (not colnames) and (not 'cols' in dmsyn): arg += "[opt colnames=none]" tbl = _open_crate(pycrates.TABLECrate, [arg]) filename = tbl.get_filename() # Make a copy of the data, since we don't know that pycrates will # do something sensible wrt reference counting elif isinstance(arg, pycrates.TABLECrate): tbl = arg filename = arg.get_filename() make_copy=False else: raise IOErr('badfile', arg, 'TABLECrate obj') cnames = list(pycrates.get_col_names(tbl, vectors=False, rawonly=True)) if colkeys is not None: colkeys = [str(name).strip() for name in list(colkeys)] elif (type(arg) == str and (not os.path.isfile(arg)) and '[' in arg and ']' in arg): colkeys = cnames # Try Channel, Counts or X,Y before defaulting to first two table cols elif 'CHANNEL' in cnames and 'COUNTS' in cnames: colkeys = ['CHANNEL','COUNTS'] elif 'X' in cnames and 'Y' in cnames: colkeys = ['X','Y'] else: colkeys = cnames[:ncols] cols = [] for name in colkeys: for col in _require_tbl_col(tbl, name, cnames, make_copy, fix_type): cols.append(col) hdr={} if hdrkeys is not None: for key in hdrkeys: hdr[key] = _require_hdr_key(tbl, key) return colkeys, cols, filename, hdr
def get_image_data(arg, make_copy=False): """ get_image_data( filename [, make_copy=False ]) get_image_data( [PrimaryHDU] [, make_copy=False ]) """ filename = '' if type(arg) == str and is_binary_file(arg): hdu = pyfits.open(arg) filename = arg elif ( (type(arg) is pyfits.HDUList) and (len(arg) > 0 ) and (arg[0].__class__ is pyfits.PrimaryHDU) ): hdu = arg filename = hdu[0]._file.name else: raise IOErr('badfile', arg, "a binary FITS file or a PyFITS.PrimaryHDU list") # FITS uses logical-to-world where we use physical-to-world. # For all transforms, update their physical-to-world # values from their logical-to-world values. # Find the matching physical transform # (same axis no, but sub = 'P' ) # and use it for the update. # Physical tfms themselves do not get updated. # # Fill the physical-to-world transform given the # logical-to-world and the associated logical-to-physical. # W = wv + wd * ( P - wp ) # P = pv + pd * ( L - pp ) # W = lv + ld * ( L - lp ) # Then # L = pp + ( P - pv ) / pd # so W = lv + ld * ( pp + (P-pv)/pd - lp ) # = lv + ( ld / pd ) * ( P - [ pv + (lp-pp)*pd ] ) # Hence # wv = lv # wd = ld / pd # wp = pv + ( lp - pp ) * pd # EG suppose phys-to-world is # W = 1000 + 2.0 * ( P - 4.0 ) # and we bin and scale to generate a logical-to-phys of # P = 20 + 4.0 * ( L - 10 ) # Then # W = 1000 + 2.0 * ( (20-4) - 4 * 10 ) + 2 * 4 $ # try: data = {} img = hdu[0] if hdu[0].data is None: img = hdu[1] if hdu[1].data is None: raise IOErr('badimg', '') data['y'] = numpy.asarray(img.data) cdeltp = _get_wcs_key(img, 'CDELT1P', 'CDELT2P') crpixp = _get_wcs_key(img, 'CRPIX1P', 'CRPIX2P') crvalp = _get_wcs_key(img, 'CRVAL1P', 'CRVAL2P') cdeltw = _get_wcs_key(img, 'CDELT1', 'CDELT2') crpixw = _get_wcs_key(img, 'CRPIX1', 'CRPIX2') crvalw = _get_wcs_key(img, 'CRVAL1', 'CRVAL2') # proper calculation of cdelt wrt PHYSICAL coords if (( cdeltw != () ) and ( cdeltp != () ) ): cdeltw = cdeltw/cdeltp # proper calculation of crpix wrt PHYSICAL coords if (( crpixw != () ) and ( crvalp != () ) and ( cdeltp != () ) and ( crpixp != () ) ): crpixw = crvalp + ( crpixw - crpixp ) * cdeltp sky = None if(cdeltp != () and crpixp != () and crvalp != () and transformstatus): sky = WCS('physical', 'LINEAR', crvalp, crpixp, cdeltp) eqpos = None if(cdeltw != () and crpixw != () and crvalw != () and transformstatus): eqpos = WCS('world', 'WCS', crvalw, crpixw, cdeltw) data['sky'] = sky data['eqpos'] = eqpos data['header'] = _get_meta_data(img) keys = ['MTYPE1','MFORM1','CTYPE1P','CTYPE2P','WCSNAMEP','CDELT1P', 'CDELT2P','CRPIX1P','CRPIX2P','CRVAL1P','CRVAL2P', 'MTYPE2','MFORM2','CTYPE1','CTYPE2','CDELT1','CDELT2','CRPIX1', 'CRPIX2','CRVAL1','CRVAL2','CUNIT1','CUNIT2','EQUINOX'] for key in keys: try: data['header'].pop(key) except KeyError: pass finally: hdu.close() return data, filename
def get_ascii_data(filename, ncols=1, colkeys=None, sep=' ', dstype=Data1D, comment='#', require_floats=True): """Read in columns from an ASCII file. Parameters ---------- filename : str The name of the ASCII file to read in. ncols : int, optional The number of columns to read in (the first ``ncols`` columns in the file). This is ignored if ``colkeys`` is given. colkeys : array of str, optional An array of the column name to read in. The default is ``None``. sep : str, optional The separator character. The default is ``' '``. dstype : data class to use, optional Used to check that the data file contains enough columns. comment : str, optional The comment character. The default is ``'#'``. require_floats : bool, optional If ``True`` (the default), non-numeric data values will raise a `ValueError`. Returns ------- (colnames, coldata, filename) The column names read in, the data for the columns as an array, with each element being the data for the column (the order matches ``colnames``), and the name of the file. Raises ------ sherpa.utils.IOErr Raised if a requested column is missing or the file appears to be a binary file. ValueError If a column value can not be converted into a numeric value and the ``require_floats`` parameter is True. See Also -------- read_arrays, read_data, write_arrays, write_data Notes ----- The file is processed by reading in each line, stripping out any unsupported characters (replacing them by the ``sep`` argument), skipping empty lines, and then identifying comment and data lines. The list of unsupported characters are: ``\t``, ``\n``, ``\r``, comma, semi-colon, colon, space, and ``|``. The last comment line before the data is used to define the column names, splitting the line by the ``sep`` argument. If there are no comment lines then the columns are named starting at ``col1``, ``col2``, up to the number of columns. Data lines are separated into columns - splitting by the ``sep`` comment - and then converted to NumPy arrays. If the ``require_floats`` argument is ``True`` then the column will be converted to the `sherpa.utils.SherpaFloat` type, with an error raised if this fails. An error is raised if the number of columns per row is not constant. If the ``colkeys`` argument is used then a case-sensitive match is used to determine what columns to return. Examples -------- Read in the first column from the file: >>> (colnames, coldata, fname) = get_ascii_data('src.dat') Read in the first three columns from the file: >>> colinfo = get_ascii_data('src.dat', ncols=3) Read in a histogram data set, using the columns XLO, XHI, and Y: >>> cols = ['XLO', 'XHI', 'Y'] >>> res = get_ascii_data('hist.dat', colkeys=cols, dstype=sherpa.data.Data1DInt) Read in the first and third column from the file cols.dat, where the file has no header information: >>> res = get_ascii_data('cols.dat', colkeys=['col1', 'col3']) """ if is_binary_file(filename): raise IOErr('notascii', filename) names, args = read_file_data(filename, sep, comment, require_floats) if colkeys is None: kwargs = [] if ncols != 1: _check_args(ncols, dstype) kwargs.extend(args[:ncols]) return (names, kwargs, filename) kwargs = [] colkeys = list(colkeys) if len(names) > len(args): raise IOErr('toomanycols') assert(len(names) <= len(args)) for key in colkeys: if key not in names: raise IOErr('reqcol', key, numpy.asarray(names, numpy.string_)) kwargs.append(args[names.index(key)]) _check_args(len(kwargs), dstype) return (colkeys, kwargs, filename)
def get_ascii_data(filename, ncols=1, colkeys=None, sep=' ', dstype=Data1D, comment='#', require_floats=True): """Read in columns from an ASCII file. Parameters ---------- filename : str The name of the ASCII file to read in. ncols : int, optional The number of columns to read in (the first ``ncols`` columns in the file). This is ignored if ``colkeys`` is given. colkeys : array of str, optional An array of the column name to read in. The default is ``None``. sep : str, optional The separator character. The default is ``' '``. dstype : data class to use, optional Used to check that the data file contains enough columns. comment : str, optional The comment character. The default is ``'#'``. require_floats : bool, optional If ``True`` (the default), non-numeric data values will raise a `ValueError`. Returns ------- (colnames, coldata, filename) The column names read in, the data for the columns as an array, with each element being the data for the column (the order matches ``colnames``), and the name of the file. Raises ------ sherpa.utils.IOErr Raised if a requested column is missing or the file appears to be a binary file. ValueError If a column value can not be converted into a numeric value and the ``require_floats`` parameter is True. See Also -------- read_arrays, read_data, write_arrays, write_data Notes ----- The file is processed by reading in each line, stripping out any unsupported characters (replacing them by the ``sep`` argument), skipping empty lines, and then identifying comment and data lines. The list of unsupported characters are: ``\t``, ``\n``, ``\r``, comma, semi-colon, colon, space, and ``|``. The last comment line before the data is used to define the column names, splitting the line by the ``sep`` argument. If there are no comment lines then the columns are named starting at ``col1``, ``col2``, up to the number of columns. Data lines are separated into columns - splitting by the ``sep`` comment - and then converted to NumPy arrays. If the ``require_floats`` argument is ``True`` then the column will be converted to the `sherpa.utils.SherpaFloat` type, with an error raised if this fails. An error is raised if the number of columns per row is not constant. If the ``colkeys`` argument is used then a case-sensitive match is used to determine what columns to return. Examples -------- Read in the first column from the file: >>> (colnames, coldata, fname) = get_ascii_data('src.dat') Read in the first three columns from the file: >>> colinfo = get_ascii_data('src.dat', ncols=3) Read in a histogram data set, using the columns XLO, XHI, and Y: >>> cols = ['XLO', 'XHI', 'Y'] >>> res = get_ascii_data('hist.dat', colkeys=cols, dstype=sherpa.data.Data1DInt) Read in the first and third column from the file cols.dat, where the file has no header information: >>> res = get_ascii_data('cols.dat', colkeys=['col1', 'col3']) """ if is_binary_file(filename): raise IOErr('notascii', filename) names, args = read_file_data(filename, sep, comment, require_floats) if colkeys is None: kwargs = [] if ncols != 1: _check_args(ncols, dstype) kwargs.extend(args[:ncols]) return (names, kwargs, filename) kwargs = [] colkeys = list(colkeys) if len(names) > len(args): raise IOErr('toomanycols') assert (len(names) <= len(args)) for key in colkeys: if key not in names: raise IOErr('reqcol', key, numpy.asarray(names, numpy.string_)) kwargs.append(args[names.index(key)]) _check_args(len(kwargs), dstype) return (colkeys, kwargs, filename)