def read_resolutions(filename): """ Read resolutions from hdf5 file Parameters ---------- filename: str Returns ------- bins, res: `astropy.table.Table, astropy.table.Table` """ bins = read_table_hdf5(filename, path='bins') res = read_table_hdf5(filename, path='res') return bins, res
def read_hdf5(path, in_memory=True): """Read a dataset from an HDF5 file Parameters ---------- path : str Path of the dataset in_memory : bool """ from astropy.io.misc.hdf5 import read_table_hdf5 import tables # Read the metadata meta = read_table_hdf5(path, '/metadata') if in_memory: # Read all of the light curve data with tables.open_file(path, 'r') as f: obs_node = f.get_node('/observations') observations = astropy.table.Table(obs_node.read()) return from_observations(meta, observations) else: # Don't read all of the light curve data. It can be loaded later if needed. return HDF5Dataset(path, meta)
def ReadFromHDF(filename): """ ReadFromHDF: read data from HDF """ pn = DataHandler() f = h5py.File(filename, 'r') # read stock data rgroup = f['stock'] grps = rgroup.keys() for key in grps: grp = rgroup[key] attrs = grp.attrs astro_tb = read_table_hdf5(grp, path='quote') m = np.matrix(astro_tb['TIMESTAMP']) for i in quote_col: t = np.matrix(astro_tb[i]) m = np.hstack((m, t)) id = pandas.to_datetime(np.array(m[:, 0]).flatten(), unit='ms') data = np.array(m[:, 1:]) df = pandas.DataFrame(data, index=id, columns=quote_col) pn.stock[key] = df pn.code.append(key) pn.stock_desp[key] = {} pn.stock_desp[key]['gics'] = attrs.get(name='gics') pn.stock_desp[key]['name'] = attrs.get(name='name') f.close() pn.stock = pandas.Panel(pn.stock) pn.date = pn.stock.major_axis pn.status_table = pandas.DataFrame(index=pn.date, columns=pn.code) pn.compile_table() return pn
def ReadFromHDF(filename): """ ReadFromHDF: read data from HDF """ pn = DataHandler() f = h5py.File(filename, 'r') # read stock data rgroup = f['stock'] grps = rgroup.keys(); for key in grps: grp = rgroup[key] attrs= grp.attrs astro_tb = read_table_hdf5(grp, path='quote') m = np.matrix(astro_tb['TIMESTAMP']) for i in quote_col: t = np.matrix(astro_tb[i]) m = np.hstack((m,t)) id = pandas.to_datetime(np.array(m[:,0]).flatten(), unit='ms') data=np.array(m[:,1:]) df=pandas.DataFrame(data, index=id, columns=quote_col) pn.stock[key] = df pn.code.append(key) pn.stock_desp[key] = {} pn.stock_desp[key]['gics'] = attrs.get(name='gics') pn.stock_desp[key]['name'] = attrs.get(name='name') f.close() pn.stock = pandas.Panel(pn.stock) pn.date = pn.stock.major_axis pn.status_table = pandas.DataFrame(index=pn.date, columns=pn.code) pn.compile_table() return pn
def _from_File(self, fname): """ Load the content of a file Parameters ---------- fname: str filename (incl. path) to read from """ # load_seds - load wavelength and seds if self._get_type(fname) == "fits": with fits.open(fname) as f: extnames = [ f[k].header["EXTNAME"].lower() for k in range(1, len(f)) ] if "seds" in extnames: # new format self.lamb = f[0].data self.seds = f["seds"].data if "covdiag" in extnames: self.cov_diag = f["covdiag"].data else: self.cov_diag = None if "covoffdiag" in extnames: self.cov_offdiag = f["covoffdiag"].data else: self.cov_offdiag = None self._header = f["grid"].header self.grid = Table(f["grid"].data) else: # old format (used for stellar atmosphere grids, remove when those updated) with fits.open(fname) as f: self.seds = f[0].data[:-1] self.lamb = f[0].data[-1] self._header = f[1].header self.grid = Table.read(fname) elif self._get_type(fname) == "hdf": with h5py.File(fname, "r") as s: self.seds = s["seds"][()] self.lamb = s["lamb"][()] if "covdiag" in s.keys(): self.cov_diag = s["covdiag"][()] else: self.cov_diag = None if "covdiag" in s.keys(): self.cov_offdiag = s["covoffdiag"][()] else: self.cov_offdiag = None self.grid = read_table_hdf5(s["grid"]) self._header = self.grid.meta if "filters" in self._header.keys(): self._header["filters"] = _decodebytestring( self._header["filters"])
def main(): import argparse from exorad.__version__ import __version__ from exorad.utils.util import parse_range parser = argparse.ArgumentParser(description='ExoRad {}'.format(__version__)) parser.add_argument("-i", "--input", dest='input', type=str, required=True, help="Input h5 file to pass") parser.add_argument("-o", "--out", dest='out', type=str, default='None', required=True, help="Output directory") parser.add_argument("-n", "--target-number", dest='target_number', type=str, default='all', required=False, help="A list or range of targets to run") parser.add_argument("-t", "--target-name", dest='target_name', type=str, default='None', required=False, help="name of the target to plot") parser.add_argument("-d", "--debug", dest='debug', default=False, required=False, help="log output on screen", action='store_true') args = parser.parse_args() logger = logging.getLogger('exorad') from exorad.utils.ascii_art import ascii_plot logger.info(ascii_plot) logger.info('code version {}'.format(__version__)) if args.debug: setLogLevel(logging.DEBUG) if not os.path.exists(args.out): os.makedirs(args.out) logger.info('output directory created') logger.info('reading {}'.format(args.input)) file = h5py.File(args.input) if args.target_number != 'all' and args.target_name != 'None': logger.error('you cannot use both target number and target name') raise ValueError targets_dir = file['targets'] targets_to_run_id = parse_range(args.target_number, len(targets_dir.keys())) targets_to_run = [list(targets_dir.keys())[n] for n in targets_to_run_id] if args.target_name != 'None': targets_to_run = [target for target in targets_to_run if target == args.target_name] for target in targets_to_run: target_dir = targets_dir[target] table_dir = target_dir['table'] table = read_table_hdf5(table_dir, path='table') plotter = Plotter(input_table=table) plotter.plot_table() plotter.save_fig(os.path.join(args.out, '{}.png'.format(target))) plt.close()
def test_hdf5(): size = 100 string = size*'a' config = {'tables': { 'test_table': { 'column1': Call(np.random.uniform, [], { 'size': size}), 'column2': Call(np.random.uniform, [], { 'low': Ref('test_table.column1')}), 'column3': Call(list, [string], {})}}} pipeline = Pipeline(config) pipeline.execute() pipeline.write('output.hdf5') hdf_table = read_table_hdf5('output.hdf5', 'tables/test_table', character_as_bytes=False) assert np.all(hdf_table == pipeline['test_table'])
def execute(self): payload_dir = self.get_task_param('input')['payload'] payload = load(payload_dir['payload description']) channels_dir = payload_dir['channels'] channels = {} for ch in channels_dir.keys(): ch_dir = channels_dir[ch] description = load(ch_dir['description']) instrument = instruments[description['channelClass'] ['value'].lower().decode("utf-8")] channels[ch] = instrument( name=ch, description=description, payload=payload, ) table = read_table_hdf5(ch_dir, path=ch) built_instr = load(ch_dir['built_instr']) channels[ch].load(table, built_instr) self.debug('channels loaded: {}'.format(channels)) self.set_output([payload, channels])
def zodiacal_fit_direction(self, coord): import os from pathlib import Path from astropy.io.misc.hdf5 import read_table_hdf5 import numpy as np ra_input = coord[0] dec_input = coord[1] dir_path = Path(os.path.dirname(os.path.realpath(__file__))) i = 0 while 'data' not in [ d.stem for d in Path(dir_path).iterdir() if d.is_dir() ] or i > 10: dir_path = dir_path.parent i += 1 if i > 10: self.error('Zodi map file not found') raise OSError('Zodi map file not found') data_path = os.path.join(dir_path.absolute().as_posix(), 'data') zodi_map_file = os.path.join(data_path, 'Zodi_map.hdf5') self.debug('map data:{}'.format(zodi_map_file)) try: zodi_table = read_table_hdf5(zodi_map_file) self.debug(zodi_table) distance = (zodi_table['ra_icrs'] * u.deg - ra_input)**2 + ( zodi_table['dec_icrs'] * u.deg - dec_input)**2 idx = np.argmin(distance) self.debug('selected line {}'.format(idx)) self.debug(zodi_table[idx]) return zodi_table['zodi_coeff'][idx] except OSError: self.error('Zodi map file not found') raise OSError('Zodi map file not found')
def populate_table_clu(con, cur, tbl=None, max_dist=100., path_clu='CLU_20190708_marshalFormat.hdf5'): ''' Crossmatch the candidates with the CLU galaxy catalog. --- Parameters con, cur connection and cursor for the psql database tbl astropy.table photometry table, if provided all the candidates in the table will be crossmatched. If None, all the candidates in the db with clu_match NULL will be crossmatched. max_sep float largest projected distance (in kpc) from CLU galaxies path_clu str path to the CLU catalog filename --- Returns It populates the crossmatch table with CLU galaxies and it updates clu_match with a boolean value in the candidate table. ''' if tbl is None: # Get candidates that do not have been matched already cur.execute("select name, ra, dec from candidate \ where clu_match is NULL") r = cur.fetchall() names = list(l[0] for l in r) ra = list(l[1] for l in r) dec = list(l[2] for l in r) else: names = list(n for n in set(tbl['name'])) ra = list( np.mean(tbl['ra'][tbl['name'] == name]) for name in list(names)) dec = list( np.mean(tbl['dec'][tbl['name'] == name]) for name in list(names)) coords = SkyCoord(ra=np.array(ra) * u.deg, dec=np.array(dec) * u.deg) # Marks for the ingestion marks = ",".join(["%s"] * 24) cur.execute("SELECT MAX(id) from crossmatch") maxid = cur.fetchall()[0][0] if maxid is None: maxid = 0 # Read CLU clu = read_table_hdf5(path_clu) clu = clu[clu['distmpc'] > 4] clu_coords = SkyCoord(ra=clu['ra'] * u.deg, dec=clu['dec'] * u.deg) names_match = [] names_no_match = [] for name, coord in zip(names, coords): sep = clu_coords.separation(coord) dist_kpc = clu['distmpc'] * (10**3) * np.sin(sep) / np.cos(sep) condition0 = dist_kpc >= 0 clu_match = clu[condition0] sep_match = sep[condition0] dist_kpc = dist_kpc[condition0] condition = dist_kpc < 120 clu_match = clu_match[condition] sep_match = sep_match[condition] dist_kpc = dist_kpc[condition] if len(clu_match) > 0: names_match.append(name) for c, d, s in zip(clu_match, dist_kpc, sep_match): maxid += 1 cur.execute( f"INSERT INTO crossmatch (id, name, clu_id, \ clu_ra, clu_dec, clu_z, clu_zerr, clu_distmpc, \ clu_mstar, clu_sfr_fuv, clu_sfr_ha, \ clu_w1mpro, clu_w1sigmpro, clu_w2mpro, \ clu_w2sigmpro, clu_w3mpro, clu_w3sigmpro, \ clu_w4mpro, clu_w4sigmpro, clu_type_ned, \ clu_a, clu_b2a, clu_dist_kpc, clu_sep_arcsec) \ VALUES ({marks})", (maxid, name, int( c['cluid']), c['ra'], c['dec'], c['z'], c['zerr'], c['distmpc'], c['mstar'], c['sfr_fuv'], c['sfr_ha'], c['w1mpro'], c['w1sigmpro'], c['w2mpro'], c['w2sigmpro'], c['w3mpro'], c['w3sigmpro'], c['w4mpro'], c['w4sigmpro'], c['type_ned'], c['a'], c['b2a'], float(d), s.arcsec)) con.commit() else: names_no_match.append(name) # Update the candidate table if len(names_match) > 0: names_match_str = "'" + "','".join(names_match) + "'" cur.execute(f"UPDATE candidate SET \ clu_match = 1 \ where name in ({names_match_str})") if len(names_no_match) > 0: names_no_match_str = "'" + "','".join(names_no_match) + "'" cur.execute(f"UPDATE candidate SET \ clu_match = 0 \ where name in ({names_no_match_str})") # Commit the changes con.commit()
# pzero_model[:,j] = (numpy.random.normal(loc=p1[j], # scale=p2[j], size=nwalkers)) #if p4[j] == 'pos': # pzero[:, j] = numpy.abs(pzero[:, j]) if pzero == []: pzero = pzero_model else: pzero = numpy.append(pzero, pzero_model, axis=1) # Use an intermediate posterior PDF to initialize the walkers if it exists posteriorloc = 'posteriorpdf.hdf5' if os.path.exists(posteriorloc): # read the latest posterior PDFs print "Found existing posterior PDF file: " + posteriorloc posteriordat = hdf5.read_table_hdf5(posteriorloc) if len(posteriordat) > 1: # assign values to pzero nlnprob = 1 pzero = numpy.zeros((nwalkers, nparams)) startindx = nlnprob #+ previousndim_model for j in range(nparams): namej = posteriordat.colnames[j + startindx] pzero[:, j] = posteriordat[namej][-nwalkers:] # output name is based on most recent burnin file name realpdf = True else: realpdf = False else:
#Y = histogram( im1[goodregion], bin=0.0006, locations=X ) #Result = GAUSSFIT( X, Y, A ) #rms = A[2] rms = im[goodregion].std() #print rms #npix_sma2 = math.pi * bmaj2/2 * bmin2/2 / celldata**2 / math.log(2) immin = -rms immax = im.max() #------------------------------------------------------------------------------ # Read best-fit results file bestfitloc = 'posteriorpdf.hdf5' # read the latest posterior PDFs print "Found latest posterior PDF file: " + bestfitloc fitresults = hdf5.read_table_hdf5(bestfitloc) #fitresults = Table.read(bestfitloc, format='ascii') # identify best-fit model minchi2 = fitresults['lnprob'].max() indx = fitresults['lnprob'] == minchi2#fitresults['lnprob'][1] bestfit = fitresults[indx][0] nmu = 2 * (numpy.array(nsource_regions).sum() + nregions) pzero_regions = list(bestfit.data)[1:-nmu] #if len(bestfit) > 1: #bestfit = bestfit[0] print objectname, bestfit.data #rint bestfit['shear']
rc('font',**{'family':'sans-serif','sans-serif':['Arial Narrow'],'size':'6'}) ## for Palatino and other serif fonts use: #rc('font',**{'family':'serif','serif':['New Century Schoolbook']}) #rc('text', usetex=True) nticks = 5 #deltachi2 = 100 posteriorloc = 'posteriorpdf.hdf5' # read posterior PDF print "Reading output from emcee" fitresults = hdf5.read_table_hdf5(posteriorloc) fitresults = fitresults[-5000:] print 'prior to pruning: ', fitresults['lnprob'].mean() # identify the good fits fitresultsgood = modifypdf.prune(fitresults) # determine dimensions of PDF plots nparams = len(fitresultsgood[0]) ncol = 4 nrow = nparams / ncol + 1 j = 1 fig = mpl.figure(figsize=(8.0, 1.0 * nrow)) # set up the plotting window
def test_sedgrid(cformat, cback, copygrid): """ Tests of the SEDGrid class """ n_bands = 3 filter_names = ["BAND1", "BAND2", "BAND3"] n_models = 100 lamb = [1.0, 2.0, 3.0] seds = np.zeros((n_models, n_bands)) cov_diag = np.full((n_models, n_bands), 0.1) n_offdiag = ((n_bands**2) - n_bands) // 2 cov_offdiag = np.full((n_models, n_offdiag), 1.0) cols = {"Av": [1.0, 1.1, 1.3], "Rv": [2.0, 3.0, 4.0]} header = {"Origin": "test_code"} gtable = Table(cols) gtable.meta = header tgrid = SEDGrid( lamb, seds=seds, grid=gtable, header=header, cov_diag=cov_diag, cov_offdiag=cov_offdiag, backend="memory", ) tgrid.header["filters"] = " ".join(filter_names) # check that the grid has the expected properties expected_props = [ "lamb", "seds", "cov_diag", "cov_offdiag", "grid", "nbytes", "filters", "header", "keys", ] for cprop in expected_props: assert hasattr(tgrid, cprop), f"missing {cprop} property" np.testing.assert_allclose(tgrid.lamb, lamb, err_msg="lambdas not equal") np.testing.assert_allclose(tgrid.seds, seds, err_msg="seds not equal") np.testing.assert_allclose(tgrid.cov_diag, cov_diag, err_msg="covdiag not equal") np.testing.assert_allclose(tgrid.cov_offdiag, cov_offdiag, err_msg="covoffdiag not equal") assert isinstance(tgrid.nbytes, (int, np.integer)), "grid nbytes property not integer" compare_tables(tgrid.grid, gtable) assert tgrid.grid.keys() == list(cols.keys()), "colnames of grid not equal" assert tgrid.filters == filter_names, "filters of grid not equal" # test writing and reading to disk print(f"testing {cformat} file format") tfile = NamedTemporaryFile(suffix=cformat) # write the file tgrid.write(tfile.name) # read in the file using different backends if (cback == "disk") and (cformat == ".fits"): # not supported return True print(f" testing {cback} backend") dgrid_in = SEDGrid(tfile.name, backend=cback) # test making a copy print(f" testing copygrid={copygrid}") if copygrid: dgrid = dgrid_in.copy() else: dgrid = dgrid_in print(dgrid) for cprop in expected_props: assert hasattr(dgrid, cprop), f"missing {cprop} property" # check that the grid has the expected values # this test is having a problem in the online travis ci # it someone manages to access another file with HST filter names! # no idea way. Works fine offline. # assert dgrid.filters == filter_names, "{cformat} file filters not equal" assert len(dgrid) == n_bands, f"{cformat} file len not equal" np.testing.assert_allclose( dgrid.lamb, lamb, err_msg=f"{cformat} file grid lambdas not equal") np.testing.assert_allclose(dgrid.seds, seds, err_msg=f"{cformat} file grid seds not equal") np.testing.assert_allclose( dgrid.cov_diag, cov_diag, err_msg=f"{cformat} file grid cov_diag not equal", ) np.testing.assert_allclose( dgrid.cov_offdiag, cov_offdiag, err_msg=f"{cformat} file grid cov_offdiag not equal", ) assert isinstance( dgrid.nbytes, (int, np.integer)), f"{cformat} file grid nbytes property not integer" dTable = dgrid.grid if (cback == "disk") and (cformat == ".hdf"): dTable = read_table_hdf5(dgrid.grid) compare_tables(dTable, gtable, otag=f"{cformat} file") assert dTable.keys() == list( cols.keys()), f"{cformat} file colnames of grid not equal" assert dgrid.keys() == tgrid.keys( ), f"{cformat} file colnames of grid not equal" # final copy - needed for disk backend to get the now defined variables print(dgrid) dgrid_fin = dgrid.copy() print(dgrid_fin)
Purpose: Plot convergence of lnprob """ from astropy.io.misc import hdf5 import matplotlib.pyplot as plt from pylab import savefig import os import numpy keyname = 'lnprob' posteriorloc = 'posteriorpdf.hdf5' print "Reading burnin results from " + posteriorloc pdf = hdf5.read_table_hdf5(posteriorloc) lnprob = pdf[keyname] lnprob = numpy.array(lnprob) lnprob = lnprob.max() - lnprob lnprob = numpy.abs(lnprob) plt.clf() plt.plot(lnprob, ',', alpha=0.5) plt.xlabel('iteration') plt.ylabel('max(lnprob) - lnprob') tmpcwd = os.getcwd() startindx = tmpcwd.find('ModelFits') + 10 endindx = tmpcwd.find('uvfit') + 7 objname = tmpcwd[startindx:endindx] plt.title(objname)
def write_hdf5(self, path, append=False, overwrite=False, object_id_itemsize=0, band_itemsize=0): """Write the dataset to an HDF5 file Parameters ---------- path : str Output path to write to append : bool, optional Whether to append if there is an existing file, default False overwrite : bool, optional Whether to overwrite if there is an existing file, default False object_id_itemsize : int, optional Width to use for the object_id string column. Inferred from the longest string if not specified. band_itemsize : int, optional Width to use for the band string column. Inferred from the longest string if not specified. """ from astropy.io.misc.hdf5 import write_table_hdf5, read_table_hdf5 import tables meta = self.meta # Figure out what we are doing. if os.path.exists(path): if not append and not overwrite: raise OSError(f"File exists: {path}") elif append: # Append to an existing file. We merge the metadata and overwrite what # was previously there since there can often be differences in the # columns/formats. The observations are in a consistent format, so we # can just append them. old_meta = read_table_hdf5(path, '/metadata') current_meta = self.meta # Check that there is no overlap. verify_unique(old_meta['object_id'], current_meta['object_id']) # Stack the metadata. We rewrite it and overwrite whatever was there # before. meta = astropy.table.vstack([old_meta, self.meta]) # Sort the metadata by the object_id. meta = meta[np.argsort(meta['object_id'])] overwrite = True elif overwrite: # If both append and overwrite are set, we append. os.remove(path) else: # No file there, so appending is the same as writing to a new file. append = False # Write out the LC data with tables.open_file(path, 'a') as f: # Figure out the dtype of our data. We need to use fixed length ASCII # strings in HDF5. Find the longest strings in each column to not waste # unnecessary space. for lc in self.light_curves: object_id_itemsize = max(object_id_itemsize, len(lc.meta['object_id'])) band_itemsize = max(band_itemsize, get_str_dtype_length(lc['band'].dtype)) if append: # Make sure that the column sizes used in the file are at least as long # as what we want to append. obs_node = f.get_node('/observations') for key, itemsize in (('object_id', object_id_itemsize), ('band', band_itemsize)): file_itemsize = obs_node.col(key).itemsize if file_itemsize < itemsize: # TODO: handle resizing the table automatically. raise ValueError( f"File column size too small for key '{key}' " f"(file={file_itemsize}, new={itemsize}). Can't append. " f"Specify a larger value for '{key}_itemsize' when " f"initially creating the file.") dtype = obs_node.dtype else: # TODO: make this format configurable. dtype = [ ('object_id', f'S{object_id_itemsize}'), ('time', 'f8'), ('flux', 'f4'), ('fluxerr', 'f4'), ('band', f'S{band_itemsize}'), ] # Setup an empty record array length = np.sum([len(i) for i in self.light_curves]) data = np.recarray((length, ), dtype=dtype) start = 0 for lc in self.light_curves: end = start + len(lc) data['object_id'][start:end] = lc.meta['object_id'] data['time'][start:end] = lc['time'] data['flux'][start:end] = lc['flux'] data['fluxerr'][start:end] = lc['fluxerr'] data['band'][start:end] = lc['band'] start = end # Write out the observations. if append: f.get_node('/observations').append(data) else: filters = tables.Filters(complevel=5, complib='blosc', fletcher32=True) table = f.create_table('/', 'observations', data, filters=filters) table.cols.object_id.create_index() # Write out the metadata write_table_hdf5(meta, path, '/metadata', overwrite=True, append=True, serialize_meta=True)