def write_angular_resolutions(outfile, e_bins, res, overwrite=False, append=True): """ Parameters ---------- outfile: str e_bins: `numpy.ndarray` res: `np.ndarray` overwrite append """ e_bins_t = Table(data=e_bins[..., np.newaxis], names=['energy_bins']) data = res names = ['angular_res', 'angular_res_err_lo', 'angular_res_err_hi'] res_t = Table(data=data, names=names) write_table_hdf5(e_bins_t, outfile, path='bins', overwrite=overwrite, append=append, serialize_meta=True) write_table_hdf5(res_t, outfile, path='res', append=True)
def write_energy_resolutions(outfile, e_bins, res, bias=None, overwrite=False, append=True): """ Save the computed resolutions in hdf5 format Parameters ---------- outfile: str e_bins: `numpy.ndarray` res: `np.ndarray` bias: `np.ndarray` overwrite append """ e_bins_t = Table(data=e_bins[..., np.newaxis], names=['energy_bins']) data = res names = ['energy_res', 'energy_res_err_lo', 'energy_res_err_hi'] if bias is not None: data = np.append(data, bias[..., np.newaxis], axis=1) names.append('energy_bias') res_t = Table(data=data, names=names) write_table_hdf5(e_bins_t, outfile, path='bins', overwrite=overwrite, append=append, serialize_meta=True) write_table_hdf5(res_t, outfile, path='res', append=True)
def WriteToHDF(datahandler, filename, datatype="all"): """ WriteToHDF: write data to HDF """ code = "" f = h5py.File(filename, 'w') # attributes prepared for HDF if datatype == "all": panel = datahandler.stock mt = { "version": "3.0", "CLASS": "TABLE", "TITLE": "quote", "FIELD_0_NAME": "TIMESTAMP", "FIELD_1_NAME": "Open", "FIELD_2_NAME": "Close", "FIELD_3_NAME": "High", "FIELD_4_NAME": "Low", "FIELD_5_NAME": "Prev", "FIELD_6_NAME": "Vol", "FIELD_7_NAME": "Amt" } # loop to write stock by stock for tp in panel.iteritems(): code = tp[0] ind = tp[1].index.values head = np.matrix((ind.astype('uint64') / 1e6).astype('uint64')).T belly = tp[1].as_matrix() dt = np.hstack((head, belly)) col = np.append(['TIMESTAMP'], quote_col) dt = dt[~np.isnan(dt).any(axis=1).A1] # ----this line consumes much time, needs optimization tb = Table(dt, names=col, meta=mt, dtype=('i8', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4')) # ---- end ------------- grp = f.create_group("/stock/" + code) write_table_hdf5(table=tb, output=grp, path='quote', overwrite=True) grp.attrs['code'] = code grp.attrs['name'] = stock_desp[code]['name'] grp.attrs['gics'] = stock_desp[code]['gics'] grp.attrs['first'] = head[0] grp.attrs['last'] = head[-1] grp.attrs['beg'] = datetime.datetime.utcfromtimestamp( head[0] / 1000).strftime('%Y%m%d.%H:%M:%S') grp.attrs['end'] = datetime.datetime.utcfromtimestamp( head[-1] / 1000).strftime('%Y%m%d.%H:%M:%S') grp.attrs['nrow'] = ind.size f.close()
def dump_plus_copy_node_to_create_new_table( input_filename, hfile_out, astropy_table_to_copy, newparent_pointer, newname_pointer, tmp_name, overwrite=False, ): """ General function to write an astropy table to a temporal file, and immediately after copy it to the output v0.6 hfile. Parameters input_filename : [ste] input hfile name hfile_out : output File pointer astropy_table_to_copy : astropy table to be copied newparent_pointer : newparent copy_node parameter newname_pointer : newname copy_node parameter tmp_name : [str] flag to identify the temportal table and make it unique (necessary when simultaneous reorganizers are run in the same dir) overwrite : overwrite parameter of the copy_node method """ input_filename = input_filename.split("___")[0] if tmp_name == "": flag_name = "UNKNOWN" else: flag_name = tmp_name temp_table_name = f"{input_filename}_tmp_table_reoganizer_{flag_name}.h5" write_table_hdf5(astropy_table_to_copy, temp_table_name, path="/root") temp_table = tables.open_file(temp_table_name, "r") hfile_out.copy_node( temp_table.root.root, newparent=newparent_pointer, newname=newname_pointer, overwrite=overwrite, ) temp_table.close() os.remove(temp_table_name)
def WriteToHDF(datahandler, filename, datatype="all"): """ WriteToHDF: write data to HDF """ code=""; f = h5py.File(filename, 'w') # attributes prepared for HDF if datatype=="all": panel = datahandler.stock mt = {"version":"3.0", "CLASS":"TABLE", "TITLE":"quote", "FIELD_0_NAME":"TIMESTAMP", "FIELD_1_NAME":"Open","FIELD_2_NAME":"Close", "FIELD_3_NAME":"High", "FIELD_4_NAME":"Low","FIELD_5_NAME":"Prev", "FIELD_6_NAME":"Vol", "FIELD_7_NAME":"Amt"} # loop to write stock by stock for tp in panel.iteritems(): code=tp[0]; ind = tp[1].index.values head=np.matrix((ind.astype('uint64') / 1e6).astype('uint64')).T belly=tp[1].as_matrix(); dt=np.hstack((head,belly)) col = np.append(['TIMESTAMP'], quote_col) dt = dt[~np.isnan(dt).any(axis=1).A1] # ----this line consumes much time, needs optimization tb = Table(dt, names=col, meta=mt, dtype=('i8','f4','f4','f4','f4','f4','f4','f4')) # ---- end ------------- grp = f.create_group("/stock/"+code) write_table_hdf5(table=tb, output=grp, path='quote', overwrite=True) grp.attrs['code'] = code grp.attrs['name'] = stock_desp[code]['name'] grp.attrs['gics'] = stock_desp[code]['gics'] grp.attrs['first']= head[0] grp.attrs['last'] = head[-1] grp.attrs['beg']= datetime.datetime.utcfromtimestamp(head[0]/1000).strftime('%Y%m%d.%H:%M:%S') grp.attrs['end']= datetime.datetime.utcfromtimestamp(head[-1]/1000).strftime('%Y%m%d.%H:%M:%S') grp.attrs['nrow']= ind.size f.close()
# Single processor with Nthreads cores sampler = emcee.EnsembleSampler(nwalkers, nparams, lnprob, \ args=[p_u, p_l, fixindx, real, imag, wgt, uuu, vvv, pcd, \ lnlikemethod, x, y, modelheader, celldata, model_types, nregions, \ nlens_regions, nsource_regions], threads=Nthreads) else: # Multiple processors using MPI sampler = emcee.EnsembleSampler(nwalkers, nparams, lnprob, pool=pool, \ args=[p_u, p_l, fixindx, real, imag, wgt, uuu, vvv, pcd, \ lnlikemethod, x, y, modelheader, celldata, model_types, nregions, \ nlens_regions, nsource_regions]) # Sample, outputting to a file os.system('date') for pos, prob, state, amp in sampler.sample(pzero, iterations=10000): print numpy.mean(sampler.acceptance_fraction) print os.system('date') #ff.write(str(prob)) yesamp = amp > 0 namp = len(amp[yesamp]) superpos = numpy.zeros(1 + nparams + namp) for wi in range(nwalkers): superpos[0] = prob[wi] superpos[1:nparams + 1] = pos[wi] superpos[nparams + 1:nparams + namp + 1] = amp[wi] posteriordat.add_row(superpos) hdf5.write_table_hdf5(posteriordat, 'posteriorpdf.hdf5', path = '/posteriorpdf', overwrite=True, compression=True)
s_table['RP_ERR_NORM'] = rpArrErrNorm s_table['J_NORM'] = JArrNorm s_table['J_ERR_NORM'] = JArrErrNorm s_table['H_NORM'] = HArrNorm s_table['H_ERR_NORM'] = HArrErrNorm s_table['K_NORM'] = KArrNorm s_table['K_ERR_NORM'] = KArrErrNorm # The targets s_table['age'] = ageArr s_table['ageErr'] = ageArrErr s_table['logAge'] = logAgeArr s_table['logAgeErr'] = logAgeArrErr s_table['distKpc'] = distArr s_table['distErrKpc'] = distArrErr s_table['logDistKpc'] = logDistArr s_table['logDistKpcErr'] = logDistArrErr NameInput = input("Input name of the dataset > ") s_table_pd = s_table.to_pandas() s_table_pd.to_csv('../HBNN_train_data/AllTrainedNormAugShuffled_%s.csv' % str(NameInput)) s_table.write('../HBNN_train_data//AllTrainedNormAugShuffled_%s.fits' % str(NameInput), overwrite=True) write_table_hdf5(s_table, '../HBNN_train_data//AllTrainedNormAugShuffled_%s.hdf5' % str(NameInput), path='updated_data', overwrite=True)
def write_hdf5(self, path, append=False, overwrite=False, object_id_itemsize=0, band_itemsize=0): """Write the dataset to an HDF5 file Parameters ---------- path : str Output path to write to append : bool, optional Whether to append if there is an existing file, default False overwrite : bool, optional Whether to overwrite if there is an existing file, default False object_id_itemsize : int, optional Width to use for the object_id string column. Inferred from the longest string if not specified. band_itemsize : int, optional Width to use for the band string column. Inferred from the longest string if not specified. """ from astropy.io.misc.hdf5 import write_table_hdf5, read_table_hdf5 import tables meta = self.meta # Figure out what we are doing. if os.path.exists(path): if not append and not overwrite: raise OSError(f"File exists: {path}") elif append: # Append to an existing file. We merge the metadata and overwrite what # was previously there since there can often be differences in the # columns/formats. The observations are in a consistent format, so we # can just append them. old_meta = read_table_hdf5(path, '/metadata') current_meta = self.meta # Check that there is no overlap. verify_unique(old_meta['object_id'], current_meta['object_id']) # Stack the metadata. We rewrite it and overwrite whatever was there # before. meta = astropy.table.vstack([old_meta, self.meta]) # Sort the metadata by the object_id. meta = meta[np.argsort(meta['object_id'])] overwrite = True elif overwrite: # If both append and overwrite are set, we append. os.remove(path) else: # No file there, so appending is the same as writing to a new file. append = False # Write out the LC data with tables.open_file(path, 'a') as f: # Figure out the dtype of our data. We need to use fixed length ASCII # strings in HDF5. Find the longest strings in each column to not waste # unnecessary space. for lc in self.light_curves: object_id_itemsize = max(object_id_itemsize, len(lc.meta['object_id'])) band_itemsize = max(band_itemsize, get_str_dtype_length(lc['band'].dtype)) if append: # Make sure that the column sizes used in the file are at least as long # as what we want to append. obs_node = f.get_node('/observations') for key, itemsize in (('object_id', object_id_itemsize), ('band', band_itemsize)): file_itemsize = obs_node.col(key).itemsize if file_itemsize < itemsize: # TODO: handle resizing the table automatically. raise ValueError( f"File column size too small for key '{key}' " f"(file={file_itemsize}, new={itemsize}). Can't append. " f"Specify a larger value for '{key}_itemsize' when " f"initially creating the file.") dtype = obs_node.dtype else: # TODO: make this format configurable. dtype = [ ('object_id', f'S{object_id_itemsize}'), ('time', 'f8'), ('flux', 'f4'), ('fluxerr', 'f4'), ('band', f'S{band_itemsize}'), ] # Setup an empty record array length = np.sum([len(i) for i in self.light_curves]) data = np.recarray((length, ), dtype=dtype) start = 0 for lc in self.light_curves: end = start + len(lc) data['object_id'][start:end] = lc.meta['object_id'] data['time'][start:end] = lc['time'] data['flux'][start:end] = lc['flux'] data['fluxerr'][start:end] = lc['fluxerr'] data['band'][start:end] = lc['band'] start = end # Write out the observations. if append: f.get_node('/observations').append(data) else: filters = tables.Filters(complevel=5, complib='blosc', fletcher32=True) table = f.create_table('/', 'observations', data, filters=filters) table.cols.object_id.create_index() # Write out the metadata write_table_hdf5(meta, path, '/metadata', overwrite=True, append=True, serialize_meta=True)