def create_output_containers(self): # open or create output file fout = tb.openFile(self.fname_out, 'w') filters = tb.Filters(complib='zlib', complevel=9) atom = tb.Atom.from_dtype(self.any_data.dtype) N, ny, nx = self.shape chunkshape = (1, ny, nx) # chunk = slab to be saved title = '' self.fout = fout if self.node_name: g = fout.createGroup('/', self.node_name) else: g = '/' #self.table = fout.createTable(g, 'table', TimeSeriesGrid, title, filters) self.time1 = fout.createCArray(g, 'time1', atom, (N, ), title, filters) self.time2 = fout.createCArray(g, 'time2', atom, (N, ), title, filters) self.lon = fout.createCArray(g, 'lon', atom, (nx, ), title, filters) self.lat = fout.createCArray(g, 'lat', atom, (ny, ), title, filters) self.x_edges = fout.createCArray(g, 'x_edges', atom, (nx + 1, ), title, filters) self.y_edges = fout.createCArray(g, 'y_edges', atom, (ny + 1, ), title, filters) self.dh_mean = fout.createCArray(g, 'dh_mean', atom, (N, ny, nx), title, filters, chunkshape=chunkshape) self.dh_error = fout.createCArray(g, 'dh_error', atom, (N, ny, nx), title, filters, chunkshape=chunkshape) self.dh_error2 = fout.createCArray(g, 'dh_error2', atom, (N, ny, nx), title, filters, chunkshape=chunkshape) self.dg_mean = fout.createCArray(g, 'dg_mean', atom, (N, ny, nx), title, filters, chunkshape=chunkshape) self.dg_error = fout.createCArray(g, 'dg_error', atom, (N, ny, nx), title, filters, chunkshape=chunkshape) self.dg_error2 = fout.createCArray(g, 'dg_error2', atom, (N, ny, nx), title, filters, chunkshape=chunkshape) self.n_ad = fout.createCArray(g, 'n_ad', atom, (N, ny, nx), title, filters, chunkshape=chunkshape) self.n_da = fout.createCArray(g, 'n_da', atom, (N, ny, nx), title, filters, chunkshape=chunkshape)
from __future__ import print_function import sys from time import time import numpy as np import tables as tb from numexpr.necompiler import ( getContext, getExprNames, getType, NumExpr) shape = (1000, 160000) #shape = (10,1600) filters = tb.Filters(complevel=1, complib="blosc", shuffle=0) ofilters = tb.Filters(complevel=1, complib="blosc", shuffle=0) #filters = tb.Filters(complevel=1, complib="lzo", shuffle=0) #ofilters = tb.Filters(complevel=1, complib="lzo", shuffle=0) # TODO: Makes it sense to add a 's'tring typecode here? typecode_to_dtype = {'b': 'bool', 'i': 'int32', 'l': 'int64', 'f': 'float32', 'd': 'float64', 'c': 'complex128'} def _compute(result, function, arguments, start=None, stop=None, step=None): """Compute the `function` over the `arguments` and put the outcome in `result`""" arg0 = arguments[0] if hasattr(arg0, 'maindim'): maindim = arg0.maindim (start, stop, step) = arg0._process_range_read(start, stop, step) nrowsinbuf = arg0.nrowsinbuf
""" #PyTables提供基于文件的数据库格式 filename = path + 'tab.h5' h5 = tb.open_file(filename, 'w') #为了举例,我们生成一个200万行的数据表 rows = 2000000 row_des = { 'Date': tb.StringCol(26, pos=1), 'No1': tb.IntCol(pos=2), 'No2': tb.IntCol(pos=3), 'No3': tb.Float64Col(pos=4), 'No4': tb.Float64Col(pos=5) } #创建表格时,我们选择无压缩表格 filters = tb.Filters(complevel=0) # no compression tab = h5.create_table('/', 'ints_floats', row_des, title='Integers and Floats', expectedrows=rows, filters=filters) tab pointer = tab.row #生成样本数据 ran_int = np.random.randint(0, 10000, size=(rows, 2)) ran_flo = np.random.standard_normal((rows, 2)).round(5) #将样本数据集逐行写入表格 for i in range(rows): pointer['Date'] = dt.datetime.now() pointer['No1'] = ran_int[i, 0]
def save(self, filename, pSymmetric=True, pApplyCorrection=None): """ Saves a matrix using hdf5 format :param filename: :return: None """ log.debug('Save in h5 format') # self.restoreMaskedBins() if not filename.endswith(".h5"): filename += ".h5" # if the file name already exists # try to find a new suitable name if os.path.isfile(filename): log.warning("*WARNING* File already exists {}\n " "Overwriting ...\n".format(filename)) unlink(filename) if self.nan_bins is None: self.nan_bins = np.array([]) elif not isinstance(self.nan_bins, np.ndarray): self.nan_bins = np.array(self.nan_bins) # save only the upper triangle of the if pSymmetric: # symmetric matrix matrix = triu(self.matrix, k=0, format='csr') else: matrix = self.matrix matrix.eliminate_zeros() filters = tables.Filters(complevel=5, complib='blosc') with tables.open_file(filename, mode="w", title="HiCExplorer matrix") as h5file: matrix_group = h5file.create_group( "/", "matrix", ) # save the parts of the csr matrix for matrix_part in ('data', 'indices', 'indptr', 'shape'): arr = np.array(getattr(matrix, matrix_part)) atom = tables.Atom.from_dtype(arr.dtype) ds = h5file.create_carray(matrix_group, matrix_part, atom, shape=arr.shape, filters=filters) ds[:] = arr # save the matrix intervals intervals_group = h5file.create_group( "/", "intervals", ) chr_list, start_list, end_list, extra_list = zip( *self.cut_intervals) for interval_part in ('chr_list', 'start_list', 'end_list', 'extra_list'): arr = np.array(eval(interval_part)) atom = tables.Atom.from_dtype(arr.dtype) ds = h5file.create_carray(intervals_group, interval_part, atom, shape=arr.shape, filters=filters) ds[:] = arr # save nan bins if len(self.nan_bins): atom = tables.Atom.from_dtype(self.nan_bins.dtype) ds = h5file.create_carray(h5file.root, 'nan_bins', atom, shape=self.nan_bins.shape, filters=filters) ds[:] = self.nan_bins # save corrections factors if self.correction_factors is not None and len( self.correction_factors): self.correction_factors = np.array(self.correction_factors) mask = np.isnan(self.correction_factors) self.correction_factors[mask] = 0 atom = tables.Atom.from_dtype(self.correction_factors.dtype) ds = h5file.create_carray(h5file.root, 'correction_factors', atom, shape=self.correction_factors.shape, filters=filters) ds[:] = np.array(self.correction_factors) # save distance counts if self.distance_counts is not None and len(self.distance_counts): atom = tables.Atom.from_dtype(self.distance_counts.dtype) ds = h5file.create_carray(h5file.root, 'distance_counts', atom, shape=self.distance_counts.shape, filters=filters) ds[:] = np.array(self.distance_counts)
def recon(fid, fout): global PMT_pos, coeff, event_pe, event_count ''' reconstruction fid: root reference file fout: output file in this step ''' # Create the output file and the group rootfile = ROOT.TFile(fid) #TruthChain = rootfile.Get('SimTriggerInfo') print(fid) ''' class ChargeData(tables.IsDescription): ChannelID = tables.Float64Col(pos=0) Time = tables.Float16Col(pos=1) PE = tables.Float16Col(pos=2) Charge = tables.Float16Col(pos=2) ''' class ReconData(tables.IsDescription): EventID = tables.Int64Col(pos=0) x = tables.Float16Col(pos=1) y = tables.Float16Col(pos=2) z = tables.Float16Col(pos=3) t0 = tables.Float16Col(pos=4) E = tables.Float16Col(pos=5) tau_d = tables.Float16Col(pos=6) success = tables.Int64Col(pos=7) # Create the output file and the group h5file = tables.open_file(fout, mode="w", title="OneTonDetector", filters=tables.Filters(complevel=9)) group = "/" # Create tables ''' ChargeTable = h5file.create_table(group, "Charge", ChargeData, "Charge") Charge = ChargeTable.row ''' ReconTable = h5file.create_table(group, "Recon", ReconData, "Recon") recondata = ReconTable.row ''' # Loop for ROOT files. data = ROOT.TChain("SimpleAnalysis") data.Add(fid) ''' # Loop for event ''' for event in data: print(event) EventID = event.TriggerNo print(EventID) ''' ''' psr = argparse.ArgumentParser() psr.add_argument("-o", dest='opt', help="output") psr.add_argument('ipt', help="input") args = psr.parse_args() f = uproot.open(args.ipt) ''' result_total = np.empty((1, 4)) record = np.zeros((1, 4)) h = h5py.File('../JP_python/version3/calib/coeff_corr.h5', 'r') coeff = h['coeff_corr'][...] f = uproot.open(fid) a = f['SimpleAnalysis'] for tot, chl, PEl, Pkl, nPl in zip(a.array("TotalPE"), a.array("ChannelInfo.ChannelId"), a.array('ChannelInfo.PE'), a.array('ChannelInfo.PeakLoc'), a.array('ChannelInfo.nPeaks')): #print("=== TotalPE: {} ===".format(tot)) #for ch, PE, pk, np in zip(chl, PEl, Pkl, nPl): # print(ch, PE, pk, np) CH = np.zeros(np.size(PMT_pos[:, 1])) PE = np.zeros(np.size(PMT_pos[:, 1])) fired_PMT = np.zeros(0) TIME = np.zeros(0) for ch, pe, pk, npk in zip(chl, PEl, Pkl, nPl): PE[ch] = pe TIME = np.hstack((TIME, pk)) fired_PMT = np.hstack((fired_PMT, ch * np.ones(np.size(pk)))) # print(TIME, fired_PMT) fired_PMT = fired_PMT.astype(int) time_array = TIME ''' for ChannelInfo in event.ChannelInfo: Charge['ChannelID'] = ChargeInfo.ChannelID Charge['Time'] = ChannelInfo.Peak Charge['PE'] = ChannelInfo.PE Charge['Charge'] = ChannelInfo.Charge Charge.append() PE = ChannelInfo.nPeaks Time = ChannelInfo.Peak ChannelID = ChargeInfo.ChannelID ''' result_recon = np.empty((0, 6)) result_drc = np.empty((0, 3)) result_tdrc = np.empty((0, 3)) # initial value x0 = np.zeros((1, 4)) x0[0][0] = PE.sum() / 300 x0[0][1] = np.sum(PE * PMT_pos[:, 0]) / np.sum(PE) x0[0][2] = np.sum(PE * PMT_pos[:, 1]) / np.sum(PE) x0[0][3] = np.sum(PE * PMT_pos[:, 2]) / np.sum(PE) # Constraints event_pe = PE # x0 = np.sum(PE*PMT_pos,axis=0)/np.sum(PE) theta0 = np.array([1, 0.1, 0.1, 0.1]) theta0[0] = x0[0][0] theta0[1] = x0[0][1] theta0[2] = x0[0][2] theta0[3] = x0[0][3] cons = con() result = minimize(ReconSph(), theta0, method='SLSQP', constraints=cons) record[0, :] = np.array(result.x, dtype=float) result_total = np.vstack((result_total, record)) # result print(event_count, result.x, result.success) event_count = event_count + 1 recondata['EventID'] = event_count recondata['x'] = result.x[1] recondata['y'] = result.x[2] recondata['z'] = result.x[3] recondata['E'] = result.x[0] recondata['success'] = result.success recondata.append() # print(np.sum(result_drc*truth_px)/np.sqrt(np.sum(result_drc**2)*np.sum(truth_px**2))) # Flush into the output file # ChargeTable.flush() ReconTable.flush() h5file.close()
def ni96ch_process_spots(fname, out_path=None, chunksize=2**18, num_timestamps=-1, debug=False, close=False, inner_loop=None, comp_filter=None, progrbar_widget=True): """Sort timestamps per-spot and correct overflow in NI-96ch data. This function auto-detects whether the file is saved by LabVIEW MultiCounterProject (so it has a 3-lines header and timestamps in big-endian order) or by LabVIEW FPGA_96ch project (no header, timestamps in little-endian order). Arguments: fname (string or pathlib.Path): name of the input data file. out_path (string or pathlib.Path or None): name of the ouput HDF5 file. If None, use same name as input file changing the extension to hdf5. chunksize (int): input file is read in chunks (i.e. number of 32-bit words) of this size. num_timestamps (int): read at most `num_timestamps`. If negative read the whole file. close (bool): wether to close the output pytables file debug (bool): perform additional consistency checks. inner_loop (function or None): function to use in the inner loop for overflow correction of each chunk of timestamp. comp_filter (tables.Filters): compression filter for the pytables file. progrbar_widget (bool): If true display progress bar as a Jypyter notebook widget. Returns: A tuple of: - h5file (pytables file): the handle for the pytables file - meta (dict): metadata extracted from the file """ fname = Path(fname) if inner_loop is None: inner_loop = _inner_loop2 dt, endianess, meta = detectformat(fname) if num_timestamps < meta['num_timestamps']: num_timestamps = meta['num_timestamps'] nbits = 24 ts_max = 2**nbits spots = np.arange(48) #nch = 2 * spots.size ts_unit = 1 / meta['clock_frequency'] if out_path is None: out_path = fname.with_suffix('.hdf5') out_path = Path(out_path) # Open file and position cursor after header f = open(fname, 'rb') f.seek(meta['offset']) # Output file if comp_filter is None: comp_filter = tables.Filters(complevel=6, complib='zlib') h5file = tables.open_file(str(out_path), mode="w", filters=comp_filter) for spot in spots: h5file.create_earray('/photon_data%d' % spot, 'timestamps', createparents=True, chunkshape=(chunksize, ), obj=np.array([], dtype=np.int64)) h5file.create_earray('/photon_data%d' % spot, 'detectors', chunkshape=(chunksize, ), obj=np.array([], dtype=np.uint8)) # List of empty timestamps arrays in HDF5 file timestamps_m, detectors_m = get_photon_data_arr(h5file, spots) # Separate channels and correct overflow t_start = np.zeros(2 * spots.size, dtype='int64') progressbar = tqdm_notebook if progrbar_widget else tqdm _iter = iter( progressbar(iter_chunksize(num_timestamps, chunksize), total=np.ceil(num_timestamps / chunksize))) timestamps, det = _read_chunk(f, next(_iter), dt, endianess, nbits) prev_ts_chunks, prev_det_chunks = _inner_loop_spots( det, timestamps, t_start, ts_max) ts_idx = chunksize for chunksize in _iter: timestamps, det = _read_chunk(f, chunksize, dt, endianess, nbits) ts_idx += chunksize ts_chunks, det_chunks = _inner_loop_spots(det, timestamps, t_start, ts_max) last_ts_chunks, last_det_chunks = [], [] for i, (ts, det) in enumerate(zip(timestamps_m, detectors_m)): last_two_ts_chunks = [prev_ts_chunks[i], ts_chunks[i]] last_two_det_chunks = [prev_det_chunks[i], det_chunks[i]] _fix_order(i, last_two_ts_chunks, last_two_det_chunks) ts.append(last_two_ts_chunks[0]) det.append(last_two_det_chunks[0]) prev_ts_chunks[i] = last_two_ts_chunks[1] prev_det_chunks[i] = last_two_det_chunks[1] last_ts_chunks.append(last_two_ts_chunks[1]) last_det_chunks.append(last_two_det_chunks[1]) if debug: assert (np.diff(ts_chunks[i]) > 0).all() # Save the last chunk for each spot for i, (ts, det) in enumerate(zip(timestamps_m, detectors_m)): ts.append(last_ts_chunks[i]) det.append(last_det_chunks[i]) # Compute acquisition duration meta['acquisition_duration'] = duration(timestamps_m, ts_unit) h5file.flush() if close: h5file.close() return h5file, meta
def create_hit_table( input_file_name, tdc_calibation_file, plsr_dac_calibation_file, n_sub_files=8 ): # loops over all root files and merges the data into a hdf5 file aligned at the event number print 'Converting data from CERN ROOT TTree to hdf5 table' charge_calibration_values, tdc_calibration, tdc_error, tot_calibration, tot_error = get_charge_calibration( tdc_calibation_file, plsr_dac_calibation_file) # add all files that have the input_file_name praefix and load their data input_file_names = [ input_file_name + '_t%d.root' % index for index in range(n_sub_files) if os.path.isfile(input_file_name + '_t%d.root' % index) ] n_files = len(input_file_names) input_files_root = [ r.TFile(file_name, 'read') for file_name in input_file_names ] pixel_digits = [ input_file_root.Get('EventData').Get('Pixel Digits') for input_file_root in input_files_root ] n_hits = [pixel_digit.GetEntries() for pixel_digit in pixel_digits] # total pixel hits to analyze n_total_hits = sum(n_hits) with tb.open_file(input_file_name + '_interpreted.h5', 'w') as out_file_h5: hit_table = out_file_h5.create_table( out_file_h5.root, name='Hits', description=data_struct.HitInfoTable, title='hit_data', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False)) # tmp data structures to be filles by ROOT data = {} for index, pixel_digit in enumerate(pixel_digits): column_data = {} for branch in pixel_digit.GetListOfBranches( ): # loop over the branches column_data[branch.GetName()] = np.zeros(shape=1, dtype=np.int32) branch.SetAddress(column_data[branch.GetName()].data) data[index] = column_data # result data structur to be filles in the following loop hits = np.zeros((n_total_hits, ), dtype=tb.dtype_from_descr(data_struct.HitInfoTable)) # get file index with lowest event number for pixel_digit in pixel_digits: pixel_digit.GetEntry(0) min_event_number = min( [data[index]['event'][0] for index in range(n_files)]) actual_file_index = np.where( np.array([data[index]['event'][0] for index in range(n_files)]) == min_event_number)[0][0] indices = [0] * n_files table_index = 0 actual_data = data[actual_file_index] actual_event_number = actual_data['event'][0] last_valid_event_number = 0 last_tdc = 0 expected_event_number = actual_event_number indices[actual_file_index] = 1 progress_bar = progressbar.ProgressBar(widgets=[ '', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA() ], maxval=n_total_hits, term_width=80) progress_bar.start() def add_actual_data(actual_data, table_index): if actual_data['column'] >= 0 and actual_data[ 'column'] < 80 and actual_data['row'] >= 0 and actual_data[ 'row'] < 336: tdc_interpolation = interp1d( x=charge_calibration_values, y=tdc_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=0) tdc = tdc_interpolation(actual_data['charge']) tot_interpolation = interp1d( x=charge_calibration_values, y=tot_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=0) tot = tot_interpolation(actual_data['charge']) if math.isnan( tdc ): # do not add hits where tdc is nan, these pixel have a very high threshold or do not work return table_index if tdc == 0 and actual_data[ 'charge'] > 10000: # no calibration for TDC due to high charge, thus mark as TDC overflow event hits[table_index]['event_status'] |= 0b0000010000000000 tdc = 4095 if tot == 0 and actual_data[ 'charge'] > 10000: # no calibration for TOT due to high charge, thus set max tot tot = 13 hits[table_index]['event_status'] |= 0b0000000100000000 hits[table_index]['event_number'] = actual_data['event'][ 0].astype(np.int64) hits[table_index]['column'] = (actual_data['column'] + 1).astype(np.uint8) hits[table_index]['row'] = (actual_data['row'] + 1).astype( np.uint16) hits[table_index]['TDC'] = int(actual_data['charge'] / 300.) hits[table_index]['tot'] = int(tot) table_index += 1 return table_index while True: actual_event_number = actual_data['event'][0] if (actual_event_number == expected_event_number or actual_event_number == expected_event_number - 1): # check if event number increases actual_index, actual_digits, actual_data = indices[ actual_file_index], pixel_digits[actual_file_index], data[ actual_file_index] table_index = add_actual_data(actual_data, table_index) else: # event number does not increase, thus the events are in another file --> switch file or the event number is missing file_event_numbers = [ data[file_index]['event'][0] for file_index in range(n_files) ] # all files actual event number actual_file_index = np.where( file_event_numbers == min(file_event_numbers))[0][0] actual_index, actual_digits, actual_data = indices[ actual_file_index], pixel_digits[actual_file_index], data[ actual_file_index] actual_event_number = actual_data['event'][0] table_index = add_actual_data(actual_data, table_index) progress_bar.update(table_index) expected_event_number = actual_event_number + 1 actual_digits.GetEntry(actual_index) if indices[actual_file_index] < n_hits[ actual_file_index]: # simply stop when the first file is fully iterated indices[actual_file_index] += 1 else: break # Set missing data and store to file hits[:table_index]['LVL1ID'] = hits[:table_index]['event_number'] % 255 hits[:table_index]['BCID'] = hits[:table_index]['LVL1ID'] hits[:table_index]['relative_BCID'] = 6 hit_table.append(hits[:table_index]) progress_bar.finish() for input_file_root in input_files_root: input_file_root.Close()
# -*- coding: utf-8 -*- """ Created on Mon Nov 23 10:17:34 2020 @author: Digvijay """ import tables as tb, pandas as pd, os filters = tb.Filters(complevel=5, complib='zlib') def convert_hdf_to_dict(file_name='es_res.h5', sim_dict={}, to_csv=False, **kwargs): processed_res = {} with tb.open_file(filename=file_name, mode='r') as f: for sim_name, outputs in sim_dict.items(): sim_data = list(getattr(f.root, sim_name)) tmp = pd.DataFrame(data=sim_data, columns=['time'] + outputs) processed_res[sim_name] = tmp if to_csv: if 'res_folder_name' in kwargs.values(): f_name = kwargs['res_folder_name'] else: f_name = 'res' return export_to_csv(processed_res, f_name=f_name) else: return processed_res
def _resave_old(self): filters = tables.Filters(complib='blosc', complevel=5) h5 = tables.open_file(self.fname, 'r+', filters=filters) attrs = h5.root._v_attrs attrs['positions'] = self.positions attrs['factory'] = self.factory
def manual_exe(): WDIR = os.path.abspath("") DATA = dm.DataManagement(WDIR, "\\data\\test_data.xlsx", co2_price=6) # DATA.add_line("SON", "SHE", 150, 26.4) # DATA.add_line("BBR", "KAS", 150, 28) # DATA.add_data_centers() OPT_SETUP = {"opt": 'cbco_nodal', # dispatch, ntc, nodal, cbco_nodal, cbco_zonal "infeas_heat": True, # Allow Infeasibilities in Heat EB "infeas_el": True, # Allow Infeasibilities in EL EB "infeas_lines": True, # Allow Infeasibilities on Lines } add_cbco = {"cbco": [["l057", "l054"], ["l137", "l112"], ["l111", "l112"], ["l113", "l112"], ]} # add_cbco = {} GRID = grm.GridModel(DATA.nodes, DATA.lines) GRID_REP = GRID.gms_grid_rep(OPT_SETUP["opt"], DATA.ntc, add_cbco=None) # GRID.shift_phase_on_line({"l139": -5, "l304": -5}) # JL = julia.JuliaInterface(WDIR, DATA, OPT_SETUP, GRID_REP, model_horizon=range(200,300)) JL.data_to_json() JL.run() GMS = gms.GamsModel(WDIR, DATA, OPT_SETUP, GRID_REP, model_horizon=range(200,300)) GMS.run() OBJ = {"julia": JL.results["Obj"], "GAMS": GMS.gams_symbol_to_df("COST").COST.values[0]} OBJ["abs_delta"] = abs(OBJ["julia"] - OBJ["GAMS"]) OBJ["rel_delta"] = abs(OBJ["julia"] - OBJ["GAMS"])/max(OBJ["julia"], OBJ["GAMS"]) bokeh_plot = bokeh.BokehPlot(WDIR, DATA) bokeh_plot.add_market_result_from_gdx(GMS, "gams") bokeh_plot.add_market_result_from_julia(JL, "julia") bokeh_plot.add_grid_object(GRID) bokeh_plot.start_server() bokeh_plot.stop_server() # G = pd.merge(JL.results["G"], GMS.gams_symbol_to_df("G"), how="outer", on=["p", "t"]).fillna(0) # H = pd.merge(JL.results["H"], GMS.gams_symbol_to_df("H"), on=["p", "t"]).fillna(0) ## dd = pd.merge(JL.results["D_d"], GMS.gams_symbol_to_df("D_d"), on=["d", "t"]).fillna(0) # ph = pd.merge(JL.results["D_ph"], GMS.gams_symbol_to_df("D_ph"), on=["ph", "t"]).fillna(0) ## es = pd.merge(JL.results["D_es"], GMS.gams_symbol_to_df("D_es"), on=["es", "t"]).fillna(0) # hs = pd.merge(JL.results["D_hs"], GMS.gams_symbol_to_df("D_hs"), on=["hs", "t"]).fillna(0) # # INJ = pd.merge(JL.results["INJ"], GMS.gams_symbol_to_df("INJ"), on=["n", "t"]).fillna(0) # F_DC = pd.merge(JL.results["F_DC"], GMS.gams_symbol_to_df("F_DC"), on=["dc", "t"]).fillna(0) # EX = pd.merge(JL.results["EX"], GMS.gams_symbol_to_df("EX"), on=["z", "zz", "t"]).fillna(0) # prices_gms = GMS.nodal_prices() # prices_jl = JL.price() # prices = pd.merge(prices_jl, prices_gms, how="outer", on=["t", "n", "z"]).fillna(0) # prices["diff"] = prices.marginal_x - prices.marginal_y # TIMESLICE = ['t'+ "{0:0>4}".format(x) for x in range(200,300)] # ol = GRID.check_n_1_for_marketresult(GMS.gams_symbol_to_df("INJ"), TIMESLICE, threshold=1000) ol = GRID.check_n_1_for_marketresult(JL.results["INJ"], TIMESLICE, threshold=1000) # # GRID.lineloading_timeseries(GMS.gams_symbol_to_df("INJ"), "l117").plot() # GMS.plot_generation_area(option="fuel") # GRID.plot_fbmc(["DK-East"], ["DK-West"]) def update_net_injection(self, Nodes, Plants, time): # gamsdb = gamsdb # time = 't0001' # Update Net Injection Nodes.net_injection = Nodes.net_injection.astype(float) for n in Nodes.index: dem = gamsdb["d_el"].find_record([n, time]) Nodes.set_value(n, 'net_injection', -dem.value) Nodes.net_injection[n] = -dem.value for p in gamsdb["co"]: gen = gamsdb["G"].find_record(keys=[p.get_keys()[0], time]) Nodes.set_value(Plants.node[p.keys[0]], 'net_injection', \ Nodes.net_injection[Plants.node[p.keys[0]]] + gen.level) for p in gamsdb["es"]: stor = gamsdb["D_es"].find_record(keys=[p.get_keys()[0], time]) Nodes.set_value(Plants.node[p.keys[0]], 'net_injection', \ Nodes.net_injection[Plants.node[p.keys[0]]] - stor.level) for p in gamsdb["hp"]: stor = gamsdb["D_hp"].find_record(keys=[p.get_keys()[0], time]) Nodes.set_value(Plants.node[p.keys[0]], 'net_injection', \ Nodes.net_injection[Plants.node[p.keys[0]]] - stor.level) def update_gsk(self, Nodes, Plants, option, time): # Update GSK based on the marginal price per zone/node # -> marginal Plants provide increased Generation ref_price = {} if option in ['Nodal', 'CBCO']: for n in Nodes.index: price = -gamsdb["EB_Nodal"].find_record(keys=[n, time]).marginal ref_price[n] = -price else: for z in Nodes.zone: price = -gamsdb["EB_Dispatch_NTC"].find_record(keys=[z, time]).marginal ref_price[z] = -price Nodes.loc[Nodes.index, 'gsk'] = 1 for g in Plants.index: if option in ['Nodal', 'CBCO'] \ and Plants.mc[g] <= ref_price[Plants.node[g]]*1.1: Nodes.loc[Nodes.index == Plants.node[g], 'gsk'] += 1 elif option not in ['Nodal', 'CBCO'] \ and Plants.mc[g] <= ref_price[Nodes.zone[Plants.node[g]]]*1: Nodes.loc[Nodes.index == Plants.node[g], 'gsk'] += 1 # Nodes.loc[Nodes.index == 'C3', 'gsk'] += 15 # # print('Nodes Updated with net injections and GSK') # #nodes_tva_xls = pd.ExcelFile(wdir + "\\nodes_tva.xlsx") #lines_tva_xls = pd.ExcelFile(wdir + "\\lines_tva.xlsx") # #Lines = lines_tva_xls.parse('Sheet1', index_col = 0) #Nodes = nodes_tva_xls.parse('Sheet1', index_col = 1, parse_cols=tools.a2i("G")) # #naming_nodes = {'lat_anders': "node_lat", # "lon_anders": "node_lon", # } #Nodes = Nodes.rename(columns = naming_nodes) # #for l in Lines.index: # if Lines.node_i[l] not in Nodes.index and \ # Lines.node_j[l] not in Nodes.index: # print(1) # Lines = Lines.drop(l) ###### TEST FOR PYLIST self = mato.grid n_1 = self.create_all_n_1_ptdf() A, b = self.contingency_Ab("nodal", contingency=n_1) import tables hdf5_path = "my_data.hdf5" hdf5_file = tables.open_file(hdf5_path, mode='w') A_storage = hdf5_file.create_array(hdf5_file.root, 'A', A) b_storage = hdf5_file.create_array(hdf5_file.root, 'b', b) hdf5_file.close() read_hdf5_file = tables.open_file(hdf5_path, mode='r') A1 = read_hdf5_file.root.A[:] b1 = read_hdf5_file.root.b[:] read_hdf5_file.close() t1 = np.equal(A, A1).all() hdf5_path = "my_compressed_data.hdf5" hdf5_file = tables.open_file(hdf5_path, mode='w') filters = tables.Filters(complevel=4, complib='zlib') A_storage = hdf5_file.create_carray(hdf5_file.root, 'A', tables.Atom.from_dtype(A.dtype), shape=A.shape, filters=filters) b_storage = hdf5_file.create_carray(hdf5_file.root, 'b', tables.Atom.from_dtype(b.dtype), shape=b.shape, filters=filters) A_storage[:] = A b_storage[:] = b hdf5_file.close() hdf5_path = "my_compressed_data.hdf5" compressed_hdf5_file = tables.open_file(hdf5_path, mode='r') # Here we slice [:] all the data back into memory, then operate on it uncompressed_hdf5_A = compressed_hdf5_file.root.A[:] uncompressed_hdf5_b = compressed_hdf5_file.root.b[:] compressed_hdf5_file.close() t2 = np.equal(A, uncompressed_hdf5_A).all() hdf5_path = "my_extendable_compressed_data.hdf5" hdf5_file = tables.open_file(hdf5_path, mode='w') filters = tables.Filters(complevel=1, complib='zlib') A_storage = hdf5_file.create_earray(hdf5_file.root, 'A', tables.Atom.from_dtype(A.dtype), shape=(0, A.shape[-1]), filters=filters, expectedrows=len(A)) b_storage = hdf5_file.create_earray(hdf5_file.root, 'b', tables.Atom.from_dtype(b.dtype), shape=(0,), filters=filters, expectedrows=len(b)) contingency = n_1 ram_array = self.update_ram(contingency[0], option="array") A_storage.append(np.vstack([self.ptdf, -self.ptdf])) b_storage.append(np.concatenate([ram_array[:, 0], -ram_array[:, 1]], axis=0)) for idx, line in enumerate(self.lines.index): ptdf = self.create_n_1_ptdf_outage(idx) A_storage.append(np.vstack([ptdf, -ptdf])) b_storage.append(np.concatenate([ram_array[:, 0], -ram_array[:, 1]], axis=0)) hdf5_file.close() hdf5_path = "my_extendable_compressed_data.hdf5" extendable_hdf5_file = tables.open_file(hdf5_path, mode='r') extendable_hdf5_A = extendable_hdf5_file.root.A[:] extendable_hdf5_b = extendable_hdf5_file.root.b[:] extendable_hdf5_file.close() t3 = np.equal(A, extendable_hdf5_A).all() ################################ def price(self): """returns nodal electricity price""" eb_nodal = self.results["EB_nodal"] eb_nodal = pd.merge(eb_nodal, self.nodes.zone.to_frame(), how="left", left_on="n", right_index=True) eb_nodal.EB_nodal[abs(eb_nodal.EB_nodal) < 1E-3] = 0 eb_zonal = self.results["EB_zonal"] eb_zonal.EB_zonal[abs(eb_zonal.EB_zonal) < 1E-3] = 0 price = pd.merge(eb_nodal, eb_zonal, how="left", left_on=["t", "zone"], right_on=["t", "z"]) price["marginal"] = -(price.EB_zonal + price.EB_nodal) return price[["t", "n", "z", "marginal"]] #### def plot_vertecies_of_inequalities(self, domain_x, domain_y, gsk_sink): """Plot Vertecies Representation of FBMC Domain""" self.nodes.net_injection = 0 contingency = self.n_1_ptdf gsk_sink = gsk_sink or {} list_zonal_ptdf = self.create_zonal_ptdf(contingency) A, b = self.create_eq_list_zptdf(list_zonal_ptdf, domain_x, domain_y, gsk_sink) cbco_index = self.reduce_ptdf(A, b) full_indices = np.array([x for x in range(0,len(A))]) # only plot a subset of linear inequalities that are not part of the load flow domain if A too large if len(A) > 1e3: relevant_subset = np.append(cbco_index, np.random.choice(full_indices, int(1e3), replace=False)) else: relevant_subset = full_indices relevant_subset = cbco_index A = np.array(A) b = np.array(b).reshape(len(b), 1) vertecies_full = np.take(A, relevant_subset, axis=0)/np.take(b, relevant_subset, axis=0) vertecies_reduces = np.take(A, cbco_index, axis=0)/np.take(b, cbco_index, axis=0) xy_limits = tools.find_xy_limits([[vertecies_reduces[:,0], vertecies_reduces[:,1]]]) fig = plt.figure() ax = plt.subplot() scale = 1.2 ax.set_xlim(xy_limits["x_min"]*scale, xy_limits["x_max"]*scale) ax.set_ylim(xy_limits["y_min"]*scale, xy_limits["y_max"]*scale) for point in vertecies_full: ax.scatter(point[0], point[1], c='lightgrey') for point in vertecies_reduces: ax.scatter(point[0], point[1], c='r') return fig #####
def main(): fname_in = sys.argv[1] din = GetData(fname_in, 'a') time = ap.num2date(getattr(din, T_NAME)[:]) lon = din.lon[:] lat = din.lat[:] nrows = len(time) nt, ny, nx = getattr(din, H_NAME).shape # i,j,k = t,y,x RR = np.empty((nt, ny, nx), 'f8') * np.nan SS = np.empty((nt, ny, nx), 'f8') * np.nan if TINT: intervals = [ap.year2date(tt) for tt in INTERVALS] if TINT: print 'using time-interval correlation' elif TVAR: print 'using time-variable correlation' else: print 'using constant correlation' print 'processing time series:' isfirst = True # iterate over every grid cell (all times): i,j = y,x #----------------------------------------------------------------- for i in xrange(ny): for j in xrange(nx): print 'time series of grid-cell:', i, j dh = getattr(din, H_NAME)[:nrows, i, j] dg = getattr(din, G_NAME)[:nrows, i, j] if np.alltrue(np.isnan(dh)): continue #--------------------------------------------------------- if TINT: # satellite-dependent R and S dh_cor, RR[:,i,j], SS[:,i,j] = \ ap.backscatter_corr3(dh, dg, time, intervals, diff=DIFF, robust=True) elif TVAR: # time-varying R and S dh_cor, RR[:,i,j], SS[:,i,j] = \ ap.backscatter_corr2(dh, dg, diff=DIFF, robust=True, npts=NPTS) else: # constant R and S dh_cor, RR[:,i,j], SS[:,i,j] = \ ap.backscatter_corr(dh, dg, diff=DIFF, robust=True) #--------------------------------------------------------- # plot figures if PLOT_TS: dh_cor = ap.referenced(dh_cor, to='first') dh = ap.referenced(dh, to='first') dg = ap.referenced(dg, to='first') k, = np.where(~np.isnan(RR[:, i, j])) r = np.mean(RR[k, i, j]) s = np.mean(SS[k, i, j]) fig = plot_rs(time, RR[:, i, j], SS[:, i, j]) fig = plot_ts(time, lon[j], lat[i], dh_cor, dh, dg, r, s, diff=DIFF) if fig is None: continue plt.show() # save one TS per grid cell at a time #--------------------------------------------------------- if not SAVE_TO_FILE: continue if isfirst: # open or create output file isfirst = False atom = tb.Atom.from_type('float64', dflt=np.nan) filters = tb.Filters(complib='zlib', complevel=9) c1 = din.file.createCArray('/', SAVE_AS_NAME, atom, (nt, ny, nx), '', filters) c2 = din.file.createCArray('/', R_NAME, atom, (nt, ny, nx), '', filters) c3 = din.file.createCArray('/', S_NAME, atom, (nt, ny, nx), '', filters) c1[:, i, j] = dh_cor if SAVE_TO_FILE: c2[:] = RR c3[:] = SS if PLOT_MAP: if TVAR: # 3D -> 2D RR = np.mean(RR[~np.isnan(RR)], axis=0) SS = np.mean(SS[~np.isnan(SS)], axis=0) plot_map(lon, lat, np.abs(RR), BBOX, MFILE, mres=1, vmin=0, vmax=1) plt.title('Correlation Coefficient, R') plt.savefig('map_r.png') plot_map(lon, lat, SS, BBOX, MFILE, mres=1, vmin=-0.2, vmax=0.7) plt.title('Correlation Gradient, S') plt.savefig('map_s.png') plt.show() din.file.close() if SAVE_TO_FILE: print 'out file -->', fname_in
class GeneExpressionMatrix(object): FILE_EXTENSION = "gem" FILTERS = tables.Filters(complevel=1, complib='lzo') def __init__(self, name, data=None, path=None, minGenes=2, filters=None): self.name = name if path is None: # default to PWD path = "./" fileName = path + name + "." + GeneExpressionMatrix.FILE_EXTENSION if filters is None: filters = self.FILTERS # if creating this GEM if data is not None: self.file = tables.openFile(fileName, mode="w", title=name, filters=filters) group = self.file.createGroup("/", "gem") # save raw version in case it's needed for algorithm enhancements self.file.createArray(group, "raw", data) self.data = packData(data) createBiclusters = True else: self.file = tables.openFile(fileName, mode="r+", filters=filters) group = self.file.getNode("/", "gem") raw = self.file.getNode(group, "raw") self.data = packData(raw[:]) createBiclusters = False self.maxConditions = self.data.shape[1] self.maxGenes = self.data.shape[0] self.biclusters = Biclustering.Bicluster.Group(self.file, "/", self.maxConditions, self.maxGenes, createBiclusters, minGenes) def splitSubset(self, conditions): """Identifies all biclusters with a given subset of 2 conditions @param conditions subset of conditions in matrix to search for biclusters @return number of valid biclusters found """ if conditions.size != 2: raise ValueError( "conditions subset can only have 2 conditions for split") increasing = numpy.where( self.data[:, conditions[0]] < self.data[:, conditions[1]]) increasingGenes = Biclustering.BitSet.BitSet(self.data.shape[0], increasing) count = 0 # increasing set increasingConditions = Biclustering.Bit.OrderedBitSet( conditions, self.data.shape[1]) if self.biclusters.pool(increasingConditions, increasingGenes): count += 1 # decreasing set decreasingConditions = increasingConditions.reverse() if self.biclusters.pool(decreasingConditions, ~increasingGenes): count += 1 return count def splitBiclusters(self): """Finds all biclusters with 2 conditions""" combinations = Biclustering.Combinatorics.xcombinations( self.maxConditions, 2) progressBar = \ Biclustering.Timing.ProgressBar(combinations.len(), "Splitting") count = 0 for conditions in combinations: progressBar.update() count += self.splitSubset(conditions) progressBar.finish() self.file.flush() return count def indexBiclusters(self, width): """Indexes all biclusters Indexes are used to speed up chain*() methods @param width width of biclusters to index. chain*(width) cannot be called before calling indexBiclusters(width) """ self.biclusters.index(width) def chainBiclusters(self, tailWidth): """Chains biclusters into larger biclusters Chained biclusters are formed by chaining one bicluster of tailWidth with a bicluster of width 2. @param tailWidth number of conditions in first array of biclusters @return number of biclusters found """ title = "(%d %d) => (%d)" % (2, tailWidth, tailWidth + 1) progressBar = \ Biclustering.Timing.ProgressBar(self.maxConditions, title) count = 0 for link in xrange(self.maxConditions): progressBar.update() count += self.biclusters.chain(tailWidth, link) progressBar.finish() self.file.flush() return count def chainBiclustersPreCrest(self, headWidth, doubling=False): """Chains biclusters into larger biclusters Chained biclusters are formed by chaining one bicluster of headWidth with a bicluster of width 2. @param headWidth number of conditions in first array of biclusters @param doubling @return number of biclusters found """ if doubling: tailWidth = headWidth else: tailWidth = 2 title = "(%d %d) => (%d)" % (headWidth, tailWidth, headWidth + tailWidth - 1) progressBar = \ Biclustering.Timing.ProgressBar(self.maxConditions, title) count = 0 for link in xrange(self.maxConditions): progressBar.update() count += self.biclusters.chainPreCrest(headWidth, link, doubling) progressBar.finish() self.file.flush() return count def pruneBiclusters(self, width): """Prunes biclusters of width conditions that are nested @param width number of conditions in biclusters to prune """ indexes = xrange(self.biclusters.depth(width)) title = "(%d not in %d)" % (width, width + 1) progressBar = Biclustering.Timing.ProgressBar(len(indexes), title) for index in indexes: progressBar.update() self.biclusters.isNested(width, index) progressBar.finish() self.file.flush() def biclusterCount(self, includeNested=True): """Returns total number of biclusters @param includeNested True to include nested biclusters. False otherwise. @return total number of biclusters """ count = 0 for i in xrange(2, self.maxConditions + 1): count += self.biclusters.depth(i, includeNested) return count def allBiclusters(self): """Finds all biclusters in the GEM""" totalStartTime = time.time() # seed clusters need 2 conditions so biclusters # can be grown by 1 condition if needed # 0 biclusters is unlikely, but may occur to too high of minGenes if self.splitBiclusters() == 0: logging.error( "No seed biclusters found. " "Perhaps minimum genes (%d) is too high?", self.minGenes) logging.info("Chaining") # search for valid bicluster with most conditions maxConditions = self.maxConditions # only look for holes above minimum valid bicluster conditions and # smaller than the known maxConditions that may still yield genes progressBar = \ Biclustering.Timing.ProgressBar(maxConditions - 2, "Chaining") for i in xrange(2, maxConditions + 1): progressBar.update() self.indexBiclusters(i) if self.chainBiclusters(i) == 0: maxConditions = i break progressBar.finish() logging.info("Chains constructed. Biclusters: %d Max Conditions: %d", self.biclusterCount(), maxConditions) logging.info("Pruning nested Biclusters") progressBar = \ Biclustering.Timing.ProgressBar(maxConditions - 2, "Pruning") for i in xrange(2, maxConditions): progressBar.update() self.pruneBiclusters(i) progressBar.finish() logging.info("Nested Biclusters pruned. Biclusters: %s ", self.biclusterCount(False)) logging.info("Total Time: %s", datetime.timedelta(seconds=time.time() - totalStartTime)) def stats(self): """Prints stats on GEM @param includeNested True to count nested (pruned) bicluster. False to ignore them """ lines = list() for i in xrange(2, self.maxConditions + 1): if i != 2: lines.append("\n") lines.append( "(%d): %d T %d NSUB" % (i, self.biclusters.depth(i), self.biclusters.depth(i, False))) lines.append("\n") lines.append("Total: %d NSUB: %d" % (self.biclusterCount(), self.biclusterCount(False))) return ''.join(lines) def annotate(self, annotation, axis="genes"): """ """ pass
def store_alignment_parameters(alignment_file, alignment_parameters, mode='absolute', select_duts=None): ''' Stores the alignment parameters (rotations, translations) into the alignment file. Absolute (overwriting) and relative mode (add angles, translations) is supported. Paramter: -------- alignment_file : pytables file The pytables file with the alignment alignment_parameters : numpy recarray An array with the alignment values mode : string 'relative' and 'absolute' supported use_duts : iterable In relative mode only change specified DUTs ''' with tb.open_file(alignment_file, mode="r+") as out_file_h5: # Open file with alignment data #FIXME: this does not make sence to be here: # alignment_parameters[:]['translation_z'] = out_file_h5.root.PreAlignment[:]['z'] # Set z from pre-alignment try: alignment_table = out_file_h5.create_table(out_file_h5.root, name='Alignment', title='Table containing the alignment geometry parameters (translations and rotations)', description=np.zeros((1,), dtype=alignment_parameters.dtype).dtype, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False)) alignment_table.append(alignment_parameters) except tb.NodeError: alignment_parameters = merge_alignment_parameters(old_alignment=out_file_h5.root.Alignment[:], new_alignment=alignment_parameters, mode=mode, select_duts=select_duts) logging.info('Overwrite existing alignment!') out_file_h5.root.Alignment._f_remove() # Remove old node, is there a better way? alignment_table = out_file_h5.create_table(out_file_h5.root, name='Alignment', title='Table containing the alignment geometry parameters (translations and rotations)', description=np.zeros((1,), dtype=alignment_parameters.dtype).dtype, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False)) alignment_table.append(alignment_parameters) string = "\n".join(['DUT%d: alpha=%1.4f, beta=%1.4f, gamma=%1.4f Rad, x/y/z=%d/%d/%d um' % (dut_values['DUT'], dut_values['alpha'], dut_values['beta'], dut_values['gamma'], dut_values['translation_x'], dut_values['translation_y'], dut_values['translation_z']) for dut_values in alignment_parameters]) logging.info('Set alignment parameters to:\n%s' % string)
# compute bounding box maskidx = np.argwhere(fullmask) minidx = maskidx.min(0) - 5 maxidx = maskidx.max(0) + 5 mask = fullmask[minidx[0]:maxidx[0],minidx[1]:maxidx[1],minidx[2]:maxidx[2]] # indices to extract at first print "Mask bounding box: ", minidx, maxidx print ########################################################################### # Build data array # # Read all data files, but only voxels in data mask. FILTERS = tb.Filters(complevel=5,complib='zlib') if os.path.exists(datafile): fileh = tb.open_file(datafile, mode='a', title="data", filters=FILTERS) else: fileh = tb.open_file(datafile, mode='w', title="data", filters=FILTERS) if not 'files' in fileh.root: _ = fileh.create_array(fileh.root,'files',labelfiles) if not 'fullmask' in fileh.root: _ = fileh.create_array(fileh.root,'fullmask',fullmask) if not 'mask' in fileh.root: _ = fileh.create_array(fileh.root,'mask',mask) if not 'cropbbox_min' in fileh.root: _ = fileh.create_array(fileh.root,'cropbbox_min', minidx) if not 'cropbbox_max' in fileh.root: _ = fileh.create_array(fileh.root,'cropbbox_max', maxidx)
def open_db(url, myglobals = None, suffix_for_class_name = '', use_global_session = True, object_number_in_cache = None, numpy_storage_engine = 'sqltable', compress = DEFAULT_COMPRESS_LIB, hdf5_filename = None, relationship_lazy = 'select', predefined_classes = None, max_binary_size = MAX_BINARY_SIZE,): """ Hight level function from playing with a database: this function create sqlalchemy engine, inspect database, create classes, map then and create caches. :param url: url in sqlalchemy style :param myglobals: your locals() or globals() dict if you want to put mapped classes directly in your namespace :param suffix_for_class_name: you can add a suffix to class name to avoid conflct with neo for instance (AnalogSignal or _AnalogSignal) :param use_global_session: True by default. True is convinient for easy script mode there is a global session for all object. Do not do this for multiprocessing or GUI. :param object_number_in_cache: default=None. use a basic memory cache with a fixed object length to avoid to reload objects from db each time you need them. If None do not cache anything. :param numpy_storage_engine: 'sqltable' or 'hdf5' all numpy.array ( and pq.Quantity) can be stored directly in sql tables or separated hdf5. 'sqltable': great because your database is consistent but this is slow, SQL is not optimized for for big binaries object. 'hdf5': great because this is faster but you need to provide a separated file than url for storage. :param compress: 'lz4', 'snappy', 'blosc', 'zlib' or None do compress with all BLOB for np.array (save disk space and band width) Note that compression include a memory overhead (beauause np.array buffer + compress buffer) :param hdf5_filename: if numpy_storage_engine is hdf5 you need to provide the filename. :param relationship_lazy: sqlalchemy option for relationship (default 'select') Can be 'select', 'immediate', dynamic' See http://docs.sqlalchemy.org/en/latest/orm/relationships.html?highlight=lazy :param predefined_classes: if None it take the OpenElectrophy.core.oeclasses for creating the schema. You can also provide something else. :param max_binary_size: max size for BLOB column depend of engine (SQLite limited to 2Go, MySQL need some configs, ...) :rtype: :py:class:`DataBaseConnectionInfo` object with url, mapped classes, metadata. See Usage in script mode: >>> url = 'sqlite://mydatabase.sqlite' >>> open_db(url, myglobals = globals() ) >>> Advanced usage for GUI or multiprocessing: >>> url = 'sqlite://mydatabase.sqlite' >>> dbinfo = open_db(url, myglobals =None, use_global_session = False ) >>> session = dbinfo.Session() >>> print dbinfo.mapped_classes """ engine = create_engine(url, echo=False, convert_unicode = True, encoding = 'utf8') #client_encoding='utf8' if predefined_classes is None: from OpenElectrophy.core import oeclasses predefined_classes = oeclasses create_or_update_database_schema( engine, predefined_classes, max_binary_size = max_binary_size) generated_classes = create_classes_from_schema_sniffing( engine, predefined_classes, suffix_for_class_name = suffix_for_class_name, ) # TODO check if hdf5_filename and numpy_storage_engine ara consistent if numpy_storage_engine == 'sqltable': hfile = None elif numpy_storage_engine == 'hdf5': hfile = tables.openFile(hdf5_filename, mode = "a", filters = tables.Filters(complevel=9, complib='blosc',)) metadata = map_generated_classes(engine, generated_classes, relationship_lazy = relationship_lazy, numpy_storage_engine = numpy_storage_engine, compress = compress, hfile = hfile ) if numpy_storage_engine == 'hdf5': for genclass in generated_classes: #~ event.listen(genclass, 'load', EventOnHdf5Load(hfile = hfile) ) event.listen(genclass, 'after_insert', EventOnHdf5AfterInsert(hfile = hfile)) event.listen(genclass, 'after_update', EventOnHdf5AfterUpdate(hfile = hfile)) event.listen(genclass, 'after_delete', EventOnHdf5AfterDelete(hfile = hfile)) if object_number_in_cache: cache = MyBasicCache(maxsize = object_number_in_cache) l = EventLoadListennerForCache(cache = cache) for genclass in generated_classes: event.listen(genclass, 'load', l) else: cache = None if myglobals is not None: d = { } for genclass in generated_classes: d[genclass.__name__] = genclass myglobals.update(d) Session = orm.scoped_session(orm.sessionmaker(bind=metadata.bind , autocommit=False, autoflush=True)) #~ Session = orm.sessionmaker(bind=metadata.bind , autocommit=False, autoflush=True) dbinfo = DataBaseConnectionInfo( url =url,mapped_classes = generated_classes,Session = Session, metadata = metadata, cache = cache, numpy_storage_engine = numpy_storage_engine, compress = compress) if use_global_session: global globalsession globalsession = Session() global globaldbinfo globaldbinfo = dbinfo return dbinfo
def main(): fname_in = sys.argv[1] din = GetData(fname_in, 'a') time = ap.num2date(getattr(din, T_NAME)[:]) lon = din.lon[:] lat = din.lat[:] sat = din.satname[:] nrows = len(time) nt, ny, nx = getattr(din, H_NAME).shape # i,j,k = t,y,x RR = np.empty((nt,ny,nx), 'f8') * np.nan SS = np.empty((nt,ny,nx), 'f8') * np.nan print 'processing time series:' isfirst = True # iterate over every grid cell (all times): i,j = y,x #----------------------------------------------------------------- for i in xrange(ny): for j in xrange(nx): print 'time series of grid-cell:', i, j dh = getattr(din, H_NAME)[:nrows,i,j] dg = getattr(din, G_NAME)[:nrows,i,j] dh_cor = np.zeros_like(dh) if np.alltrue(np.isnan(dh)): continue #--------------------------------------------------------- # pull and correct a chunk of the array at a time for s in np.unique(sat): k = np.where(sat == s) dh_cor[k], R, S = ap.backscatter_corr(dh[k], dg[k], diff=DIFF, robust=True) RR[k,i,j] = R SS[k,i,j] = S #--------------------------------------------------------- if PLOT_TS: dh_cor = ap.referenced(dh_cor, to='first') dh = ap.referenced(dh, to='first') dg = ap.referenced(dg, to='first') fig = plot_rs(time, RR[:,i,j], SS[:,i,j]) for s in np.unique(sat): k = np.where(sat == s) r, s = np.mean(RR[k,i,j]), np.mean(SS[k,i,j]) try: fig = plot_ts(time[k], lon[j], lat[i], dh_cor[k], dh[k], dg[k], r, s, diff=DIFF) except: print 'something wrong with ploting!' print 'dh:', dh print 'dg:', dg if fig is None: continue plt.show() # save one TS per grid cell at a time #--------------------------------------------------------- if not SAVE_TO_FILE: continue if isfirst: # open or create output file isfirst = False atom = tb.Atom.from_type('float64', dflt=np.nan) filters = tb.Filters(complib='zlib', complevel=9) try: c1 = din.file.create_carray('/', SAVE_AS_NAME, atom, (nt,ny,nx), '', filters) except: c1 = din.file.getNode('/', SAVE_AS_NAME) c2 = din.file.create_carray('/', R_NAME, atom, (nt,ny,nx), '', filters) c3 = din.file.create_carray('/', S_NAME, atom, (nt,ny,nx), '', filters) c1[:,i,j] = dh_cor if SAVE_TO_FILE: c2[:] = RR c3[:] = SS if PLOT_MAP: RR = RR[0] # change accordingly SS = SS[0] plot_map(lon, lat, np.abs(RR), BBOX, MASK_FILE, mres=1, vmin=0, vmax=1) plt.title('Correlation Coefficient, R') plt.savefig('map_r.png') plot_map(lon, lat, SS, BBOX, MASK_FILE, mres=1, vmin=-0.2, vmax=0.7) plt.title('Correlation Gradient, S') plt.savefig('map_s.png') plt.show() din.file.close() if SAVE_TO_FILE: print 'out file -->', fname_in
def ni96ch_process(fname, out_path=None, chunksize=2**18, num_timestamps=-1, debug=False, close=False, inner_loop=None, comp_filter=None, progrbar_widget=True): """Sort timestamps per-ch and correct overflow in NI-96ch data. This function separates each single detector channel and corrects overflows. The 96 arrays are saved to a (hopefully temporary) HDF5 file. To create Photon-HDF5 files, channels pairs (i.e. Donor-Acceptor) need to be merged. This function auto-detects whether the file is saved by LabVIEW MultiCounterProject (so it has a 3-lines header and timestamps in big-endian order) or by LabVIEW FPGA_96ch project (no header, timestamps in little-endian order). Arguments: out_path (string or pathlib.Path or None): name of the ouput HDF5 file. If None, use same name as input file appending '_raw_temp.hdf5'. chunksize (int): input file is read in chunks (i.e. number of 32-bit words) of this size. num_timestamps (int): read at most `num_timestamps`. If negative read the whole file. close (bool): wether to close the output pytables file debug (bool): perform additional consistency checks. inner_loop (function or None): function to use in the inner loop for overflow correction of each chunk of timestamp. comp_filter (tables.Filters): compression filter for the pytables file. progrbar_widget (bool): If true display progress bar as a Jypyter notebook widget. Returns: A tuple of: - h5file (pytables file): the handle for the pytables file - timestamps_m (list): list of pytables timetamps arrays - meta (dict): metadata extracted from the file """ fname = Path(fname) if inner_loop is None: inner_loop = _inner_loop2 dt, endianess, meta = detectformat(fname) if num_timestamps < meta['num_timestamps']: num_timestamps = meta['num_timestamps'] nbits = 24 ts_max = 2**nbits nch = meta['nchannels'] ts_unit = 1 / meta['clock_frequency'] if out_path is None: out_path = Path(fname.parent, fname.stem + '_raw_temp.hdf5') out_path = Path(out_path) # Open file and position cursor after header f = open(fname, 'rb') f.seek(meta['offset']) # Output file if comp_filter is None: comp_filter = tables.Filters(complevel=6, complib='blosc') h5file = tables.open_file(str(out_path), mode="w", filters=comp_filter) for ch in range(nch): h5file.create_earray('/', 'timestamps%d' % ch, chunkshape=(chunksize, ), obj=np.array([], dtype=np.int64)) # List of empty timestamps arrays in HDF5 file timestamps_m = [h5file.get_node('/timestamps%d' % ch) for ch in range(nch)] # Separate channels and correct overflow t_start = np.zeros(nch, dtype='int64') progressbar = tqdm_notebook if progrbar_widget else tqdm _iter = progressbar(iter_chunksize(num_timestamps, chunksize), total=np.ceil(num_timestamps / chunksize)) for chunksize in _iter: timestamps, det = _read_chunk(f, chunksize, dt, endianess, nbits) ts_chunks = inner_loop(det, timestamps, t_start, ts_max, nch) for ts, ts_chunk in zip(timestamps_m, ts_chunks): ts.append(ts_chunk) if debug: assert (np.diff(ts_chunk) > 0).all() # Compute acquisition duration meta['acquisition_duration'] = duration(timestamps_m, ts_unit) h5file.flush() if close: h5file.close() return h5file, meta
def _comp_filter(lib='lz4', lvl=3): return tables.Filters(complib=f'blosc:{lib}', complevel=lvl) # export @patch
def create_db_file( mols_source: Union[str, IterableType], filename: str, fp_type: str, fp_params: dict = {}, mol_id_prop: str = "mol_id", gen_ids: bool = False, sort_by_popcnt: bool = True, ) -> None: """Creates FPSim2 FPs db file from .smi, .sdf files or from an iterable. Parameters ---------- mols_source : str .smi/.sdf filename or iterable. filename: float Fingerprint database filename. fp_type : str Fingerprint type used to create the fingerprints. fp_params : dict Parameters used to create the fingerprints. mol_id_prop : str Name of the .sdf property to read the molecule id. gen_ids : bool Autogenerate FP ids. sort_by_popcnt: bool Whether if the FPs should be sorted or not. Returns ------- None """ # if params dict is empty use defaults if not fp_params: fp_params = FP_FUNC_DEFAULTS[fp_type] supplier = get_mol_suplier(mols_source) fp_length = get_fp_length(fp_type, fp_params) # set compression filters = tb.Filters(complib="blosc", complevel=5) # set the output file and fps table with tb.open_file(filename, mode="w") as fp_file: particle = create_schema(fp_length) fps_table = fp_file.create_table(fp_file.root, "fps", particle, "Table storing fps", filters=filters) # set config table; used fp function, parameters and rdkit version param_table = fp_file.create_vlarray(fp_file.root, "config", atom=tb.ObjectAtom()) param_table.append(fp_type) param_table.append(fp_params) param_table.append(rdkit.__version__) fps = [] for mol_id, rdmol in supplier(mols_source, gen_ids, mol_id_prop=mol_id_prop): efp = rdmol_to_efp(rdmol, fp_type, fp_params) popcnt = py_popcount(np.array(efp, dtype=np.uint64)) efp.insert(0, mol_id) efp.append(popcnt) fps.append(tuple(efp)) if len(fps) == BATCH_WRITE_SIZE: fps_table.append(fps) fps = [] # append last batch < 10k if fps: fps_table.append(fps) # create index so table can be sorted fps_table.cols.popcnt.create_index(kind="full") if sort_by_popcnt: sort_db_file(filename)
def load_images(fnames, channel=1, dsname='default', tmppath='/scratch365/akuehlka/mcnns_tmp', segmentation_data=(), transform_vgg=False): print('-- loading raw images ...') sys.stdout.flush() dt = np.float32 iris_data, iris_map = (np.array([]), np.array([])) if segmentation_data: iris_data = segmentation_data[0] iris_map = segmentation_data[1] fname = dsname if transform_vgg: fname += "_vgg" channel = 3 # read one image as a template npimg = read_single_image(fnames[0], dtype=dt, channel=channel) # create a pytables file to contain all images # this way we'll be able to work with datasets that don't fit into the memory # based on https://kastnerkyle.github.io/posts/using-pytables-for-larger-than-ram-data-processing/ hdf5_path = '{}/{}.hdf5'.format(tmppath, fname) hdf5_lock = '{}/{}.lock'.format(tmppath, fname) if not os.path.exists(hdf5_path): print("Cache not found, loading data...") os.system('echo "1" > ' + hdf5_lock) hdf5_file = tables.open_file(hdf5_path, mode='w') filters = tables.Filters(complevel=5, complib='blosc') data_storage = hdf5_file.create_earray( hdf5_file.root, 'imgs', tables.Atom.from_dtype(np.dtype(dt, npimg.shape)), shape=tuple([0] + list(npimg.shape)), filters=filters, expectedrows=len(fnames)) for i, fname in enumerate(fnames): # print('Reading ', fname, i) img = read_single_image(fname, channel=channel) # resize irregular images if img.shape[0] != data_storage.shape[1] or img.shape[ 1] != data_storage.shape[2]: if dsname == 'livdetiiitd': print("Resizing image", fname, "with shape", img.shape, "to", npimg.shape) # resize the image to fit the current size, considering segmentation info img, iris_data, iris_map = resizeProp( img, data_storage.shape[1:4], fname, segmentation_data=segmentation_data) data_storage.append(img[np.newaxis, ...]) hdf5_file.close() os.system('rm ' + hdf5_lock) # check for lock file/wait while os.path.exists(hdf5_lock): print("Waiting for lock release...") time.sleep(5) data_storage = tables.open_file(hdf5_path, mode='r') return data_storage.root.imgs, iris_data, iris_map
from ..paths import parse_r0_filename, run_to_dl1_filename, r0_to_dl1_filename from ..pointing import PointingPosition logger = logging.getLogger(__name__) __all__ = [ 'add_disp_to_parameters_table', 'get_dl1', 'r0_to_dl1', ] cleaning_method = tailcuts_clean filters = tables.Filters( complevel=5, # enable compression, with level 0=disabled, 9=max complib='blosc:zstd', # compression using blosc fletcher32=True, # attach a checksum to each chunk for error correction bitshuffle=False, # for BLOSC, shuffle bits for better compression ) def get_dl1( calibrated_event, subarray, telescope_id, dl1_container=None, custom_config={}, use_main_island=True, ): """ Return a DL1ParametersContainer of extracted features from a calibrated event. The DL1ParametersContainer can be passed to be filled if created outside the function
def recon(fid, fout, *args): PMT_pos, event_count = args # global event_count,shell,PE,time_array,PMT_pos, fired_PMT ''' reconstruction fid: root reference file convert to .h5 fout: output file ''' # Create the output file and the group print(fid) # filename class ReconData(tables.IsDescription): EventID = tables.Int64Col(pos=0) # EventNo x = tables.Float16Col(pos=1) # x position y = tables.Float16Col(pos=2) # y position z = tables.Float16Col(pos=3) # z position t0 = tables.Float16Col(pos=4) # time offset E = tables.Float16Col(pos=5) # energy tau_d = tables.Float16Col(pos=6) # decay time constant success = tables.Int64Col(pos=7) # recon failure x_sph = tables.Float16Col(pos=8) # x position y_sph = tables.Float16Col(pos=9) # y position z_sph = tables.Float16Col(pos=10) # z position E_sph = tables.Float16Col(pos=11) # energy success_sph = tables.Int64Col(pos=12) # recon failure x_truth = tables.Float16Col(pos=13) # x position y_truth = tables.Float16Col(pos=14) # y position z_truth = tables.Float16Col(pos=15) # z position E_truth = tables.Float16Col(pos=16) # z position # Create the output file and the group h5file = tables.open_file(fout, mode="w", title="OneTonDetector", filters=tables.Filters(complevel=9)) group = "/" # Create tables ReconTable = h5file.create_table(group, "Recon", ReconData, "Recon") recondata = ReconTable.row # Loop for event h = tables.open_file(fid, 'r') rawdata = h.root.GroundTruth EventID = rawdata[:]['EventID'] ChannelID = rawdata[:]['ChannelID'] Time = rawdata[:]['PETime'] h.close() for i in np.arange(np.max(EventID)): event_count = event_count + 1 index = (EventID == event_count) pe_array = np.zeros(np.size( PMT_pos[:, 1])) # Photons on each PMT (PMT size * 1 vector) fired_PMT = ChannelID[index] for j in np.arange(np.size(fired_PMT)): pe_array[fired_PMT[j]] = pe_array[fired_PMT[j]] + 1 fired_PMT = fired_PMT.astype(int) time_array = Time[index] # filter index_1 = (time_array > np.mean(time_array) - 100) & ( time_array < np.mean(time_array) + 100) time_array = time_array[index_1] fired_PMT = fired_PMT[index_1] PMT_No = np.unique(fired_PMT) time_final = np.zeros(np.size(PMT_No)) fired_final = np.zeros(np.size(PMT_No)) for j, k in enumerate(PMT_No): time_final[j] = np.min(time_array[fired_PMT == k]) fired_final[j] = k time_array = time_final fired_PMT = fired_final fired_PMT = fired_PMT.astype(int) x0 = np.zeros((1, 4)) x0[0][0] = np.mean(time_array) - 26 x0[0][1] = np.sum(pe_array * PMT_pos[:, 0]) / np.sum(pe_array) x0[0][2] = np.sum(pe_array * PMT_pos[:, 1]) / np.sum(pe_array) x0[0][3] = np.sum(pe_array * PMT_pos[:, 2]) / np.sum(pe_array) # Constraints E_min = 0.01 E_max = 10 tau_min = 0.01 tau_max = 100 t0_min = -300 t0_max = 300 con_args = E_min, E_max, tau_min, tau_max, t0_min, t0_max cons_sph = con_sph(con_args) record = np.zeros((1, 4)) result = minimize(Likelihood_Time, x0, method='SLSQP', constraints=cons_sph, args=(PMT_pos, fired_PMT, time_array)) recondata['x_sph'] = result.x[1] recondata['y_sph'] = result.x[2] recondata['z_sph'] = result.x[3] recondata['success_sph'] = result.success vertex = result.x[1:4] print(result.x, np.sqrt(np.sum(vertex**2))) event_count = event_count + 1 recondata.append() # Flush into the output file ReconTable.flush() h5file.close()
def load_fruitspeech(fruit_list=['apple', 'pineapple']): # Check if dataset is in the data directory. data_path = os.path.join(os.path.split(__file__)[0], "data") if not os.path.exists(data_path): os.makedirs(data_path) dataset = 'audio.tar.gz' data_file = os.path.join(data_path, dataset) if os.path.isfile(data_file): dataset = data_file if not os.path.isfile(data_file): try: import urllib urllib.urlretrieve('http://google.com') url = 'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz' except AttributeError: import urllib.request as urllib url = 'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz' print('Downloading data from %s' % url) urllib.urlretrieve(url, data_file) print('... loading data') if not os.path.exists(os.path.join(data_path, "audio")): tar = tarfile.open(data_file) os.chdir(data_path) tar.extractall() tar.close() h5_file_path = os.path.join(data_path, "saved_all_fruit.h5") if not os.path.exists(h5_file_path): audio_matches = [] data_path = os.path.join(data_path, "audio") for root, dirnames, filenames in os.walk(data_path): for fruit in fruit_list: for filename in fnmatch.filter(filenames, fruit + '*.wav'): audio_matches.append(os.path.join(root, filename)) random.seed(1999) random.shuffle(audio_matches) # http://mail.scipy.org/pipermail/numpy-discussion/2011-March/055219.html h5_file = tables.openFile(h5_file_path, mode='w') data_x = h5_file.createVLArray(h5_file.root, 'data_x', tables.Float32Atom(shape=()), filters=tables.Filters(1)) data_y = h5_file.createVLArray(h5_file.root, 'data_y', tables.Int32Atom(shape=()), filters=tables.Filters(1)) for wav_path in audio_matches: # Convert chars to int classes word = wav_path.split(os.sep)[-1][:-6] chars = [ord(c) - 97 for c in word] data_y.append(np.array(chars, dtype='int32')) fs, d = wavfile.read(wav_path) # Preprocessing from A. Graves "Towards End-to-End Speech # Recognition" data_x.append(d.astype('float32')) h5_file.close() h5_file = tables.openFile(h5_file_path, mode='r') data_x = h5_file.root.data_x data_y = h5_file.root.data_y # FIXME: HACKING train_x = data_x train_y = data_y valid_x = data_x valid_y = data_y test_x = data_x test_y = data_y rval = [(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] return rval
import numpy as np import tables # Types that should be saved as pytables attribute ATTR_TYPES = (int, float, bool, str, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float16, np.float32, np.float64, np.bool_, np.complex64, np.complex128) try: COMPRESSION = tables.Filters(complevel=9, complib='blosc', shuffle=True) except Exception: #type? warnings.warn("Missing BLOSC; no compression will used.") COMPRESSION = tables.Filters() def _save_level(handler, group, level, name=None): if isinstance(level, dict): # First create a new group new_group = handler.createGroup(group, name, "dict:{}".format(len(level))) for k, v in level.items(): _save_level(handler, new_group, v, name=k) elif isinstance(level, list): # Lists can contain other dictionaries and numpy arrays, so we don't want to # serialize them. Instead, we will store each entry as i0, i1, etc. new_group = handler.createGroup(group, name, "list:{}".format(len(level))) for i, entry in enumerate(level): level_name = 'i{}'.format(i) _save_level(handler, new_group, entry, name=level_name)
def writePangenome(pangenome, filename, force, disable_bar=False): """ Writes or updates a pangenome file pangenome is the corresponding pangenome object, filename the h5 file and status what has been modified. """ if pangenome.status["genomesAnnotated"] == "Computed": compressionFilter = tables.Filters(complevel=1, shuffle=True, bitshuffle=True, complib='blosc:zstd') h5f = tables.open_file(filename, "w", filters=compressionFilter) logging.getLogger().info("Writing genome annotations...") writeAnnotations(pangenome, h5f, disable_bar=disable_bar) pangenome.status["genomesAnnotated"] = "Loaded" h5f.close() elif pangenome.status["genomesAnnotated"] in ["Loaded", "inFile"]: pass else: # if the pangenome is not Computed or not Loaded, it's probably not really in a good state # (or something new was coded). raise NotImplementedError("Something REALLY unexpected and unplanned for happened here. " "Please post an issue on github with what you did to reach this error.") # from there, appending to existing file. h5f = tables.open_file(filename, "a") if pangenome.status["geneSequences"] == "Computed": logging.getLogger().info("writing the protein coding gene dna sequences") writeGeneSequences(pangenome, h5f, disable_bar=disable_bar) pangenome.status["geneSequences"] = "Loaded" if pangenome.status["genesClustered"] == "Computed": logging.getLogger().info("Writing gene families and gene associations...") writeGeneFamilies(pangenome, h5f, force, disable_bar=disable_bar) logging.getLogger().info("Writing gene families information...") writeGeneFamInfo(pangenome, h5f, force, disable_bar=disable_bar) if pangenome.status["genomesAnnotated"] in ["Loaded", "inFile"] and \ pangenome.status["defragmented"] == "Computed": # if the annotations have not been computed in this run, # and there has been a clustering with defragmentation, then the annotations can be updated updateGeneFragments(pangenome, h5f, disable_bar=disable_bar) pangenome.status["genesClustered"] = "Loaded" if pangenome.status["neighborsGraph"] == "Computed": logging.getLogger().info("Writing the edges...") writeGraph(pangenome, h5f, force, disable_bar=disable_bar) pangenome.status["neighborsGraph"] = "Loaded" if pangenome.status["partitionned"] == "Computed" and \ pangenome.status["genesClustered"] in ["Loaded", "inFile"]: # otherwise, it's been written already. updateGeneFamPartition(pangenome, h5f, disable_bar=disable_bar) pangenome.status["partitionned"] = "Loaded" if pangenome.status['predictedRGP'] == "Computed": logging.getLogger().info("Writing Regions of Genomic Plasticity...") writeRGP(pangenome, h5f, force, disable_bar=disable_bar) pangenome.status['predictedRGP'] = "Loaded" if pangenome.status["spots"] == "Computed": logging.getLogger().info("Writing Spots of Insertion...") writeSpots(pangenome, h5f, force, disable_bar=disable_bar) pangenome.status['spots'] = "Loaded" if pangenome.status["modules"] == "Computed": logging.getLogger().info("Writing Modules...") writeModules(pangenome, h5f, force, disable_bar=disable_bar) pangenome.status["modules"] = "Loaded" writeStatus(pangenome, h5f) writeInfo(pangenome, h5f) h5f.close() logging.getLogger().info(f"Done writing the pangenome. It is in file : {filename}")
def __init__(self, options, model_dir="model/", rand=N.random.RandomState(123)): """ :param options: rgbd: 0 for RGB, 1 for RGB + depth shrink: amount to shrink channels n_orient: number of orientations per gradient scale grd_smooth_rad: radius for image gradient smoothing grd_norm_rad: radius for gradient normalization reg_smooth_rad: radius for reg channel smoothing ss_smooth_rad: radius for sim channel smoothing p_size: size of image patches g_size: size of ground truth patches n_cell: number of self similarity cells n_pos: number of positive patches per tree n_neg: number of negative patches per tree fraction: fraction of features to use to train each tree n_tree: number of trees in forest to train n_class: number of classes (clusters) for binary splits min_count: minimum number of data points to allow split min_child: minimum number of data points allowed at child nodes max_depth: maximum depth of tree split: options include 'gini', 'entropy' and 'twoing' discretize: optional function mapping structured to class labels stride: stride at which to compute edges sharpen: sharpening amount (can only decrease after training) n_tree_eval: number of trees to evaluate per location nms: if true apply non-maximum suppression to edges :param model_dir: directory for model A trained model will contain thrs: threshold corresponding to each feature index fids: feature indices for each node cids: indices of children for each node edge_bnds: begin / end of edge points for each node edge_pts: edge points for each node n_seg: number of segmentations for each node segs: segmentation map for each node :param rand: random number generator """ BaseStructuredForests.__init__(self, options) assert self.options["g_size"] % 2 == 0 assert self.options["stride"] % self.options["shrink"] == 0 self.model_dir = model_dir self.data_dir = os.path.join(self.model_dir, "data") self.tree_dir = os.path.join(self.model_dir, "trees") self.forest_dir = os.path.join(self.model_dir, "forests") self.data_prefix = "data_" self.tree_prefix = "tree_" self.forest_name = "forest.h5" self.comp_filt = tables.Filters(complib="zlib", complevel=1) self.trained = False try: self.load_model() except: self.model = {} print >> sys.stderr, "No model file found. Training is required." self.rand = rand
from __future__ import print_function import tables import numpy as np # Create a VLArray: fileh = tables.open_file('vlarray1.h5', mode='w') vlarray = fileh.create_vlarray(fileh.root, 'vlarray1', tables.Int32Atom(shape=()), "ragged array of ints", filters=tables.Filters(1)) # Append some (variable length) rows: vlarray.append(np.array([5, 6])) vlarray.append(np.array([5, 6, 7])) vlarray.append([5, 6, 9, 8]) # Now, read it through an iterator: print('-->', vlarray.title) for x in vlarray: print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, x)) # Now, do the same with native Python strings. vlarray2 = fileh.create_vlarray(fileh.root, 'vlarray2', tables.StringAtom(itemsize=2), "ragged array of strings", filters=tables.Filters(1)) vlarray2.flavor = 'python' # Append some (variable length) rows: print('-->', vlarray2.title) vlarray2.append(['5', '66'])
def getIntensityProfile(masked_image_file, skeletons_file, intensities_file, width_resampling=15, length_resampling=131, min_num_skel=100, smooth_win=11, pol_degree=3, width_percentage=0.5, save_int_maps=False): assert smooth_win > pol_degree assert min_num_skel > 0 assert 0 < width_percentage < 1 #we want to use symetrical distance centered in the skeleton if length_resampling % 2 == 0: length_resampling += 1 if width_resampling % 2 == 0: width_resampling += 1 #get the limits to be averaged from the intensity map if save_int_maps: width_win_ind = getWidthWinLimits(width_resampling, width_percentage) else: width_win_ind = (0, width_resampling) #filters for the tables structures table_filters = tables.Filters(complevel=5, complib='zlib', shuffle=True, fletcher32=True) #Get a reduced version of the trajectories_data table with only the valid skeletons. #The rows of this new table are going to be saved into skeletons_file trajectories_data_valid = setIntMapIndexes(skeletons_file, min_num_skel) #let's save this new table into the intensities file with tables.File(intensities_file, 'w') as fid: fid.create_table('/', 'trajectories_data_valid', \ obj = trajectories_data_valid.to_records(index=False), filters=table_filters) tot_rows = len(trajectories_data_valid) if tot_rows == 0: with tables.File(intensities_file, "r+") as int_file_id: #nothing to do here let's save empty data and go out worm_int_avg_tab = int_file_id.create_array( "/", "straighten_worm_intensity_median", obj=np.zeros(0)) worm_int_avg_tab._v_attrs['has_finished'] = 1 return with tables.File(masked_image_file, 'r') as mask_fid, \ tables.File(skeletons_file, 'r') as ske_file_id, \ tables.File(intensities_file, "r+") as int_file_id: #pointer to the compressed videos mask_dataset = mask_fid.get_node("/mask") #pointer to skeletons skel_tab = ske_file_id.get_node('/skeleton') skel_width_tab = ske_file_id.get_node('/width_midbody') filters = tables.Filters(complevel=5, complib='zlib', shuffle=True) #we are using Float16 to save space, I am assuing the intensities are between uint8 worm_int_avg_tab = int_file_id.create_carray("/", "straighten_worm_intensity_median", \ tables.Float16Atom(dflt=np.nan), \ (tot_rows, length_resampling), \ chunkshape = (1, length_resampling),\ filters = table_filters) worm_int_avg_tab._v_attrs['has_finished'] = 0 worm_int_avg_tab.attrs['width_win_ind'] = width_win_ind if save_int_maps: worm_int_tab = int_file_id.create_carray("/", "straighten_worm_intensity", \ tables.Float16Atom(dflt=np.nan), \ (tot_rows, length_resampling,width_resampling), \ chunkshape = (1, length_resampling,width_resampling),\ filters = table_filters) #variables used to report progress base_name = skeletons_file.rpartition('.')[0].rpartition( os.sep)[-1].rpartition('_')[0] progressTime = timeCounterStr('Obtaining intensity maps.') for frame, frame_data in trajectories_data_valid.groupby( 'frame_number'): img = mask_dataset[frame, :, :] for ii, row_data in frame_data.iterrows(): skeleton_id = int(row_data['skeleton_id']) worm_index = int(row_data['worm_index_joined']) int_map_id = int(row_data['int_map_id']) #read ROI and skeleton, and put them in the same coordinates map worm_img, roi_corner = getWormROI(img, row_data['coord_x'], row_data['coord_y'], row_data['roi_size']) skeleton = skel_tab[skeleton_id, :, :] - roi_corner half_width = skel_width_tab[skeleton_id] / 2 assert not np.isnan(skeleton[0, 0]) skel_smooth = smoothSkeletons( skeleton, length_resampling=length_resampling, smooth_win=smooth_win, pol_degree=pol_degree) straighten_worm, grid_x, grid_y = getStraightenWormInt( worm_img, skel_smooth, half_width=half_width, width_resampling=width_resampling) #if you use the mean it is better to do not use float16 int_avg = np.median( straighten_worm[width_win_ind[0]:width_win_ind[1], :], axis=0) worm_int_avg_tab[int_map_id] = int_avg #only save the full map if it is specified by the user if save_int_maps: worm_int_tab[int_map_id] = straighten_worm.T if frame % 500 == 0: progress_str = progressTime.getStr(frame) print_flush(base_name + ' ' + progress_str) worm_int_avg_tab._v_attrs['has_finished'] = 1
ind, = np.where( \ #(np.int8(data[:, column['fmode']]) == flag['fmode']) & \ (np.int8(data[:, column['fret']]) == flag['fret']) & \ (np.int8(data[:, column['fprob']]) == flag['fprob']) & \ (np.int8(data[:, column['fmask']]) != flag['fmask']) \ #(np.int8(data[:, column['fbord']]) == flag['fbord']) \ #(np.int8(data[:, column['fsep']]) == flag['fsep']) \ ) nfiltered += data.shape[0] - ind.shape[0] if ind.shape[0] > 0: fout = tb.openFile(os.path.splitext(f)[0] + ext, 'w') shape = data[ind, :].shape atom = tb.Atom.from_dtype(data.dtype) filters = tb.Filters(complib='blosc', complevel=9) dout = fout.createCArray(fout.root, 'data', atom=atom, shape=shape, filters=filters) dout[:] = data[ind, :] fout.close() fin.close() perc = ((np.float(nfiltered) / npoints) * 100) nleft = npoints - nfiltered print 'done!' print 'total points:', npoints print 'filtered out: %d (%.1f%%)' % (nfiltered, perc)
def scan(self, mask_steps=4, repeat_command=100, columns=[True] * 16, **kwargs): '''Scan loop Parameters ---------- mask : int Number of mask steps. repeat : int Number of injections. ''' self.not_fired = [] path = "/home/carlo/fe65_p2/firmware/ise/digital_scan_bits/" self.bitfiles = OrderedDict([(160, "fe65p2_mio_160.bit"), (144, "fe65p2_mio_144.bit"), (120, "fe65p2_mio_120.bit"), (96, "fe65p2_mio_96.bit"), (72, "fe65p2_mio_72.bit"), (64, "fe65p2_mio_64.bit"), (48, "fe65p2_mio_48.bit")]) #, (144, "fe65p2_mio_144.bit") self.voltages = [2.0, 1.8, 1.6, 1.4, 1.2, 1.0, 0.9, 0.85] for freq in self.bitfiles.iterkeys(): logging.info("Loading " + self.bitfiles[freq]) #loading bitfile self.dut['intf']._sidev.DownloadXilinx(path + self.bitfiles[freq]) for volt in self.voltages: # to change the supply voltage self.dut['VDDA'].set_current_limit(200, unit='mA') self.dut['VDDA'].set_voltage(volt, unit='V') self.dut['VDDA'].set_enable(True) self.dut['VDDD'].set_voltage(volt, unit='V') self.dut['VDDD'].set_enable(True) self.dut['VAUX'].set_voltage(volt, unit='V') self.dut['VAUX'].set_enable(True) logging.info(scan.dut.power_status()) #prints power supply self.run_name = time.strftime("%Y%m%d_%H%M%S_") + "_" + str( freq) + "MHz_" + str(volt) + "V" self.output_filename = os.path.join(self.working_dir, self.run_name) self._first_read = False self.scan_param_id = 0 # .h5 output management filename = self.output_filename + '.h5' filter_raw_data = tb.Filters(complib='blosc', complevel=5, fletcher32=False) self.filter_tables = tb.Filters(complib='zlib', complevel=5, fletcher32=False) self.h5_file = tb.open_file(filename, mode='w', title=self.scan_id) self.raw_data_earray = self.h5_file.createEArray( self.h5_file.root, name='raw_data', atom=tb.UIntAtom(), shape=(0, ), title='raw_data', filters=filter_raw_data) self.meta_data_table = self.h5_file.createTable( self.h5_file.root, name='meta_data', description=MetaTable, title='meta_data', filters=self.filter_tables) self.meta_data_table.attrs.kwargs = yaml.dump(kwargs) self.dut['control']['RESET'] = 0b00 self.dut['control'].write() time.sleep(0.1) self.fifo_readout = FifoReadout(self.dut) # write InjEnLd & PixConfLd to '1 self.dut['pixel_conf'].setall(True) self.dut.write_pixel_col() self.dut['global_conf']['SignLd'] = 1 self.dut['global_conf']['InjEnLd'] = 1 self.dut['global_conf']['TDacLd'] = 0b1111 self.dut['global_conf']['PixConfLd'] = 0b11 self.dut.write_global() # write SignLd & TDacLd to '0 self.dut['pixel_conf'].setall(False) self.dut.write_pixel_col() self.dut['global_conf']['SignLd'] = 0 self.dut['global_conf']['InjEnLd'] = 0 self.dut['global_conf']['TDacLd'] = 0b0000 self.dut['global_conf']['PixConfLd'] = 0b00 self.dut.write_global() # test hit self.dut['global_conf']['TestHit'] = 1 self.dut['global_conf']['SignLd'] = 0 self.dut['global_conf']['InjEnLd'] = 0 self.dut['global_conf']['TDacLd'] = 0 self.dut['global_conf']['PixConfLd'] = 0 self.dut['global_conf'][ 'OneSr'] = 0 # all multi columns in parallel self.dut['global_conf']['ColEn'][:] = bitarray.bitarray( columns) self.dut.write_global() self.dut['control']['RESET'] = 0b01 self.dut['control']['DISABLE_LD'] = 1 self.dut['control'].write() self.dut['control']['CLK_OUT_GATE'] = 1 self.dut['control']['CLK_BX_GATE'] = 1 self.dut['control'].write() time.sleep(0.1) self.dut['control']['RESET'] = 0b11 self.dut['control'].write() # enable testhit pulse and trigger wiat_for_read = (16 + columns.count(True) * (4 * 64 / mask_steps) * 2) * (20 / 2) + 100 self.dut['testhit'].set_delay( wiat_for_read ) # this should based on mask and enabled columns self.dut['testhit'].set_width(3) self.dut['testhit'].set_repeat(repeat_command) self.dut['testhit'].set_en(False) self.dut['trigger'].set_delay(400 - 4) self.dut['trigger'].set_width(8) self.dut['trigger'].set_repeat(1) self.dut['trigger'].set_en(True) lmask = [1] + ([0] * (mask_steps - 1)) lmask = lmask * ((64 * 64) / mask_steps + 1) lmask = lmask[:64 * 64] bv_mask = bitarray.bitarray(lmask) with self.readout(): for i in range(mask_steps): self.dut['pixel_conf'][:] = bv_mask bv_mask[1:] = bv_mask[0:-1] bv_mask[0] = 0 self.dut.write_pixel_col() time.sleep(0.1) self.dut['testhit'].start() if os.environ.get('TRAVIS'): logging.debug('.') while not self.dut['testhit'].is_done(): pass while not self.dut['trigger'].is_done(): pass # just some time for last read self.dut['trigger'].set_en(False) self.fifo_readout.print_readout_status() self.meta_data_table.attrs.power_status = yaml.dump( self.dut.power_status()) self.meta_data_table.attrs.dac_status = yaml.dump( self.dut.dac_status()) self.h5_file.close() logging.info('Data Output Filename: %s', self.output_filename + '.h5') self.analyze() self.shmoo_plotting()