def test02_CompareTable(self): "Comparing written time data with read data in a Table." wtime = 1234567890.123456 # Create test Table with data. h5file = tables.openFile( self.h5fname, 'w', title = "Test for comparing Time tables") tbl = h5file.createTable('/', 'test', self.MyTimeRow) row = tbl.row row['t32col'] = int(wtime) row['t64col'] = (wtime, wtime) row.append() h5file.close() # Check the written data. h5file = tables.openFile(self.h5fname) recarr = h5file.root.test.read(0) h5file.close() self.assertEqual(recarr['t32col'][0], int(wtime), "Stored and retrieved values do not match.") comp = (recarr['t64col'][0] == numpy.array((wtime, wtime))) self.assertTrue(numpy.alltrue(comp), "Stored and retrieved values do not match.")
def __init__(self, h5parmFile, readonly = True, complevel = 5, complib='zlib'): """ Keyword arguments: h5parmFile -- H5parm filename readonly -- if True the table is open in readonly mode (default=True) complevel -- compression level from 0 to 9 (default=5) when creating the file complib -- library for compression: lzo, zlib, bzip2 (default=zlib) """ if os.path.isfile(h5parmFile): if tables.is_pytables_file(h5parmFile) == None: logging.critical('Wrong HDF5 format for '+h5parmFile+'.') raise Exception('Wrong HDF5 format for '+h5parmFile+'.') if readonly: logging.debug('Reading from '+h5parmFile+'.') self.H = tables.openFile(h5parmFile, 'r') else: logging.debug('Appending to '+h5parmFile+'.') self.H = tables.openFile(h5parmFile, 'r+') else: if readonly: raise Exception('Missing file '+h5parmFile+'.') else: logging.debug('Creating '+h5parmFile+'.') # add a compression filter f = tables.Filters(complevel=complevel, complib=complib) self.H = tables.openFile(h5parmFile, filters=f, mode='w') self.fileName = h5parmFile
def test03b_Compare64EArray(self): "Comparing several written and read 64-bit time values in an EArray." # Create test EArray with data. h5file = tables.openFile( self.h5fname, 'w', title = "Test for comparing Time64 E arrays") ea = h5file.createEArray( '/', 'test', tables.Time64Atom(), shape=(0, 2)) # Size of the test. nrows = ea.nrowsinbuf + 34 # Add some more rows than buffer. # Only for home checks; the value above should check better # the I/O with multiple buffers. ##nrows = 10 for i in xrange(nrows): j = i * 2 ea.append(((j + 0.012, j + 1 + 0.012),)) h5file.close() # Check the written data. h5file = tables.openFile(self.h5fname) arr = h5file.root.test.read() h5file.close() orig_val = numpy.arange(0, nrows*2, dtype=numpy.int32) + 0.012 orig_val.shape = (nrows, 2) if common.verbose: print "Original values:", orig_val print "Retrieved values:", arr self.assertTrue(allequal(arr, orig_val), "Stored and retrieved values do not match.")
def main(): import sys filename = sys.argv[1] from pyphant.core import KnowledgeManager km = KnowledgeManager.KnowledgeManager.getInstance() import os.path km.registerURL("file://" + os.path.realpath(filename)) import tables h5 = tables.openFile(filename, 'r+') from pyphant.core import PyTablesPersister recipe = PyTablesPersister.loadRecipe(h5) executionOrders = PyTablesPersister.loadExecutionOrders(h5) h5.close() from pyphant.core.Emd5Src import Emd5Src for order in executionOrders: for socket, emd5 in order[0].iteritems(): sSpec = socket.split('.') w = recipe.getWorker(sSpec[0]) s = getattr(w, sSpec[-1]) src = Emd5Src(recipe) src.paramEmd5.value = emd5 if s.isFull(): s.pullPlug() s.insert(src.plugGetDataContainer) pSpec = order[1][0].split('.') d = recipe.getWorker(pSpec[0]) plug = getattr(d, pSpec[1]) res = plug.getResult() res.seal() h5 = tables.openFile(filename, 'r+') PyTablesPersister.saveResult(res, h5) h5.close()
def _write_image_tables(self,images,setname,descr): """ Write images to file """ import tables images = num.array(images) fname = self._make_fname() if os.path.exists(fname): h = tables.openFile(fname,mode="a") if not hasattr(h.root,'image_data'): h.createGroup(h.root,'image_data',"Image Data") else: h = tables.openFile(fname,mode="w",title="Scan Data Archive") root = h.createGroup(h.root, "image_data", "Image Data") # if setname == None: # look under '/images for 'SXXX' # find the highest one and # auto generate set name as next in the sequence if hasattr(h.root.image_data,setname): print "Warning: Image Archive File '%s'" % fname print "-->Setname '%s' already exists, data is not overwritten\n" % setname else: h.createGroup('/image_data',setname,"Image Data") grp = '/image_data/' + setname h.createArray(grp,'images',images,descr) h.close()
def connect(self, filename): """ Opens / initialises new HDF5 file. We rely on PyTables and keep all session management staff there. """ if not self.connected: try: if tb.isHDF5File(filename): self._data = tb.openFile(filename, mode='a', title=filename) self.connected = True else: raise TypeError('"%s" is not an HDF5 file format.' % filename) except IOError: # create a new file if specified file not found self._data = tb.openFile(filename, mode='w', title=filename) self.connected = True except: raise NameError("Incorrect file path, couldn't find or " "create a file.") self.objects_by_ref = {} self.name_indices = {} else: logger.info('Already connected.')
def eigFieldValues(self, type, modename, pts, ptsName = None, saveDir = None): if saveDir != None: if saveDir[-1] != "/": saveDir += "/" res = [] # if user supplies name and directory, search for existing data first. Existing # data must have the same dimensions of 'pts' if saveDir != None and ptsName != None and havePyTables: filepath = saveDir + self._fieldPrefixes[type] + "_" + ptsName + "_eigVecs_" + modename + ".h5" if os.path.exists(filepath): h5 = tables.openFile(filepath, 'r') data = h5.root.data.read() h5.close() if len(data) == len(pts): return data for pt in pts: res.append(self.eigFieldValue(type, modename, pt)) res = numpy.asarray(res) # save if name and directory given if saveDir != None and ptsName != None and havePyTables: h5 = tables.openFile(filepath, 'w') h5.createArray("/", "data", res) h5.close() return res
def _create_hash_lookup_file(cls, name): """ (Re)creates a hash lookup file for a results directory. This file contains all file hashes in the directory so that the correct file for a given parameter set can be found quickly. :param str name: The name of the results. """ name = os.path.join(cls.data_dir, name) hashfile_name = os.path.join(name, 'hash.h5') hash_file = tables.openFile(hashfile_name, mode='w') table = hash_file.createTable('/', 'lookup_table', HashEntry, title='Hash lookup') # Loop through files and write hashes file_names = [os.path.join(name,f) for f in os.listdir(name)] entry = table.row for fn in file_names: if not fn.endswith('.h5') or fn.endswith('hash.h5'): continue with tables.openFile(fn, 'r') as h5: file_hash = h5.getNodeAttr('/', '_hash') entry['hash'] = file_hash entry['filename'] = fn entry.append() hash_file.close()
def ComputeTargetStateZhuRabitzExperiment(prop, numEigs=0, eigFile=None, \ eigDataSet = "/eigenvector", outFileName = "zhu_rabitz_final_state.h5"): """ Zhu and Rabitz use a gaussian projection operator to characterize their target space, P = gamma / sqrt(pi) * exp[-gamma**2 * (x - x')**2 ] This function computes |ZR> = sum(<i|P|i>|i>, i), where |i> is the i'th eigenfunction of the Morse oscillator. """ #Setup Zhu-Rabtiz operator ZhuRabitzOperator = eval(prop.Config.ZhuRabitzOperator.classname + "_1()") ZhuRabitzOperator.ApplyConfigSection(prop.Config.ZhuRabitzOperator) #Get tmpPsi tmpPsi = prop.psi.Copy() #Create vector to hold Zhu-Rabitz state ZhuRabitzState = numpy.zeros(numEigs, dtype=complex) h5file = tables.openFile(eigFile, "r") for i in range(numEigs): tmpPsi.Clear() prop.psi.GetData()[:] = h5file.getNode("%s%03i" % (eigDataSet, i)) ZhuRabitzOperator.MultiplyPotential(prop.psi, tmpPsi, 0, 0) ZhuRabitzState[i] = prop.psi.InnerProduct(tmpPsi) outFile = tables.openFile(outFileName, "w") try: outFile.createArray("/", "ZhuRabitzFinalState", ZhuRabitzState) finally: outFile.close()
def get_val(sessions=None, masked=True, repeats=False): """Retrieves training data for given sessions. Default: all sessions, mask applied""" if sessions is None: sessions = range(3) if repeats: if isinstance(masked, bool) and masked: return tables.openFile(val_file_repeats).getNode("/alldata").read() elif isinstance(masked, np.ndarray): return tables.openFile(val_file_repeats).getNode("/alldata").read().reshape(270, 30, 10, 100, 100).transpose(0, 1, 3, 4, 2)[:, masked, :] else: raise Exception("Repeats data is masked") else: if isinstance(masked, bool) and masked: #mask = get_mask() mask = cortex.get_cortical_mask("MLfs", "20121210ML_auto1", "thick") return np.concatenate( [tables.openFile(t).getNode('/data').read()[:, mask] for t in [val_files[i] for i in sessions]]) elif isinstance(masked, np.ndarray): mask = masked return np.concatenate( [tables.openFile(t).getNode('/data').read()[:, mask] for t in [val_files[i] for i in sessions]]) else: raise NotImplementedError("This will exceed 4G of RAM")
def __init__( self, file_name, file_mode, compression_level=1, compression_lib='zlib' ): '''Constructor For compatibility it is reccomeded the compression values are left at defaults. Arguments: file_name -- Path to file file_mode -- How file should be opened i.e. r, w, a, r+ compression_level -- Level of compression to use from 1 to 9 compression_lib -- Compression library to use see PyTables docs for option. ''' compression_filters = Filters( complevel=compression_level, complib=compression_lib ) if file_mode == "w": self._file_handle = openFile( file_name, file_mode, filters=compression_filters ) self._data_group = self._file_handle.createGroup( "/", "data" ) self._parameters_group = self._file_handle.createGroup( "/", "parameters" ) self._priors_group = self._file_handle.createGroup( "/", "priors" ) self._file_handle.setNodeAttr( '/', 'creation_date', time.ctime() ) else: self._file_handle = openFile( file_name, file_mode ) self._data_group = self._file_handle.root.data self._parameters_group = self._file_handle.root.parameters self._priors_group = self._file_handle.root.priors self._init_entries() self._init_chr_tables()
def test_write_to_hdf5(): test_files = ["mcnp_ptrac_i4_little.ptrac", "mcnp_ptrac_i8_little.ptrac"] for test_file in test_files: p = mcnp.PtracReader(test_file) h5file = tables.openFile("mcnp_ptrac_hdf5_file.h5", "w") tab = h5file.createTable("/", "t", mcnp.PtracEvent, "test") p.write_to_hdf5_table(tab) tab.flush() h5file.close() del h5file del tab del p # now check if the data was correctly written. # there should be 5 events of type 1000 (src) h5file = tables.openFile("mcnp_ptrac_hdf5_file.h5") tab = h5file.getNode("/t") selected = [1 for x in tab.iterrows() if x["event_type"] == 1000] assert_equal(len(selected), 5) h5file.close() del tab del h5file # clean up if os.path.exists("mcnp_ptrac_hdf5_file.h5"): os.unlink("mcnp_ptrac_hdf5_file.h5")
def main(infile, dec_fs=600, outfile_suffix='dec', force_overwrite=False): fh_in = tables.openFile(infile, 'r') if fh_in.root._g_getnchildren() == 1: print 'Processing {}'.format(infile) outfile = infile.replace('raw', outfile_suffix) if path.exists(outfile) and not force_overwrite: raise IOError, '{} already exists'.format(outfile) fh_out = tables.openFile(outfile, 'w') output_node = fh_out.root input_node = fh_in.root._f_listNodes()[0] decimate_waveform(input_node, output_node, source_fs=input_node._v_attrs['fs'], dec_fs=dec_fs, progress_callback=update_progress) # Add some extra metadata to the output node to help us in tracking # where the data came from output_node._v_attrs['source_file'] = infile output_node._v_attrs['source_pathname'] = input_node._v_pathname fh_out.close() fh_in.close() else: mesg = "Unable to process {}".format(infile) raise ValueError, mesg
def open_file(self): """ Checks if underlying snapshot file is open and available. If not, opens the file. Returns -------- f_hdf : ``PyTables file`` HDF file handle. """ if self.is_open() is True: logger.user_information('%s already open' % self.snap.fn_base) return self.f_hdf else: if os.path.exists(self.snap.filename): if valid_file(self.snap.filename): self.f_hdf = pyT.openFile(self.snap.filename, 'a') else: self.logger.error('File %s not HDF5 file.' % self.snap.fn_base) else: self.logger.user_message('Creating %s' % self.snap.fn_base) self.f_hdf = pyT.openFile(self.snap.filename, 'a') # TODO: Do not return - need to abstract f_hdf out of Snap return self.f_hdf
def compute_rab(self): ''' Uses the current set of ICA realizations (pytabled) to compute K*(K-1)/2 cross-correlation matrices; they are indexed via tuples. R(a,b) is much smaller than the ICA realizations (all R(a,b) matrices are generally smaller than ONE realization), so R(a,b) is also retained in memory. Recomputation of the R(a,b) matrices is forced. ''' if not os.path.exists(self.rabDirectory): try: os.mkdir(self.rabDirectory) except OSError: pass icaFiles = sorted(os.listdir(self.icaDirectory)) if len(icaFiles) == 0: raise RAICARICAException for fi in icaFiles: fiPtr = tb.openFile(os.path.join(self.icaDirectory,fi),'r') si = fiPtr.getNode('/decomps/sources').read() fiPtr.close() i = np.int(deconstruct_file_name(fi)[1]) print 'Working on R(%d,b)'%i for fj in icaFiles: j = np.int(deconstruct_file_name(fj)[1]) if j > i: # sources assumed to have unit std. dev. but nonzero mean - will behave badly if not! fjPtr = tb.openFile(os.path.join(self.icaDirectory,fj),'r') sj = fjPtr.getNode('/decomps/sources').read() fjPtr.close() self.RabDict[(i,j)] = np.abs(corrmatrix(si,sj)) # pickle the result rabPtr = open(os.path.join(self.rabDirectory,'rabmatrix.db'),'wb') cPickle.dump(self.RabDict,rabPtr,protocol=-1) rabPtr.close()
def main(argv): parser = argparse.ArgumentParser() parser.add_argument("-a", dest="one_name", help="track file 1") parser.add_argument("-b", dest="two_name", help="track file 2") parser.add_argument("--atrack", dest="atrack", help="track name 1") parser.add_argument("--btrack", dest="btrack", help="track name 2") parser.add_argument("-o", dest="out_name", help="out track file") parser.add_argument("--floor", required=False, default=False) args = parser.parse_args() one = tb.openFile(args.one_name) two = tb.openFile(args.two_name) out = tb.openFile(args.out_name, "a") atrack = args.atrack btrack = args.btrack two_track = two.getNode("/" + btrack) if atrack == "all": for one_track in one.iterNodes("/"): run(one_track, two_track, out, args.floor) else: one_track = one.getNode("/" + atrack) run(one_track, two_track, out, args.floor) out.flush() out.close()
def test_ErrorSeries_no_index(h5f=None): if not h5f: h5f_path = tempfile.NamedTemporaryFile().name h5f = tables.openFile(h5f_path, "w") validation_error = series.ErrorSeries(error_name="validation_error", table_name="validation_error", hdf5_file=h5f, # empty tuple index_names=tuple(), title="Validation error with no index") # (1,1), (1,2) etc. are (epoch, minibatch) index validation_error.append(tuple(), 32.0) validation_error.append(tuple(), 30.0) validation_error.append(tuple(), 28.0) validation_error.append(tuple(), 26.0) h5f.close() h5f = tables.openFile(h5f_path, "r") table = h5f.getNode('/', 'validation_error') assert compare_lists(table.cols.validation_error[:], [32.0, 30.0, 28.0, 26.0]) assert not ("epoch" in dir(table.cols))
def __init__( self, fnContigLengths, fnWssd, overwrite, openMode, groupsToCheck=[], compression=False ): self.compress = compression assert os.path.exists(fnContigLengths) if openMode=='r': assert not overwrite assert os.path.exists(fnWssd), fnWssd debug_output('WssdBase: reading contig lengths from file %s'%fnContigLengths) self.mContigNameLen = {} for l in open(fnContigLengths,'r'): l=l.replace('\n','').split('\t') self.mContigNameLen[l[0]]=int(l[1]) debug_output('WSSD space: %d contigs totaling %d bp'%( len(self.mContigNameLen), sum(self.mContigNameLen.values()) )) if overwrite or not os.path.exists(fnWssd): self.tbl = tables.openFile( fnWssd, 'w' ) else: if openMode=='r': self.tbl = tables.openFile( fnWssd, 'r' ) else: self.tbl = tables.openFile( fnWssd, 'a' )
def test_BasicStatisticsSeries_common_case(h5f=None): if not h5f: h5f_path = tempfile.NamedTemporaryFile().name h5f = tables.openFile(h5f_path, "w") stats_series = BasicStatisticsSeries(table_name="b_vector_statistics", hdf5_file=h5f, index_names=('epoch','minibatch'), title="Basic statistics for b vector indexed by epoch and minibatch") # (1,1), (1,2) etc. are (epoch, minibatch) index stats_series.append((1,1), [0.15, 0.20, 0.30]) stats_series.append((1,2), [-0.18, 0.30, 0.58]) stats_series.append((2,1), [0.18, -0.38, -0.68]) stats_series.append((2,2), [0.15, 0.02, 1.9]) h5f.close() h5f = tables.openFile(h5f_path, "r") table = h5f.getNode('/', 'b_vector_statistics') assert compare_lists(table.cols.epoch[:], [1,1,2,2]) assert compare_lists(table.cols.minibatch[:], [1,2,1,2]) assert compare_lists(table.cols.mean[:], [0.21666667, 0.23333333, -0.29333332, 0.69], floats=True) assert compare_lists(table.cols.min[:], [0.15000001, -0.18000001, -0.68000001, 0.02], floats=True) assert compare_lists(table.cols.max[:], [0.30, 0.58, 0.18, 1.9], floats=True) assert compare_lists(table.cols.std[:], [0.06236095, 0.31382939, 0.35640177, 0.85724366], floats=True)
def test_SharedParamsStatisticsWrapper_notimestamp(h5f=None): import numpy.random if not h5f: h5f_path = tempfile.NamedTemporaryFile().name h5f = tables.openFile(h5f_path, "w") stats = SharedParamsStatisticsWrapper(new_group_name="params", base_group="/", arrays_names=('b1','b2','b3'), hdf5_file=h5f, index_names=('epoch','minibatch'), store_timestamp=False) b1 = DD({'value':numpy.random.rand(5)}) b2 = DD({'value':numpy.random.rand(5)}) b3 = DD({'value':numpy.random.rand(5)}) stats.append((1,1), [b1,b2,b3]) h5f.close() h5f = tables.openFile(h5f_path, "r") b1_table = h5f.getNode('/params', 'b1') b3_table = h5f.getNode('/params', 'b3') assert b1_table.cols.mean[0] - numpy.mean(b1.value) < 1e-3 assert b3_table.cols.mean[0] - numpy.mean(b3.value) < 1e-3 assert b1_table.cols.min[0] - numpy.min(b1.value) < 1e-3 assert b3_table.cols.min[0] - numpy.min(b3.value) < 1e-3 assert not ('timestamp' in dir(b1_table.cols))
def main_load_disp(app): """ Local replacement for CompareApp::main(). Load displacement fields. """ field = "displacements" filename = "results/strikeslip_%s_%04dm.h5" % (shape, res) projection.open() # PyLith --------------------------------- app._info.log("Projecting PyLith solution...") solnfile = tables.openFile(filename, 'r') for tstep in [0]: projection.project(solnfile, "pylith_1_0", tstep, field) solnfile.close() # Analytic ------------------------------- app._info.log("Copying analytic solution...") filename = "analytic/output/%s_%04dm.h5" % (shape, res) solnfile = tables.openFile(filename, 'r') for tstep in [0]: projection.copy_projection(solnfile, "analytic", tstep, field) solnfile.close() # ---------------------------------------- projection.close() return
def test_AccumulatorSeriesWrapper_common_case(h5f=None): if not h5f: h5f_path = tempfile.NamedTemporaryFile().name h5f = tables.openFile(h5f_path, "w") validation_error = ErrorSeries(error_name="accumulated_validation_error", table_name="accumulated_validation_error", hdf5_file=h5f, index_names=('epoch','minibatch'), title="Validation error, summed every 3 minibatches, indexed by epoch and minibatch") accumulator = AccumulatorSeriesWrapper(base_series=validation_error, reduce_every=3, reduce_function=numpy.sum) # (1,1), (1,2) etc. are (epoch, minibatch) index accumulator.append((1,1), 32.0) accumulator.append((1,2), 30.0) accumulator.append((2,1), 28.0) accumulator.append((2,2), 26.0) accumulator.append((3,1), 24.0) accumulator.append((3,2), 22.0) h5f.close() h5f = tables.openFile(h5f_path, "r") table = h5f.getNode('/', 'accumulated_validation_error') assert compare_lists(table.cols.epoch[:], [2,3]) assert compare_lists(table.cols.minibatch[:], [1,2]) assert compare_lists(table.cols.accumulated_validation_error[:], [90.0,72.0], floats=True)
def get_node(org, mode): # get the parent group node in the h5 file. if mode == 'w': h5 = tables.openFile(H5, mode='a') if org in h5.root: action = raw_input(\ """%s copy counts exist in %s. what to do [d/a/u]? 'd': delete them and create new copy-counts 'a': abort 'u': use the existing copy-counts you can use the existing counts if the blast is unchanged.""" % (org, H5))[0].lower() if action == 'd': getattr(h5.root, org)._f_remove(recursive=True) h5.flush() elif action == 'u': h5.close() return None, None else: print('ABORT: %s already exists in %s' % (org, H5)) h5.close(); sys.exit() return h5, h5.createGroup(h5.root, org, org) else: h5 = tables.openFile(H5, mode='r') return h5, getattr(h5.root, org)
def _init__(self, path_to_arc): """ :Parameters: path_to_arc : str Path to the hdf5 archive in the local file system. """ # get handle to archive try: self._arc = openFile(path_to_arc, 'r') except: self._arc = openFile(path_to_arc, 'w') self._grp_config = None self._grp_ndata = None self._grp_scene = None # establish main structure if '/__TYPE__' not in self._arc: self._arc.createArray(self._arc.root, '__TYPE__', 'SCENE_ARCHIVE') if self._has_main_grp('CONFIG'): self._grp_config = self._get_main_grp('CONFIG') else: self._grp_config = self._arc.createGroup(self._arc.root, 'CONFIG') self._arc.createArray(self._grp_config, '__TYPE__', 'CONFIG') if self._has_main_grp('NEURON_DATA'): self._grp_ndata = self._get_main_grp('NEURON_DATA') else: self._grp_ndata = self._arc.createGroup(self._arc.root, 'NEURON_DATA') self._arc.createArray(self._grp_ndata, '__TYPE__', 'NEURON_DATA') if self._has_main_grp('SCENE'): self._grp_scene = self._get_main_grp('SCENE') else: self._grp_scene = self._arc.createGroup(self._arc.root, 'SCENE') self._arc.createArray(self._grp_scene, '__TYPE__', 'SCENE')
def modeToVtk(resolution, modename, pathToH5Utils = ""): import tables delta = 0.5 / float(resolution) ex = numpy.zeros(3 * (resolution, ), dtype = 'd') ey = numpy.zeros(3 * (resolution, ), dtype = 'd') ez = numpy.zeros(3 * (resolution, ), dtype = 'd') pol, n, l, m, phase = strToMode(modename) print "Saving " + modename + " field" #print "Saving TM%d%d%d field" % (n, l, m) fx = "elecField_" + modename + "_x.h5" fy = "elecField_" + modename + "_y.h5" fz = "elecField_" + modename + "_z.h5" fvtk = "elecField_" + modename + ".vtk" h5x = tables.openFile(fx, 'w') h5y = tables.openFile(fy, 'w') h5z = tables.openFile(fz, 'w') pts = numpy.mgrid[0:0.5:delta, 0:0.5:delta, 0:0.5:delta] pts = numpy.rollaxis(pts, 0, pts.ndim) field = efield(pol, n, l, m, phase, pts) h5x.createArray("/", "data", field[...,0]) h5y.createArray("/", "data", field[...,1]) h5z.createArray("/", "data", field[...,2]) h5x.close() h5y.close() h5z.close() # now use h5tovtk to get vtk file os.system(pathToH5Utils + "h5tovtk -o %s %s %s %s" % (fvtk, fx, fy, fz))
def merge(out, fnames): data = tables.openFile(out, mode='a') for fname in fnames: f = tables.openFile(fname, mode='r') raw_targets = f.root.denseFeat if 'denseFeat' in data.root: prev_data = data.root.denseFeat targets = data.createCArray(data.root, '_y', atom=tables.Float32Atom(), shape=((raw_targets.shape[0]+prev_data.shape[0],436))) targets[:prev_data.shape[0],:] = prev_data[:,:] targets[prev_data.shape[0]:,:] = raw_targets[:,:] data.flush() data.removeNode(data.root, "denseFeat", 1) else: targets = data.createCArray(data.root, '_y', atom=tables.Float32Atom(), shape=((raw_targets.shape[0],436))) targets[:,:] = raw_targets[:,:] data.flush() data.renameNode(data.root, "denseFeat", "_y") data.flush() f.close() data.close()
def compute_rms(ext_filename, force_overwrite=False): ''' Add running measurement of RMS noise floor to the extracted spiketimes file. This metric is required for many of the spike processing routines; however, this is such a slow (possibly inefficient) algorithm that it was broken out into a separate function. ''' processing = {} with tables.openFile(ext_filename, 'a') as fh: raw_filename = ext_filename.replace('extracted', 'raw') if 'rms' in fh.root: if not force_overwrite: raise IOError, 'Already contains RMS data' else: fh.root.rms._f_remove(recursive=True) processing['filter_freq_lp'] = fh.root.filter._v_attrs.fc_lowpass processing['filter_freq_hp'] = fh.root.filter._v_attrs.fc_highpass processing['filter_order'] = fh.root.filter._v_attrs.filter_order processing['filter_btype'] = fh.root.filter._v_attrs.filter_btype processing['bad_channels'] = fh.root.filter.bad_channels[:]-1 processing['diff_mode'] = fh.root.filter._v_attrs.diff_mode #channels = fh.root.event_data._v_attrs.extracted_channels[:]-1 with tables.openFile(raw_filename, 'r') as fh_raw: input_node = h5.p_get_node(fh_raw.root, '*') output_node = fh.createGroup('/', 'rms') running_rms(input_node, output_node, 1, 0.25, processing=processing, algorithm='median', progress_callback=update_progress)
def __init__(self, output_dir, chrom_list): # combined allele-specific read counts as_count_filename = "%s/combined_as_count.h5" % output_dir self.as_count_h5 = tables.openFile(as_count_filename, "w") # combined mapped read counts read_count_filename = "%s/combined_read_count.h5" % output_dir self.read_count_h5 = tables.openFile(read_count_filename, "w") # counts of genotypes ref_count_filename = "%s/combined_ref_count.h5" % output_dir self.ref_count_h5 = tables.openFile(ref_count_filename, "w") alt_count_filename = "%s/combined_alt_count.h5" % output_dir self.alt_count_h5 = tables.openFile(alt_count_filename, "w") het_count_filename = "%s/combined_het_count.h5" % output_dir self.het_count_h5 = tables.openFile(het_count_filename, "w") self.filenames = [as_count_filename, read_count_filename, ref_count_filename, alt_count_filename, het_count_filename] self.h5_files = [self.as_count_h5, self.read_count_h5, self.ref_count_h5, self.alt_count_h5, self.het_count_h5] # initialize all of these files atom = tables.UInt16Atom(dflt=0) for h5f in self.h5_files: for chrom in chrom_list: self.create_carray(h5f, chrom, atom)
def populate_R_data(self): print "Collecting R Data for "+str(self.driftFNs[0].run)+'...' self.r_blue=np.zeros((self.beammap.shape[0],self.beammap.shape[1],len(self.driftFNs))) for i in range(len(self.driftFNs)): try: calFile=tables.openFile(self.driftFNs[i].calSoln(),mode='r') cal_row = calFile.root.wavecal.calsoln.cols.pixelrow[:] cal_col = calFile.root.wavecal.calsoln.cols.pixelcol[:] cal_params = calFile.root.wavecal.calsoln.cols.polyfit[:] cal_sigma = calFile.root.wavecal.calsoln.cols.sigma[:] except: print '\tUnable to open: '+self.driftFNs[i].calSoln() return try: driftFile=tables.openFile(self.driftFNs[i].calDriftInfo(),mode='r') drift_row = driftFile.root.params_drift.driftparams.cols.pixelrow[:] drift_col = driftFile.root.params_drift.driftparams.cols.pixelcol[:] drift_params = driftFile.root.params_drift.driftparams.cols.gaussparams[:] except: print '\tUnable to open: '+self.driftFNs[i].calDriftInfo() return for k in range(len(cal_sigma)): if cal_sigma[k]>0: drift_ind = np.where((drift_row==cal_row[k]) * (drift_col==cal_col[k]))[0][0] peak_fit = drift_params[drift_ind] blue_energy = (parabola(cal_params[k],x=np.asarray([peak_fit[1]]),return_models=True))[0][0] self.r_blue[cal_row[k],cal_col[k],i]=blue_energy/(self.params['fwhm2sig']*cal_sigma[k]) calFile.close() driftFile.close() print "\tDone."
def main(argv): parser = argparse.ArgumentParser() parser.add_argument('-a', dest='one_name', help='track file 1') parser.add_argument('-b', dest='two_name', help='track file 2') parser.add_argument('--atrack', dest='atrack', help="track name 1") parser.add_argument('--btrack', dest='btrack', help="track name 2") parser.add_argument('-o', dest='out_name', help='out track file') parser.add_argument('--floor', required=False, default=False) args = parser.parse_args() one = tb.openFile(args.one_name) two = tb.openFile(args.two_name) out = tb.openFile(args.out_name, 'a') atrack = args.atrack btrack = args.btrack two_track = two.getNode("/" + btrack) if atrack == "all": for one_track in one.iterNodes("/"): run(one_track, two_track, out, args.floor) else: one_track = one.getNode("/" + atrack) run(one_track, two_track, out, args.floor) out.flush() out.close()
plt.rc( 'font', **{ 'family': 'sans-serif', 'sans-serif': ['Dejavu Sans'], 'size': 10 }) plt.rc('axes', grid=True) plt.rc('lines', markeredgewidth=0) ticks.set_extended_locator(1) plotwidth = 12 plt.gcf().set_size_inches((plotwidth, plotwidth / np.sqrt(2))) f = 0.09 plt.gca().set_position([f, f, 1 - 2 * f, 1 - 2 * f]) f = tables.openFile('data/x-dwd.h5', 'r') zvalue = 'tm' time = set() zmin = np.inf zmax = -np.inf data_table = f.root.raw.weather for row in data_table: time.add(row['time']) zmin, zmax = min(zmin, row[zvalue]), max(zmax, row[zvalue]) time = np.array(list(time)) time = np.sort(time) print time for t in time: lat = []
def __upgrade_file(input_file_name, output_file_name): """ This method does any required processing in order to convert an input file stored in TVB 1.0 format into an output_file of TVB 2.0 format. NOTE: This should not be used directly since the simultaneous use of pyTables and h5py causes segmentation faults on some setups (Debian 32/65, Fedora 64, Windows 64) on file open/close. (Probably caused by some GIL / C level incompatibilities). Instead of this use the `upgrade(file_name)` which will call this method in a separate Python process. :param input_file_name: the path to a input *.h5 file from TVB 1.0 using pyTables format for storage :param output_file_name: the path to a output *.h5 that will be written in h5py TVB 1.0.1 specific format """ tables_h5_file = tables.openFile(input_file_name, 'r') if os.path.exists(output_file_name): os.remove(output_file_name) h5py_h5_file = h5py.File(output_file_name, 'a') # Iterate through all pyTables nodes for tables_node in tables_h5_file.walkNodes(): node_path = tables_node._v_pathname.replace('/', '') node_metadata = {} # Get meta-data from the pyTables node. This does not change for root/group/Carray nodes all_meta_keys = tables_node._v_attrs._f_list('user') for meta_key in all_meta_keys: new_key = meta_key value = tables_h5_file.getNodeAttr(tables_node, meta_key) node_metadata[new_key] = _deserialize_value(value) if tables_node.__class__ is tables.group.RootGroup: # For the root the node is already created in the h5py equivalent h5py_node = h5py_h5_file['/'] elif tables_node.__class__ is tables.group.Group: # For groups just create an empty datas-et since it's easier to handle # than sub-groups. h5py_node = h5py_h5_file.create_dataset(node_path, (1,)) else: # We have a standard node (Carray), compute based on the shape if it will # fit in the DATA_BUFFER_SIZE we set or we need to read/write by chunks. node_shape = tables_node.shape max_dimension = 0 total_size = 1 for idx, val in enumerate(node_shape): if val > node_shape[max_dimension]: max_dimension = idx total_size = total_size * val if total_size <= DATA_BUFFER_SIZE: # We did not pass our buffer size, so it's save to just read/write the whole data at once node_data = tables_node.read() h5py_node = h5py_h5_file.create_dataset(node_path, data=node_data, shape=node_data.shape, dtype=node_data.dtype) else: # We need to read in chunks. Set the dimension that is growable to None node_shape_list = list(node_shape) node_shape_list[max_dimension] = None h5py_node = h5py_h5_file.create_dataset(node_path, shape=node_shape, maxshape=tuple(node_shape_list)) slice_size = max(int(DATA_BUFFER_SIZE * node_shape[max_dimension] / total_size), 1) full_slice = slice(None, None, None) data_slice = [full_slice for _ in node_shape] for idx in range(0, node_shape[max_dimension], slice_size): specific_slice = slice(idx, idx + slice_size, 1) data_slice[max_dimension] = specific_slice tables_data = tables_node[tuple(data_slice)] h5py_node = h5py_h5_file[node_path] h5py_node[tuple(data_slice)] = tables_data for meta_key in node_metadata: processed_value = _serialize_value(node_metadata[meta_key]) h5py_node.attrs[meta_key] = processed_value h5py_h5_file['/'].attrs[TVB_ATTRIBUTE_PREFIX + DATA_VERSION_ATTRIBUTE] = 2 tables_h5_file.close() # Reloading h5py seems to fix the segmentation fault that used to appear. importlib.reload(h5py) h5py_h5_file.close()
def open_h5_file_append(h5filename): """ Open an existing H5 in append mode. """ return tables.openFile(h5filename, mode='a')
def open_h5_file_read(h5filename): """ Open an existing H5 in read mode. """ return tables.openFile(h5filename, mode='r')
def create_aggregate_file(h5filename, title='H5 Aggregate File', force=False, expectedrows=1000, complevel=1, summaryfile=False): """ Create a new HDF5 file for all songs. It will contains everything that are in regular song files. Tables created empty. If force=False, refuse to overwrite an existing file Raise a ValueError if it's the case. If summaryfile=True, creates a sumary file, i.e. no arrays Other optional param is the H5 file. DETAILS - if you create a very large file, try to approximate correctly the number of data points (songs), it speeds things up with arrays (by setting the chunking correctly). - we set the compression level to 1 by default, it uses the ZLIB library to disable compression, set it to 0 Setups the groups, each containing a table 'songs' with one row: - metadata - analysis """ # check if file exists if not force: if os.path.exists(h5filename): raise ValueError('file exists, can not create HDF5 song file') # summary file? change title if summaryfile: title = 'H5 Summary File' # create the H5 file h5 = tables.openFile(h5filename, mode='w', title='H5 Song File') # set filter level h5.filters = tables.Filters(complevel=complevel, complib='zlib') # setup the groups and tables # group metadata group = h5.createGroup("/", 'metadata', 'metadata about the song') table = h5.createTable(group, 'songs', DESC.SongMetaData, 'table of metadata for one song', expectedrows=expectedrows) # group analysis group = h5.createGroup("/", 'analysis', 'Echo Nest analysis of the song') table = h5.createTable(group, 'songs', DESC.SongAnalysis, 'table of Echo Nest analysis for one song', expectedrows=expectedrows) # group musicbrainz group = h5.createGroup("/", 'musicbrainz', 'data about the song coming from MusicBrainz') table = h5.createTable(group, 'songs', DESC.SongMusicBrainz, 'table of data coming from MusicBrainz', expectedrows=expectedrows) # create arrays if not summaryfile: create_all_arrays(h5, expectedrows=expectedrows) # close it, done h5.close()
def getMeshGrid(grid): xl, yl = grid._v_attrs.vsLowerBounds xu, yu = grid._v_attrs.vsUpperBounds nx, ny = grid._v_attrs.vsNumCells dx = (xu-xl)/nx dy = (yu-yl)/ny X = linspace(xl+0.5*dx, xu-0.5*dx, nx) Y = linspace(yl+0.5*dy, yu-0.5*dy, ny) return meshgrid(X, Y) def mkFig(fh, XX, YY, dat, nm): tm = fh.root.timeData._v_attrs.vsTime f = figure(1) im = pcolormesh(XX, YY, dat.transpose()) title("T = %.4g" % tm) axis('image') colorbar_adj(im) savefig(nm, bbox_inches='tight') close() for i in range(0,101): print ("Working on %d .." % i) fh = tables.openFile("s441-euler-rt-2d_q_%d.h5" % i) q = fh.root.StructGridField X, Y = getMeshGrid(fh.root.StructGrid) mkFig(fh, X, Y, q[:,:,0], 's441-euler-rt-rho_%05d.png' % i) fh.close()
W += waves(period=1. / self.fi[ii], waveHeight=2. * self.ai[ii, jj], mwl=self.mwl, depth=self.d, g=self.g, waveDir=self.dirs[ii, jj], wavelength=wi[ii], phi0=self.phi[ii, jj]).w(x, y, z, t) return W if __name__ == '__main__': from matplotlib import pyplot as plt import os as os import tables lineData = tables.openFile("20150927-0000-01.FRFNProp.line.data.mat", "r") z = lineData.root.lineGriddedFilteredData.waterGridFiltered[:] x = lineData.root.lineCoredat.downLineX[:] xfinite = x[:] xfinite[np.isnan(x)] = -1000.0 i105 = np.where(xfinite > 105.0)[0][0] print i105 rawtime = lineData.root.lineCoredat.tGPSVector[:500] raweta = z[i105, :500] rawtime = rawtime - rawtime[0] rawtime *= 86400 plt.plot(rawtime, raweta, "ko") #from scipy.interpolate import interp1d
def setUp(self): self.h5file = NamedTemporaryFile() self.h5 = openFile(self.h5file.name, 'w')
parser.add_option("--blocksize", dest="blocksize", type=int, default=2048) opts, args = parser.parse_args() if len(args) < 2: parser.error("You must specify at least one output and one input file") infiles, outfile = args[:-1], args[-1] if os.path.exists(outfile): parser.error("%s already exists!" % outfile) shutil.copy(infiles[0], outfile) from collections import defaultdict paths = defaultdict(list) for fname in infiles[1:]: with tables.openFile(fname) as hdf: for group in hdf.walkNodes(where='/', classname='Group'): if 'ndim' in group._v_attrs: # a dashi histogram path = group._v_pathname paths[path].append(fname) def histadd(sourceGroup, destGroup, blocksize=1): """ Add dashi histograms stored in HDF5 groups :param blocksize: operate on blocksize I/O chunks at a time """ for arr in '_h_bincontent', '_h_squaredweights': source = sourceGroup._v_children[arr] dest = destGroup._v_children[arr] chunksize = blocksize*reduce(operator.mul, dest.chunkshape)
def pandas_hdf_to_data_dict(filename): """ Explore the content of the pandas HDFStore (HDF5) and create a dictionary of timeseries (numpy arrays) found in it. The key will be used as names for the curves. All indexes must be the same and stored once with key "index". Note: This assumes that the file was created via the pandas' HDFStore interface: all pandas are stored inside a group containing the data and the array of indexes in each direction. Dataframes and panels are stored respectively as 2, and 3 dimensional nd-arrays. Returns: - content of all (1D) timeseries found in the hdf5 file including the index - whether the index representes dates. In that case, the index is stored with a kind keyword with value 'datetime' and its values are the times in seconds since Epoch. NOTE: The version 1 accesses the pandas, by reconstructing them from the HDFStore. But this is inefficient as pandas stores all the pandas components in the form of numpy arrays even for DateRange instances. This is an deeper implementation that accesses uses the numpy arrays directly inside the HDF file (2x gain). Each group and array is stored with a set of attributes accessible via its _v_attrs. For example each series contains an index with several attributes including 'kind'. It stores if the index was a DateRange in pandas. """ h5file = tables.openFile(filename, "r") content = {} index_dict = {} # All pandas stored using the HDFStore interface are organized one per # group. DateRange indexes possess a 'kind' attribute that specifies # that it is an array of datetime objects. for key, _ in h5file.root._v_children.items(): group = getattr(h5file.root, key) pandas_type = getattr(group._v_attrs, "pandas_type", "other") if pandas_type == 'series': # only the read method forces to load the content into memory. # Cast to an array of float because sometimes an object array # is returned. # FIXME: how to deal with nan? content[key] = np.asarray(group.values.read(), dtype=np.float) index_dict[key] = group.index elif pandas_type == 'frame': index_dict[key] = group.axis1 data = group.block0_values.read() if isinstance(data, list): # FIXME: this is a hack: pandas sometimes stores a df # into a list with 1 array!! data = data[0] assert (data.ndim == 2) for i, col_name in enumerate(group.axis0): content[key + "_" + col_name] = np.asarray(data[i, :], dtype=np.float) elif pandas_type == 'wide': index_dict[key] = group.axis1 data = group.block0_values.read() assert (data.ndim == 3) for i, item_name in enumerate(group.axis0): for j, col_name in enumerate(group.axis2): entry = key + "_" + item_name + "_" + col_name content[entry] = np.asarray(data[i, :, j], dtype=np.float) else: raise ValueError("The group found in the file %s" " is not a standard type." % filename) key0, index0 = index_dict.items()[0] arr_index0 = index0.read() content["index"] = arr_index0 # Check indexes are all the same. # FIXME: do this by creating a 2D np array? for k, v in index_dict.items()[1:]: if not np.all(v.read() == arr_index0): warnings.warn("Error: the index of %s is not" " equal to the index of %s" % (k, key0)) index_is_dates = getattr(index0._v_attrs, 'kind', "numeric") == "datetime" h5file.close() return content, index_is_dates
def runMCMCmodel(args): """ Simulate the survey data and run the MCMC luminosity calibration model. Parameters ---------- args - Command line arguments """ mcmcParams = args['mcmcString'] surveyParams = args['surveyString'] priorParams = args['priorsString'] maxIter, burnIter, thinFactor = [int(par) for par in mcmcParams] if surveyParams[5] == 'Inf': magLim = np.Inf else: magLim = float(surveyParams[5]) S = U.UniformDistributionSingleLuminosity(int(surveyParams[0]), float(surveyParams[1]), float(surveyParams[2]), float(surveyParams[3]), float(surveyParams[4]), surveyLimit=magLim) #S.setRandomNumberSeed(53949896) S.generateObservations() lumCalModel = L.UniformSpaceDensityGaussianLFBook(S, float(surveyParams[1]), float(surveyParams[2]), float(priorParams[0]), float(priorParams[1]), float(priorParams[2]), float(priorParams[3])) class SurveyData(IsDescription): """ Class that holds the data model for the data from the simulated parallax survey. Intended for use with the HDF5 files through the pytables package. """ trueParallaxes = Float64Col(S.numberOfStarsInSurvey) absoluteMagnitudes = Float64Col(S.numberOfStarsInSurvey) apparentMagnitudes = Float64Col(S.numberOfStarsInSurvey) parallaxErrors = Float64Col(S.numberOfStarsInSurvey) magnitudeErrors = Float64Col(S.numberOfStarsInSurvey) observedParallaxes = Float64Col(S.numberOfStarsInSurvey) observedMagnitudes = Float64Col(S.numberOfStarsInSurvey) baseName = "LumCalSimSurvey-{0}".format(S.numberOfStars) + "-{0}".format( S.minParallax) baseName = baseName + "-{0}".format(S.maxParallax) + "-{0}".format( S.meanAbsoluteMagnitude) baseName = baseName + "-{0}".format(S.varianceAbsoluteMagnitude) h5file = openFile(baseName + ".h5", mode="w", title="Simulated Survey") group = h5file.createGroup("/", 'survey', 'Survey parameters, data, and MCMC parameters') parameterTable = h5file.createTable(group, 'parameters', SurveyParameters, "Survey parameters") dataTable = h5file.createTable(group, 'data', SurveyData, "Survey data") mcmcTable = h5file.createTable(group, 'mcmc', McmcParameters, "MCMC parameters") surveyParams = parameterTable.row surveyParams['kind'] = S.__class__.__name__ surveyParams['numberOfStars'] = S.numberOfStars surveyParams['minParallax'] = S.minParallax surveyParams['maxParallax'] = S.maxParallax surveyParams['meanAbsoluteMagnitude'] = S.meanAbsoluteMagnitude surveyParams['varianceAbsoluteMagnitude'] = S.varianceAbsoluteMagnitude surveyParams[ 'parallaxErrorNormalizationMagnitude'] = S.parallaxErrorNormalizationMagnitude surveyParams['parallaxErrorSlope'] = S.parallaxErrorSlope surveyParams[ 'parallaxErrorCalibrationFloor'] = S.parallaxErrorCalibrationFloor surveyParams[ 'magnitudeErrorNormalizationMagnitude'] = S.magnitudeErrorNormalizationMagnitude surveyParams['magnitudeErrorSlope'] = S.magnitudeErrorSlope surveyParams[ 'magnitudeErrorCalibrationFloor'] = S.magnitudeErrorCalibrationFloor surveyParams['apparentMagnitudeLimit'] = S.apparentMagnitudeLimit surveyParams['numberOfStarsInSurvey'] = S.numberOfStarsInSurvey surveyParams.append() parameterTable.flush() surveyData = dataTable.row surveyData['trueParallaxes'] = S.trueParallaxes surveyData['absoluteMagnitudes'] = S.absoluteMagnitudes surveyData['apparentMagnitudes'] = S.apparentMagnitudes surveyData['parallaxErrors'] = S.parallaxErrors surveyData['magnitudeErrors'] = S.magnitudeErrors surveyData['observedParallaxes'] = S.observedParallaxes surveyData['observedMagnitudes'] = S.observedMagnitudes surveyData.append() dataTable.flush() mcmcParameters = mcmcTable.row mcmcParameters['iterations'] = maxIter mcmcParameters['burnIn'] = burnIter mcmcParameters['thin'] = thinFactor mcmcParameters['minMeanAbsoluteMagnitude'] = float(priorParams[0]) mcmcParameters['maxMeanAbsoluteMagnitude'] = float(priorParams[1]) mcmcParameters['priorTau'] = "OneOverX" mcmcParameters['tauLow'] = float(priorParams[2]) mcmcParameters['tauHigh'] = float(priorParams[3]) mcmcParameters.append() dataTable.flush() h5file.close() # Run MCMC and store in HDF5 database baseName = "LumCalResults-{0}".format(S.numberOfStars) + "-{0}".format( S.minParallax) baseName = baseName + "-{0}".format(S.maxParallax) + "-{0}".format( S.meanAbsoluteMagnitude) baseName = baseName + "-{0}".format(S.varianceAbsoluteMagnitude) M = MCMC(lumCalModel.pyMCModel, db='hdf5', dbname=baseName + ".h5", dbmode='w', dbcomplevel=9, dbcomplib='bzip2') M.use_step_method(Metropolis, M.priorParallaxes) M.use_step_method(Metropolis, M.priorAbsoluteMagnitudes) start = now() M.sample(iter=maxIter, burn=burnIter, thin=thinFactor) finish = now() print "Elapsed time in seconds: %f" % (finish - start) M.db.close()
def main(): if len(sys.argv) < 2: sys.stderr.write("usage: %s <chrom> [<start> <end>]\n" % sys.argv[0]) exit(2) inChrom = sys.argv[1] if len(sys.argv) > 2: inStart = int(sys.argv[2]) inEnd = int(sys.argv[3]) else: start = "" end = "" sys.stderr.write("%s %d %d\n" % (inChrom, inStart, inEnd)) inStrand = "+" #rename depending on input gene fastainput = open("Input_GATA3.fasta", 'w') #change path to hdf5 file you want to use seq_h5 = tables.openFile("/iblm/netapp/data1/external/GRC37/GRC37.h5", "r") inputRegion = get_seq(inChrom, inStart, inEnd, inStrand, seq_h5) #print inputRegion fastainput.write(">" + inChrom + '\n') fastainput.write(inputRegion) fastainput.close() #reopen the fasta input file from above f = open("Input_GATA3.fasta", "r") #declare chr start and end for region that you want to extract guides from file = f.readlines() #Sguide = [] #guideList = [] #create output file #rename depending on gene of interest outfile = open('GuideRNAs_GATA3', 'w') outfile2 = open('FQGuides_GATA3.fq', 'w') #can change seq and header to lists if there are more than one header in fasta file with several sequences #declare empty lists sequence = [] header = [] seq = "" header = "" GC = [] #store all the headers in a list for f in file: if f.startswith('>'): header = header + f header = header.splitlines()[0] #get ride of new line charaters and spaces else: f = f.replace(" ", "") f = f.replace("\n", "") seq = seq + f #i = 0 #make it all upper case, easier to parse seq = seq.upper() #print guideRna #call function to find all guide RNAs gRNA_length = 20 guideRna1, guideRnaOutput1, location1, direction1 = finding_guides( seq, inStart, inEnd, gRNA_length) gRNA_length = 19 guideRna2, guideRnaOutput2, location2, direction2 = finding_guides( seq, inStart, inEnd, gRNA_length) gRNA_length = 21 guideRna3, guideRnaOutput3, location3, direction3 = finding_guides( seq, inStart, inEnd, gRNA_length) guideRna = guideRna1 + guideRna2 + guideRna3 guideRnaOutput = guideRnaOutput1 + guideRnaOutput2 + guideRnaOutput3 location = location1 + location2 + location3 direction = direction1 + direction2 + direction3 print guideRna print guideRnaOutput print len(guideRna) #create .fq file used for alignment j = 0 outfile.write(header) while j < len(guideRna): outfile.write(guideRna[j] + '\n') outfile2.write('@' + str(j) + '\n') outfile2.write(guideRna[j] + '\n') outfile2.write('+' + '\n') score = "I" * len(guideRna[j]) outfile2.write(score + '\n') j = j + 1 #if we don't close and reopen FQGuides.fq we cannot excute the commands since FQGuides.fq was only a write file outfile2.close() #reopen the .fq file created above open("FQGuides_GATA3.fq", "r") print "We created a .fq file with all the guides" #call bwa aligner from this code and get information from the sam file #bottleneck here #change input reference genome and output file name here cmd = 'bwa aln -o 0 -n 3 -N -t 5 hg37.fa FQGuides_GATA3.fq > aln_GATA3.sai' os.system(cmd) print "Alignment Step has finished" #change input reference genome and output file name here cmd2 = 'bwa samse -n 1000 hg37.fa aln_GATA3.sai FQGuides_GATA3.fq > aln_GATA3.sam' os.system(cmd2) print "Sam file has been created" import re seq_h5.close()
for l in range(0, len(mapping)): if (what.args[w] == data_list[l]): args.append(mapping[l]) found = True if (found == False): sys.exit(what.args[w] + ' does not exist') if (what.clean_folder == True): if (my_rank == 0): if (os.path.exists(path) == False): os.makedirs(path) else: shutil.rmtree(path) os.makedirs(path) f = tbl.openFile(dir) data_h = [] for n in f.root: data_h.append(n.read()) data = array(data_h) id_probes = [] if (what.probes == "all"): n_probes = len(data) for p in range(0, n_probes): id_probes.append(p) else: n_probes = len(what.probes) for p in range(0, n_probes): id_probes.append(int(what.probes[p])) if (what.colors != 'rgbcmykw'):
def loadFile(self, group, index, *args): '''Load configuration from a predefined list of .mat files''' from Coordinate import Coordinate from Window import Window from Medium import Medium from Signal import Signal from System import System from Data import Data from Array import Array from Image import Image import os PHDCODE_ROOT = os.environ['COS_ROOT'] Ni = len(args) if type(index) == int: idx = copy(index) index = [idx] for i in index: if Ni > 0: origin = fileLUT(group, i, args) else: origin = fileLUT(group, i) if origin.type == 'ultrasound multi file': print 'Loading ' + PHDCODE_ROOT + origin.path + origin.file #print 'Loading ' + PHDCODE_ROOT + origin.path + origin.info_file info_file = io.loadmat(PHDCODE_ROOT + origin.path + origin.info_file) mat_file = io.loadmat(PHDCODE_ROOT + origin.path + origin.file) NFrames = info_file['NFrames'][0, 0] NElements = info_file['NElements'][0, 0] angles = info_file['angles'] ranges = info_file['ranges'] tmp = mat_file['frame%d' % origin.index].shape Xd = np.zeros((tmp[0], tmp[1], tmp[2]), dtype=np.complex64) Xd[:, :, :] = mat_file['frame%d' % i] s = System() s.data = Data() s.data.Xd = Xd s.data.angles = angles s.data.ranges = ranges s.data.M = NElements s.data.NFrames = NFrames return s elif origin.type == 'ultrasound_simulation': import tables as tb s = System() s.data = Data() print 'Loading ' + PHDCODE_ROOT + origin.path + origin.file file = tb.openFile((PHDCODE_ROOT + origin.path + origin.file), 'r') root = file.getNode(file.root) s.data.M = root.frame_1.N_rx.read() s.data.angles = root.frame_1.theta.read()[0] s.data.ranges = root.frame_1.range.read() d = root.frame_1.datacube.read().shape s.data.Xd = np.zeros((100, d[1], d[2], d[0]), dtype=np.complex64) for i in range(100): tmp = np.transpose( getattr(root, 'frame_%d' % (i + 1)).datacube.read(), (1, 2, 0)) s.data.Xd[i, :, :, :] = tmp file.close() return s else: print 'WW DataInput() - No method implemented for ' + origin.type
files = args.file x1, x2 = args.range dx = args.step buf = args.overlap loncol = args.loncol latcol = args.latcol print 'processing files: %d ...' % len(files) print 'lon,lat columns: %d,%d' % (loncol, latcol) nfiles = 0 npts = 0 nvalidpts = 0 for f in files: # input fin = tb.openFile(f, 'r') data = fin.getNode('/data') lon = data[:, loncol] lat = data[:, latcol] npts += data.shape[0] # processing sectors = define_sectors(x1, x2, dx=dx, buf=buf) results = get_sectors(lon, sectors) if len(results) < 1: continue # output for r in results: ind, secnum = r
import numpy as np import tables as tb import pylab as pl import sys f = tb.openFile(sys.argv[1]) d = f.root.data i, = np.where(d[:, -2] == int(sys.argv[2])) if i.shape[0] > 0: pl.plot(d[i, 3], d[i, 2], '.') pl.show() else: print 'no data points!'
dest='outfile', action='store', required=True) parser.add_argument('infiles', nargs='+', action='store') try: args = parser.parse_args() except: parser.print_help() sys.exit() print(("INPUT : ", args.infiles)) print(("OUTPUT: ", args.outfile)) filters = tables.Filters(1) hfo = tables.openFile(args.outfile, 'a', filters=filters) hfs = [tables.openFile(f, 'r') for f in args.infiles] keys = [node.name for node in hfs[0].listNodes('/', classname='Array')] for k in keys: print(k) if k in hfo.root: print('skipping...') continue shape = hfs[0].getNode('/', k).shape atom = hfs[0].getNode('/', k).atom X = np.zeros(shape, atom.dtype) Y = np.zeros(shape, atom.dtype)
def load(self, cache_phenotype=True): """load data file Args: cache_phenotype: load phentopyes fully intro memry (default: True) """ import tables self.f = tables.openFile(self.file_name, 'r') self.pheno = self.f.root.phenotype #parse out thse we always need for convenience self.pheno_matrix = self.pheno.matrix self.sample_ID = self.pheno.row_header.sample_ID[:] self.phenotype_ID = self.pheno.col_header.phenotype_ID[:] #cache? if cache_phenotype: self.pheno_matrix = self.pheno_matrix[:] # Additional pheno col header headers = self.pheno.col_header #TODO: create pandas.MultiIndex from headers child_names = [] #headers._v_children.keys() child_arrays = [] for child in headers: if child._v_name == "phenotype_ID": pass #TODO: OS, I removed this as the whole things otherwise crashes if there are no headers #continue child_names.append(child._v_name) child_arrays.append(child[:]) multiindex = pd.MultiIndex.from_arrays(arrays=child_arrays, names=child_names) self.index_frame = pd.DataFrame(data=SP.arange( self.pheno_matrix.shape[1]), index=multiindex) self.headers_frame = pd.DataFrame(data=SP.array(child_arrays).T, index=self.phenotype_ID, columns=child_names) if 'gene_ID' in headers: self.eqtl = True self.gene_ID = self.pheno.col_header.gene_ID[:] self.gene_pos_start = SP.array([ self.pheno.col_header.gene_chrom[:], self.pheno.col_header.gene_start[:] ], dtype='int').T self.gene_pos_end = SP.array([ self.pheno.col_header.gene_chrom[:], self.pheno.col_header.gene_end[:] ], dtype='int').T self.gene_ID_list = list(set(self.gene_ID)) else: self.eqtl = False if 'environment' in headers: self.environment = self.pheno.col_header.environment[:] self.environment_list = list(set(self.environment)) else: self.environment = None #dimensions self.N = self.pheno_matrix.shape[0] self.P = self.pheno_matrix.shape[1]
def write_array(A, filename, mode = 'w', title = 'test', complevel = None, verbose = True): """ write memory to a h5 file h5 file contains root.arrat(A real or complex) A: a ndarray, GPUArray or PitchArray filename: name of file to store mode: 'w' to start a new file 'a' to append, leading dimension of A must be the same as the existing file file can be read by read_array in python """ h5file = tables.openFile(filename, mode, title) if complevel is not None: filters = tables.Filters(complevel=complevel, complib='zlib') else: filters = None if (A.dtype == np.float32): tb = tables.Float32Atom elif (A.dtype == np.float64): tb = tables.Float64Atom elif (A.dtype == np.complex64) or (A.dtype == np.complex128): tb = tables.ComplexAtom elif A.dtype == np.int32: tb = tables.Int32Atom elif A.dtype == np.int64: tb = tables.Int64Atom else: TypeError("Write file error: unkown input dtype") if PYCUDA: if A.__class__.__name__ in ["GPUArray", "PitchArray"]: B = A.get() elif A.__class__.__name__ == "ndarray": B = A else: raise TypeError("Write file error: unkown input") else: if A.__class__.__name__ == "ndarray": B = A else: raise TypeError("Write file error: unkown input") shape = list(B.shape) shape[0] = 0 if mode == 'w': if (A.dtype == np.complex64): h5file.createEArray("/","array", tb(8), tuple(shape), filters = filters) elif (A.dtype == np.complex128): h5file.createEArray("/","array", tb(16), tuple(shape), filters = filters) else: h5file.createEArray("/","array", tb(), tuple(shape), filters = filters) h5file.root.array.append(B) h5file.close() if verbose: if mode == 'w': print "file %s created" % (filename) else: print "file %s attached" % (filename)
from proteus import Comm, Domain, Isosurface parser = argparse.ArgumentParser() parser.add_argument("prefix", help="The prefix of the h5 files") parser.add_argument("-L", type=float, default=[1.0, 1.0, 1.0], nargs='+', help="extents of bounding box") parser.add_argument("-x", type=float, default=[0.0, 0.0, 0.0], nargs='+', help="lower left front corner") parser.add_argument("-s", "--steps", type=int, default=0, help="number of time steps to process") args = parser.parse_args() domain = Domain.RectangularDomain(L=args.L, x=args.x) comm = Comm.init() h5 = tables.openFile(args.prefix + ".h5", "r") isosurface = Isosurface.Isosurface((('phi_t', (0.0, )), ), domain, writeBoundary=False) for i in range(args.steps): isosurface.attachHDF5(h5, i) isosurface.calculate(checkTime=False) h5.close()
def __init__(self, h5Filename, taskQueue=None): self.h5Filename = getFullFilename( h5Filename) #convert relative path to full path self.h5File = tables.openFile(self.h5Filename)
def write_memory_to_file(A, filename, mode = 'w', title = 'test', complevel = None, verbose = True): """ write memory to a h5 file h5 file contains root.real and root.imag(if A complex) best for transfer data with Matlab A: a ndarray, GPUArray or PitchArray filename: name of file to store mode: 'w' to start a new file 'a' to append, leading dimension of A must be the same as the existing file file can be read by read_file or in matlab using h5read.m """ h5file = tables.openFile(filename, mode, title) if complevel is not None: filters = tables.Filters(complevel=complevel, complib='zlib') else: filters = None if (A.dtype == np.float32) or (A.dtype == np.complex64): tb = tables.Float32Atom elif (A.dtype == np.float64) or (A.dtype == np.complex128): tb = tables.Float64Atom elif A.dtype == np.int32: tb = tables.Int32Atom elif A.dtype == np.int64: tb = tables.Int64Atom else: TypeError("Write file error: unkown input dtype") if PYCUDA: if A.__class__.__name__ in ["GPUArray", "PitchArray"]: B = A.get() elif A.__class__.__name__ == "ndarray": B = A else: raise TypeError("Write file error: unkown input") else: if A.__class__.__name__ == "ndarray": B = A else: raise TypeError("Write file error: unkown input") shape = list(B.shape) shape[0] = 0 if mode == 'w': if np.iscomplexobj(B): h5file.createEArray("/","real", tb(), tuple(shape), filters = filters) h5file.createEArray("/","imag", tb(), tuple(shape), filters = filters) else: h5file.createEArray("/","real", tb(), tuple(shape), filters = filters) if np.iscomplexobj(B): h5file.root.real.append(B.real) h5file.root.imag.append(B.imag) else: h5file.root.real.append(B) h5file.close() if verbose: if mode == 'w': print "file %s created" % (filename) else: print "file %s attached" % (filename)
import tables class MyClass(object): foo = 'bar' # An object of my custom class. myObject = MyClass() h5f = tables.openFile('test.h5', 'w') h5f.root._v_attrs.obj = myObject # store the object print h5f.root._v_attrs.obj.foo # retrieve it h5f.close() # Delete class of stored object and reopen the file. del MyClass, myObject h5f = tables.openFile('test.h5', 'r') print h5f.root._v_attrs.obj.foo # Let us inspect the object to see what is happening. print repr(h5f.root._v_attrs.obj) # Maybe unpickling the string will yield more information: import cPickle cPickle.loads(h5f.root._v_attrs.obj) # So the problem was not in the stored object, # but in the *environment* where it was restored. h5f.close()
def reloadData(self): self.h5File.close() self.h5File = tables.openFile(self.h5Filename)
import os.path import sys import textwrap import clusters from simulations import GroundParticlesSimulation, QSubSimulation DATAFILE = 'data-e15.h5' if __name__ == '__main__': try: data except NameError: data = tables.openFile(DATAFILE, 'a') if '/simulations' in data: print print textwrap.dedent("""\ WARNING: previous simulations exist and will be overwritten Continue? (answer 'yes'; anything else will exit)""") try: inp = raw_input() except KeyboardInterrupt: inp = 'Ctrl-C' if inp.lower() == 'yes': data.removeNode('/simulations', recursive=True) else: print
def saveSequence(self, f, sequence=None, framesInd=None, filterLevel=5): if sequence == None: sequence = self data = sequence.getFrame(0) if framesInd is None: framesInd = range(sequence.frames) h5file = tb.openFile(f, mode='w') root = h5file.root atom = tb.UInt16Atom() #tb.Atom.from_dtype(data.dtype) if filterLevel == 0: filters = None else: filters = tb.Filters(complevel=filterLevel, complib='zlib', shuffle=True) x = h5file.createEArray(root, 'x', atom, shape=(sequence.getHeight(), sequence.getWidth(), 0), expectedrows=sequence.frames, filters=filters) if showProgress: for i in progress(framesInd, "Saving...", "Cancel"): data = sequence.getFrame(i) try: x.append( data.reshape( (sequence.getHeight(), sequence.getWidth(), 1))) except ValueError: print("Can't add frame " + str(i) + ". Wrong dimensions?") else: for i in framesInd: data = sequence.getFrame(i) try: x.append( data.reshape( (sequence.getHeight(), sequence.getWidth(), 1))) except ValueError: print("Can't add frame " + str(i) + ". Wrong dimensions?") tGroup = h5file.createGroup(root, 'times') tA = np.vstack( (sequence.timesDict.frames(), sequence.timesDict.times())).T _t = h5file.createCArray(tGroup, 'timesArray', tb.Atom.from_dtype(tA.dtype), tA.shape, filters=filters) _t[:] = tA bytes = np.fromstring(sequence.timesDict.label.encode('utf-8'), np.uint8) _tStr = h5file.createCArray(tGroup, 'timeLabel', tb.UInt8Atom(), shape=(len(bytes), ), filters=filters) _tStr[:] = bytes h5file.flush() h5file.close()
scatter(x, y, c='r', alpha=1.) scatter(0, 0, c='white', alpha=1.) scatter(0, 0, c='r', alpha=.2) # plot coordinates stored in 'observables' table for event in test_output.observables.readWhere('id == %d' % event_id): scatter(event['x'], event['y'], c='lightgreen') xlabel("[m]") ylabel("[m]") def plot_cluster(alpha): for station in cluster.stations: c = 'yellow' for detector in station.detectors: x, y = detector.get_xy_coordinates() scatter(x, y, c=c, alpha=alpha) if c == 'yellow': c = 'blue' if __name__ == '__main__': try: data except NameError: data = tables.openFile('transform_test.h5') sim = data.root.simulations.E_100TeV.zenith_0 test_output = data.root.test_output.E_100TeV.zenith_0 cluster = test_output._v_attrs.cluster
self.__updateMonitor('Copying %s to %s\n' % (climo_file, tmp_file)) if os.path.exists(tmp_file): os.unlink(tmp_file) os.system('/bin/cp -f ' + climo_file + ' ' + tmp_file) os.chmod(tmp_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IWGRP | stat.S_IRGRP | stat.S_IROTH) self.stns[stn].fh = tables.openFile(tmp_file, 'a') table = self.stns[stn].fh.root.obs try: self.stns[stn].set_start_year(table[table.nrows - 1]['year']) self.stns[stn].set_ending_datetime(table[table.nrows - 1]['date_time']) except IndexError: self.stns[stn].set_start_year(DATA_LIMIT_YEAR) except Exception, e: self.__updateMonitor(str(e) + '\n') elif os.path.exists(tmp_file) and self.append: os.chmod(tmp_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IWGRP | stat.S_IRGRP | stat.S_IROTH) self.stns[stn].fh = tables.openFile(tmp_file, 'a') try: table = self.stns[stn].fh.root.obs except: # sometimes the file exists from a user trying to create it but not starting the process # only to want to come back and append to it later; this will deal with errors arising from # that situation os.unlink(tmp_file) os.system('/bin/cp -f ' + climo_file + ' ' + tmp_file) os.chmod(tmp_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IWGRP | stat.S_IRGRP | stat.S_IROTH) self.stns[stn].fh = tables.openFile(tmp_file, 'a') table = self.stns[stn].fh.root.obs try: self.stns[stn].set_start_year(table[table.nrows - 1]['year']) self.stns[stn].set_ending_datetime(table[table.nrows - 1]['date_time']) except IndexError:
ax.plot(n**(-4. / 7), r2 / n**(8. / 7), label=r'$\omega = \omega_{c,n}$') ax.set_xlim(xmin=0) ax.set_xlabel(r'$n^{-4/7}$') ax.set_ylabel(r'$\langle R^2 \rangle / n^{8/7}$') ax.set_title('{}: {}'.format(hf.title, title)) ax.legend(loc=0) ax.text(0.95, 0, s, transform=ax.transAxes, ha='right') basename = os.path.splitext(os.path.basename(hf.filename))[0] fn = '%s.%s.pdf' % (basename, title.replace(' ', '_')) print(('saving figure in ' + fn)) plt.savefig(fn) # if show: plt.show() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('filename', action='store') parser.add_argument('--show', action='store_true') try: args = parser.parse_args() except: parser.print_help() sys.exit() with closing(openFile(args.filename)) as hf: do_plot(hf)
#ylim(0, 100) #legend(numpoints=1) utils.saveplot() artist.utils.save_graph(graph, dirname='plots') print if __name__ == '__main__': # invalid values in arcsin will be ignored (nan handles the situation # quite well) np.seterr(invalid='ignore', divide='ignore') try: data except NameError: data = tables.openFile('master-ch4v2.h5', 'r') if '/reconstructions' not in data: print "Reconstructing shower direction..." do_full_reconstruction(data) else: print "Skipping reconstruction!" utils.set_prefix("DIR-") artist.utils.set_prefix("DIR-") do_reconstruction_plots(data) # These currently don't work # utils.set_prefix("WIP-") # do_jos_plots(data)