def create_file(number): ## CREATE FILE TO SAVE RWFS AND EVENTS h5file = tb.open_file(out_dir + "/" + f"run_{run}_{number}_selected.h5", mode="w", title="selected wfs") selrwfs_group = h5file.create_group("/", "RD") event_info_group = h5file.create_group("/", "Run") PMTRWFs_Array = h5file.create_earray(selrwfs_group, "pmtrwf", tb.Int16Atom(), shape=(0, 3, 32000)) SIPMRWFs_Array = h5file.create_earray(selrwfs_group, "sipmrwf", tb.Int16Atom(), shape=(0, 256, 800)) class Event_Info(tb.IsDescription): event = tb.Int32Col() time = tb.UInt64Col() Event_Info_table = h5file.create_table(event_info_group, "events", Event_Info, "selected events") #EI = Event_Info_table.row return h5file, PMTRWFs_Array, SIPMRWFs_Array, Event_Info_table
def create_VLInt16Array(self, name, array, group): """Stores a homogenous variable length float array in a group""" self.h5file.create_vlarray(group, name, tables.Int16Atom(), "ragged array of floats", chunkshape = 512)
def save_hdf5(qa_idxs, filename): '''save the processed data into a hdf5 file''' print("writing hdf5..") f = tables.open_file(filename, 'w') filters = tables.Filters(complib='blosc', complevel=5) earrays = f.create_earray(f.root, 'sentences', tables.Int16Atom(), shape=(0, ), filters=filters) indices = f.create_table("/", 'indices', Index, "a table of indices and lengths") count = 0 pos = 0 for qa in qa_idxs: q = qa[0] a = qa[1] earrays.append(np.array(q)) earrays.append(np.array(a)) ind = indices.row ind['pos'] = pos ind['q_len'] = len(q) ind['a_len'] = len(a) ind.append() pos += len(q) + len(a) count += 1 if count % 1000000 == 0: print(count) sys.stdout.flush() indices.flush() elif count % 100000 == 0: sys.stdout.write('.') sys.stdout.flush() f.close()
def add_recording_in_kwd(kwd, recording_id=0, downsample_factor=None, nchannels=None, nsamples=None, data=None): if isinstance(kwd, string_types): kwd = open_file(kwd, 'a') to_close = True else: to_close = False if data is not None: nsamples, nchannels = data.shape recording = kwd.createGroup('/recordings', str(recording_id)) recording._f_setAttr('downsample_factor', downsample_factor) dataset = kwd.createEArray(recording, 'data', tb.Int16Atom(), (0, nchannels), expectedrows=nsamples) # Add raw data. if data is not None: assert data.shape[1] == nchannels data_int16 = convert_dtype(data, np.int16) dataset.append(data_int16) kwd.createGroup(recording, 'filter') # TODO: filter if to_close: kwd.close() return kwd
def buildRelationalMatrixToH5File(seqList, listFeature, filename="..//outputfile//" + version + "//encodingFile.h5"): fileh = tables.open_file(filename, mode="w") # Lay root cua file root = fileh.root sizeOfSeqList = len(seqList) atom = tables.Int16Atom() featureRowMatrix = fileh.create_earray(root, 'featureRowMatrix', atom, (0, sizeOfSeqList), "featureRowMatrix") seqRowMatrix = fileh.create_earray(root, 'seqRowMatrix', atom, (sizeOfSeqList, 0), "seqRowMatrix") for curFeatureIndex in range(len(listFeature)): print(curFeatureIndex) curFeature = listFeature[curFeatureIndex] arr = np.zeros((sizeOfSeqList, ), np.uint16) #dem so lan xuat hien for indexOfSeq in xrange(sizeOfSeqList): arr[indexOfSeq] = seqList[indexOfSeq].count(curFeature) # luu array vao HDF5 file featureRowMatrix.append([arr]) seqRowMatrix.append(arr.reshape(sizeOfSeqList, 1)) fileh.flush() fileh.close()
def segmentCollection(arrayFile, arrayOut): DBi = tables.open_file(arrayFile, mode='r') array = DBi.root.resampled DFi = pandas.read_csv(tsvFile, sep='\t') IMG_SHAPE = array[0].shape nVols = len(array) DBo = tables.open_file(arrayOut, mode='w') filters = tables.Filters( complevel=1, complib='blosc:snappy' ) # 7.7sec / 1.2 GB (14 sec 1015MB if precision is reduced) 140s 3.7GB images = DBo.create_carray(DBo.root, 'resampled', atom=tables.Int16Atom(shape=IMG_SHAPE), shape=(nVols, ), filters=filters) print nVols for index in trange(nVols): vol = array[index] seg = segmentVol(vol) print 'seg: ', seg.min(), seg.mean(), seg.max() #images.append([seg]) images[index] = seg
def add_raw_ephys_data(self, max_load=1e9): """ :param rec_h5_obj: h5 file object from the record. :param max_load: number of integers to load at a given time for RAM limitations (default uses 8 GB). :return: """ filesize = os.path.getsize(self.bin_fn) expct_rows = filesize / self.nchannels / 2 data = { } # dictionary to hold all of the data array objects (neural and metadata streams). for k, v in self.chan_idxes.iteritems(): if self.run_group.__contains__(k): # TODO: add overwritability!!! for k in self.chan_idxes.keys(): try: data[k] = self.run_group._f_get_child(k) logging.info( 'Raw data for stream {0} exists for run'.format(k)) except tables.NoSuchNodeError: logging.error('No data exists for {0}'.format) raise Exception('No data exists for {0}'.format) logging.info('Data already exists, using existing data.') return data data[k] = self.rec_h5_obj.create_earray(self.run_group, name=k, atom=tables.Int16Atom(), shape=(0, len(v)), title='raw %s edata' % k, expectedrows=expct_rows) data[k]._v_attrs['bin_filename'] = self.bin_fn data[k]._v_attrs['acquisition_system'] = self.prms[ 'acquisition_system'] data[k]._v_attrs['sampling_rate_Hz'] = self.prms['sample_rate'] f = open(self.bin_fn, 'rb') ld_q = int(max_load) // int( self.nchannels ) # automatically floors this value. a ceil wouldn't be bad ld_iter = ld_q * self.nchannels # calculate number of values to read in each iteration ld_count = 0 logging.info('\t\tAdding raw run recording data to kwd...') while ld_count < filesize: arr = np.fromfile(f, np.int16, ld_iter) ld_count += ld_iter larr = arr.size / self.nchannels arr.shape = (larr, self.nchannels) for k, v in data.iteritems(): idx = self.chan_idxes[k] v.append(arr[:, idx]) v.flush() pc = float(ld_count) / float(filesize) * 100. if pc > 100.: pc = 100. logging.info('\t\t\t... %0.1d %% complete' % pc) f.close() self.run_group._v_attrs['bin_filename'] = str(self.bin_fn) self.rec_h5_obj.flush() return data
def add_recording_in_kwd(kwd, recording_id=0, downsample_factor=None, nchannels=None, nsamples=None, data=None, name=None, sample_rate=None, start_time=None, start_sample=None, bit_depth=None, band_high=None, band_low=None, filter_name=''): if isinstance(kwd, string_types): kwd = open_file(kwd, 'a') to_close = True else: to_close = False if data is not None: nsamples, nchannels = data.shape try: recording = kwd.createGroup('/recordings', str(recording_id)) except tb.NodeError: if to_close: kwd.close() return kwd recording._f_setAttr('downsample_factor', downsample_factor) dataset = kwd.createEArray(recording, 'data', tb.Int16Atom(), (0, nchannels), expectedrows=nsamples) # Add raw data. if data is not None: assert data.shape[1] == nchannels data_int16 = convert_dtype(data, np.int16) dataset.append(data_int16) # Add filter info. fil = kwd.createGroup(recording, 'filter') fil._f_setAttr('name', filter_name) # Copy recording info from kwik to kwd. recording._f_setAttr('name', name) recording._f_setAttr('start_time', start_time) recording._f_setAttr('start_sample', start_sample) recording._f_setAttr('sample_rate', sample_rate) recording._f_setAttr('bit_depth', bit_depth) recording._f_setAttr('band_high', band_high) recording._f_setAttr('band_low', band_low) if to_close: kwd.close() return kwd
def set_output_store(self, h5out, nmax, sp): # RD group RD = h5out.create_group(h5out.root, "RD") # MC group MC = h5out.root.MC # create a table to store Energy plane FEE self.fee_table = h5out.create_table(MC, "FEE", FEE, "EP-FEE parameters", tbl.filters("NOCOMPR")) # create vectors self.pmtrwf = h5out.create_earray(RD, "pmtrwf", atom=tb.Int16Atom(), shape=(0, sp.NPMT, sp.PMTWL), expectedrows=nmax, filters=tbl.filters( self.compression)) self.pmtblr = h5out.create_earray(RD, "pmtblr", atom=tb.Int16Atom(), shape=(0, sp.NPMT, sp.PMTWL), expectedrows=nmax, filters=tbl.filters( self.compression)) self.sipmrwf = h5out.create_earray(RD, "sipmrwf", atom=tb.Int16Atom(), shape=(0, sp.NSIPM, sp.SIPMWL), expectedrows=nmax, filters=tbl.filters( self.compression)) # run group RUN = h5out.create_group(h5out.root, "Run") self.runInfot = h5out.create_table(RUN, "RunInfo", RunInfo, "Run info", tbl.filters("NOCOMPR")) self.evtsInfot = h5out.create_table(RUN, "events", EventInfo, "Events info", tbl.filters("NOCOMPR"))
def create_training_feature_array(self, image_filenames, seg_filenames, array_name, indices_list): nb_features_per_subject = 1000000 nb_subjects = len(image_filenames) nb_src_modalities = len(image_filenames[0]) print(image_filenames[0]) tmpimg = nib.load(image_filenames[0]) tmpseg = nib.load(seg_filenames[0]) image_dtype = tmpimg.get_data_dtype() seg_dtype = tmpseg.get_data_dtype() feature_array = self.data_storage.create_earray(self.data_storage.root, array_name, tables.Atom.from_dtype(image_dtype), shape=(0,) + self.feature_shape + (1,), expectedrows=np.prod(nb_features_per_subject)*nb_subjects) seg_array = self.data_storage.create_earray(self.data_storage.root, array_name+'_seg', tables.Atom.from_dtype(seg_dtype), shape=(0,) + self.feature_shape + (1,), expectedrows=np.prod(nb_features_per_subject)*nb_subjects) index_array = self.data_storage.create_earray(self.data_storage.root, array_name+'_index', tables.Int16Atom(), shape=(0, 3), expectedrows=np.prod(nb_features_per_subject) * nb_subjects) if indices_list == None: print("No indices_list found") indices_list = list() for input_file, seg_file in zip(image_filenames, seg_filenames): (features, indices) = self.extract_training_patches(input_file, seg_file, intensity_threshold=0, step_size=[1,1,1], indices=None) feature_array.append(features) index_array.append(indices) indices_list.append(indices) print(input_file + " features extract size ") print(features.shape) else: print("YES indices_list found") for input_file, seg_file, curr_indices in zip(image_filenames, seg_filenames, indices_list): print("curr indices shape is ") print(curr_indices.shape) (features, indices) = self.extract_training_patches(input_file, seg_file, intensity_threshold=0, step_size=[1,1,1], indices=curr_indices) print("indices shape is ") print(indices.shape) feature_array.append(features) index_array.append(curr_indices) print(input_file + " features extract size ") print(features.shape) return feature_array, index_array, indices_list
def _add_datasets(self, group, j, track_times): # Create a table table = self.h5file.create_table(group, f'table{j}', Record, title=self.title, filters=None, track_times=track_times) # Get the record object associated with the new table d = table.row # Fill the table for i in range(self.nrows): d['var1'] = '%04d' % (self.nrows - i) d['var2'] = i d['var3'] = i * 2 d.append() # This injects the Record values # Flush the buffer for this table table.flush() # Create a couple of arrays in each group var1List = [x['var1'] for x in table.iterrows()] var3List = [x['var3'] for x in table.iterrows()] self.h5file.create_array(group, f'array{j}', var1List, f"col {j}", track_times=track_times) # Create CArrays as well self.h5file.create_carray(group, name=f'carray{j}', obj=var3List, title="col {}".format(j + 2), track_times=track_times) # Create EArrays as well ea = self.h5file.create_earray(group, f'earray{j}', tb.StringAtom(itemsize=4), (0, ), "col {}".format(j + 4), track_times=track_times) # And fill them with some values ea.append(var1List) # Finally VLArrays too vla = self.h5file.create_vlarray(group, f'vlarray{j}', tb.Int16Atom(), "col {}".format(j + 6), track_times=track_times) # And fill them with some values vla.append(var3List)
def fetch_data(): data_path = "/data/lisatmp3/Lessac_Blizzard2013_segmented/backup" partial_path = os.path.join("/Tmp/", os.getenv("USER")) hdf5_path = os.path.join(partial_path, "full_blizzard.h5") if not os.path.exists(hdf5_path): data_matches = [] for root, dirnames, filenames in os.walk(data_path): for filename in fnmatch.filter(filenames, 'data_*.npy'): data_matches.append(os.path.join(root, filename)) # sort in proper order data_matches = sorted( data_matches, key=lambda x: int(x.split("/")[-1].split("_")[-1][0])) # setup tables sz = 32000 compression_filter = tables.Filters(complevel=5, complib='blosc') hdf5_file = tables.openFile(hdf5_path, mode='w') data = hdf5_file.createEArray( hdf5_file.root, 'data', tables.Int16Atom(), shape=(0, sz), filters=compression_filter, ) for na, f in enumerate(data_matches): print("Reading file %s" % (f)) with open(f) as fp: # Array of arrays, ragged d = np.load(fp) for n, di in enumerate(d): print("Processing line %i of %i" % (n, len(d))) # Some of these are stereo??? wtf if len(di.shape) < 2: e = [r for r in range(0, len(di), sz)] e.append(None) starts = e[:-1] stops = e[1:] endpoints = zip(starts, stops) for i, j in endpoints: di_new = di[i:j] # zero pad if len(di_new) < sz: di_large = np.zeros((sz, ), dtype='int16') di_large[:len(di_new)] = di_new di_new = di_large data.append(di_new[None]) hdf5_file.close() hdf5_file = tables.openFile(hdf5_path, mode='r') data = hdf5_file.root.data X = data return X
def _create_table_list(self, name, example): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. The modified version for creating table with appendList """ type_map = { np.dtype(np.float64): tables.Float64Atom(), np.dtype(np.float32): tables.Float32Atom(), np.dtype(np.int): tables.Int64Atom(), np.dtype(np.int8): tables.Int8Atom(), np.dtype(np.uint8): tables.UInt8Atom(), np.dtype(np.int16): tables.Int16Atom(), np.dtype(np.uint16): tables.UInt16Atom(), np.dtype(np.int32): tables.Int32Atom(), np.dtype(np.uint32): tables.UInt32Atom(), np.dtype(np.bool): tables.BoolAtom(), } try: if type(example) == np.ndarray: h5type = type_map[example.dtype] elif type(example) == list and type(example[0]) == str: h5type = tables.VLStringAtom() except KeyError: raise TypeError("Don't know how to handle dtype '%s'" % example.dtype) if type(example) == np.ndarray: h5dim = (0, ) + example.shape[1:] h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_earray(h5.root, name, h5type, h5dim, filters=filters) elif type(example) == list and type(example[0]) == str: h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_vlarray(h5.root, name, h5type, filters=filters) self.types[name] = type(example)
def __init__(self, filename, **kwargs): MasterBlock.__init__(self) self.filename = filename for arg, default in [ ("node", "table"), ("expected_rows", 10**8), ("atom", tables.Int16Atom()), ("label", "stream"), ("metadata", {}), ]: setattr(self, arg, kwargs.pop(arg, default)) assert not kwargs, "Invalid kwarg(s) in Hdf_saver: " + str(kwargs) if not isinstance(self.atom, tables.Atom): self.atom = tables.Atom.from_dtype(np.dtype(self.atom))
def rwf_writer(h5out: tb.file.File, *, group_name: str, table_name: str, compression: str = 'ZLIB4', n_sensors: int, waveform_length: int) -> Callable: """ Defines group and table where raw waveforms will be written. h5out : pytables file Output file where waveforms to be saved group_name : str Name of the group in h5in.root Known options: RD, BLR Setting to None will save directly in root table_name : str Name of the table Known options: pmtrwf, pmtcwf, sipmrwf compression : str file compression n_sensors : int number of sensors in the table (shape[0]) waveform_length : int Number of samples per sensor """ if group_name is None: rwf_group = h5out.root elif group_name in h5out.root: rwf_group = getattr(h5out.root, group_name) else: rwf_group = h5out.create_group(h5out.root, group_name) rwf_table = h5out.create_earray(rwf_group, table_name, atom=tb.Int16Atom(), shape=(0, n_sensors, waveform_length), filters=tbl.filters(compression)) def write_rwf(waveform: np.ndarray) -> None: """ Writes raw waveform arrays to file. waveform : np.ndarray shape = (n_sensors, waveform_length) array of sensor charge. """ rwf_table.append(waveform.reshape(1, n_sensors, waveform_length)) return write_rwf
def _create_table(self, name, example): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. """ type_map = { np.dtype(np.float64): tables.Float64Atom(), np.dtype(np.float32): tables.Float32Atom(), np.dtype(np.int): tables.Int64Atom(), np.dtype(np.int8): tables.Int8Atom(), np.dtype(np.uint8): tables.UInt8Atom(), np.dtype(np.int16): tables.Int16Atom(), np.dtype(np.uint16): tables.UInt16Atom(), np.dtype(np.int32): tables.Int32Atom(), np.dtype(np.uint32): tables.UInt32Atom(), np.dtype(np.bool): tables.BoolAtom(), } try: if type(example) == np.ndarray: h5type = type_map[example.dtype] elif type(example) == str: h5type = tables.VLStringAtom() except KeyError: raise TypeError( "Could not create table %s because of unknown dtype '%s'" % (name, example.dtype)) #+ ", of name: " % example.shape) if type(example) == np.ndarray: h5dim = (0, ) + example.shape h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_earray(h5.root, name, h5type, h5dim, filters=filters) elif type(example) == str: h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_vlarray(h5.root, name, h5type, filters=filters) self.types[name] = type(example)
def main(options): gdb = genome.db.GenomeDB(assembly=options.assembly) chrom_dict = gdb.get_chromosome_dict() track = gdb.create_track(options.track_name[0]) if options.dtype == "float32": atom = tables.Float32Atom() elif options.dtype == "int8": atom = tables.Int8Atom() elif options.dtype == "uint8": atom = tables.UInt8Atom() elif options.dtype == "int16": atom = tables.Int16Atom() else: raise NotImplementedError("datatype %s not implemented" % dtype) for path in options.filename: filename = path.split("/")[-1] if options.format in ("xb", "xbf"): # all of the chromosomes are in a single file... chrom_names = [chrom.name for chrom in gdb.get_chromosomes()] else: chrom_names = [extract_chrom_name(filename)] for chrom_name in chrom_names: if chrom_name not in chrom_dict: raise ValueError("unknown chromosome '%s'" % chrom_name) chrom = chrom_dict[chrom_name] sys.stderr.write(chrom_name + "\n") # create a chunked array with one dimension the length # of the chromosome shape = [chrom.length] carray = track.h5f.createCArray(track.h5f.root, chrom_name, atom, shape, filters=ZLIB_FILTER) # populate the array with data read from a file carray[:] = trackreader.read_file(path, chrom, dtype=options.dtype, format=options.format, pos_idx=options.pos_idx, val_idx=options.val_idx, strand=options.strand) track.close()
def _make_lfp(raw_files_prefix: str, channels, lfp_filename, acquistion_frequency, create_file=False, target_freqency=1000, dtype=np.int16, expectedrows=0): """ Creates a decimated copy of the acquired (or processed) binary file. Only saves specific channels indicated by the user. Target frequency is 1kHz, but this can be adjusted as required. Output is a .npy file (for now), as this can be easily converted as required. :param raw_files_prefix: Path to the binary files (separated by channels). :param channels: list of channels to save LFP copies. :param save_filename: filename for LFP file to save. :param acquistion_frequency: Sampling frequency of the original binary file. :param create_file: create lfp file? :param target_freqency: Desired sampling frequency of the LFP copy (default is 1 kHz). :return: """ logging.info('Making LFP for {}. Loading data...'.format(raw_files_prefix)) downsample_factor = acquistion_frequency // target_freqency lfp_freq = acquistion_frequency / downsample_factor if os.path.exists(lfp_filename) and create_file: raise ValueError('LFP file already exists.') elif create_file: with tb.open_file(lfp_filename, 'w') as f: n = f.create_group('/', 'lfp') n._f_setattr('Frequency_hz', lfp_freq) for ch in channels: f.create_earray('/lfp/', 'ch_{0:04n}'.format(ch), tb.Int16Atom(), shape=(0, ), expectedrows=expectedrows // downsample_factor, filters=LFP_FILTER) logging.info("writing LFP to {}".format(lfp_filename)) with tb.open_file(lfp_filename, 'r+') as f: for ch in tqdm(channels, unit='chan', desc='LFP save'): fn = _gen_channel_fn(raw_files_prefix, ch) a = np.fromfile(fn, dtype=dtype) b = decimate(a, downsample_factor, zero_phase=True) ch_array = f.get_node('/lfp/ch_{0:04n}'.format(ch)) ch_array.append(b) logging.info('Complete.')
def buildTrainingSet(DF, segImages): sparseImages = SparseImageSource('/data/datasets/lung/resampled_order1/segmentedNonzero.h5') outArray = '/ssd/camsB.h5' outTsv = outArray.replace('.h5', '.tsv') camImageDF = pandas.DataFrame() DBo = tables.open_file(outArray, mode='w') filters = tables.Filters(complevel=6, complib='blosc:snappy') # 7.7sec / 1.2 GB (14 sec 1015MB if precision is reduced) 140s 3.7GB #filters = None cams = DBo.create_earray(DBo.root, 'cams', atom=tables.Int16Atom(shape=CAM_SHAPE), shape=(0,), expectedrows=len(DF), filters=filters) for index, row in tqdm(DF.iterrows(), total=len(DF)): print row cancer = row['cancer'] # slow #image, imgNum = getImage(segImages, row) #camImage = makeCamImgFromImage(image, cubeSize) # faster #image = sparseImages.getImageFromSparse(row) #camImage = makeCamImgFromImage(image, cubeSize) # should be fastest cubes, positions = sparseImages.getCubesAndPositions(row, posType='pos') camImage = makeCamImageFromCubes(cubes, positions) print 'CAM IMAGE SHAPE %s mean %s max %s ==========', camImage.shape, camImage.mean(), camImage.max() if camImage.mean() == 0: print 'THIS IMAGE IS BAD ========================' cam = forceImageIntoShape(camImage, CAM_SHAPE) cams.append([cam]) camImageDF = camImageDF.append(row) camImageDF.to_csv(outTsv, sep='\t')
def _make_tables(hdf5_file, n_sensors, compression="ZLIB4"): compr = tbl_filters(compression) trigger_group = hdf5_file.create_group(hdf5_file.root, 'Trigger') make_table = partial(hdf5_file.create_table, trigger_group, filters=compr) trg_type = make_table('trigger', table_formats.TriggerType, "Trigger Type") array_name = "events" trg_channels = hdf5_file.create_earray(trigger_group, array_name, atom=tb.Int16Atom(), shape=(0, n_sensors), filters=compr) trg_tables = trg_type, trg_channels return trg_tables
def _create_table(self, name, example, parent=None): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. """ h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) if parent is None: parent = h5.root if type(example) == str: h5type = tables.VLStringAtom() h5.createVLArray(parent, name, h5type, filters=filters) return if type(example) == dict: self.h5.createGroup(parent, name) return #If we get here then we're dealing with numpy arrays example = np.asarray(example) #MODIFICATION: appended name everywhere and introduced string type_map = { np.dtype(np.float64).name: tables.Float64Atom(), np.dtype(np.float32).name: tables.Float32Atom(), np.dtype(np.int).name: tables.Int64Atom(), np.dtype(np.int8).name: tables.Int8Atom(), np.dtype(np.uint8).name: tables.UInt8Atom(), np.dtype(np.int16).name: tables.Int16Atom(), np.dtype(np.uint16).name: tables.UInt16Atom(), np.dtype(np.int32).name: tables.Int32Atom(), np.dtype(np.uint32).name: tables.UInt32Atom(), np.dtype(np.bool).name: tables.BoolAtom(), # Maximal string length of 128 per string - change if needed 'string32': tables.StringAtom(128) } try: h5type = type_map[example.dtype.name] h5dim = (0, ) + example.shape h5.createEArray(parent, name, h5type, h5dim, filters=filters) except KeyError: raise TypeError("Don't know how to handle dtype '%s'" % example.dtype)
def rwf_writer(file, *, group_name : 'options: RD, BLR', table_name : 'options: pmtrwf, pmtcwf, sipmrwf', compression = 'ZLIB4', n_sensors : 'number of pmts or sipms', waveform_length : 'length of pmt or sipm waveform_length'): try: rwf_group = getattr (file.root, group_name) except tb.NoSuchNodeError: rwf_group = file.create_group(file.root, group_name) rwf_table = file.create_earray(rwf_group, table_name, atom = tb.Int16Atom(), shape = (0, n_sensors, waveform_length), filters = tbl.filters(compression)) def write_rwf(waveform : 'np.array: RWF, CWF, SiPM'): rwf_table.append(waveform.reshape(1, n_sensors, waveform_length)) return write_rwf
def edf2hdf5(fn): ef = _edflib.Edfreader(fn) # nf = tables.createFile(fn+'.h5') nsigs = ef.signals_in_file print("nsigs:", nsigs) nsamples = [ef.samples_in_file(ii) for ii in range(nsigs)] nsample0 = nsamples[0] nsamples = np.array(nsamples) if any(nsamples != nsample0): raise Exception( "Assumption error: should be equal rate or shoudl all something else" ) print("nsample0", nsample0) bigarr = np.empty(nsample0, dtype='int32') big16arr = np.empty(nsample0, dtype='int16') ii = 5 _edflib.read_int_samples(ef.handle, ii, N, bigarr) big16arr[:] = bigarr compfilter = tables.Filters(complevel=6, complib='zlib') h5 = tables.openFile('tstint16.h5', mode="w", title="test int16 file", filters=compfilter) atom16 = tables.Int16Atom() shape = big16arr.shape dataset = h5.createCArray(h5.root, 'int16 array', atom16, shape, filters=compfilter) dataset[:] = big16arr h5.flush() h5.close()
def _setup_output(self): outputfile = self.LPU_id + '_out' if self.record_neuron: self.outputfile_I = tables.openFile(outputfile + 'I.h5', 'w') self.outputfile_I.createEArray( "/", "array", tables.Float64Atom() if self.dtype == np.double else tables.Float32Atom(), (0, self.num_neurons)) self.outputfile_V = tables.openFile(outputfile + 'V.h5', 'w') self.outputfile_V.createEArray( "/", "array", tables.Float64Atom() if self.dtype == np.double else tables.Float32Atom(), (0, self.num_neurons)) if self.record_microvilli: self.outputfile_X0 = tables.openFile(outputfile + 'X0.h5', 'w') self.outputfile_X0.createEArray("/", "array", tables.Int16Atom(), (0, self.num_neurons)) self.outputfile_X1 = tables.openFile(outputfile + 'X1.h5', 'w') self.outputfile_X1.createEArray("/", "array", tables.Int16Atom(), (0, self.num_neurons)) self.outputfile_X2 = tables.openFile(outputfile + 'X2.h5', 'w') self.outputfile_X2.createEArray("/", "array", tables.Int16Atom(), (0, self.num_neurons)) self.outputfile_X3 = tables.openFile(outputfile + 'X3.h5', 'w') self.outputfile_X3.createEArray("/", "array", tables.Int16Atom(), (0, self.num_neurons)) self.outputfile_X4 = tables.openFile(outputfile + 'X4.h5', 'w') self.outputfile_X4.createEArray("/", "array", tables.Int16Atom(), (0, self.num_neurons)) self.outputfile_X5 = tables.openFile(outputfile + 'X5.h5', 'w') self.outputfile_X5.createEArray("/", "array", tables.Int16Atom(), (0, self.num_neurons)) self.outputfile_X6 = tables.openFile(outputfile + 'X6.h5', 'w') self.outputfile_X6.createEArray("/", "array", tables.Int16Atom(), (0, self.num_neurons))
def initfile(h5name, ncsf, q_down, include_times=True): """ initializes a h5 file to store converted data """ adbitvolts = ncsf.header['ADBitVolts'] timestep = ncsf.timestep chname = ncsf.header['AcqEntName'] h5f = tables.open_file(h5name, 'w') h5f.create_group('/', 'data') h5f.create_earray('/data', 'rawdata', tables.Int16Atom(), [0]) h5f.root.data.rawdata.set_attr('ADBitVolts', adbitvolts) h5f.root.data.rawdata.set_attr('timestep', timestep) h5f.root.data.rawdata.set_attr('Q', q_down) h5f.root.data.rawdata.set_attr('AcqEntName', chname) if include_times: h5f.create_earray('/', 'time', tables.UInt64Atom(), [0]) return h5f
def SaveToHDF(self, Filename, loc="/"): """A generic function for saving ForceFields / Topologies / Conformations to H5 files. Certain types of data cannot be stored as simple arrays, so these are the exceptions (if statements) in this function.""" # check h5 file doesn't already exist CheckIfFileExists(Filename) F = tables.File(Filename, 'a') for key, data in self.iteritems(): #print(key,data) try: #This checks if the list is homogenous and can be stored in an array data type (ie a square tensor). If not, we need VLArray TEMP = np.array(data) if TEMP.dtype == np.dtype("object"): raise ValueError #This check is necessary for Numpy 1.6.0 and greater, which allow inhomogeneous lists to be converted to dtype arrays. except ValueError: F.createVLArray(loc, key, tables.Int16Atom()) for x in data: F.getNode(loc, key).append(x) continue SaveEntryAsCArray(np.array(data), key, F0=F, loc=loc) F.flush() F.close()
if (listSubStr[curIndex] in listSubStr[indexArr]) or (listSubStr[indexArr] in listSubStr[curIndex]): listRet.append(curIndex) #chi chua 1 index duy nhat la chinh no ==> khong co arr nao giong no if (len(listRet) == 1): return None else: return listRet #-------------------------BUILD MATRIX AND SAVE TO HDF5 FILES-------------------------- sizeOfSeqList = len(seqList) atom = tables.Int16Atom() # Use ``a`` as the object type for the enlargeable array. featureRowMatrix = fileh.create_earray(root, 'featureRowMatrix', atom, (0, sizeOfSeqList), "featureRowMatrix") seqRowMatrix = fileh.create_earray(root, 'seqRowMatrix', atom, (sizeOfSeqList, 0), "seqRowMatrix") count = 0 for curFeatureIndex in range(len(listSubStr)): count += 1 print(count) curFeature = listSubStr[curFeatureIndex] arr = np.zeros((sizeOfSeqList, ), np.uint16) #dem so lan xuat hien for indexOfSeq in xrange(sizeOfSeqList):
def _create_table_list(self, name, example): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. The modified version for creating table with appendList """ type_map = { np.dtype(np.float64): tables.Float64Atom(), np.dtype(np.float32): tables.Float32Atom(), np.dtype(np.int): tables.Int64Atom(), np.dtype(np.int8): tables.Int8Atom(), np.dtype(np.uint8): tables.UInt8Atom(), np.dtype(np.int16): tables.Int16Atom(), np.dtype(np.uint16): tables.UInt16Atom(), np.dtype(np.int32): tables.Int32Atom(), np.dtype(np.uint32): tables.UInt32Atom(), np.dtype(np.bool): tables.BoolAtom(), } try: if type(example) == np.ndarray: h5type = type_map[example.dtype] elif type(example) == list and type(example[0]) == str: h5type = tables.VLStringAtom() except KeyError: raise TypeError("Don't know how to handle dtype '%s'" % example.dtype) if type(example) == np.ndarray: h5dim = (0, ) + example.shape[1:] h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) nodes = h5.list_nodes(h5.root) nmpt = name.replace('.', '/\n') nmpt = nmpt.split('\n') path = '/' for kay in range(len(nmpt) - 1): #if not path+nmpt[kay][:-1] in str(nodes): h5.create_group(path,nmpt[kay][:-1]) try: h5.is_visible_node(path + nmpt[kay][:-1]) except: h5.create_group(path, nmpt[kay][:-1]) path += nmpt[kay] self.tables[name] = h5.create_earray(path, nmpt[-1], h5type, h5dim, filters=filters) elif type(example) == list and type(example[0]) == str: h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) nodes = h5.list_nodes(h5.root) nmpt = name.replace('.', '/\n') nmpt = nmpt.split('\n') path = '/' for kay in range(len(nmpt) - 1): #if not path+nmpt[kay][:-1] in str(nodes): h5.create_group(path,nmpt[kay][:-1]) try: h5.is_visible_node(path + nmpt[kay][:-1]) except: h5.create_group(path, nmpt[kay][:-1]) path += nmpt[kay] self.tables[name] = h5.create_vlarray(path, nmpt[-1], h5type, filters=filters) self.types[name] = type(example)
data = model.get_data(dataset) dataptr = data.root.test_img if test_set else data.root.train_img batch_size = 64 # test model image by image batch_round = 0 nseq = network_params['sequence_length'] nsoundstream = network_params['audio_gen']['nsoundstream'] n_v1_write = model.n_v1_write v1_gaussian = network_params['v1_gaussian'] section_len = int(network_params['audio_gen']['section_len_msec'] / 1000. * network_params['fs']) nmodulation = network_params['audio_gen']['nmodulation'] soundstream_len = int(nmodulation * section_len) soundscape_len = int(soundstream_len * network_params['audio_gen']['soundscape_len_by_stream_len']) * nseq float_dtype = tables.Float32Atom() ss_dtype = tables.Int16Atom() img_dtype = tables.UInt8Atom() set_text = '_test' if test_set else '_train' hdf5_file = tables.open_file('data/gendata_' + config_id + set_text + '.hdf5', mode='w') cs_storage = hdf5_file.create_earray(hdf5_file.root, 'cs', img_dtype, shape=[0, nseq, model.img_h, model.img_w]) if test_set: ss_storage = hdf5_file.create_earray(hdf5_file.root, 'soundscapes', ss_dtype, shape=[0, soundscape_len, 2]) img_storage = hdf5_file.create_earray(hdf5_file.root, 'gen_img', img_dtype, shape=[0, model.img_h, model.img_w]) inp_img_storage = hdf5_file.create_earray(hdf5_file.root, 'inp_img', img_dtype, shape=[0, model.img_h, model.img_w]) df_storage = hdf5_file.create_earray(hdf5_file.root, 'df', float_dtype, shape=[0, nseq, nsoundstream, soundstream_len]) da_storage = hdf5_file.create_earray(hdf5_file.root, 'da', float_dtype, shape=[0, nseq, nsoundstream, soundstream_len]) dazim_storage = hdf5_file.create_earray(hdf5_file.root, 'dazim', float_dtype, shape=[0, nseq, nsoundstream, soundstream_len]) gx_storage = hdf5_file.create_earray(hdf5_file.root, 'gx', float_dtype, shape=[0, nseq, n_v1_write]) gy_storage = hdf5_file.create_earray(hdf5_file.root, 'gy', float_dtype, shape=[0, nseq, n_v1_write]) delta_storage = hdf5_file.create_earray(hdf5_file.root, 'delta', float_dtype, shape=[0, nseq, n_v1_write])
def ANASTASIA(argv=sys.argv): """ ANASTASIA driver """ CFP = configure(argv) if CFP["INFO"]: print(__doc__) # Increate thresholds by 1% for safety PMT_NOISE_CUT_RAW = CFP["PMT_NOISE_CUT_RAW"] * 1.01 PMT_NOISE_CUT_BLR = CFP["PMT_NOISE_CUT_BLR"] * 1.01 SIPM_ZS_METHOD = CFP["SIPM_ZS_METHOD"] SIPM_NOISE_CUT = CFP["SIPM_NOISE_CUT"] COMPRESSION = CFP["COMPRESSION"] with tb.open_file(CFP["FILE_IN"], "r+", filters=tbl.filters(CFP["COMPRESSION"])) as h5in: pmtblr = h5in.root.RD.pmtblr pmtcwf = h5in.root.RD.pmtcwf sipmrwf = h5in.root.RD.sipmrwf pmtdf = DB.DataPMT() sipmdf = DB.DataSiPM() NEVT, NPMT, PMTWL = pmtcwf.shape NEVT, NSIPM, SIPMWL = sipmrwf.shape print_configuration({"# PMT": NPMT, "PMT WL": PMTWL, "# SiPM": NSIPM, "SIPM WL": SIPMWL, "# events in DST": NEVT}) # Create instance of the noise sampler and compute noise thresholds sipms_noise_sampler_ = SiPMsNoiseSampler(SIPMWL) if SIPM_ZS_METHOD == "FRACTION": sipms_thresholds_ = sipms_noise_sampler_.ComputeThresholds( SIPM_NOISE_CUT, sipmdf['adc_to_pes']) else: sipms_thresholds_ = np.ones(NSIPM) * SIPM_NOISE_CUT if "/ZS" not in h5in: h5in.create_group(h5in.root, "ZS") if "/ZS/PMT" in h5in: h5in.remove_node("/ZS", "PMT") if "/ZS/BLR" in h5in: h5in.remove_node("/ZS", "BLR") if "/ZS/SiPM" in h5in: h5in.remove_node("/ZS", "SiPM") # Notice the Int16, not Float32! bad for compression pmt_zs_ = h5in.create_earray(h5in.root.ZS, "PMT", atom=tb.Int16Atom(), shape=(0, NPMT, PMTWL), expectedrows=NEVT, filters=tbl.filters(COMPRESSION)) blr_zs_ = h5in.create_earray(h5in.root.ZS, "BLR", atom=tb.Int16Atom(), shape=(0, NPMT, PMTWL), expectedrows=NEVT, filters=tbl.filters(COMPRESSION)) sipm_zs_ = h5in.create_earray(h5in.root.ZS, "SiPM", atom=tb.Int16Atom(), shape=(0, NSIPM, SIPMWL), expectedrows=NEVT, filters=tbl.filters(COMPRESSION)) adc_to_pes = abs(1.0/pmtdf["adc_to_pes"].reshape(NPMT, 1)) t0 = time() for i in define_event_loop(CFP, NEVT): sumpmt = np.sum(pmtcwf[i] * adc_to_pes, axis=0) selection = np.tile(sumpmt > PMT_NOISE_CUT_RAW, (NPMT, 1)) pmtzs = np.where(selection, pmtcwf[i], 0) blr = wfm.subtract_baseline(FE.CEILING - pmtblr[i]) sumpmt = np.sum(blr * adc_to_pes, axis=0) selection = np.tile(sumpmt > PMT_NOISE_CUT_BLR, (NPMT, 1)) blrzs = np.where(selection, blr, 0) pmt_zs_.append(pmtzs[np.newaxis]) blr_zs_.append(blrzs[np.newaxis]) sipmzs = sipmrwf[i] if "/MC" not in h5in: sipmzs = wfm.subtract_baseline(sipmzs, 200) sipmzs = wfm.noise_suppression(sipmzs, sipms_thresholds_) sipm_zs_.append(sipmzs[np.newaxis]) t1 = time() dt = t1-t0 print("ANASTASIA has run over {} events in {} seconds".format(i+1, dt)) print("Leaving ANASTASIA. Safe travels!")