def make_shd(path_to_train, path_to_test, path_to_hdf5, digits, window_length): data = tables.open_file(path_to_data, 'r') n_neurons = 700 T_max = max([max(data.root.spikes.times[i]) for i in range(len(data.root.labels))]) S_prime = math.ceil(T_max / window_length) pattern = [1, 0, 0, 0, 0] # the pattern used as output for the considered digit hdf5_file = tables.open_file(path_to_hdf5, 'w') train = hdf5_file.create_group(where=hdf5_file.root, name='train') data = hdf5_file.create_array(where=hdf5_file.root.train, name='data', atom=tables.BoolAtom(), obj=load_shd(data, S_prime, digits, window_length)) labels = hdf5_file.create_earray(where=hdf5_file.root.train, name='label', atom=tables.BoolAtom(), shape=(0, len(digits), S_prime))
def __init__(self, database: AbstractDB): """ Initialize the atoms for meta-data (types, valid tag, and splits) Args: database (AbstractDB): Associated Database object """ super().__init__(database) self.filename_atom = tables.StringAtom(itemsize=255) self.types_atom = tables.StringAtom(itemsize=255) # whether the patch is valid. self.valid_atom = tables.BoolAtom(shape=(), dflt=False) # save the meta info: split # noinspection PyArgumentList self.file_list_atom = tables.StringAtom(itemsize=get_path_limit()) # noinspection PyArgumentList self.split_atom = tables.IntAtom(shape=(), dflt=False) self.hdf5_organizer = H5Organizer(self.database, self.database.group_level) self.data_extractor = DataExtractor(self.database) self.weight_writer = WeightCollector( self.database, self.data_extractor, weight_counter=self.database.weight_counter_callable) self.data_size = {}
def subset_and_writeout(hf_in, fname, thin, maskval, binfn=lambda x:x): print 'Subsetting for %s'%fname res=5 hf_out = tb.openFile(os.path.join('5k-covariates',fname.replace('-','_').replace('.','_')+'.hdf5'),'w') hf_out.createArray('/','lon',lon[lon_min_i:lon_max_i:res]) hf_out.createArray('/','lat',lat[lat_min_i:lat_max_i:res]) d = hf_in.root.data[(hf_in.root.data.shape[0]-lat_max_i*thin):\ (hf_in.root.data.shape[0]-lat_min_i*thin):\ thin, lon_min_i*thin:\ lon_max_i*thin:\ thin] d = map_utils.grid_convert(map_utils.grid_convert(d,'y-x+','x+y+')[::res,::res], 'x+y+','y-x+') hf_out.createCArray('/','data',atom=tb.FloatAtom(),shape=d.shape,filters=tb.Filters(complevel=1,complib='zlib')) hf_out.createCArray('/','mask',atom=tb.BoolAtom(),shape=d.shape,filters=tb.Filters(complevel=1,complib='zlib')) hf_out.root.data.attrs.view = 'y-x+' hf_out.root.data[:]=binfn(d) hf_out.root.mask[:] = (d==maskval)+clipped_pete_mask hf_out.close()
def make_h5_col_file(dat, content, colname): """Make a new h5 table to hold column from ``dat``.""" filename = os.path.join('data', content, 'msid', colname + '.h5') if os.path.exists(filename): os.unlink(filename) filedir = os.path.dirname(filename) if not os.path.exists(filedir): os.makedirs(filedir) filters = tables.Filters(complevel=5, complib='zlib') h5 = tables.openFile(filename, mode='w', filters=filters) col = dat[colname] h5shape = (0, ) + col.shape[1:] h5type = tables.Atom.from_dtype(col.dtype) h5.createEArray(h5.root, 'data', h5type, h5shape, title=colname, expectedrows=86400 * 365 * 10) h5.createEArray(h5.root, 'quality', tables.BoolAtom(), (0, ), title='Quality', expectedrows=86400 * 365 * 10) print 'Made', colname h5.close()
def write_logControl(self, topic_group, data): fields = ['taskName', 'actionType', 'skillName', 'topics'] self.pytable_writer_helper(topic_group, fields, tables.StringAtom(itemsize=20), data) self.pytable_writer_helper(topic_group, ['playback'], tables.BoolAtom(), data)
def make_h5_col_file(dats, colname): """Make a new h5 table to hold column from ``dat``.""" filename = msid_files['msid'].abs filedir = os.path.dirname(filename) if not os.path.exists(filedir): os.makedirs(filedir) # Estimate the number of rows for 20 years based on available data times = np.hstack([x['TIME'] for x in dats]) dt = np.median(times[1:] - times[:-1]) n_rows = int(86400 * 365 * 20 / dt) filters = tables.Filters(complevel=5, complib='zlib') h5 = tables.openFile(filename, mode='w', filters=filters) col = dats[-1][colname] h5shape = (0, ) + col.shape[1:] h5type = tables.Atom.from_dtype(col.dtype) h5.createEArray(h5.root, 'data', h5type, h5shape, title=colname, expectedrows=n_rows) h5.createEArray(h5.root, 'quality', tables.BoolAtom(), (0, ), title='Quality', expectedrows=n_rows) logger.verbose( 'WARNING: made new file {} for column {!r} shape={} with n_rows(1e6)={}' .format(filename, colname, h5shape, n_rows / 1.0e6)) h5.close()
def components(self, components=None): """ Retrieve or store each individual submatrix composing the aggregate matrix. :param components: List of (masked) numpy arrays :return: List of (masked) numpy arrays """ if components is not None: try: self.file.remove_node(self._group, 'components', recursive=True) except tables.NoSuchNodeError: pass component_group = self.file.create_group(self._group, 'components') for i, m in enumerate(components): if m is None: m = np.array([np.nan]) cm = self.file.create_carray(component_group, 'component_{}'.format(i), tables.Float32Atom(), m.shape) cm[:] = m if hasattr(m, 'mask'): mm = self.file.create_carray(component_group, 'mask_{}'.format(i), tables.BoolAtom(), m.shape) mm[:] = m.mask self.file.flush() max_ix = -1 masks = dict() components = dict() component_group = self.file.get_node(self._group, 'components') for node in self.file.iter_nodes(component_group): if node.name.startswith('mask_'): ix = int(node.name[5:]) masks[ix] = node[:] max_ix = max(ix, max_ix) elif node.name.startswith('component_'): ix = int(node.name[10:]) m = node[:] if m.shape == (1,) and np.isnan(m[0]): components[ix] = None else: components[ix] = m max_ix = max(ix, max_ix) sorted_components = [] for ix in range(max_ix + 1): component = components[ix] if component is not None: if ix in masks: mask = masks[ix] else: mask = None sorted_components.append(np.ma.masked_array(component, mask=mask)) else: sorted_components.append(None) return sorted_components
def create_input_group(h5f, title="input data at transmitter", rolloff_dflt=np.nan, attrs={}, arrays=["symbols", "bits"], **kwargs): """ Create the table for saving the input symbols and bits Parameters ---------- h5f : string or h5filehandle The file to use, if a string create or open new file title: string, optional The title description of the group attrs: dict, optional attributes on the table arrays: list, optional name of arrays referenced in the table **kwargs: keyword arguments passed to create_table/array, it is highly recommended to set expectedrows Returns ------- h5f : h5filehandle Pytables handle to the hdf file """ try: gr = h5f.create_group("/", "input", title=title) except AttributeError: h5f = tb.open_file(h5f, "a") gr = h5f.create_group("/", "input", title=title) # if no shape for input syms or bits is given use scalar t_in = h5f.create_table(gr, "signal", { "id": tb.Int64Col(), "idx_symbols": tb.Int64Col(dflt=0), "idx_bits": tb.Int64Col(dflt=0), "rolloff": tb.Float64Col(dflt=rolloff_dflt) }, title="parameters of input signal", **kwargs) setattr(t_in.attrs, "arrays", arrays) arr_syms = h5f.create_mdvlarray(gr, "symbols", tb.ComplexAtom(itemsize=16, dflt=np.nan), title="sent symbols", **kwargs) arr_bits = h5f.create_mdvlarray(gr, "bits", tb.BoolAtom(), title="sent bits", **kwargs) for k, v in attrs: setattr(t_in.attrs, k, v) return h5f
def create_recvd_data_group(h5f, title="data analysis and qampy results", description=None, oversampling_dflt=2, attrs=DSP_UNITS, arrays=["data", "symbols", "taps", "bits"], nmodes=2, **kwargs): """ Create the table for saving recovered data and parameters after DSP Parameters ---------- h5f : string or h5filehandle The file to use, if a string create or open new file title: string The title description of the group description: dict or tables.IsDescription (optional) If given use to create the table attrs: dict, optional attributes for the table arrays: list, optional name of arrays referenced in the table nmodes: int, optional number of modes/polarisations **kwargs: keyword arguments passed to create_table/array, it is highly recommended to set expectedrows Returns ------- h5f : h5filehandle Pytables handle to the hdf file """ try: gr = h5f.create_group("/", "analysis", title=title) except AttributeError: h5f = tb.open_file(h5f, "a") gr = h5f.create_group("/", "analysis", title=title) gr_dsp = h5f.create_group(gr, "qampy", title="Signal from DSP") if description is None: dsp_params = { "freq_offset": tb.Float64Col(dflt=np.nan), "freq_offset_N": tb.Int64Col(dflt=0), "phase_est": tb.StringCol(itemsize=20), "N_angles": tb.Float64Col(dflt=np.nan), "ph_est_blocklength": tb.Int64Col(), "stepsize": tb.Float64Col(shape=2), "trsyms": tb.Float64Col(shape=2), "iterations": tb.Int64Col(shape=2), "ntaps": tb.Int64Col(), "method": tb.StringCol(itemsize=20)} description = {"id":tb.Int64Col(), "idx_data": tb.Int64Col(), "idx_symbols": tb.Int64Col(), "idx_bits": tb.Int64Col(), "idx_taps": tb.Int64Col(), "evm": tb.Float64Col(dflt=np.nan, shape=nmodes), "ber":tb.Float64Col(dflt=np.nan, shape=nmodes), "ser":tb.Float64Col(dflt=np.nan, shape=nmodes), "oversampling":tb.Int64Col(dflt=oversampling_dflt)} description.update(dsp_params) t_rec = h5f.create_table(gr_dsp, "signal", description, "signal after DSP", **kwargs) setattr(t_rec.attrs, "arrays", arrays) data_arr = h5f.create_mdvlarray(gr_dsp, "data", tb.ComplexAtom(itemsize=16), "signal after DSP", **kwargs) syms_arr = h5f.create_mdvlarray(gr_dsp, "symbols", tb.ComplexAtom(itemsize=16, dflt=np.nan), "recovered symbols", **kwargs) taps_arr = h5f.create_mdvlarray(gr_dsp, "taps", tb.ComplexAtom(itemsize=16, dflt=np.nan), "qampy taps", **kwargs) bits_arr = h5f.create_mdvlarray(gr_dsp, "bits", tb.BoolAtom(dflt=False), "recovered bits", **kwargs) for k, v in attrs.items(): setattr(t_rec.attrs, k, v) return h5f
def repeat_expt(smplr, n_expts, n_labels, output_file=None): """ Parameters ---------- smplr : sub-class of PassiveSampler sampler must have a sample_distinct method, reset method and ... n_expts : int number of expts to run n_labels : int number of labels to query from the oracle in each expt """ FILTERS = tables.Filters(complib='zlib', complevel=5) max_iter = smplr._max_iter n_class = smplr._n_class if max_iter < n_labels: raise ValueError( "Cannot query {} labels. Sampler ".format(n_labels) + "instance supports only {} iterations".format(max_iter)) if output_file is None: # Use current date/time as filename output_file = 'expt_' + time.strftime("%d-%m-%Y_%H:%M:%S") + '.h5' logging.info("Writing output to {}".format(output_file)) f = tables.open_file(output_file, mode='w', filters=FILTERS) float_atom = tables.Float64Atom() bool_atom = tables.BoolAtom() int_atom = tables.Int64Atom() array_F = f.create_carray(f.root, 'F_measure', float_atom, (n_expts, n_labels, n_class)) array_s = f.create_carray(f.root, 'n_iterations', int_atom, (n_expts, 1)) array_t = f.create_carray(f.root, 'CPU_time', float_atom, (n_expts, 1)) logging.info("Starting {} experiments".format(n_expts)) for i in range(n_expts): if i % np.ceil(n_expts / 10).astype(int) == 0: logging.info("Completed {} of {} experiments".format(i, n_expts)) ti = time.process_time() smplr.reset() smplr.sample_distinct(n_labels) tf = time.process_time() if hasattr(smplr, 'queried_oracle_'): array_F[i, :, :] = smplr.estimate_[smplr.queried_oracle_] else: array_F[i, :, :] = smplr.estimate_ array_s[i] = smplr.t_ array_t[i] = tf - ti f.close() logging.info("Completed all experiments")
def make_shd(path_to_train, path_to_test, path_to_hdf5, digits, alphabet_size, pattern, window_length): train_data_file = tables.open_file(path_to_train, 'r') test_data_file = tables.open_file(path_to_test, 'r') T_max = 1. * 1e6 S_prime = math.ceil(T_max / window_length) hdf5_file = tables.open_file(path_to_hdf5, 'w') # Make train group and arrays train = hdf5_file.create_group(where=hdf5_file.root, name='train') train_data, output_signal = load_shd(path_to_train, S_prime, digits, window_length, alphabet_size, pattern) train_data_array = hdf5_file.create_array(where=hdf5_file.root.train, name='data', atom=tables.BoolAtom(), obj=train_data) train_labels_array = hdf5_file.create_earray(where=hdf5_file.root.train, name='label', atom=tables.BoolAtom(), obj=output_signal) test = hdf5_file.create_group(where=hdf5_file.root, name='test') test_data, output_signal = load_shd(path_to_test, S_prime, digits, window_length, alphabet_size, pattern) test_data_array = hdf5_file.create_array(where=hdf5_file.root.test, name='data', atom=tables.BoolAtom(), obj=test_data) test_labels_array = hdf5_file.create_earray(where=hdf5_file.root.test, name='label', atom=tables.BoolAtom(), obj=output_signal) make_stats_group(hdf5_file) train_data_file.close() test_data_file.close() hdf5_file.close()
def make_mnist_dvs(path_to_data, path_to_hdf5, digits, max_pxl_value, min_pxl_value, T_max, window_length, scale): """" Preprocess the .aedat file and save the dataset as an .hdf5 file """ dirs = [r'/' + dir_ for dir_ in os.listdir(path_to_data)] S_prime = math.ceil(T_max / window_length) pattern = [1, 0, 0, 0, 0] # the pattern used as output for the considered digit hdf5_file = tables.open_file(path_to_hdf5, 'w') train = hdf5_file.create_group(where=hdf5_file.root, name='train') train_data = hdf5_file.create_earray(where=hdf5_file.root.train, name='data', atom=tables.BoolAtom(), shape=(0, (max_pxl_value - min_pxl_value + 1) ** 2, S_prime)) train_labels = hdf5_file.create_earray(where=hdf5_file.root.train, name='label', atom=tables.BoolAtom(), shape=(0, len(digits), S_prime)) test = hdf5_file.create_group(where=hdf5_file.root, name='test') test_data = hdf5_file.create_earray(where=hdf5_file.root.test, name='data', atom=tables.BoolAtom(), shape=(0, (max_pxl_value - min_pxl_value + 1) ** 2, S_prime)) test_labels = hdf5_file.create_earray(where=hdf5_file.root.test, name='label', atom=tables.BoolAtom(), shape=(0, len(digits), S_prime)) for i, digit in enumerate(digits): for dir_ in dirs: if dir_.find(str(digit)) != -1: for subdir, _, _ in os.walk(path_to_data + dir_): if subdir.find(scale) != -1: for j, file in enumerate(glob.glob(subdir + r'/*.aedat')): if j < 0.9 * len(glob.glob(subdir + r'/*.aedat')): print('train', file) train_data.append( load_dvs(file, S_prime, min_pxl_value=min_pxl_value, max_pxl_value=max_pxl_value, window_length=window_length)) output_signal = np.array([[[0] * S_prime] * i + [pattern * int(S_prime / len(pattern)) + pattern[:( S_prime % len(pattern))]] + [[0] * S_prime] * (len(digits) - 1 - i)], dtype=bool) train_labels.append(output_signal) else: print('test', file) test_data.append( load_dvs(file, S_prime, min_pxl_value=min_pxl_value, max_pxl_value=max_pxl_value, window_length=window_length)) output_signal = np.array([[[0] * S_prime] * i + [pattern * int(S_prime / len(pattern)) + pattern[:( S_prime % len(pattern))]] + [[0] * S_prime] * (len(digits) - 1 - i)], dtype=bool) test_labels.append(output_signal) hdf5_file.close()
def write_bluetooth(self, topic_group, data): str_fields = ['mac_addr', 'dev_name'] self.pytable_writer_helper(topic_group, str_fields, tables.StringAtom(itemsize=20), data) self.pytable_writer_helper(topic_group, ['is_present'], tables.BoolAtom(), data) self.pytable_writer_helper(topic_group, ['rssi'], tables.Int64Atom(), data) self.pytable_writer_helper(topic_group, ['time'], tables.Float64Atom(), data)
def _create_table_list(self, name, example): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. The modified version for creating table with appendList """ type_map = { np.dtype(np.float64): tables.Float64Atom(), np.dtype(np.float32): tables.Float32Atom(), np.dtype(np.int): tables.Int64Atom(), np.dtype(np.int8): tables.Int8Atom(), np.dtype(np.uint8): tables.UInt8Atom(), np.dtype(np.int16): tables.Int16Atom(), np.dtype(np.uint16): tables.UInt16Atom(), np.dtype(np.int32): tables.Int32Atom(), np.dtype(np.uint32): tables.UInt32Atom(), np.dtype(np.bool): tables.BoolAtom(), } try: if type(example) == np.ndarray: h5type = type_map[example.dtype] elif type(example) == list and type(example[0]) == str: h5type = tables.VLStringAtom() except KeyError: raise TypeError("Don't know how to handle dtype '%s'" % example.dtype) if type(example) == np.ndarray: h5dim = (0, ) + example.shape[1:] h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_earray(h5.root, name, h5type, h5dim, filters=filters) elif type(example) == list and type(example[0]) == str: h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_vlarray(h5.root, name, h5type, filters=filters) self.types[name] = type(example)
def _create_table(self, name, example): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. """ type_map = { np.dtype(np.float64): tables.Float64Atom(), np.dtype(np.float32): tables.Float32Atom(), np.dtype(np.int): tables.Int64Atom(), np.dtype(np.int8): tables.Int8Atom(), np.dtype(np.uint8): tables.UInt8Atom(), np.dtype(np.int16): tables.Int16Atom(), np.dtype(np.uint16): tables.UInt16Atom(), np.dtype(np.int32): tables.Int32Atom(), np.dtype(np.uint32): tables.UInt32Atom(), np.dtype(np.bool): tables.BoolAtom(), } try: if type(example) == np.ndarray: h5type = type_map[example.dtype] elif type(example) == str: h5type = tables.VLStringAtom() except KeyError: raise TypeError( "Could not create table %s because of unknown dtype '%s'" % (name, example.dtype)) #+ ", of name: " % example.shape) if type(example) == np.ndarray: h5dim = (0, ) + example.shape h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_earray(h5.root, name, h5type, h5dim, filters=filters) elif type(example) == str: h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_vlarray(h5.root, name, h5type, filters=filters) self.types[name] = type(example)
def _triage_write(key, value, root, *write_params): import tables as tb create_group, create_table, create_c_array, filters = write_params if isinstance(value, dict): sub_root = create_group(root, key, 'dict') for key, sub_value in value.items(): if not isinstance(key, string_types): raise TypeError('All dict keys must be strings') _triage_write('key_{0}'.format(key), sub_value, sub_root, *write_params) elif isinstance(value, (list, tuple)): title = 'list' if isinstance(value, list) else 'tuple' sub_root = create_group(root, key, title) for vi, sub_value in enumerate(value): _triage_write('idx_{0}'.format(vi), sub_value, sub_root, *write_params) elif isinstance(value, type(None)): atom = tb.BoolAtom() s = create_c_array(root, key, atom, (1,), title='None', filters=filters) s[:] = False elif isinstance(value, (int, float)): if isinstance(value, int): title = 'int' else: # isinstance(value, float): title = 'float' value = np.atleast_1d(value) atom = tb.Atom.from_dtype(value.dtype) s = create_c_array(root, key, atom, (1,), title=title, filters=filters) s[:] = value elif isinstance(value, string_types): atom = tb.UInt8Atom() if isinstance(value, text_type): # unicode value = np.fromstring(value.encode('utf-8'), np.uint8) title = 'unicode' else: value = np.fromstring(value.encode('ASCII'), np.uint8) title = 'ascii' s = create_c_array(root, key, atom, (len(value),), title=title, filters=filters) s[:] = value elif isinstance(value, np.ndarray): atom = tb.Atom.from_dtype(value.dtype) s = create_c_array(root, key, atom, value.shape, title='ndarray', filters=filters) s[:] = value else: raise TypeError('unsupported type %s' % type(value))
def create_DenseTrackSet(outTableFile, contigLengths, grp): print "creating Dense Track Set..." track = DenseTrackSet( contigLengths, outTableFile, overwrite=True, ###############################DANGERDANGERDANGER openMode='w', compression=True) ###only one group, called, mask track.addGroup(grp) track[grp].addArray(tables.BoolAtom(), []) print "done" return track
def _create_table(self, name, example, parent=None): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. """ h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) if parent is None: parent = h5.root if type(example) == str: h5type = tables.VLStringAtom() h5.createVLArray(parent, name, h5type, filters=filters) return if type(example) == dict: self.h5.createGroup(parent, name) return #If we get here then we're dealing with numpy arrays example = np.asarray(example) #MODIFICATION: appended name everywhere and introduced string type_map = { np.dtype(np.float64).name: tables.Float64Atom(), np.dtype(np.float32).name: tables.Float32Atom(), np.dtype(np.int).name: tables.Int64Atom(), np.dtype(np.int8).name: tables.Int8Atom(), np.dtype(np.uint8).name: tables.UInt8Atom(), np.dtype(np.int16).name: tables.Int16Atom(), np.dtype(np.uint16).name: tables.UInt16Atom(), np.dtype(np.int32).name: tables.Int32Atom(), np.dtype(np.uint32).name: tables.UInt32Atom(), np.dtype(np.bool).name: tables.BoolAtom(), # Maximal string length of 128 per string - change if needed 'string32': tables.StringAtom(128) } try: h5type = type_map[example.dtype.name] h5dim = (0, ) + example.shape h5.createEArray(parent, name, h5type, h5dim, filters=filters) except KeyError: raise TypeError("Don't know how to handle dtype '%s'" % example.dtype)
def make_msid_file(colname, content, content_def): ft['content'] = content ft['msid'] = colname filename = msid_files['data'].abs if os.path.exists(filename): return logger.info('Making MSID data file %s', filename) if colname == 'TIME': dp_vals, indexes = derived.times_indexes(opt.start, opt.stop, content_def['time_step']) else: dp = content_def['classes'][colname]() dataset = dp.fetch(opt.start, opt.stop) dp_vals = np.asarray(dp.calc(dataset), dtype=dp.dtype) # Finally make the actual MSID data file filters = tables.Filters(complevel=5, complib='zlib') h5 = tables.open_file(filename, mode='w', filters=filters) n_rows = int(20 * 3e7 / content_def['time_step']) h5shape = (0, ) h5type = tables.Atom.from_dtype(dp_vals.dtype) h5.create_earray(h5.root, 'data', h5type, h5shape, title=colname, expectedrows=n_rows) h5.create_earray(h5.root, 'quality', tables.BoolAtom(), (0, ), title='Quality', expectedrows=n_rows) logger.info('Made {} shape={} with n_rows(1e6)={}'.format( colname, h5shape, n_rows / 1.0e6)) h5.close()
def h5open(self): self.h5filename = os.path.join( self.conmatdir, "conmat_%s_%s.h5" % (self.projname, self.casename)) self.h5f = h5f = td.openFile(self.h5filename, 'a') if not hasattr(h5f.root, 'conmat'): if not hasattr(self, 'reg'): self.regvec_from_discs() jdvec = int((self.jdmax - self.jdmin + 1) / self.djd) + 1 shape = (jdvec, self.dtmax, self.nreg, self.nreg) iatom = td.UInt32Atom() fatom = td.FloatCol() batom = td.BoolAtom() filtr = td.Filters(complevel=5, complib='zlib') crc = h5f.createCArray cnmat = crc(h5f.root, 'conmat', iatom, shape, filters=filtr) jdvec = crc(h5f.root, 'jdvec', fatom, (shape[0], )) exist = crc(h5f.root, 'exist', batom, (shape[0], shape[1])) jdvec[:] = np.arange(self.jdmin, self.jdmax + 1, self.djd) exist[:] = False else: cnmat = h5f.root.conmat jdvec = h5f.root.jdvec exist = h5f.root.exist return cnmat, jdvec, exist
def make_stub_h5_col(msid, row0, row1, basedir_ref, basedir_stub): fetch.ft['msid'] = msid with set_fetch_basedir(basedir_ref): file_ref = fetch.msid_files['data'].abs if not Path(file_ref).exists(): return with tables.open_file(file_ref, 'r') as h5: data_stub = h5.root.data[row0:row1] qual_stub = h5.root.quality[row0:row1] n_rows = len(h5.root.data) data_fill = np.zeros(row0, dtype=data_stub.dtype) qual_fill = np.ones(row0, dtype=qual_stub.dtype) # True => bad with set_fetch_basedir(basedir_stub): file_stub = fetch.msid_files['data'].abs if os.path.exists(file_stub): os.unlink(file_stub) filters = tables.Filters(complevel=5, complib='zlib') with tables.open_file(file_stub, mode='w', filters=filters) as h5: h5shape = (0,) + data_stub.shape[1:] h5type = tables.Atom.from_dtype(data_stub.dtype) h5.create_earray(h5.root, 'data', h5type, h5shape, title=msid, expectedrows=n_rows) h5.create_earray(h5.root, 'quality', tables.BoolAtom(), (0,), title='Quality', expectedrows=n_rows) with tables.open_file(file_stub, mode='a') as h5: h5.root.data.append(data_fill) h5.root.data.append(data_stub) h5.root.quality.append(qual_fill) h5.root.quality.append(qual_stub)
print "# outlier voxels:", len(np.where(outlier_mask)[0]) print "# data voxels:", len(np.where(data_mask)[0]) print ########################################################################### # Build data array # # Ignore all high-scoring labels and labels with outlier voxels # # Read all data files, but only voxels in data mask. flat_data_mask = data_mask.reshape(-1) if not os.path.exists(datafile): print "Loading data into {}".format(datafile) fileh = tb.open_file(datafile, mode='w', title="data", filters=FILTERS) dataarray = fileh.createEArray(fileh.root,'data',tb.BoolAtom(), shape=(0,np.sum(flat_data_mask))) images_used = [] t0 = time() for i, labelfile in enumerate(labelfiles): if i>0 and i % printfreq == 0: progress(i,len(labelfiles),time()-t0,printfreq) t0 = time() if not passing_idx[i]: continue data = rawdata[i,:] #outlier_sum = np.sum(data[outlier_mask]) #if outlier_sum > 0: continue
def _create_table_list(self, name, example): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. The modified version for creating table with appendList """ type_map = { np.dtype(np.float64): tables.Float64Atom(), np.dtype(np.float32): tables.Float32Atom(), np.dtype(np.int): tables.Int64Atom(), np.dtype(np.int8): tables.Int8Atom(), np.dtype(np.uint8): tables.UInt8Atom(), np.dtype(np.int16): tables.Int16Atom(), np.dtype(np.uint16): tables.UInt16Atom(), np.dtype(np.int32): tables.Int32Atom(), np.dtype(np.uint32): tables.UInt32Atom(), np.dtype(np.bool): tables.BoolAtom(), } try: if type(example) == np.ndarray: h5type = type_map[example.dtype] elif type(example) == list and type(example[0]) == str: h5type = tables.VLStringAtom() except KeyError: raise TypeError("Don't know how to handle dtype '%s'" % example.dtype) if type(example) == np.ndarray: h5dim = (0, ) + example.shape[1:] h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) nodes = h5.list_nodes(h5.root) nmpt = name.replace('.', '/\n') nmpt = nmpt.split('\n') path = '/' for kay in range(len(nmpt) - 1): #if not path+nmpt[kay][:-1] in str(nodes): h5.create_group(path,nmpt[kay][:-1]) try: h5.is_visible_node(path + nmpt[kay][:-1]) except: h5.create_group(path, nmpt[kay][:-1]) path += nmpt[kay] self.tables[name] = h5.create_earray(path, nmpt[-1], h5type, h5dim, filters=filters) elif type(example) == list and type(example[0]) == str: h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) nodes = h5.list_nodes(h5.root) nmpt = name.replace('.', '/\n') nmpt = nmpt.split('\n') path = '/' for kay in range(len(nmpt) - 1): #if not path+nmpt[kay][:-1] in str(nodes): h5.create_group(path,nmpt[kay][:-1]) try: h5.is_visible_node(path + nmpt[kay][:-1]) except: h5.create_group(path, nmpt[kay][:-1]) path += nmpt[kay] self.tables[name] = h5.create_vlarray(path, nmpt[-1], h5type, filters=filters) self.types[name] = type(example)
subfiles = glob.glob(imagedir + '/*.h5') filelist = [] for subfile in subfiles: subfileh = tb.open_file(subfile, mode='r', title="data", filters=FILTERS) print "Loading data from {}".format(subfile) if not 'mask' in fileh.root: mask = subfileh.root.mask[:] _ = fileh.create_array(fileh.root, 'mask', mask) dataarray = fileh.createEArray(fileh.root, 'data', tb.BoolAtom(), shape=[0] + list(mask.shape)) if not 'cropbbox_min' in fileh.root: _ = fileh.create_array(fileh.root, 'cropbbox_min', subfileh.root.cropbbox_min[:]) if not 'cropbbox_max' in fileh.root: _ = fileh.create_array(fileh.root, 'cropbbox_max', subfileh.root.cropbbox_max[:]) dataarray.append(subfileh.root.data[:]) filelist.extend(subfileh.root.files) subfileh.close() _ = fileh.create_array(fileh.root, 'files', filelist)
def create_compressible_array(self, nodename, shape, precision, group=None): pass if is_tables: precision_to_atom = { 'float32': tables.Float32Atom(), 'complex64': tables.ComplexAtom(8), 'float64': tables.Float64Atom(), 'complex128': tables.ComplexAtom(16), 'bool': tables.BoolAtom(), 'int32': tables.Int32Atom(), 'int16': tables.Int16Atom(), 'int8': tables.Int8Atom(), } class H5FileTables(H5FileBase, tables.File): def create_extendable_array(self, nodename, shape, precision, group=None): if not group: group = self.root atom = precision_to_atom[precision] self.create_earray(group, nodename, atom, shape)
def make_mnist_dvs(path_to_data, path_to_hdf5, digits, max_pxl_value, min_pxl_value, T_max, window_length, scale, polarity, pattern, alphabet_size): """" Preprocess the .aedat file and save the dataset as an .hdf5 file """ dirs = [r'/' + dir_ for dir_ in os.listdir(path_to_data)] S_prime = math.ceil(T_max / window_length) hdf5_file = tables.open_file(path_to_hdf5, 'w') train = hdf5_file.create_group(where=hdf5_file.root, name='train') if alphabet_size == 1: data_shape = (0, (1 + polarity) * (max_pxl_value - min_pxl_value + 1)**2, S_prime) label_shape = (0, len(digits), S_prime) else: data_shape = (0, (max_pxl_value - min_pxl_value + 1)**2, alphabet_size, S_prime) label_shape = (0, len(digits), alphabet_size, S_prime) train_data = hdf5_file.create_earray(where=hdf5_file.root.train, name='data', atom=tables.BoolAtom(), shape=data_shape) train_labels = hdf5_file.create_earray(where=hdf5_file.root.train, name='label', atom=tables.BoolAtom(), shape=label_shape) test = hdf5_file.create_group(where=hdf5_file.root, name='test') test_data = hdf5_file.create_earray(where=hdf5_file.root.test, name='data', atom=tables.BoolAtom(), shape=data_shape) test_labels = hdf5_file.create_earray(where=hdf5_file.root.test, name='label', atom=tables.BoolAtom(), shape=label_shape) for i, digit in enumerate(digits): output_signal = make_output(i, pattern, len(digits), alphabet_size, S_prime) for dir_ in dirs: if dir_.find(str(digit)) != -1: for subdir, _, _ in os.walk(path_to_data + dir_): if subdir.find(scale) != -1: for j, file in enumerate( glob.glob(subdir + r'/*.aedat')): if j < 0.9 * len(glob.glob(subdir + r'/*.aedat')): print('train', file) tmp = load_dvs(file, S_prime, min_pxl_value=min_pxl_value, max_pxl_value=max_pxl_value, window_length=window_length, polarity=polarity) print(tmp.shape, data_shape) train_data.append(tmp) train_labels.append(output_signal) else: print('test', file) test_data.append( load_dvs(file, S_prime, min_pxl_value=min_pxl_value, max_pxl_value=max_pxl_value, window_length=window_length, polarity=polarity)) test_labels.append(output_signal) make_stats_group(hdf5_file) hdf5_file.close()
def write_bool(self, topic_group, data): self.pytable_writer_helper(topic_group, ['data'], tables.BoolAtom(), data) self.pytable_writer_helper(topic_group, ['time'], tables.Float64Atom(), data)
for cmph in cmph_covariates: print 'Subsetting for %s'%cmph lon_,lat_,data = map_utils.CRU_extract('.','%s'%cmph, zip=False) lon_.sort() lat_.sort() # data = map_utils.interp_geodata(lon_, lat_, data, lon[lon_min_i:lon_max_i], lat[lon_min_i:lon_max_i]) data = map_utils.grid_convert(basemap.interp(map_utils.grid_convert(data,'y-x+','y+x+'), lon_, lat_, *np.meshgrid(lon[lon_min_i:lon_max_i],lat[lat_min_i:lat_max_i])),'y+x+','x+y+') for res in [5]: hf_out = tb.openFile(os.path.join('%ik-covariates'%res,cmph.lower()+'.hdf5'),'w') hf_out.createArray('/','lon',lon[lon_min_i:lon_max_i][::res]) hf_out.createArray('/','lat',lat[lat_min_i:lat_max_i][::res]) d = map_utils.grid_convert(data[::res,::res], 'x+y+','y-x+') hf_out.createCArray('/','data',atom=tb.FloatAtom(),shape=d.shape,filters=tb.Filters(complevel=1,complib='zlib')) hf_out.createCArray('/','mask',atom=tb.BoolAtom(),shape=d.shape,filters=tb.Filters(complevel=1,complib='zlib')) hf_out.root.data.attrs.view = 'y-x+' hf_out.root.data[:]=d hf_out.root.mask[:] = clipped_pete_mask hf_out.close() glob = tb.openFile('Globcover.hdf5') for c in glob_channels: subset_and_writeout(glob, 'globcover-channel-%i'%c, 3, glob_missing, lambda x:x==c) glob.close() # Reconcile the masks print 'Finding the conservative mask' # for res in [1,2,5]:
vlarray.append(["123", "456", "3"]) vlarray.append(["456", "3"]) # Binary strings vlarray = fileh.create_vlarray(root, 'vlarray4', tables.UInt8Atom(), "pickled bytes") data = pickle.dumps((["123", "456"], "3")) vlarray.append(np.ndarray(buffer=data, dtype=np.uint8, shape=len(data))) # The next is a way of doing the same than before vlarray = fileh.create_vlarray(root, 'vlarray5', tables.ObjectAtom(), "pickled object") vlarray.append([["123", "456"], "3"]) # Boolean arrays are supported as well vlarray = fileh.create_vlarray(root, 'vlarray6', tables.BoolAtom(), "Boolean atoms") # The next lines are equivalent... vlarray.append([1, 0]) vlarray.append([1, 0, 3, 0]) # This will be converted to a boolean # This gives a TypeError # vlarray.append([1,0,1]) # Variable length strings vlarray = fileh.create_vlarray(root, 'vlarray7', tables.VLStringAtom(), "Variable Length String") vlarray.append("asd") vlarray.append("aaana") # Unicode variable length strings vlarray = fileh.create_vlarray(root, 'vlarray8', tables.VLUnicodeAtom(),
def create_events_hdf5(directory, path_to_hdf5, classes, alphabet_size, pattern, grid_size=128, reduction_factor=4, sample_length_train=500000, sample_length_test=1800000, window_length=5000): fns_train = gather_aedat(directory, 1, 24) fns_test = gather_aedat(directory, 24, 30) print(len(fns_train), len(fns_test)) assert len(fns_train) == 98 hdf5_file = tables.open_file(path_to_hdf5, 'w') n_neurons = int(grid_size / reduction_factor)**2 S_prime_train = int(np.ceil(sample_length_train / window_length)) S_prime_test = int(np.ceil(sample_length_test / window_length)) if alphabet_size == 1: data_shape_train = (0, n_neurons, S_prime_train) label_shape_train = (0, len(classes), S_prime_train) data_shape_test = (0, n_neurons, S_prime_test) label_shape_test = (0, len(classes), S_prime_test) else: data_shape_train = (0, n_neurons, alphabet_size, S_prime_train) label_shape_train = (0, len(classes), alphabet_size, S_prime_train) data_shape_test = (0, n_neurons, alphabet_size, S_prime_test) label_shape_test = (0, len(classes), alphabet_size, S_prime_test) train = hdf5_file.create_group(where=hdf5_file.root, name='train') train_data_array = hdf5_file.create_earray(where=hdf5_file.root.train, name='data', atom=tables.BoolAtom(), shape=data_shape_train) train_labels_array = hdf5_file.create_earray(where=hdf5_file.root.train, name='label', atom=tables.BoolAtom(), shape=label_shape_train) test = hdf5_file.create_group(where=hdf5_file.root, name='test') test_data_array = hdf5_file.create_earray(where=hdf5_file.root.test, name='data', atom=tables.BoolAtom(), shape=data_shape_test) test_labels_array = hdf5_file.create_earray(where=hdf5_file.root.test, name='label', atom=tables.BoolAtom(), shape=label_shape_test) for file_d in tqdm(fns_train + fns_test): istrain = file_d in fns_train if istrain: input, output = load_dvs(file_d, S_prime_train, classes, alphabet_size, pattern, window_length, sample_length_train, grid_size, reduction_factor) train_data_array.append(input) train_labels_array.append(output) else: input, output = load_dvs(file_d, S_prime_test, classes, alphabet_size, pattern, window_length, sample_length_test, grid_size, reduction_factor) test_data_array.append(input) test_labels_array.append(output) make_stats_group(hdf5_file) hdf5_file.close()