def create_hdf_csr(self, file_name): ######## create hdf file friom pytables ###### h5File_path = os.path.join(self.prefix, file_name) h5File = tb.open_file(h5File_path, "w") filters = tb.Filters(complevel=5, complib='blosc') h5File.create_earray(h5File.root, 'data', tb.Float32Atom(), shape=(0,), filters=filters) h5File.create_earray(h5File.root, 'indices', tb.UInt32Atom(),shape=(0,), filters=filters) h5File.create_earray(h5File.root, 'indptr', tb.UInt32Atom(), shape=(0,), filters=filters) h5File.root.indptr.append(np.array([0], dtype = np.uint32)) return h5File, h5File_path
def groups_to_matrix(m_file, c_file): filters = tables.Filters(complevel=1, complib='blosc', fletcher32=True) h5fh = tables.open_file(m_file, mode='a', filters=filters) if not 'counts' in h5fh.root: atom = tables.UInt32Atom() shape = (2**32 - 1, 0) h5fh.create_earray(h5fh.root, 'counts', atom, shape, "counts matrix", expectedrows=2**32 - 1) counts = h5fh.root.counts grouph5fh = tables.open_file(c_file, mode='r') for group_num in list(grouph5fh.root._v_groups): path = "/%s" % group_num print "Processing counts for %s" % path counts.append( np.resize(grouph5fh.getNode(path, 'count').read(), (2**32 - 1, 1))) col_num = counts.shape[1] - 1 new_group = h5fh.create_group(h5fh.root, "%s|%s" % (group_num, col_num)) print "Adding taxonomy data for %s" % path h5fh.copy_node(grouph5fh.getNode(path, 'taxonomy'), new_group, 'taxonomy', recursive=True) print "Finished processing %s" % path grouph5fh.close() h5fh.close()
def test_append_to_matrix_counts(self): shape = (5, 0) atom = tables.UInt32Atom() filters = tables.Filters(complevel=9, complib='zlib') h5fh = tables.open_file("earray_append.h5", mode='a', filters=filters) ea = h5fh.create_earray(h5fh.root, 'counts', atom, shape, "counts matrix", filters, 2**32 - 1) self.assertEqual(ea.shape[1], 0) col1 = np.array([1, 6, 11, 16, 21], dtype=np.uint32, ndmin=2).transpose() ea.append(col1) h5fh.close() # append to file h5fh = tables.open_file("earray_append.h5", mode='a', filters=filters) self.assertIn('counts', h5fh.root) counts = h5fh.root.counts self.assertEqual(counts.shape[1], 1) col2 = np.array([2, 7, 12, 17, 22], dtype=np.uint32, ndmin=2).transpose() counts.append(col2) self.assertEqual(counts.shape[1], 2) self.assertTrue(np.array_equal(h5fh.root.counts[:, 0], col1[:, 0])) self.assertTrue(np.array_equal(h5fh.root.counts[:, 1], col2[:, 0])) h5fh.close() os.remove("earray_append.h5")
def add_clustering(fd, channel_group_id=None, name=None, spike_clusters=None, overwrite=False): """fd is returned by `open_files`: it is a dict {type: tb_file_handle}.""" if channel_group_id is None: channel_group_id = '0' kwik = fd.get('kwik', None) # The KWIK needs to be there. assert kwik is not None # The channel group id containing the new cluster group must be specified. assert channel_group_id is not None assert name is not None assert spike_clusters is not None spikes = kwik.root.channel_groups.__getattr__(channel_group_id).spikes.recording spikes_path = '/channel_groups/{0:s}/spikes/clusters'.format(channel_group_id) clusters_path = '/channel_groups/{0:s}/clusters'.format(channel_group_id) # Check if clustering has the right number of spikes if not spike_clusters.shape[0] == spikes.shape[0]: print "\nERROR: Could not add clustering in group \"{0:s}\": wrong number of spikes".format(name) print ("Expected {0:d}, got {1:d}".format(spike_clusters.shape[0], spikes.shape[0])) return False # Create the HDF5 groups in /.../clusters. try: clu_group = kwik.createGroup(clusters_path, name) except tb.NodeError: assert overwrite, "The clustering already exists, use overwrite=True" kwik.removeNode(clusters_path, name, recursive=True) clu_group = kwik.createGroup(clusters_path, name) # Create the HDF5 dataset with the spike clusters. try: kwik.createEArray(spikes_path, name, tb.UInt32Atom(), expectedrows=1000000, obj=spike_clusters.astype(np.uint32)) except tb.NodeError: assert overwrite, "The clustering already exists, use overwrite=True" kwik.removeNode(spikes_path, name) kwik.createEArray(spikes_path, name, tb.UInt32Atom(), expectedrows=1000000, obj=spike_clusters.astype(np.uint32)) # Create the cluster HDF5 groups under the new clustering group. clusters_unique = np.unique(spike_clusters) for cluster in clusters_unique: add_cluster(fd, channel_group_id=channel_group_id, id=str(cluster), clustering=name, cluster_group=3) # default cluster group = unsorted
def listener(q, output_path): """ """ try: counter = 0 pid = os.getpid() logging.info("Listener running on {}".format(pid)) hdf5_file = tb.open_file(output_path, mode='w') pred_storage = hdf5_file.create_earray( hdf5_file.root, "pred_img", tb.UInt8Atom(), shape=(0, 299, 299, 3) ) xlabel_storage = hdf5_file.create_earray( hdf5_file.root, "pos_xlabel", tb.UInt32Atom(), shape=(0, 1) ) ylabel_storage = hdf5_file.create_earray( hdf5_file.root, "pos_ylabel", tb.UInt32Atom(), shape=(0, 1) ) while 1: counter += 1 if counter % 100 == 0: logging.info("{} tiles saved in hdf5.".format(counter)) data = q.get() if data == 'kill': logging.info("Listner closed.") hdf5_file.close() return None pred = data['pred'] xlabel = data['xlabel'] ylabel = data['ylabel'] pred_storage.append(pred[None]) xlabel_storage.append(xlabel[None]) ylabel_storage.append(ylabel[None]) finally: hdf5_file.close()
def add_unique_tiles_table(out_file, feature_table, group): unique_tiles = np.unique(feature_table.cols.pair_id[:]) atom = tb.UInt32Atom() filters = tb.Filters(complevel=5, complib='blosc') ca = out_file.create_carray(group, 'unique_tiles', atom, unique_tiles.shape, filters=filters) ca.flush()
def create_database(): """Specifies the input data only.""" db = tables.openFile(DATAPATH + 'processed/db.h5', 'w') # / input_data = db.createGroup('/', 'input_data', 'The Input Data group') # /input_data dictionary = db.createGroup(input_data, 'dictionary', 'The Dictionary group') dictionary._v_attrs.num_docs = 0 # Number of documents processed dictionary._v_attrs.num_tokens = 0 # Number of token->id mappings in the dictionary record = db.createGroup(input_data, 'record', 'The Congressional Record group') record._v_attrs.num_speakers = 0 # Number of unique speakers (by icpsrID) supplementary = db.createGroup(input_data, 'supplementary', 'The Supplementary Data group') # /input_data/dictionary token2id = db.createTable(dictionary, 'token2id', Dictionary.token2id, 'The token2id table', expectedrows=1e5) # Add index only once the token2id table is completed #indexrows = token2id.cols.token_id.createCSIndex() # /input_data/record speaker = db.createTable(record, 'speaker', Speaker.cols, 'The Speaker table', expectedrows=2500) a = tables.UInt32Atom() document = db.createTable(record, 'document', Document.cols, 'The Document table', expectedrows=5e4) doc2bow = db.createTable(record, 'doc2bow', Doc2BoW.cols, 'The Doc2BoW table', expectedrows=5e4) # /input_data/supplementary covariates = db.createTable(supplementary, 'covariates', Covariates.cols, 'The Covariates table', expectedrows=5e4) db.close()
def write_hdf(jd, dt): filename = os.path.join(self.conmatdir,"conmat_%s_%s_%06i.h5" % (self.projname, self.casename, jd)) shape = (self.dtmax, self.nreg, self.nreg) atom = td.UInt32Atom() #filters = td.Filters(complevel=5, complib='zlib') with td.openFile(filename, 'a') as h5f: if hasattr(h5f.root, 'conmat'): ca = h5f.root.conmat else: ca = h5f.createCArray(h5f.root, 'conmat', atom, shape) ca[dt,:,:] = self.conmat.astype(np.uint32)
def compute_ratings_matrix(ratings_matrix_file): """ Computes the rating matrix Input: ratings_matrix_file: Filename output rating matrix """ mongo = Mongo('Acme-Supermarket') mongo.connect() matrix_file = ratings_matrix_file hdf5_matrix = tables.openFile(matrix_file, mode='w') filters = tables.Filters(complevel=5, complib='blosc') products = mongo.database.products.find({}, {'_id': 1}) products = [p['_id'] for p in products] products = numpy.concatenate((numpy.array([-1]), products)) products_count = mongo.database.products.count() customers = mongo.database.actors.find({'_type': 'Customer'}, {'_id': 1}) customers = [c['_id'] for c in customers] customers_count = mongo.database.actors.count({'_type': 'Customer'}) data_storage = hdf5_matrix.createEArray(hdf5_matrix.root, 'data', tables.UInt32Atom(), shape=(0, products_count + 1), filters=filters, expectedrows=customers_count) data_storage.append(products[:][None]) for customer_id in customers: # Each column 0: Customer IDs # Product ratings in columns 1+ row = numpy.zeros((products_count + 1, )) row[0] = customer_id ratings = mongo.database.rates.find({'customer_id': customer_id}, { 'product_id': 1, 'value': 1 }) for rating in ratings: row[numpy.where( products == rating['product_id'])[0][0]] = rating['value'] data_storage.append(row[:][None]) hdf5_matrix.close() mongo.disconnect() return matrix_file
def search_coincidences(self): if '/c_index' not in self.data and '/timestamps' not in self.data: c_index, timestamps = [], [] for id, station in enumerate(self.station_groups): station = self.data.getNode(station) for event_id, event in enumerate(station.events): timestamps.append((event['ext_timestamp'], id, event_id)) c_index.append([len(timestamps) - 1]) timestamps = np.array(timestamps, dtype=np.uint64) self.data.createArray('/', 'timestamps', timestamps) self.data.createVLArray('/', 'c_index', tables.UInt32Atom()) for coincidence in c_index: self.data.root.c_index.append(coincidence)
def create_mapping(self, title, entries, overwrite=False): """ Create an equivalency index, which maps a raw data dimension to another integer value. Once created, mappings can be referenced by offset or by key. Parameters: ----------- title : string Name of this mapping entries : list List of n equivalencies for the mapping. n must match one data dimension of the matrix. overwrite : boolean True to allow overwriting an existing mapping, False will raise a LookupError if the mapping already exists. Default is False. Returns: -------- mapping : tables.array Returns the created mapping. Raises: LookupError : if the mapping exists and overwrite=False """ # Enforce shape-checking if self.shape(): if not len(entries) in self._shape: raise ShapeError('Mapping must match one data dimension') # Handle case where mapping already exists: if title in self.list_mappings(): if overwrite: self.delete_mapping(title) else: raise LookupError(title + ' mapping already exists.') # Create lookup group under root if it doesn't already exist. if 'lookup' not in self.root: self.create_group(self.root, 'lookup') # Write the mapping! mymap = self.create_array(self.root.lookup, title, atom=tables.UInt32Atom(), shape=(len(entries), )) mymap[:] = entries return mymap
def _create_table_list(self, name, example): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. The modified version for creating table with appendList """ type_map = { np.dtype(np.float64): tables.Float64Atom(), np.dtype(np.float32): tables.Float32Atom(), np.dtype(np.int): tables.Int64Atom(), np.dtype(np.int8): tables.Int8Atom(), np.dtype(np.uint8): tables.UInt8Atom(), np.dtype(np.int16): tables.Int16Atom(), np.dtype(np.uint16): tables.UInt16Atom(), np.dtype(np.int32): tables.Int32Atom(), np.dtype(np.uint32): tables.UInt32Atom(), np.dtype(np.bool): tables.BoolAtom(), } try: if type(example) == np.ndarray: h5type = type_map[example.dtype] elif type(example) == list and type(example[0]) == str: h5type = tables.VLStringAtom() except KeyError: raise TypeError("Don't know how to handle dtype '%s'" % example.dtype) if type(example) == np.ndarray: h5dim = (0, ) + example.shape[1:] h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_earray(h5.root, name, h5type, h5dim, filters=filters) elif type(example) == list and type(example[0]) == str: h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_vlarray(h5.root, name, h5type, filters=filters) self.types[name] = type(example)
def _create_table(self, name, example): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. """ type_map = { np.dtype(np.float64): tables.Float64Atom(), np.dtype(np.float32): tables.Float32Atom(), np.dtype(np.int): tables.Int64Atom(), np.dtype(np.int8): tables.Int8Atom(), np.dtype(np.uint8): tables.UInt8Atom(), np.dtype(np.int16): tables.Int16Atom(), np.dtype(np.uint16): tables.UInt16Atom(), np.dtype(np.int32): tables.Int32Atom(), np.dtype(np.uint32): tables.UInt32Atom(), np.dtype(np.bool): tables.BoolAtom(), } try: if type(example) == np.ndarray: h5type = type_map[example.dtype] elif type(example) == str: h5type = tables.VLStringAtom() except KeyError: raise TypeError( "Could not create table %s because of unknown dtype '%s'" % (name, example.dtype)) #+ ", of name: " % example.shape) if type(example) == np.ndarray: h5dim = (0, ) + example.shape h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_earray(h5.root, name, h5type, h5dim, filters=filters) elif type(example) == str: h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_vlarray(h5.root, name, h5type, filters=filters) self.types[name] = type(example)
def createHDF5File(self): out_file_path = os.path.join(self._output_path, self._output_file_name) try: hdf5_file = tables.open_file(out_file_path, mode='w') filters = tables.Filters(complevel=5, complib='blosc') data_shape = tuple([0, self.num_modalities] + list(self._image_shape)) data_storage = hdf5_file.create_earray( hdf5_file.root, 'data', tables.Float32Atom(), shape=data_shape, filters=filters, expectedrows=self.num_modalities) if self.label_format == "nii": truth_shape = tuple([0, 1] + list(self._image_shape)) truth_storage = hdf5_file.create_earray( hdf5_file.root, 'truth', tables.UInt8Atom(), shape=truth_shape, filters=filters, expectedrows=self.num_modalities) elif self.label_format == 'csv': truth_shape = tuple([0, self._image_shape[-1]]) truth_storage = hdf5_file.create_earray( hdf5_file.root, 'truth', tables.UInt32Atom(), shape=truth_shape, filters=filters, expectedrows=self.num_modalities) else: raise ValueError("Fail to recognize label format: %s" % self.label_format) affine_storage = hdf5_file.create_earray( hdf5_file.root, 'affine', tables.Float32Atom(), shape=(0, 4, 4), filters=filters, expectedrows=self.num_modalities) return hdf5_file, data_storage, truth_storage, affine_storage except Exception as e: # If something goes wrong, delete the incomplete data file os.remove(out_file_path) raise e
def create_sorted_db(old_table, old_array, sorted_hdf): """ For a set of features in the flat_hdf, write them to a table in HDF5 """ out_file = tb.open_file(sorted_hdffinger, mode='w', tilte='Landmark Database') group = out_file.create_group('/', 'db', 'Landmark Database') old_table.copy(newparent=group, newname='landmarks', sortby='pair_id', checkCSI=True) filters = tb.Filters(complib='blosc', complevel=5) new_array = out_file.create_carray(group, name='descriptors', atom=tb.UInt8Atom(), shape=old_array.shape, filters=filters) idx = old_table.cols.pair_id.index new_array[:, :] = old_array[idx[:], :] new_table = out_file.root.db.landmarks new_table.cols.pair_id.create_csindex() new_table.cols.x.create_csindex() new_table.cols.y.create_csindex() new_table.cols.octave.create_csindex() unique_tiles, uidcount = np.unique(old_table.cols.pair_id[:], return_counts=True) atom = tb.UInt32Atom() filters = tb.Filters(complevel=5, complib='blosc') uidcount_a = out_file.create_carray(group, 'unique_tile_count', atom, unique_tiles.shape, filters=filters) uid_a = out_file.create_carray(group, 'unique_tiles', atom, unique_tiles.shape, filters=filters) uidcount_a[:] = uidcount uid_a[:] = unique_tiles out_file.close()
def __init__(self, cluster, data, output, R, N, use_poisson=None, gauss=None, trig_threshold=1., force=False): """Simulation initialization :param cluster: BaseCluster (or derived) instance :param data: the HDF5 file :param output: name of the destination group to store results :param R: maximum distance of shower to center of cluster :param N: number of simulations to perform :param force: if True, ignore pre-existing simulations; they will be overwritten! """ self.cluster = cluster self.data = data self.R = R self.N = N self.use_poisson = use_poisson self.gauss = gauss self.trig_threshold = trig_threshold if output in data and not force: raise RuntimeError("Cancelling simulation; %s already exists?" % output) elif output in data: data.removeNode(output, recursive=True) head, tail = os.path.split(output) self.output = data.createGroup(head, tail, createparents=True) self.observables = self.data.createTable( self.output, 'observables', storage.SimulationEventObservables) self.coincidences = self.data.createTable(self.output, 'coincidences', storage.Coincidence) self.c_index = self.data.createVLArray(self.output, 'c_index', tables.UInt32Atom()) self.output._v_attrs.cluster = cluster
def _create_table(self, name, example, parent=None): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. """ h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) if parent is None: parent = h5.root if type(example) == str: h5type = tables.VLStringAtom() h5.createVLArray(parent, name, h5type, filters=filters) return if type(example) == dict: self.h5.createGroup(parent, name) return #If we get here then we're dealing with numpy arrays example = np.asarray(example) #MODIFICATION: appended name everywhere and introduced string type_map = { np.dtype(np.float64).name: tables.Float64Atom(), np.dtype(np.float32).name: tables.Float32Atom(), np.dtype(np.int).name: tables.Int64Atom(), np.dtype(np.int8).name: tables.Int8Atom(), np.dtype(np.uint8).name: tables.UInt8Atom(), np.dtype(np.int16).name: tables.Int16Atom(), np.dtype(np.uint16).name: tables.UInt16Atom(), np.dtype(np.int32).name: tables.Int32Atom(), np.dtype(np.uint32).name: tables.UInt32Atom(), np.dtype(np.bool).name: tables.BoolAtom(), # Maximal string length of 128 per string - change if needed 'string32': tables.StringAtom(128) } try: h5type = type_map[example.dtype.name] h5dim = (0, ) + example.shape h5.createEArray(parent, name, h5type, h5dim, filters=filters) except KeyError: raise TypeError("Don't know how to handle dtype '%s'" % example.dtype)
def search_coincidences(self, window=10000, shifts=None, limit=None): """Search for coincidences. Search all data in the station_groups for coincidences, and store rudimentary coincidence data in the coincidences group. This data might be useful, but is very basic. You can call the :meth:`store_coincidences` method to store the coincidences in an easier format in the coincidences group. If you want to process the preliminary results: they are stored in _src_c_index and _src_timestamps. The former is a list of coincidences, which each consist of a list with indexes into the timestamps array as a pointer to the events making up the coincidence. The latter is a list of tuples. Each tuple consists of a timestamp followed by an index into the stations list which designates the detector station which measured the event, and finally an index into that station's event table. :param window: the coincidence time window in nanoseconds. All events with delta t's smaller than this window will be considered a coincidence. :param shifts: optionally shift a station's data in time. This can be useful if a station has a misconfigured GPS clock. Expects a list of shifts, one for each station, in seconds. Use 'None' for no shift. :param limit: optionally limit the search for this number of events. """ c_index, timestamps = \ self._search_coincidences(window, shifts, limit) timestamps = np.array(timestamps, dtype=np.uint64) self.data.create_array(self.coincidence_group, '_src_timestamps', timestamps) src_c_index = self.data.create_vlarray(self.coincidence_group, '_src_c_index', tables.UInt32Atom()) for coincidence in c_index: src_c_index.append(coincidence)
def small_copy(): filters = tables.Filters(complevel=6, complib='zlib') atom = tables.UInt32Atom() start = time.time() # File 1 f1 = tables.open_file('tempAPD1.hdf', 'r') t1 = f1.root.timestamps f1_copy = tables.open_file('tempAPD1_copy.hdf', 'w') t1_copy = f1_copy.create_carray(f1_copy.root, name='timestamps', atom=atom, shape=(100, 2), filters=filters) t1_copy[0:100, :] = t1[0:100, :] f1.close() f1_copy.close() print("File 1 took %f seconds." % (time.time() - start)) # file 2 start = time.time() f2 = tables.open_file('tempAPD2.hdf', 'r') t2 = f2.root.timestamps f2_copy = tables.open_file('tempAPD2_copy.hdf', 'w') t2_copy = f2_copy.create_carray(f2_copy.root, name='timestamps', atom=atom, shape=(100, 2), filters=filters) t2_copy[0:100, :] = t2[0:100, :] f2.close() f2_copy.close() print("File 1 took %f seconds." % (time.time() - start))
def h5open(self): self.h5filename = os.path.join( self.conmatdir, "conmat_%s_%s.h5" % (self.projname, self.casename)) self.h5f = h5f = td.openFile(self.h5filename, 'a') if not hasattr(h5f.root, 'conmat'): if not hasattr(self, 'reg'): self.regvec_from_discs() jdvec = int((self.jdmax - self.jdmin + 1) / self.djd) + 1 shape = (jdvec, self.dtmax, self.nreg, self.nreg) iatom = td.UInt32Atom() fatom = td.FloatCol() batom = td.BoolAtom() filtr = td.Filters(complevel=5, complib='zlib') crc = h5f.createCArray cnmat = crc(h5f.root, 'conmat', iatom, shape, filters=filtr) jdvec = crc(h5f.root, 'jdvec', fatom, (shape[0], )) exist = crc(h5f.root, 'exist', batom, (shape[0], shape[1])) jdvec[:] = np.arange(self.jdmin, self.jdmax + 1, self.djd) exist[:] = False else: cnmat = h5f.root.conmat jdvec = h5f.root.jdvec exist = h5f.root.exist return cnmat, jdvec, exist
def dataProcessing_finite(self): """ DataQ sends a string sentinel, first and last array entry get corrected by rollover count. Count rate entry/dt is send via animDataQ and lcdQ. Array gets appended to hdf file array. """ filename = str(self._folder / "smALEX_APD{}.hdf".format(self._N)) f = tables.open_file(filename, mode='w') atom = tables.UInt32Atom() filters = tables.Filters(complevel=6, complib='zlib') timestamps = f.create_earray(f.root, 'timestamps', atom=atom, shape=(0, 2), filters=filters) for array in iter(self._dataQ.get, 'STOP'): timestamps.append(array) n1 = array[0, 0] + (self._int_max * array[0, 1]) n2 = array[-1, 0] + (self._int_max * array[-1, 1]) self._animDataQ.put(self._readArraySize / (n2 - n1)) f.flush() f.close() print("DataProcesser %i sent all data and exits" % self._N)
def test_convert_column_counts_to_matrix_counts(self): shape = (5, 0) atom = tables.UInt32Atom() filters = tables.Filters(complevel=9, complib='zlib') h5fh = tables.open_file("earray1.h5", mode='a', filters=filters) ea = h5fh.create_earray(h5fh.root, 'counts', atom, shape, "counts matrix", filters, 2**32 - 1) self.assertEqual(ea.shape[1], 0) col1 = np.array([1, 6, 11, 16, 21], dtype=np.uint32, ndmin=2).transpose() ea.append(col1) self.assertEqual(ea.shape[1], 1) col2 = np.array([2, 7, 12, 17, 22], dtype=np.uint32, ndmin=2).transpose() ea.append(col2) self.assertEqual(ea.shape[1], 2) col3 = np.array([3, 8, 13, 18, 23], dtype=np.uint32, ndmin=2).transpose() ea.append(col3) self.assertEqual(ea.shape[1], 3) col4 = np.array([4, 9, 14, 19, 24], dtype=np.uint32, ndmin=2).transpose() ea.append(col4) self.assertEqual(ea.shape[1], 4) col5 = np.array([5, 10, 15, 20, 25], dtype=np.uint32, ndmin=2).transpose() ea.append(col5) self.assertEqual(ea.shape[1], 5) print h5fh.root.counts[:] self.assertTrue(np.array_equal(h5fh.root.counts[:, 0], col1[:, 0])) self.assertTrue(np.array_equal(h5fh.root.counts[:, 1], col2[:, 0])) self.assertTrue(np.array_equal(h5fh.root.counts[:, 2], col3[:, 0])) self.assertTrue(np.array_equal(h5fh.root.counts[:, 3], col4[:, 0])) self.assertTrue(np.array_equal(h5fh.root.counts[:, 4], col5[:, 0])) h5fh.close() os.remove("earray1.h5")
def create_sorted_db(old_table, sorted_hdf): """ For a set of features in the flat_hdf, write them to a table in HDF5 """ out_file = tb.open_file(sorted_hdf, mode='w', tilte='Feature Database') group = out_file.create_group('/', 'sift_db', 'Sift Feature Database') old_table.copy(newparent=group, newname='sift_features_sorted', sortby='pair_id', checkCSI=True) new_table = out_file.root.sift_db.sift_features_sorted new_table.cols.pair_id.create_csindex() new_table.cols.x.create_csindex() new_table.cols.y.create_csindex() new_table.cols.octave.create_csindex() unique_tiles, uidcount = np.unique(old_table.cols.pair_id[:], return_counts=True) atom = tb.UInt32Atom() filters = tb.Filters(complevel=5, complib='blosc') uidcount_a = out_file.create_carray(group, 'unique_tile_count', atom, unique_tiles.shape, filters=filters) uid_a = out_file.create_carray(group, 'unique_tiles', atom, unique_tiles.shape, filters=filters) uidcount_a[:] = uidcount uid_a[:] = unique_tiles out_file.close()
def save_tinfo_core(dat, outfn, n_img=None, n_maxtrial=None, save_spktch=False, n_elec=None, exclude_img=None, n_bins=None, t_min=None, t_max=None, verbose=1, n_slack=N_SLACK, t_adjust=None): iid2idx = {} # image id to index (1-th axis) table idx2iid = [] # vice versa ch2idx = {} idx2ch = [] # prepare tmp file fd, tmpf = tempfile.mkstemp() os.close(fd) # hdf5 module will handle the file. close it now. save_tinfo_core.tmpf = tmpf frame_onset = None foffset_chidx = [] foffset_imgidx = [] foffset_tridx = [] foffset_binidx = [] foffset_pos = [] # -- initialization fn_nominal = dat.get('filename', '__none__') fns_nominal = [fn_nominal] # backward compatibility if n_img is None: # if `n_img` is not specified, determine the number of images # from the first psf.pk file. (with additional n_slack) el0 = dat['all_spike'].keys()[0] n_img = len(dat['all_spike'][el0]) + n_slack if n_elec is None: # if `n_elec` is not specified, determine the number of # electrodes from the first psf.pk file. # No additinal n_slack here! n_elec = len(dat['actvelecs']) if n_maxtrial is None: el0 = dat['all_spike'].keys()[0] iis0 = dat['all_spike'][el0].keys() n_maxtrial = max([len(dat['all_spike'][el0][ii0]) for ii0 in iis0]) + n_slack if t_min is None: t_min = dat['t_start'] if t_max is None: t_max = dat['t_stop'] if t_adjust is None: t_adjust = dat['t_adjust'] if n_bins is None: n_bins = int(np.ceil((t_max - t_min) / 1000.) + 1) # number of bytes required for 1 trial n_bytes = int(np.ceil(n_bins / 8.)) shape = (n_elec, n_img, n_maxtrial, n_bytes) shape_org = (n_img, n_elec, n_maxtrial) atom = tables.UInt8Atom() atom16 = tables.Int16Atom() atomu16 = tables.UInt16Atom() atomu32 = tables.UInt32Atom() atom64 = tables.Int64Atom() filters = tables.Filters(complevel=4, complib='blosc') save_tinfo_core.h5t = h5t = tables.openFile(tmpf, 'w') db = h5t.createCArray(h5t.root, 'db', atom, shape, filters=filters) # spiking information org = h5t.createCArray(h5t.root, 'org', atom16, shape_org, filters=filters) # origin info org[...] = -1 tr = np.zeros((n_elec, n_img), dtype=np.uint16) # num of trials per each ch & image if verbose > 0: print '* Allocated: (n_elec,'\ ' n_img, n_maxtrial, n_bytes) = (%d, %d, %d, %d)' % shape print '* Temp hdf5:', tmpf # ---------------------------------------------------------------------- # -- read thru the dats, store into the tmp.hdf5 file (= tmpf) # -- actual conversion for this file happens here for ch in sorted(dat['all_spike']): makeavail(ch, ch2idx, idx2ch) ie = ch2idx[ch] # index to the electrode, 0-based if verbose > 0: print '* At: Ch/site/unit %d \r' % ie, sys.stdout.flush() for iid in sorted(dat['all_spike'][ch]): # -- main computation if is_excluded(iid, exclude_img): continue # do the conversion makeavail(iid, iid2idx, idx2iid) ii = iid2idx[iid] # index to the image, 0-based trials = dat['all_spike'][ch][iid] # get the chunk foffsets = None if 'all_foffset' in dat: foffsets = dat['all_foffset'][ch][iid] if len(trials) != len(foffsets): foffsets = None ntr0 = len(trials) # number of trials in the chunk itb = tr[ie, ii] # index to the beginning trial#, 0-based ite = itb + ntr0 # index to the end n_excess = 0 # number of excess trials in this chunk if ite > n_maxtrial: n_excess = ite - n_maxtrial ite = n_maxtrial if verbose > 0: print '** Reached n_maxtrial(=%d): ch=%s, iid=%s' % \ (n_maxtrial, str(ch), str(iid)) # number of actual trials to read in the chunk ntr = ntr0 - n_excess # book-keeping stuffs... org[ii, ie, itb:ite] = 0 # mainly for backward compatibility tr[ie, ii] += ntr # bit-like spike timing info tr_bits = np.zeros((ntr, n_bytes * 8), dtype=np.uint8) # sweep the chunk, and bit-pack the data trials = trials[:ntr] trials_enum = np.concatenate([[i] * len(e) for i, e in enumerate(trials) ]).astype('int') trials = np.concatenate(trials) # selected bins sb = np.round((trials - t_min) / 1000.).astype('int') si = np.nonzero((sb >= 0) & (sb < n_bins))[0] if len(si) == 0: # no spikes at all db[ie, ii, itb:ite, :] = 0 # this must match.. (1) continue sb = sb[si] st = trials_enum[si] tr_bits[st, sb] = 1 # there was a spike spk = np.packbits(tr_bits, axis=1) # finished this image in this electrode; store the data db[ie, ii, itb:ite, :] = spk # this must match.. (1) # keeping foffsets for .nev/.plx files if foffsets is not None: foffsets = np.concatenate(foffsets) if len(foffsets) != len(trials): # shouldn't happen print '** Length of foffsets and trials is different' foffsets = [-1] * len(trials) foffsets = np.array(foffsets) nevs = len(sb) foffset_chidx.extend([ie] * nevs) foffset_imgidx.extend([ii] * nevs) foffset_tridx.extend(st) foffset_binidx.extend(sb) foffset_pos.extend(foffsets[si]) # -- additional movie data conversion # XXX: this assumes `multi=False` if 'frame_onset' in dat and len(dat['frame_onset']) > 0: print '* Collecting frame onset info' if frame_onset is None: frame_onset = dat['frame_onset'] else: frame_onset0 = dat['frame_onset'] for iid in frame_onset0: frame_onset[iid].extend(frame_onset0[iid]) # ---------------------------------------------------------------------- # -- finished main conversion; now save into a new optimized hdf5 file n_img_ac = len(iid2idx) # actual number of images n_tr_ac = np.max(tr) # actual maximum number of trials shape_img = (n_img_ac, n_elec, n_tr_ac, n_bytes) # img-major form shape_ch = (n_elec, n_img_ac, n_tr_ac, n_bytes) # ch-major form shape_org = (n_img_ac, n_elec, n_tr_ac) if verbose > 0: print 'Optimizing... ' print '* Actual #images:', n_img_ac print '* Actual #trials:', n_tr_ac print '* New allocated: (n_elec, n_img, n_maxtrial, n_bytes)' \ ' = (%d, %d, %d, %d)' % shape_ch # -- layout output hdf5 file save_tinfo_core.h5o = h5o = tables.openFile(outfn, 'w') # /spktimg: bit-packed spike-time info matrix, image-id-major spktimg = h5o.createCArray(h5o.root, 'spkt_img', atom, shape_img, filters=filters) # /meta: metadata group meta = h5o.createGroup("/", 'meta', 'Metadata') # /meta/iid2idx: iid to matrix-index info t_iididx = h5o.createTable(meta, 'iididx', IidIdx, 'Image ID and its index') # /meta/orgfile_img: file origin info, image-id-major orgfile = h5o.createCArray(meta, 'orgfile_img', atom16, shape_org, filters=filters) # origin info # -- fill metadata # some metadata records h5o.createArray(meta, 'srcfiles', fns_nominal) h5o.createArray(meta, 'nbins', n_bins) h5o.createArray(meta, 't_start0', t_min) h5o.createArray(meta, 'tmin', t_min) # backward compatibility h5o.createArray(meta, 't_stop0', t_max) h5o.createArray(meta, 'tmax', t_max) # backward compatibility h5o.createArray(meta, 't_adjust', t_adjust) h5o.createArray(meta, 'iid2idx_pk', pk.dumps(iid2idx)) h5o.createArray(meta, 'idx2iid_pk', pk.dumps(idx2iid)) h5o.createArray(meta, 'idx2iid', idx2iid) h5o.createArray(meta, 'ch2idx_pk', pk.dumps(ch2idx)) h5o.createArray(meta, 'idx2ch', idx2ch) # save as img-major order (tr is in channel-major) h5o.createArray(meta, 'ntrials_img', tr[:, :n_img_ac].T) h5o.createArray(meta, 'frame_onset_pk', pk.dumps(frame_onset)) # cluster related stuffs for clu_k in ['idx2gcid', 'cid_sel', 'gcid2idx']: if clu_k not in dat: continue h5o.createArray(meta, clu_k + '_pk', pk.dumps(dat[clu_k])) # this is deprecated. mainly for backward compatibility orgfile[...] = org[:n_img_ac, :, :n_tr_ac] # populate /meta/iididx r = t_iididx.row for iid in iid2idx: r['iid'] = str(iid) r['iid_pk'] = pk.dumps(iid) r['idx'] = iid2idx[iid] r.append() t_iididx.flush() # -- store spiking time data for i in xrange(n_img_ac): if verbose > 0: print '* At: Image %d \r' % i, sys.stdout.flush() spktimg[i, :, :, :] = db[:, i, :n_tr_ac, :] if save_spktch: # /spktch: bit-packed spike-time info matrix, channel-major spktch = h5o.createCArray(h5o.root, 'spkt_ch', atom, shape_ch, filters=filters) for i in xrange(n_elec): if verbose > 0: print '* At: Ch/site/unit %d \r' % i, sys.stdout.flush() spktch[i, :, :, :] = db[i, :n_img_ac, :n_tr_ac, :] # foffset stuffs foffset_chidx = np.array(foffset_chidx, dtype='uint16') foffset_imgidx = np.array(foffset_imgidx, dtype='uint32') foffset_tridx = np.array(foffset_tridx, dtype='uint16') foffset_binidx = np.array(foffset_binidx, dtype='uint16') foffset_pos = np.array(foffset_pos, dtype='int64') for src, name, atom0 in zip([ foffset_chidx, foffset_imgidx, foffset_tridx, foffset_binidx, foffset_pos ], [ 'foffset_chidx', 'foffset_imgidx', 'foffset_tridx', 'foffset_binidx', 'foffset_pos' ], [atomu16, atomu32, atomu16, atomu16, atom64]): if len(src) == 0: continue dst = h5o.createCArray(meta, name, atom0, src.shape, filters=filters) dst[:] = src[:] if verbose > 0: print h5o.close() h5t.close()
def _create_table_list(self, name, example): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. The modified version for creating table with appendList """ type_map = { np.dtype(np.float64): tables.Float64Atom(), np.dtype(np.float32): tables.Float32Atom(), np.dtype(np.int): tables.Int64Atom(), np.dtype(np.int8): tables.Int8Atom(), np.dtype(np.uint8): tables.UInt8Atom(), np.dtype(np.int16): tables.Int16Atom(), np.dtype(np.uint16): tables.UInt16Atom(), np.dtype(np.int32): tables.Int32Atom(), np.dtype(np.uint32): tables.UInt32Atom(), np.dtype(np.bool): tables.BoolAtom(), } try: if type(example) == np.ndarray: h5type = type_map[example.dtype] elif type(example) == list and type(example[0]) == str: h5type = tables.VLStringAtom() except KeyError: raise TypeError("Don't know how to handle dtype '%s'" % example.dtype) if type(example) == np.ndarray: h5dim = (0, ) + example.shape[1:] h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) nodes = h5.list_nodes(h5.root) nmpt = name.replace('.', '/\n') nmpt = nmpt.split('\n') path = '/' for kay in range(len(nmpt) - 1): #if not path+nmpt[kay][:-1] in str(nodes): h5.create_group(path,nmpt[kay][:-1]) try: h5.is_visible_node(path + nmpt[kay][:-1]) except: h5.create_group(path, nmpt[kay][:-1]) path += nmpt[kay] self.tables[name] = h5.create_earray(path, nmpt[-1], h5type, h5dim, filters=filters) elif type(example) == list and type(example[0]) == str: h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) nodes = h5.list_nodes(h5.root) nmpt = name.replace('.', '/\n') nmpt = nmpt.split('\n') path = '/' for kay in range(len(nmpt) - 1): #if not path+nmpt[kay][:-1] in str(nodes): h5.create_group(path,nmpt[kay][:-1]) try: h5.is_visible_node(path + nmpt[kay][:-1]) except: h5.create_group(path, nmpt[kay][:-1]) path += nmpt[kay] self.tables[name] = h5.create_vlarray(path, nmpt[-1], h5type, filters=filters) self.types[name] = type(example)
def create_kwik(path, experiment_name=None, prm=None, prb=None, overwrite=True): """Create a KWIK file. Arguments: * path: path to the .kwik file. * experiment_name * prm: a dictionary representing the contents of the PRM file (used for SpikeDetekt) * prb: a dictionary with the contents of the PRB file """ if experiment_name is None: experiment_name = '' if prm is None: prm = {} if prb is None: prb = {} if not overwrite and os.path.exists(path): return file = tb.openFile(path, mode='w') file.root._f_setAttr('kwik_version', 2) file.root._f_setAttr('name', experiment_name) file.createGroup('/', 'application_data') # Set the SpikeDetekt parameters file.createGroup('/application_data', 'spikedetekt') for prm_name, prm_value in iteritems(prm): file.root.application_data.spikedetekt._f_setAttr(prm_name, prm_value) file.createGroup('/', 'user_data') # Create channel groups. file.createGroup('/', 'channel_groups') for igroup, group_info in prb.iteritems(): igroup = int(igroup) group = file.createGroup('/channel_groups', str(igroup)) # group_info: channel, graph, geometry group._f_setAttr('name', 'channel_group_{0:d}'.format(igroup)) group._f_setAttr('adjacency_graph', np.array(group_info.get('graph', np.zeros((0, 2))), dtype=np.int32)) file.createGroup(group, 'application_data') file.createGroup(group, 'user_data') # Create channels. file.createGroup(group, 'channels') channels = group_info.get('channels', []) # Add the channel order. group._f_setAttr('channel_order', np.array(channels, dtype=np.int32)) for channel_idx in channels: # channel is the absolute channel index. channel = file.createGroup(group.channels, str(channel_idx)) channel._f_setAttr('name', 'channel_{0:d}'.format(channel_idx)) channel._f_setAttr('ignored', False) # "channels" only contains # not-ignored channels here pos = group_info.get('geometry', {}). \ get(channel_idx, None) if pos is not None: pos = np.array(pos, dtype=np.float32) channel._f_setAttr('position', pos) channel._f_setAttr('voltage_gain', prm.get('voltage_gain', 0.)) channel._f_setAttr('display_threshold', 0.) file.createGroup(channel, 'application_data') file.createGroup(channel.application_data, 'spikedetekt') file.createGroup(channel.application_data, 'klustaviewa') file.createGroup(channel, 'user_data') # Create spikes. spikes = file.createGroup(group, 'spikes') file.createEArray(spikes, 'time_samples', tb.UInt64Atom(), (0,), expectedrows=1000000) file.createEArray(spikes, 'time_fractional', tb.UInt8Atom(), (0,), expectedrows=1000000) file.createEArray(spikes, 'recording', tb.UInt16Atom(), (0,), expectedrows=1000000) clusters = file.createGroup(spikes, 'clusters') file.createEArray(clusters, 'main', tb.UInt32Atom(), (0,), expectedrows=1000000) file.createEArray(clusters, 'original', tb.UInt32Atom(), (0,), expectedrows=1000000) fm = file.createGroup(spikes, 'features_masks') fm._f_setAttr('hdf5_path', '{{kwx}}/channel_groups/{0:d}/features_masks'. \ format(igroup)) wr = file.createGroup(spikes, 'waveforms_raw') wr._f_setAttr('hdf5_path', '{{kwx}}/channel_groups/{0:d}/waveforms_raw'. \ format(igroup)) wf = file.createGroup(spikes, 'waveforms_filtered') wf._f_setAttr('hdf5_path', '{{kwx}}/channel_groups/{0:d}/waveforms_filtered'. \ format(igroup)) # Create clusters. clusters = file.createGroup(group, 'clusters') file.createGroup(clusters, 'main') file.createGroup(clusters, 'original') # Create cluster groups. cluster_groups = file.createGroup(group, 'cluster_groups') file.createGroup(cluster_groups, 'main') file.createGroup(cluster_groups, 'original') # Create recordings. file.createGroup('/', 'recordings') # Create event types. file.createGroup('/', 'event_types') file.close()
def inference(cfg, is_testing=False): """ Inference for either PPN or (xor) base network (e.g. UResNet) """ if not os.path.isdir(cfg.DISPLAY_DIR): os.makedirs(cfg.DISPLAY_DIR) if is_testing: _, data = get_data(cfg) else: data, _ = get_data(cfg) net = basenets[cfg.BASE_NET](cfg=cfg) if cfg.WEIGHTS_FILE_PPN is None and cfg.WEIGHTS_FILE_BASE is None: raise Exception("Need a checkpoint file") net.init_placeholders() net.create_architecture(is_training=False) duration = 0 metrics = UResNetMetrics(cfg) FILTERS = tables.Filters(complevel=5, complib='zlib', shuffle=True, bitshuffle=False, fletcher32=False, least_significant_digit=None) f_submission = tables.open_file('/data/codalab/submission_5-6.hdf5', 'w', filters=FILTERS) preds_array = f_submission.create_earray('/', 'pred', tables.UInt32Atom(), (0, 192, 192, 192), expectedrows=data.n) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) load_weights(cfg, sess) for i in range(min(data.n, cfg.MAX_STEPS)): print("%d/%d" % (i, data.n)) blob = data.forward() if is_testing: blob['labels'] = blob['data'][..., 0] start = time.time() summary, results = net.test_image(sess, blob) end = time.time() duration += end - start # Drawing time # display_uresnet(blob, cfg, index=i, **results) if not is_testing: metrics.add(blob, results) mask = np.where(blob['data'][..., 0] > 0) preds = np.reshape(results['predictions'], (1, 192, 192, 192)) print(np.count_nonzero(preds[mask] > 0)) preds[mask] = 0 preds_array.append(preds) print(preds.shape) preds_array.close() f_submission.close() duration /= cfg.MAX_STEPS print("Average duration of inference = %f ms" % duration) if not is_testing: metrics.plot()
fare = Attribute('fare_amount', 2.5, 300.0) surcharge = Attribute('surcharge', 0.0, 3.0) tip = Attribute('tip_amount', 0.0, 165.0) toll = Attribute('tolls_amount', 0.0, 20.0) total = Attribute('total_amount', 2.5, 370.5) attributes = [ ratecode, passenger, triptime, distance, pickuplong, pickuplat, dropofflong, dropofflat, fare, surcharge, tip, toll, total ] rownr = 2000 matrixrownr = rownr * pow(math.log(rownr), 2) fileName = 'smallconcept.h5' shape = (int(matrixrownr), rownr + 1) atom = tables.UInt32Atom() filters = tables.Filters(complevel=5, complib='zlib') h5f = tables.open_file(fileName, 'w') ca = h5f.create_carray(h5f.root, 'carray', atom, shape, filters=filters) #matrix = numpy.zeros(shape=(int(matrixrownr),rownr)) #vector = [] vfileName = 'smallconceptresult.h5' vshape = (int(matrixrownr), 2) vh5f = tables.open_file(vfileName, 'w') vca = vh5f.create_carray(vh5f.root, 'carray', atom, vshape, filters=filters) efileName = 'smallconceptexpected.h5' eshape = (rownr, 2) eh5f = tables.open_file(efileName, 'w') eca = h5f.create_carray(eh5f.root, 'carray', atom, eshape, filters=filters)
rng = make_np_rng(default_seed=123522) if __name__ == "__main__": base_dir = serial.preprocess(join('${PYLEARN2_DATA_PATH}', 'dogs_vs_cats')) files = [f for f in listdir(join(base_dir, 'train')) if isfile(join(base_dir, 'train', f))] filters = tables.Filters(complib='blosc', complevel=5) h5file = tables.open_file(join(base_dir, 'train.h5'), mode='w', title='Dogs vs. Cats - Training set', filters=filters) group = h5file.create_group(h5file.root, 'Data', 'Data') atom_8 = tables.UInt8Atom() atom_32 = tables.UInt32Atom() X = h5file.create_vlarray(group, 'X', atom=atom_8, title='Data values', expectedrows=25000, filters=filters) y = h5file.create_carray(group, 'y', atom=atom_8, title='Data targets', shape=(25000, 1), filters=filters) s = h5file.create_carray(group, 's', atom=atom_32, title='Data shapes', shape=(25000, 3), filters=filters) # Shuffle examples around rng.shuffle(files) for i, f in enumerate(files): image = misc.imread(join(base_dir, 'train', f)) X.append(image.flatten()) target = 0 if 'cat' in f else 1 y[i] = target s[i] = image.shape
ts_1 = f1.root.timestamps # file 2, apd2 f2 = tables.open_file('tempAPD2_copy.hdf', 'r') ts_2 = f2.root.timestamps # lengths f1_num = f1.root.timestamps.nrows f2_num = f2.root.timestamps.nrows row_num = (f1_num + f2_num) # file 3, outfile f3 = tables.open_file('sortedFile.hdf', mode='w') f3.create_group(f3.root, name='photon_data') filters = tables.Filters(complevel=6, complib='zlib') atom1 = tables.UInt32Atom() atom2 = tables.Int8Atom() ts = f3.create_carray('/photon_data', name='timestamps', atom=atom1, shape=(row_num, 1), filters=filters) det = f3.create_carray('/photon_data', name='detectors', atom=atom2, shape=(row_num, 1), filters=filters) # Calculations start = time.time() merge_files(ts_1, ts_2, ts, det, f1_num, f2_num)