Example #1
0
	def create_hdf_csr(self, file_name):
		######## create hdf file friom pytables ######
		h5File_path = os.path.join(self.prefix, file_name)
		h5File = tb.open_file(h5File_path, "w")
		filters = tb.Filters(complevel=5, complib='blosc')
		h5File.create_earray(h5File.root, 'data', tb.Float32Atom(), shape=(0,), filters=filters)
		h5File.create_earray(h5File.root, 'indices', tb.UInt32Atom(),shape=(0,), filters=filters)
		h5File.create_earray(h5File.root, 'indptr', tb.UInt32Atom(), shape=(0,), filters=filters)
		h5File.root.indptr.append(np.array([0], dtype = np.uint32))
		return h5File, h5File_path
Example #2
0
def groups_to_matrix(m_file, c_file):
    filters = tables.Filters(complevel=1, complib='blosc', fletcher32=True)
    h5fh = tables.open_file(m_file, mode='a', filters=filters)
    if not 'counts' in h5fh.root:
        atom = tables.UInt32Atom()
        shape = (2**32 - 1, 0)
        h5fh.create_earray(h5fh.root,
                           'counts',
                           atom,
                           shape,
                           "counts matrix",
                           expectedrows=2**32 - 1)
    counts = h5fh.root.counts

    grouph5fh = tables.open_file(c_file, mode='r')
    for group_num in list(grouph5fh.root._v_groups):
        path = "/%s" % group_num
        print "Processing counts for %s" % path
        counts.append(
            np.resize(grouph5fh.getNode(path, 'count').read(), (2**32 - 1, 1)))
        col_num = counts.shape[1] - 1
        new_group = h5fh.create_group(h5fh.root,
                                      "%s|%s" % (group_num, col_num))
        print "Adding taxonomy data for %s" % path
        h5fh.copy_node(grouph5fh.getNode(path, 'taxonomy'),
                       new_group,
                       'taxonomy',
                       recursive=True)
        print "Finished processing %s" % path
    grouph5fh.close()
    h5fh.close()
Example #3
0
    def test_append_to_matrix_counts(self):
        shape = (5, 0)
        atom = tables.UInt32Atom()
        filters = tables.Filters(complevel=9, complib='zlib')
        h5fh = tables.open_file("earray_append.h5", mode='a', filters=filters)
        ea = h5fh.create_earray(h5fh.root, 'counts', atom, shape,
                                "counts matrix", filters, 2**32 - 1)
        self.assertEqual(ea.shape[1], 0)
        col1 = np.array([1, 6, 11, 16, 21], dtype=np.uint32,
                        ndmin=2).transpose()
        ea.append(col1)
        h5fh.close()

        # append to file
        h5fh = tables.open_file("earray_append.h5", mode='a', filters=filters)
        self.assertIn('counts', h5fh.root)
        counts = h5fh.root.counts
        self.assertEqual(counts.shape[1], 1)
        col2 = np.array([2, 7, 12, 17, 22], dtype=np.uint32,
                        ndmin=2).transpose()
        counts.append(col2)
        self.assertEqual(counts.shape[1], 2)
        self.assertTrue(np.array_equal(h5fh.root.counts[:, 0], col1[:, 0]))
        self.assertTrue(np.array_equal(h5fh.root.counts[:, 1], col2[:, 0]))
        h5fh.close()
        os.remove("earray_append.h5")
Example #4
0
def add_clustering(fd, channel_group_id=None, name=None,
                   spike_clusters=None, overwrite=False):
    """fd is returned by `open_files`: it is a dict {type: tb_file_handle}."""
    if channel_group_id is None:
        channel_group_id = '0'
    kwik = fd.get('kwik', None)
    # The KWIK needs to be there.
    assert kwik is not None
    # The channel group id containing the new cluster group must be specified.
    assert channel_group_id is not None
    assert name is not None
    assert spike_clusters is not None

    spikes = kwik.root.channel_groups.__getattr__(channel_group_id).spikes.recording
    spikes_path = '/channel_groups/{0:s}/spikes/clusters'.format(channel_group_id)
    clusters_path = '/channel_groups/{0:s}/clusters'.format(channel_group_id)

    # Check if clustering has the right number of spikes
    if not spike_clusters.shape[0] == spikes.shape[0]:
        print "\nERROR: Could not add clustering in group \"{0:s}\": wrong number of spikes".format(name)
        print ("Expected {0:d}, got {1:d}".format(spike_clusters.shape[0], spikes.shape[0]))
        return False
    
    # Create the HDF5 groups in /.../clusters.
    try:
        clu_group = kwik.createGroup(clusters_path, name)
    except tb.NodeError:
        assert overwrite, "The clustering already exists, use overwrite=True"
        kwik.removeNode(clusters_path, name, recursive=True)
        clu_group = kwik.createGroup(clusters_path, name)
    
    # Create the HDF5 dataset with the spike clusters.
    try:
        kwik.createEArray(spikes_path, name, tb.UInt32Atom(), 
                          expectedrows=1000000, obj=spike_clusters.astype(np.uint32))
    except tb.NodeError:
        assert overwrite, "The clustering already exists, use overwrite=True"
        kwik.removeNode(spikes_path, name)
        kwik.createEArray(spikes_path, name, tb.UInt32Atom(), 
                          expectedrows=1000000, obj=spike_clusters.astype(np.uint32))
    
    # Create the cluster HDF5 groups under the new clustering group.
    clusters_unique = np.unique(spike_clusters)
    for cluster in clusters_unique:
        add_cluster(fd, channel_group_id=channel_group_id, id=str(cluster), 
                    clustering=name, 
                    cluster_group=3)  # default cluster group = unsorted
Example #5
0
def listener(q, output_path):
    """
    """
    try:
        counter = 0
        pid = os.getpid()
        logging.info("Listener running on {}".format(pid))
        hdf5_file = tb.open_file(output_path, mode='w')
        pred_storage = hdf5_file.create_earray(
            hdf5_file.root,
            "pred_img",
            tb.UInt8Atom(),
            shape=(0, 299, 299, 3)
        )
        xlabel_storage = hdf5_file.create_earray(
            hdf5_file.root,
            "pos_xlabel",
            tb.UInt32Atom(),
            shape=(0, 1)
        )
        ylabel_storage = hdf5_file.create_earray(
            hdf5_file.root,
            "pos_ylabel",
            tb.UInt32Atom(),
            shape=(0, 1)
        )

        while 1:
            counter += 1
            if counter % 100 == 0:
                logging.info("{} tiles saved in hdf5.".format(counter))
            data = q.get()
            if data == 'kill':
                logging.info("Listner closed.")
                hdf5_file.close()
                return None
            pred = data['pred']
            xlabel = data['xlabel']
            ylabel = data['ylabel']

            pred_storage.append(pred[None])
            xlabel_storage.append(xlabel[None])
            ylabel_storage.append(ylabel[None])

    finally:
        hdf5_file.close()
Example #6
0
def add_unique_tiles_table(out_file, feature_table, group):
    unique_tiles = np.unique(feature_table.cols.pair_id[:])
    atom = tb.UInt32Atom()
    filters = tb.Filters(complevel=5, complib='blosc')
    ca = out_file.create_carray(group,
                                'unique_tiles',
                                atom,
                                unique_tiles.shape,
                                filters=filters)
    ca.flush()
Example #7
0
def create_database():
    """Specifies the input data only."""
    db = tables.openFile(DATAPATH + 'processed/db.h5', 'w')

    # /
    input_data = db.createGroup('/', 'input_data', 'The Input Data group')

    # /input_data
    dictionary = db.createGroup(input_data, 'dictionary',
                                'The Dictionary group')
    dictionary._v_attrs.num_docs = 0  # Number of documents processed
    dictionary._v_attrs.num_tokens = 0  # Number of token->id mappings in the dictionary
    record = db.createGroup(input_data, 'record',
                            'The Congressional Record group')
    record._v_attrs.num_speakers = 0  # Number of unique speakers (by icpsrID)
    supplementary = db.createGroup(input_data, 'supplementary',
                                   'The Supplementary Data group')

    # /input_data/dictionary
    token2id = db.createTable(dictionary,
                              'token2id',
                              Dictionary.token2id,
                              'The token2id table',
                              expectedrows=1e5)
    # Add index only once the token2id table is completed
    #indexrows = token2id.cols.token_id.createCSIndex()

    # /input_data/record
    speaker = db.createTable(record,
                             'speaker',
                             Speaker.cols,
                             'The Speaker table',
                             expectedrows=2500)
    a = tables.UInt32Atom()
    document = db.createTable(record,
                              'document',
                              Document.cols,
                              'The Document table',
                              expectedrows=5e4)
    doc2bow = db.createTable(record,
                             'doc2bow',
                             Doc2BoW.cols,
                             'The Doc2BoW table',
                             expectedrows=5e4)

    # /input_data/supplementary
    covariates = db.createTable(supplementary,
                                'covariates',
                                Covariates.cols,
                                'The Covariates table',
                                expectedrows=5e4)

    db.close()
Example #8
0
 def write_hdf(jd, dt):
     filename = os.path.join(self.conmatdir,"conmat_%s_%s_%06i.h5" %
                             (self.projname, self.casename, jd))
     shape = (self.dtmax, self.nreg, self.nreg)
     atom = td.UInt32Atom()
     #filters = td.Filters(complevel=5, complib='zlib')
     with td.openFile(filename, 'a') as h5f:
         if hasattr(h5f.root, 'conmat'):
             ca = h5f.root.conmat
         else:
             ca = h5f.createCArray(h5f.root, 'conmat', atom, shape)
         ca[dt,:,:] = self.conmat.astype(np.uint32)
Example #9
0
def compute_ratings_matrix(ratings_matrix_file):
    """
    Computes the rating matrix
        Input:
            ratings_matrix_file: Filename output rating matrix
    """

    mongo = Mongo('Acme-Supermarket')
    mongo.connect()

    matrix_file = ratings_matrix_file
    hdf5_matrix = tables.openFile(matrix_file, mode='w')

    filters = tables.Filters(complevel=5, complib='blosc')

    products = mongo.database.products.find({}, {'_id': 1})
    products = [p['_id'] for p in products]
    products = numpy.concatenate((numpy.array([-1]), products))
    products_count = mongo.database.products.count()

    customers = mongo.database.actors.find({'_type': 'Customer'}, {'_id': 1})
    customers = [c['_id'] for c in customers]
    customers_count = mongo.database.actors.count({'_type': 'Customer'})

    data_storage = hdf5_matrix.createEArray(hdf5_matrix.root,
                                            'data',
                                            tables.UInt32Atom(),
                                            shape=(0, products_count + 1),
                                            filters=filters,
                                            expectedrows=customers_count)

    data_storage.append(products[:][None])
    for customer_id in customers:
        # Each column 0: Customer IDs
        # Product ratings in columns 1+
        row = numpy.zeros((products_count + 1, ))

        row[0] = customer_id
        ratings = mongo.database.rates.find({'customer_id': customer_id}, {
            'product_id': 1,
            'value': 1
        })

        for rating in ratings:
            row[numpy.where(
                products == rating['product_id'])[0][0]] = rating['value']

        data_storage.append(row[:][None])

    hdf5_matrix.close()
    mongo.disconnect()

    return matrix_file
Example #10
0
 def search_coincidences(self):
     if '/c_index' not in self.data and '/timestamps' not in self.data:
         c_index, timestamps = [], []
         for id, station in enumerate(self.station_groups):
             station = self.data.getNode(station)
             for event_id, event in enumerate(station.events):
                 timestamps.append((event['ext_timestamp'], id, event_id))
                 c_index.append([len(timestamps) - 1])
         timestamps = np.array(timestamps, dtype=np.uint64)
         self.data.createArray('/', 'timestamps', timestamps)
         self.data.createVLArray('/', 'c_index', tables.UInt32Atom())
         for coincidence in c_index:
             self.data.root.c_index.append(coincidence)
Example #11
0
    def create_mapping(self, title, entries, overwrite=False):
        """
        Create an equivalency index, which maps a raw data dimension to
        another integer value. Once created, mappings can be referenced by
        offset or by key.
        
        Parameters:
        -----------
        title : string
            Name of this mapping
        entries : list
            List of n equivalencies for the mapping. n must match one data
            dimension of the matrix.
        overwrite : boolean
            True to allow overwriting an existing mapping, False will raise
            a LookupError if the mapping already exists. Default is False.

        Returns:
        --------
        mapping : tables.array
            Returns the created mapping.

        Raises:
            LookupError : if the mapping exists and overwrite=False
        """

        # Enforce shape-checking
        if self.shape():
            if not len(entries) in self._shape:
                raise ShapeError('Mapping must match one data dimension')

        # Handle case where mapping already exists:
        if title in self.list_mappings():
            if overwrite:
                self.delete_mapping(title)
            else:
                raise LookupError(title + ' mapping already exists.')

        # Create lookup group under root if it doesn't already exist.
        if 'lookup' not in self.root:
            self.create_group(self.root, 'lookup')

        # Write the mapping!
        mymap = self.create_array(self.root.lookup,
                                  title,
                                  atom=tables.UInt32Atom(),
                                  shape=(len(entries), ))
        mymap[:] = entries

        return mymap
Example #12
0
    def _create_table_list(self, name, example):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        The modified version for creating table with appendList
        """
        type_map = {
            np.dtype(np.float64): tables.Float64Atom(),
            np.dtype(np.float32): tables.Float32Atom(),
            np.dtype(np.int): tables.Int64Atom(),
            np.dtype(np.int8): tables.Int8Atom(),
            np.dtype(np.uint8): tables.UInt8Atom(),
            np.dtype(np.int16): tables.Int16Atom(),
            np.dtype(np.uint16): tables.UInt16Atom(),
            np.dtype(np.int32): tables.Int32Atom(),
            np.dtype(np.uint32): tables.UInt32Atom(),
            np.dtype(np.bool): tables.BoolAtom(),
        }

        try:
            if type(example) == np.ndarray:
                h5type = type_map[example.dtype]
            elif type(example) == list and type(example[0]) == str:
                h5type = tables.VLStringAtom()
        except KeyError:
            raise TypeError("Don't know how to handle dtype '%s'" %
                            example.dtype)

        if type(example) == np.ndarray:
            h5dim = (0, ) + example.shape[1:]

            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_earray(h5.root,
                                                 name,
                                                 h5type,
                                                 h5dim,
                                                 filters=filters)
        elif type(example) == list and type(example[0]) == str:
            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_vlarray(h5.root,
                                                  name,
                                                  h5type,
                                                  filters=filters)
        self.types[name] = type(example)
Example #13
0
    def _create_table(self, name, example):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        """
        type_map = {
            np.dtype(np.float64): tables.Float64Atom(),
            np.dtype(np.float32): tables.Float32Atom(),
            np.dtype(np.int): tables.Int64Atom(),
            np.dtype(np.int8): tables.Int8Atom(),
            np.dtype(np.uint8): tables.UInt8Atom(),
            np.dtype(np.int16): tables.Int16Atom(),
            np.dtype(np.uint16): tables.UInt16Atom(),
            np.dtype(np.int32): tables.Int32Atom(),
            np.dtype(np.uint32): tables.UInt32Atom(),
            np.dtype(np.bool): tables.BoolAtom(),
        }

        try:
            if type(example) == np.ndarray:
                h5type = type_map[example.dtype]
            elif type(example) == str:
                h5type = tables.VLStringAtom()
        except KeyError:
            raise TypeError(
                "Could not create table %s because of unknown dtype '%s'" %
                (name, example.dtype))  #+ ", of name: " % example.shape)

        if type(example) == np.ndarray:
            h5dim = (0, ) + example.shape

            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_earray(h5.root,
                                                 name,
                                                 h5type,
                                                 h5dim,
                                                 filters=filters)
        elif type(example) == str:
            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_vlarray(h5.root,
                                                  name,
                                                  h5type,
                                                  filters=filters)
        self.types[name] = type(example)
Example #14
0
    def createHDF5File(self):
        out_file_path = os.path.join(self._output_path, self._output_file_name)
        try:
            hdf5_file = tables.open_file(out_file_path, mode='w')
            filters = tables.Filters(complevel=5, complib='blosc')
            data_shape = tuple([0, self.num_modalities] +
                               list(self._image_shape))
            data_storage = hdf5_file.create_earray(
                hdf5_file.root,
                'data',
                tables.Float32Atom(),
                shape=data_shape,
                filters=filters,
                expectedrows=self.num_modalities)
            if self.label_format == "nii":
                truth_shape = tuple([0, 1] + list(self._image_shape))
                truth_storage = hdf5_file.create_earray(
                    hdf5_file.root,
                    'truth',
                    tables.UInt8Atom(),
                    shape=truth_shape,
                    filters=filters,
                    expectedrows=self.num_modalities)
            elif self.label_format == 'csv':
                truth_shape = tuple([0, self._image_shape[-1]])
                truth_storage = hdf5_file.create_earray(
                    hdf5_file.root,
                    'truth',
                    tables.UInt32Atom(),
                    shape=truth_shape,
                    filters=filters,
                    expectedrows=self.num_modalities)
            else:
                raise ValueError("Fail to recognize label format: %s" %
                                 self.label_format)

            affine_storage = hdf5_file.create_earray(
                hdf5_file.root,
                'affine',
                tables.Float32Atom(),
                shape=(0, 4, 4),
                filters=filters,
                expectedrows=self.num_modalities)
            return hdf5_file, data_storage, truth_storage, affine_storage
        except Exception as e:
            # If something goes wrong, delete the incomplete data file
            os.remove(out_file_path)
            raise e
Example #15
0
def create_sorted_db(old_table, old_array, sorted_hdf):
    """
    For a set of features in the flat_hdf, write them to a table in HDF5
    """
    out_file = tb.open_file(sorted_hdffinger,
                            mode='w',
                            tilte='Landmark Database')
    group = out_file.create_group('/', 'db', 'Landmark Database')
    old_table.copy(newparent=group,
                   newname='landmarks',
                   sortby='pair_id',
                   checkCSI=True)

    filters = tb.Filters(complib='blosc', complevel=5)
    new_array = out_file.create_carray(group,
                                       name='descriptors',
                                       atom=tb.UInt8Atom(),
                                       shape=old_array.shape,
                                       filters=filters)
    idx = old_table.cols.pair_id.index
    new_array[:, :] = old_array[idx[:], :]

    new_table = out_file.root.db.landmarks
    new_table.cols.pair_id.create_csindex()
    new_table.cols.x.create_csindex()
    new_table.cols.y.create_csindex()
    new_table.cols.octave.create_csindex()

    unique_tiles, uidcount = np.unique(old_table.cols.pair_id[:],
                                       return_counts=True)
    atom = tb.UInt32Atom()
    filters = tb.Filters(complevel=5, complib='blosc')
    uidcount_a = out_file.create_carray(group,
                                        'unique_tile_count',
                                        atom,
                                        unique_tiles.shape,
                                        filters=filters)

    uid_a = out_file.create_carray(group,
                                   'unique_tiles',
                                   atom,
                                   unique_tiles.shape,
                                   filters=filters)

    uidcount_a[:] = uidcount
    uid_a[:] = unique_tiles

    out_file.close()
Example #16
0
    def __init__(self,
                 cluster,
                 data,
                 output,
                 R,
                 N,
                 use_poisson=None,
                 gauss=None,
                 trig_threshold=1.,
                 force=False):
        """Simulation initialization

        :param cluster: BaseCluster (or derived) instance
        :param data: the HDF5 file
        :param output: name of the destination group to store results
        :param R: maximum distance of shower to center of cluster
        :param N: number of simulations to perform
        :param force: if True, ignore pre-existing simulations; they will be
            overwritten!

        """
        self.cluster = cluster
        self.data = data
        self.R = R
        self.N = N

        self.use_poisson = use_poisson
        self.gauss = gauss
        self.trig_threshold = trig_threshold

        if output in data and not force:
            raise RuntimeError("Cancelling simulation; %s already exists?" %
                               output)
        elif output in data:
            data.removeNode(output, recursive=True)

        head, tail = os.path.split(output)
        self.output = data.createGroup(head, tail, createparents=True)
        self.observables = self.data.createTable(
            self.output, 'observables', storage.SimulationEventObservables)
        self.coincidences = self.data.createTable(self.output, 'coincidences',
                                                  storage.Coincidence)
        self.c_index = self.data.createVLArray(self.output, 'c_index',
                                               tables.UInt32Atom())

        self.output._v_attrs.cluster = cluster
Example #17
0
    def _create_table(self, name, example, parent=None):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        """
        h5 = self.h5
        filters = tables.Filters(complevel=self.compression_level,
                                 complib='zlib',
                                 shuffle=True)
        if parent is None:
            parent = h5.root

        if type(example) == str:
            h5type = tables.VLStringAtom()
            h5.createVLArray(parent, name, h5type, filters=filters)
            return
        if type(example) == dict:
            self.h5.createGroup(parent, name)
            return
        #If we get here then we're dealing with numpy arrays
        example = np.asarray(example)

        #MODIFICATION: appended name everywhere and introduced string
        type_map = {
            np.dtype(np.float64).name: tables.Float64Atom(),
            np.dtype(np.float32).name: tables.Float32Atom(),
            np.dtype(np.int).name: tables.Int64Atom(),
            np.dtype(np.int8).name: tables.Int8Atom(),
            np.dtype(np.uint8).name: tables.UInt8Atom(),
            np.dtype(np.int16).name: tables.Int16Atom(),
            np.dtype(np.uint16).name: tables.UInt16Atom(),
            np.dtype(np.int32).name: tables.Int32Atom(),
            np.dtype(np.uint32).name: tables.UInt32Atom(),
            np.dtype(np.bool).name: tables.BoolAtom(),
            # Maximal string length of 128 per string - change if needed
            'string32': tables.StringAtom(128)
        }

        try:
            h5type = type_map[example.dtype.name]
            h5dim = (0, ) + example.shape
            h5.createEArray(parent, name, h5type, h5dim, filters=filters)
        except KeyError:
            raise TypeError("Don't know how to handle dtype '%s'" %
                            example.dtype)
Example #18
0
    def search_coincidences(self, window=10000, shifts=None, limit=None):
        """Search for coincidences.

        Search all data in the station_groups for coincidences, and store
        rudimentary coincidence data in the coincidences group.  This data
        might be useful, but is very basic.  You can call the
        :meth:`store_coincidences` method to store the coincidences in an
        easier format in the coincidences group.

        If you want to process the preliminary results: they are stored in
        _src_c_index and _src_timestamps.  The former is a list of
        coincidences, which each consist of a list with indexes into the
        timestamps array as a pointer to the events making up the
        coincidence. The latter is a list of tuples.  Each tuple consists
        of a timestamp followed by an index into the stations list which
        designates the detector station which measured the event, and
        finally an index into that station's event table.

        :param window: the coincidence time window in nanoseconds. All events
            with delta t's smaller than this window will be considered a
            coincidence.
        :param shifts: optionally shift a station's data in time.  This
            can be useful if a station has a misconfigured GPS clock.
            Expects a list of shifts, one for each station, in seconds.
            Use 'None' for no shift.
        :param limit: optionally limit the search for this number of
            events.

        """
        c_index, timestamps = \
            self._search_coincidences(window, shifts, limit)
        timestamps = np.array(timestamps, dtype=np.uint64)
        self.data.create_array(self.coincidence_group, '_src_timestamps',
                               timestamps)
        src_c_index = self.data.create_vlarray(self.coincidence_group,
                                               '_src_c_index',
                                               tables.UInt32Atom())
        for coincidence in c_index:
            src_c_index.append(coincidence)
Example #19
0
def small_copy():
    filters = tables.Filters(complevel=6, complib='zlib')
    atom = tables.UInt32Atom()
    start = time.time()
    # File 1
    f1 = tables.open_file('tempAPD1.hdf', 'r')
    t1 = f1.root.timestamps

    f1_copy = tables.open_file('tempAPD1_copy.hdf', 'w')
    t1_copy = f1_copy.create_carray(f1_copy.root,
                                    name='timestamps',
                                    atom=atom,
                                    shape=(100, 2),
                                    filters=filters)

    t1_copy[0:100, :] = t1[0:100, :]

    f1.close()
    f1_copy.close()
    print("File 1 took %f seconds." % (time.time() - start))

    # file 2
    start = time.time()
    f2 = tables.open_file('tempAPD2.hdf', 'r')
    t2 = f2.root.timestamps

    f2_copy = tables.open_file('tempAPD2_copy.hdf', 'w')
    t2_copy = f2_copy.create_carray(f2_copy.root,
                                    name='timestamps',
                                    atom=atom,
                                    shape=(100, 2),
                                    filters=filters)

    t2_copy[0:100, :] = t2[0:100, :]

    f2.close()
    f2_copy.close()
    print("File 1 took %f seconds." % (time.time() - start))
Example #20
0
 def h5open(self):
     self.h5filename = os.path.join(
         self.conmatdir, "conmat_%s_%s.h5" % (self.projname, self.casename))
     self.h5f = h5f = td.openFile(self.h5filename, 'a')
     if not hasattr(h5f.root, 'conmat'):
         if not hasattr(self, 'reg'): self.regvec_from_discs()
         jdvec = int((self.jdmax - self.jdmin + 1) / self.djd) + 1
         shape = (jdvec, self.dtmax, self.nreg, self.nreg)
         iatom = td.UInt32Atom()
         fatom = td.FloatCol()
         batom = td.BoolAtom()
         filtr = td.Filters(complevel=5, complib='zlib')
         crc = h5f.createCArray
         cnmat = crc(h5f.root, 'conmat', iatom, shape, filters=filtr)
         jdvec = crc(h5f.root, 'jdvec', fatom, (shape[0], ))
         exist = crc(h5f.root, 'exist', batom, (shape[0], shape[1]))
         jdvec[:] = np.arange(self.jdmin, self.jdmax + 1, self.djd)
         exist[:] = False
     else:
         cnmat = h5f.root.conmat
         jdvec = h5f.root.jdvec
         exist = h5f.root.exist
     return cnmat, jdvec, exist
Example #21
0
 def dataProcessing_finite(self):
     """
     DataQ sends a string sentinel, first and last array entry get corrected
     by rollover count. Count rate entry/dt is send via animDataQ and lcdQ.
     Array gets appended to hdf file array.
     """
     filename = str(self._folder / "smALEX_APD{}.hdf".format(self._N))
     f = tables.open_file(filename, mode='w')
     atom = tables.UInt32Atom()
     filters = tables.Filters(complevel=6, complib='zlib')
     timestamps = f.create_earray(f.root,
                                  'timestamps',
                                  atom=atom,
                                  shape=(0, 2),
                                  filters=filters)
     for array in iter(self._dataQ.get, 'STOP'):
         timestamps.append(array)
         n1 = array[0, 0] + (self._int_max * array[0, 1])
         n2 = array[-1, 0] + (self._int_max * array[-1, 1])
         self._animDataQ.put(self._readArraySize / (n2 - n1))
     f.flush()
     f.close()
     print("DataProcesser %i sent all data and exits" % self._N)
Example #22
0
 def test_convert_column_counts_to_matrix_counts(self):
     shape = (5, 0)
     atom = tables.UInt32Atom()
     filters = tables.Filters(complevel=9, complib='zlib')
     h5fh = tables.open_file("earray1.h5", mode='a', filters=filters)
     ea = h5fh.create_earray(h5fh.root, 'counts', atom, shape,
                             "counts matrix", filters, 2**32 - 1)
     self.assertEqual(ea.shape[1], 0)
     col1 = np.array([1, 6, 11, 16, 21], dtype=np.uint32,
                     ndmin=2).transpose()
     ea.append(col1)
     self.assertEqual(ea.shape[1], 1)
     col2 = np.array([2, 7, 12, 17, 22], dtype=np.uint32,
                     ndmin=2).transpose()
     ea.append(col2)
     self.assertEqual(ea.shape[1], 2)
     col3 = np.array([3, 8, 13, 18, 23], dtype=np.uint32,
                     ndmin=2).transpose()
     ea.append(col3)
     self.assertEqual(ea.shape[1], 3)
     col4 = np.array([4, 9, 14, 19, 24], dtype=np.uint32,
                     ndmin=2).transpose()
     ea.append(col4)
     self.assertEqual(ea.shape[1], 4)
     col5 = np.array([5, 10, 15, 20, 25], dtype=np.uint32,
                     ndmin=2).transpose()
     ea.append(col5)
     self.assertEqual(ea.shape[1], 5)
     print h5fh.root.counts[:]
     self.assertTrue(np.array_equal(h5fh.root.counts[:, 0], col1[:, 0]))
     self.assertTrue(np.array_equal(h5fh.root.counts[:, 1], col2[:, 0]))
     self.assertTrue(np.array_equal(h5fh.root.counts[:, 2], col3[:, 0]))
     self.assertTrue(np.array_equal(h5fh.root.counts[:, 3], col4[:, 0]))
     self.assertTrue(np.array_equal(h5fh.root.counts[:, 4], col5[:, 0]))
     h5fh.close()
     os.remove("earray1.h5")
Example #23
0
def create_sorted_db(old_table, sorted_hdf):
    """
    For a set of features in the flat_hdf, write them to a table in HDF5
    """
    out_file = tb.open_file(sorted_hdf, mode='w', tilte='Feature Database')
    group = out_file.create_group('/', 'sift_db', 'Sift Feature Database')
    old_table.copy(newparent=group,
                   newname='sift_features_sorted',
                   sortby='pair_id',
                   checkCSI=True)

    new_table = out_file.root.sift_db.sift_features_sorted
    new_table.cols.pair_id.create_csindex()
    new_table.cols.x.create_csindex()
    new_table.cols.y.create_csindex()
    new_table.cols.octave.create_csindex()
    unique_tiles, uidcount = np.unique(old_table.cols.pair_id[:],
                                       return_counts=True)
    atom = tb.UInt32Atom()
    filters = tb.Filters(complevel=5, complib='blosc')
    uidcount_a = out_file.create_carray(group,
                                        'unique_tile_count',
                                        atom,
                                        unique_tiles.shape,
                                        filters=filters)

    uid_a = out_file.create_carray(group,
                                   'unique_tiles',
                                   atom,
                                   unique_tiles.shape,
                                   filters=filters)

    uidcount_a[:] = uidcount
    uid_a[:] = unique_tiles

    out_file.close()
Example #24
0
def save_tinfo_core(dat,
                    outfn,
                    n_img=None,
                    n_maxtrial=None,
                    save_spktch=False,
                    n_elec=None,
                    exclude_img=None,
                    n_bins=None,
                    t_min=None,
                    t_max=None,
                    verbose=1,
                    n_slack=N_SLACK,
                    t_adjust=None):
    iid2idx = {}  # image id to index (1-th axis) table
    idx2iid = []  # vice versa
    ch2idx = {}
    idx2ch = []

    # prepare tmp file
    fd, tmpf = tempfile.mkstemp()
    os.close(fd)  # hdf5 module will handle the file. close it now.
    save_tinfo_core.tmpf = tmpf
    frame_onset = None

    foffset_chidx = []
    foffset_imgidx = []
    foffset_tridx = []
    foffset_binidx = []
    foffset_pos = []

    # -- initialization
    fn_nominal = dat.get('filename', '__none__')
    fns_nominal = [fn_nominal]  # backward compatibility

    if n_img is None:
        # if `n_img` is not specified, determine the number of images
        # from the first psf.pk file. (with additional n_slack)
        el0 = dat['all_spike'].keys()[0]
        n_img = len(dat['all_spike'][el0]) + n_slack
    if n_elec is None:
        # if `n_elec` is not specified, determine the number of
        # electrodes from the first psf.pk file.
        # No additinal n_slack here!
        n_elec = len(dat['actvelecs'])
    if n_maxtrial is None:
        el0 = dat['all_spike'].keys()[0]
        iis0 = dat['all_spike'][el0].keys()
        n_maxtrial = max([len(dat['all_spike'][el0][ii0])
                          for ii0 in iis0]) + n_slack
    if t_min is None:
        t_min = dat['t_start']
    if t_max is None:
        t_max = dat['t_stop']
    if t_adjust is None:
        t_adjust = dat['t_adjust']
    if n_bins is None:
        n_bins = int(np.ceil((t_max - t_min) / 1000.) + 1)

    # number of bytes required for 1 trial
    n_bytes = int(np.ceil(n_bins / 8.))
    shape = (n_elec, n_img, n_maxtrial, n_bytes)
    shape_org = (n_img, n_elec, n_maxtrial)
    atom = tables.UInt8Atom()
    atom16 = tables.Int16Atom()
    atomu16 = tables.UInt16Atom()
    atomu32 = tables.UInt32Atom()
    atom64 = tables.Int64Atom()
    filters = tables.Filters(complevel=4, complib='blosc')

    save_tinfo_core.h5t = h5t = tables.openFile(tmpf, 'w')
    db = h5t.createCArray(h5t.root, 'db', atom, shape,
                          filters=filters)  # spiking information
    org = h5t.createCArray(h5t.root, 'org', atom16, shape_org,
                           filters=filters)  # origin info
    org[...] = -1
    tr = np.zeros((n_elec, n_img),
                  dtype=np.uint16)  # num of trials per each ch & image

    if verbose > 0:
        print '* Allocated: (n_elec,'\
            ' n_img, n_maxtrial, n_bytes) = (%d, %d, %d, %d)' % shape
        print '* Temp hdf5:', tmpf

    # ----------------------------------------------------------------------
    # -- read thru the dats, store into the tmp.hdf5 file (= tmpf)
    # -- actual conversion for this file happens here
    for ch in sorted(dat['all_spike']):
        makeavail(ch, ch2idx, idx2ch)
        ie = ch2idx[ch]  # index to the electrode, 0-based

        if verbose > 0:
            print '* At: Ch/site/unit %d                          \r' % ie,
            sys.stdout.flush()

        for iid in sorted(dat['all_spike'][ch]):
            # -- main computation
            if is_excluded(iid, exclude_img):
                continue

            # do the conversion
            makeavail(iid, iid2idx, idx2iid)
            ii = iid2idx[iid]  # index to the image, 0-based
            trials = dat['all_spike'][ch][iid]  # get the chunk
            foffsets = None
            if 'all_foffset' in dat:
                foffsets = dat['all_foffset'][ch][iid]
                if len(trials) != len(foffsets):
                    foffsets = None

            ntr0 = len(trials)  # number of trials in the chunk
            itb = tr[ie, ii]  # index to the beginning trial#, 0-based
            ite = itb + ntr0  # index to the end
            n_excess = 0  # number of excess trials in this chunk
            if ite > n_maxtrial:
                n_excess = ite - n_maxtrial
                ite = n_maxtrial
                if verbose > 0:
                    print '** Reached n_maxtrial(=%d): ch=%s, iid=%s' % \
                            (n_maxtrial, str(ch), str(iid))
            # number of actual trials to read in the chunk
            ntr = ntr0 - n_excess
            # book-keeping stuffs...
            org[ii, ie, itb:ite] = 0  # mainly for backward compatibility
            tr[ie, ii] += ntr

            # bit-like spike timing info
            tr_bits = np.zeros((ntr, n_bytes * 8), dtype=np.uint8)

            # sweep the chunk, and bit-pack the data
            trials = trials[:ntr]
            trials_enum = np.concatenate([[i] * len(e)
                                          for i, e in enumerate(trials)
                                          ]).astype('int')
            trials = np.concatenate(trials)

            # selected bins
            sb = np.round((trials - t_min) / 1000.).astype('int')
            si = np.nonzero((sb >= 0) & (sb < n_bins))[0]
            if len(si) == 0:
                # no spikes at all
                db[ie, ii, itb:ite, :] = 0  # this must match.. (1)
                continue

            sb = sb[si]
            st = trials_enum[si]
            tr_bits[st, sb] = 1  # there was a spike
            spk = np.packbits(tr_bits, axis=1)
            # finished this image in this electrode; store the data
            db[ie, ii, itb:ite, :] = spk  # this must match.. (1)

            # keeping foffsets for .nev/.plx files
            if foffsets is not None:
                foffsets = np.concatenate(foffsets)
                if len(foffsets) != len(trials):
                    # shouldn't happen
                    print '** Length of foffsets and trials is different'
                    foffsets = [-1] * len(trials)
                    foffsets = np.array(foffsets)

                nevs = len(sb)
                foffset_chidx.extend([ie] * nevs)
                foffset_imgidx.extend([ii] * nevs)
                foffset_tridx.extend(st)
                foffset_binidx.extend(sb)
                foffset_pos.extend(foffsets[si])

    # -- additional movie data conversion
    # XXX: this assumes `multi=False`
    if 'frame_onset' in dat and len(dat['frame_onset']) > 0:
        print '* Collecting frame onset info'
        if frame_onset is None:
            frame_onset = dat['frame_onset']
        else:
            frame_onset0 = dat['frame_onset']
            for iid in frame_onset0:
                frame_onset[iid].extend(frame_onset0[iid])

    # ----------------------------------------------------------------------
    # -- finished main conversion; now save into a new optimized hdf5 file
    n_img_ac = len(iid2idx)  # actual number of images
    n_tr_ac = np.max(tr)  # actual maximum number of trials

    shape_img = (n_img_ac, n_elec, n_tr_ac, n_bytes)  # img-major form
    shape_ch = (n_elec, n_img_ac, n_tr_ac, n_bytes)  # ch-major form
    shape_org = (n_img_ac, n_elec, n_tr_ac)

    if verbose > 0:
        print 'Optimizing...                                 '
        print '* Actual #images:', n_img_ac
        print '* Actual #trials:', n_tr_ac
        print '* New allocated: (n_elec, n_img, n_maxtrial, n_bytes)' \
                ' = (%d, %d, %d, %d)' % shape_ch

    # -- layout output hdf5 file
    save_tinfo_core.h5o = h5o = tables.openFile(outfn, 'w')
    # /spktimg: bit-packed spike-time info matrix, image-id-major
    spktimg = h5o.createCArray(h5o.root,
                               'spkt_img',
                               atom,
                               shape_img,
                               filters=filters)
    # /meta: metadata group
    meta = h5o.createGroup("/", 'meta', 'Metadata')
    # /meta/iid2idx: iid to matrix-index info
    t_iididx = h5o.createTable(meta, 'iididx', IidIdx,
                               'Image ID and its index')
    # /meta/orgfile_img: file origin info, image-id-major
    orgfile = h5o.createCArray(meta,
                               'orgfile_img',
                               atom16,
                               shape_org,
                               filters=filters)  # origin info

    # -- fill metadata
    # some metadata records
    h5o.createArray(meta, 'srcfiles', fns_nominal)
    h5o.createArray(meta, 'nbins', n_bins)
    h5o.createArray(meta, 't_start0', t_min)
    h5o.createArray(meta, 'tmin', t_min)  # backward compatibility
    h5o.createArray(meta, 't_stop0', t_max)
    h5o.createArray(meta, 'tmax', t_max)  # backward compatibility
    h5o.createArray(meta, 't_adjust', t_adjust)
    h5o.createArray(meta, 'iid2idx_pk', pk.dumps(iid2idx))
    h5o.createArray(meta, 'idx2iid_pk', pk.dumps(idx2iid))
    h5o.createArray(meta, 'idx2iid', idx2iid)
    h5o.createArray(meta, 'ch2idx_pk', pk.dumps(ch2idx))
    h5o.createArray(meta, 'idx2ch', idx2ch)
    # save as img-major order (tr is in channel-major)
    h5o.createArray(meta, 'ntrials_img', tr[:, :n_img_ac].T)
    h5o.createArray(meta, 'frame_onset_pk', pk.dumps(frame_onset))
    # cluster related stuffs
    for clu_k in ['idx2gcid', 'cid_sel', 'gcid2idx']:
        if clu_k not in dat:
            continue
        h5o.createArray(meta, clu_k + '_pk', pk.dumps(dat[clu_k]))
    # this is deprecated.  mainly for backward compatibility
    orgfile[...] = org[:n_img_ac, :, :n_tr_ac]

    # populate /meta/iididx
    r = t_iididx.row
    for iid in iid2idx:
        r['iid'] = str(iid)
        r['iid_pk'] = pk.dumps(iid)
        r['idx'] = iid2idx[iid]
        r.append()
    t_iididx.flush()

    # -- store spiking time data
    for i in xrange(n_img_ac):
        if verbose > 0:
            print '* At: Image %d                          \r' % i,
            sys.stdout.flush()
        spktimg[i, :, :, :] = db[:, i, :n_tr_ac, :]

    if save_spktch:
        # /spktch: bit-packed spike-time info matrix, channel-major
        spktch = h5o.createCArray(h5o.root,
                                  'spkt_ch',
                                  atom,
                                  shape_ch,
                                  filters=filters)
        for i in xrange(n_elec):
            if verbose > 0:
                print '* At: Ch/site/unit %d                   \r' % i,
                sys.stdout.flush()
            spktch[i, :, :, :] = db[i, :n_img_ac, :n_tr_ac, :]

    # foffset stuffs
    foffset_chidx = np.array(foffset_chidx, dtype='uint16')
    foffset_imgidx = np.array(foffset_imgidx, dtype='uint32')
    foffset_tridx = np.array(foffset_tridx, dtype='uint16')
    foffset_binidx = np.array(foffset_binidx, dtype='uint16')
    foffset_pos = np.array(foffset_pos, dtype='int64')

    for src, name, atom0 in zip([
            foffset_chidx, foffset_imgidx, foffset_tridx, foffset_binidx,
            foffset_pos
    ], [
            'foffset_chidx', 'foffset_imgidx', 'foffset_tridx',
            'foffset_binidx', 'foffset_pos'
    ], [atomu16, atomu32, atomu16, atomu16, atom64]):
        if len(src) == 0:
            continue
        dst = h5o.createCArray(meta, name, atom0, src.shape, filters=filters)
        dst[:] = src[:]

    if verbose > 0:
        print

    h5o.close()
    h5t.close()
Example #25
0
    def _create_table_list(self, name, example):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        The modified version for creating table with appendList
        """
        type_map = {
            np.dtype(np.float64): tables.Float64Atom(),
            np.dtype(np.float32): tables.Float32Atom(),
            np.dtype(np.int): tables.Int64Atom(),
            np.dtype(np.int8): tables.Int8Atom(),
            np.dtype(np.uint8): tables.UInt8Atom(),
            np.dtype(np.int16): tables.Int16Atom(),
            np.dtype(np.uint16): tables.UInt16Atom(),
            np.dtype(np.int32): tables.Int32Atom(),
            np.dtype(np.uint32): tables.UInt32Atom(),
            np.dtype(np.bool): tables.BoolAtom(),
        }

        try:
            if type(example) == np.ndarray:
                h5type = type_map[example.dtype]
            elif type(example) == list and type(example[0]) == str:
                h5type = tables.VLStringAtom()
        except KeyError:
            raise TypeError("Don't know how to handle dtype '%s'" %
                            example.dtype)

        if type(example) == np.ndarray:
            h5dim = (0, ) + example.shape[1:]

            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)

            nodes = h5.list_nodes(h5.root)

            nmpt = name.replace('.', '/\n')
            nmpt = nmpt.split('\n')

            path = '/'
            for kay in range(len(nmpt) - 1):
                #if not path+nmpt[kay][:-1] in str(nodes): h5.create_group(path,nmpt[kay][:-1])
                try:
                    h5.is_visible_node(path + nmpt[kay][:-1])
                except:
                    h5.create_group(path, nmpt[kay][:-1])
                path += nmpt[kay]

            self.tables[name] = h5.create_earray(path,
                                                 nmpt[-1],
                                                 h5type,
                                                 h5dim,
                                                 filters=filters)

        elif type(example) == list and type(example[0]) == str:
            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)

            nodes = h5.list_nodes(h5.root)

            nmpt = name.replace('.', '/\n')
            nmpt = nmpt.split('\n')

            path = '/'
            for kay in range(len(nmpt) - 1):
                #if not path+nmpt[kay][:-1] in str(nodes): h5.create_group(path,nmpt[kay][:-1])
                try:
                    h5.is_visible_node(path + nmpt[kay][:-1])
                except:
                    h5.create_group(path, nmpt[kay][:-1])
                path += nmpt[kay]

            self.tables[name] = h5.create_vlarray(path,
                                                  nmpt[-1],
                                                  h5type,
                                                  filters=filters)

        self.types[name] = type(example)
Example #26
0
def create_kwik(path, experiment_name=None, prm=None, prb=None, overwrite=True):
    """Create a KWIK file.
    
    Arguments:
      * path: path to the .kwik file.
      * experiment_name
      * prm: a dictionary representing the contents of the PRM file (used for
        SpikeDetekt)
      * prb: a dictionary with the contents of the PRB file
    
    """
    if experiment_name is None:
        experiment_name = ''
    if prm is None:
        prm = {}
    if prb is None:
        prb = {}
    
    if not overwrite and os.path.exists(path):
        return
    
    file = tb.openFile(path, mode='w')
    
    file.root._f_setAttr('kwik_version', 2)
    file.root._f_setAttr('name', experiment_name)

    file.createGroup('/', 'application_data')
    
    # Set the SpikeDetekt parameters
    file.createGroup('/application_data', 'spikedetekt')
    for prm_name, prm_value in iteritems(prm):
        file.root.application_data.spikedetekt._f_setAttr(prm_name, prm_value)
    
    file.createGroup('/', 'user_data')
    
    # Create channel groups.
    file.createGroup('/', 'channel_groups')
    
    for igroup, group_info in prb.iteritems():
        igroup = int(igroup)
        group = file.createGroup('/channel_groups', str(igroup))
        # group_info: channel, graph, geometry
        group._f_setAttr('name', 'channel_group_{0:d}'.format(igroup))
        group._f_setAttr('adjacency_graph', 
            np.array(group_info.get('graph', np.zeros((0, 2))), dtype=np.int32))
        file.createGroup(group, 'application_data')
        file.createGroup(group, 'user_data')
        
        # Create channels.
        file.createGroup(group, 'channels')
        channels = group_info.get('channels', [])
        
        # Add the channel order.
        group._f_setAttr('channel_order', np.array(channels, dtype=np.int32))
        
        for channel_idx in channels:
            # channel is the absolute channel index.
            channel = file.createGroup(group.channels, str(channel_idx))
            channel._f_setAttr('name', 'channel_{0:d}'.format(channel_idx))
            
            channel._f_setAttr('ignored', False)  # "channels" only contains 
                                                  # not-ignored channels here
            pos = group_info.get('geometry', {}). \
                get(channel_idx, None)
            if pos is not None:
                pos = np.array(pos, dtype=np.float32)
            channel._f_setAttr('position', pos)
            channel._f_setAttr('voltage_gain', prm.get('voltage_gain', 0.))
            channel._f_setAttr('display_threshold', 0.)
            file.createGroup(channel, 'application_data')
            file.createGroup(channel.application_data, 'spikedetekt')
            file.createGroup(channel.application_data, 'klustaviewa')
            file.createGroup(channel, 'user_data')
            
        # Create spikes.
        spikes = file.createGroup(group, 'spikes')
        file.createEArray(spikes, 'time_samples', tb.UInt64Atom(), (0,),
                          expectedrows=1000000)
        file.createEArray(spikes, 'time_fractional', tb.UInt8Atom(), (0,),
                          expectedrows=1000000)
        file.createEArray(spikes, 'recording', tb.UInt16Atom(), (0,),
                          expectedrows=1000000)
        clusters = file.createGroup(spikes, 'clusters')
        file.createEArray(clusters, 'main', tb.UInt32Atom(), (0,),
                          expectedrows=1000000)
        file.createEArray(clusters, 'original', tb.UInt32Atom(), (0,),
                          expectedrows=1000000)
        
        fm = file.createGroup(spikes, 'features_masks')
        fm._f_setAttr('hdf5_path', '{{kwx}}/channel_groups/{0:d}/features_masks'. \
            format(igroup))
        wr = file.createGroup(spikes, 'waveforms_raw')
        wr._f_setAttr('hdf5_path', '{{kwx}}/channel_groups/{0:d}/waveforms_raw'. \
            format(igroup))
        wf = file.createGroup(spikes, 'waveforms_filtered')
        wf._f_setAttr('hdf5_path', '{{kwx}}/channel_groups/{0:d}/waveforms_filtered'. \
            format(igroup))
        
        # Create clusters.
        clusters = file.createGroup(group, 'clusters')
        file.createGroup(clusters, 'main')
        file.createGroup(clusters, 'original')
        
        # Create cluster groups.
        cluster_groups = file.createGroup(group, 'cluster_groups')
        file.createGroup(cluster_groups, 'main')
        file.createGroup(cluster_groups, 'original')
        
    # Create recordings.
    file.createGroup('/', 'recordings')
    
    # Create event types.
    file.createGroup('/', 'event_types')
            
    file.close()
Example #27
0
def inference(cfg, is_testing=False):
    """
    Inference for either PPN or (xor) base network (e.g. UResNet)
    """
    if not os.path.isdir(cfg.DISPLAY_DIR):
        os.makedirs(cfg.DISPLAY_DIR)

    if is_testing:
        _, data = get_data(cfg)
    else:
        data, _ = get_data(cfg)

    net = basenets[cfg.BASE_NET](cfg=cfg)
    if cfg.WEIGHTS_FILE_PPN is None and cfg.WEIGHTS_FILE_BASE is None:
        raise Exception("Need a checkpoint file")

    net.init_placeholders()
    net.create_architecture(is_training=False)
    duration = 0

    metrics = UResNetMetrics(cfg)
    FILTERS = tables.Filters(complevel=5,
                             complib='zlib',
                             shuffle=True,
                             bitshuffle=False,
                             fletcher32=False,
                             least_significant_digit=None)
    f_submission = tables.open_file('/data/codalab/submission_5-6.hdf5',
                                    'w',
                                    filters=FILTERS)
    preds_array = f_submission.create_earray('/',
                                             'pred',
                                             tables.UInt32Atom(),
                                             (0, 192, 192, 192),
                                             expectedrows=data.n)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        load_weights(cfg, sess)
        for i in range(min(data.n, cfg.MAX_STEPS)):
            print("%d/%d" % (i, data.n))
            blob = data.forward()
            if is_testing:
                blob['labels'] = blob['data'][..., 0]
            start = time.time()
            summary, results = net.test_image(sess, blob)
            end = time.time()
            duration += end - start
            # Drawing time
            # display_uresnet(blob, cfg, index=i, **results)
            if not is_testing:
                metrics.add(blob, results)
            mask = np.where(blob['data'][..., 0] > 0)
            preds = np.reshape(results['predictions'], (1, 192, 192, 192))
            print(np.count_nonzero(preds[mask] > 0))
            preds[mask] = 0
            preds_array.append(preds)
            print(preds.shape)

    preds_array.close()
    f_submission.close()

    duration /= cfg.MAX_STEPS
    print("Average duration of inference = %f ms" % duration)
    if not is_testing:
        metrics.plot()
Example #28
0
fare = Attribute('fare_amount', 2.5, 300.0)
surcharge = Attribute('surcharge', 0.0, 3.0)
tip = Attribute('tip_amount', 0.0, 165.0)
toll = Attribute('tolls_amount', 0.0, 20.0)
total = Attribute('total_amount', 2.5, 370.5)
attributes = [
    ratecode, passenger, triptime, distance, pickuplong, pickuplat,
    dropofflong, dropofflat, fare, surcharge, tip, toll, total
]

rownr = 2000
matrixrownr = rownr * pow(math.log(rownr), 2)

fileName = 'smallconcept.h5'
shape = (int(matrixrownr), rownr + 1)
atom = tables.UInt32Atom()
filters = tables.Filters(complevel=5, complib='zlib')
h5f = tables.open_file(fileName, 'w')
ca = h5f.create_carray(h5f.root, 'carray', atom, shape, filters=filters)

#matrix = numpy.zeros(shape=(int(matrixrownr),rownr))
#vector = []
vfileName = 'smallconceptresult.h5'
vshape = (int(matrixrownr), 2)
vh5f = tables.open_file(vfileName, 'w')
vca = vh5f.create_carray(vh5f.root, 'carray', atom, vshape, filters=filters)

efileName = 'smallconceptexpected.h5'
eshape = (rownr, 2)
eh5f = tables.open_file(efileName, 'w')
eca = h5f.create_carray(eh5f.root, 'carray', atom, eshape, filters=filters)
Example #29
0
rng = make_np_rng(default_seed=123522)


if __name__ == "__main__":
    base_dir = serial.preprocess(join('${PYLEARN2_DATA_PATH}', 'dogs_vs_cats'))
    files = [f for f in listdir(join(base_dir, 'train'))
             if isfile(join(base_dir, 'train', f))]

    filters = tables.Filters(complib='blosc', complevel=5)
    h5file = tables.open_file(join(base_dir, 'train.h5'), mode='w',
                              title='Dogs vs. Cats - Training set',
                              filters=filters)
    group = h5file.create_group(h5file.root, 'Data', 'Data')
    atom_8 = tables.UInt8Atom()
    atom_32 = tables.UInt32Atom()
    X = h5file.create_vlarray(group, 'X', atom=atom_8, title='Data values',
                              expectedrows=25000, filters=filters)
    y = h5file.create_carray(group, 'y', atom=atom_8, title='Data targets',
                             shape=(25000, 1), filters=filters)
    s = h5file.create_carray(group, 's', atom=atom_32, title='Data shapes',
                             shape=(25000, 3), filters=filters)

    # Shuffle examples around
    rng.shuffle(files)
    for i, f in enumerate(files):
        image = misc.imread(join(base_dir, 'train', f))
        X.append(image.flatten())
        target = 0 if 'cat' in f else 1
        y[i] = target
        s[i] = image.shape
Example #30
0
    ts_1 = f1.root.timestamps

    # file 2, apd2
    f2 = tables.open_file('tempAPD2_copy.hdf', 'r')
    ts_2 = f2.root.timestamps

    # lengths
    f1_num = f1.root.timestamps.nrows
    f2_num = f2.root.timestamps.nrows
    row_num = (f1_num + f2_num)

    # file 3, outfile
    f3 = tables.open_file('sortedFile.hdf', mode='w')
    f3.create_group(f3.root, name='photon_data')
    filters = tables.Filters(complevel=6, complib='zlib')
    atom1 = tables.UInt32Atom()
    atom2 = tables.Int8Atom()
    ts = f3.create_carray('/photon_data',
                          name='timestamps',
                          atom=atom1,
                          shape=(row_num, 1),
                          filters=filters)
    det = f3.create_carray('/photon_data',
                           name='detectors',
                           atom=atom2,
                           shape=(row_num, 1),
                           filters=filters)

    # Calculations
    start = time.time()
    merge_files(ts_1, ts_2, ts, det, f1_num, f2_num)