예제 #1
0
def make_shd(path_to_train, path_to_test, path_to_hdf5, digits, window_length):
    data = tables.open_file(path_to_data, 'r')

    n_neurons = 700
    T_max = max([max(data.root.spikes.times[i]) for i in range(len(data.root.labels))])
    S_prime = math.ceil(T_max / window_length)

    pattern = [1, 0, 0, 0, 0]  # the pattern used as output for the considered digit

    hdf5_file = tables.open_file(path_to_hdf5, 'w')
    train = hdf5_file.create_group(where=hdf5_file.root, name='train')
    data = hdf5_file.create_array(where=hdf5_file.root.train, name='data', atom=tables.BoolAtom(),
                                  obj=load_shd(data, S_prime, digits, window_length))
    labels = hdf5_file.create_earray(where=hdf5_file.root.train, name='label', atom=tables.BoolAtom(),
                                     shape=(0, len(digits), S_prime))
예제 #2
0
    def __init__(self, database: AbstractDB):
        """
        Initialize the atoms for meta-data (types, valid tag, and splits)
        Args:
            database (AbstractDB): Associated Database object
        """
        super().__init__(database)
        self.filename_atom = tables.StringAtom(itemsize=255)
        self.types_atom = tables.StringAtom(itemsize=255)
        # whether the patch is valid.
        self.valid_atom = tables.BoolAtom(shape=(), dflt=False)
        # save the meta info: split
        # noinspection PyArgumentList
        self.file_list_atom = tables.StringAtom(itemsize=get_path_limit())
        # noinspection PyArgumentList
        self.split_atom = tables.IntAtom(shape=(), dflt=False)

        self.hdf5_organizer = H5Organizer(self.database,
                                          self.database.group_level)
        self.data_extractor = DataExtractor(self.database)
        self.weight_writer = WeightCollector(
            self.database,
            self.data_extractor,
            weight_counter=self.database.weight_counter_callable)
        self.data_size = {}
예제 #3
0
def subset_and_writeout(hf_in, fname, thin, maskval, binfn=lambda x:x):
    print 'Subsetting for %s'%fname
    res=5
    hf_out = tb.openFile(os.path.join('5k-covariates',fname.replace('-','_').replace('.','_')+'.hdf5'),'w')
    hf_out.createArray('/','lon',lon[lon_min_i:lon_max_i:res])
    hf_out.createArray('/','lat',lat[lat_min_i:lat_max_i:res])
    
    d = hf_in.root.data[(hf_in.root.data.shape[0]-lat_max_i*thin):\
                        (hf_in.root.data.shape[0]-lat_min_i*thin):\
                        thin, 
                        lon_min_i*thin:\
                        lon_max_i*thin:\
                        thin]
    
    d = map_utils.grid_convert(map_utils.grid_convert(d,'y-x+','x+y+')[::res,::res], 'x+y+','y-x+')
    
    hf_out.createCArray('/','data',atom=tb.FloatAtom(),shape=d.shape,filters=tb.Filters(complevel=1,complib='zlib'))
    hf_out.createCArray('/','mask',atom=tb.BoolAtom(),shape=d.shape,filters=tb.Filters(complevel=1,complib='zlib'))
    hf_out.root.data.attrs.view = 'y-x+'
    
    
    hf_out.root.data[:]=binfn(d)
    hf_out.root.mask[:] = (d==maskval)+clipped_pete_mask
    
    hf_out.close()
예제 #4
0
def make_h5_col_file(dat, content, colname):
    """Make a new h5 table to hold column from ``dat``."""
    filename = os.path.join('data', content, 'msid', colname + '.h5')
    if os.path.exists(filename):
        os.unlink(filename)
    filedir = os.path.dirname(filename)
    if not os.path.exists(filedir):
        os.makedirs(filedir)

    filters = tables.Filters(complevel=5, complib='zlib')
    h5 = tables.openFile(filename, mode='w', filters=filters)

    col = dat[colname]
    h5shape = (0, ) + col.shape[1:]
    h5type = tables.Atom.from_dtype(col.dtype)
    h5.createEArray(h5.root,
                    'data',
                    h5type,
                    h5shape,
                    title=colname,
                    expectedrows=86400 * 365 * 10)
    h5.createEArray(h5.root,
                    'quality',
                    tables.BoolAtom(), (0, ),
                    title='Quality',
                    expectedrows=86400 * 365 * 10)
    print 'Made', colname
    h5.close()
예제 #5
0
    def write_logControl(self, topic_group, data):

        fields = ['taskName', 'actionType', 'skillName', 'topics']
        self.pytable_writer_helper(topic_group, fields,
                                   tables.StringAtom(itemsize=20), data)
        self.pytable_writer_helper(topic_group, ['playback'],
                                   tables.BoolAtom(), data)
예제 #6
0
def make_h5_col_file(dats, colname):
    """Make a new h5 table to hold column from ``dat``."""
    filename = msid_files['msid'].abs
    filedir = os.path.dirname(filename)
    if not os.path.exists(filedir):
        os.makedirs(filedir)

    # Estimate the number of rows for 20 years based on available data
    times = np.hstack([x['TIME'] for x in dats])
    dt = np.median(times[1:] - times[:-1])
    n_rows = int(86400 * 365 * 20 / dt)

    filters = tables.Filters(complevel=5, complib='zlib')
    h5 = tables.openFile(filename, mode='w', filters=filters)

    col = dats[-1][colname]
    h5shape = (0, ) + col.shape[1:]
    h5type = tables.Atom.from_dtype(col.dtype)
    h5.createEArray(h5.root,
                    'data',
                    h5type,
                    h5shape,
                    title=colname,
                    expectedrows=n_rows)
    h5.createEArray(h5.root,
                    'quality',
                    tables.BoolAtom(), (0, ),
                    title='Quality',
                    expectedrows=n_rows)
    logger.verbose(
        'WARNING: made new file {} for column {!r} shape={} with n_rows(1e6)={}'
        .format(filename, colname, h5shape, n_rows / 1.0e6))
    h5.close()
예제 #7
0
    def components(self, components=None):
        """
        Retrieve or store each individual submatrix composing the aggregate matrix.

        :param components: List of (masked) numpy arrays
        :return: List of (masked) numpy arrays
        """
        if components is not None:
            try:
                self.file.remove_node(self._group, 'components', recursive=True)
            except tables.NoSuchNodeError:
                pass

            component_group = self.file.create_group(self._group, 'components')

            for i, m in enumerate(components):
                if m is None:
                    m = np.array([np.nan])
                cm = self.file.create_carray(component_group, 'component_{}'.format(i),
                                             tables.Float32Atom(), m.shape)
                cm[:] = m

                if hasattr(m, 'mask'):
                    mm = self.file.create_carray(component_group, 'mask_{}'.format(i),
                                                 tables.BoolAtom(), m.shape)
                    mm[:] = m.mask
            self.file.flush()

        max_ix = -1
        masks = dict()
        components = dict()
        component_group = self.file.get_node(self._group, 'components')
        for node in self.file.iter_nodes(component_group):
            if node.name.startswith('mask_'):
                ix = int(node.name[5:])
                masks[ix] = node[:]
                max_ix = max(ix, max_ix)
            elif node.name.startswith('component_'):
                ix = int(node.name[10:])
                m = node[:]
                if m.shape == (1,) and np.isnan(m[0]):
                    components[ix] = None
                else:
                    components[ix] = m
                max_ix = max(ix, max_ix)

        sorted_components = []
        for ix in range(max_ix + 1):
            component = components[ix]
            if component is not None:
                if ix in masks:
                    mask = masks[ix]
                else:
                    mask = None

                sorted_components.append(np.ma.masked_array(component, mask=mask))
            else:
                sorted_components.append(None)
        return sorted_components
예제 #8
0
파일: io.py 프로젝트: nivir/QAMpy
def create_input_group(h5f,
                       title="input data at transmitter",
                       rolloff_dflt=np.nan,
                       attrs={},
                       arrays=["symbols", "bits"],
                       **kwargs):
    """
    Create the table for saving the input symbols and bits

    Parameters
    ----------

    h5f : string or h5filehandle
        The file to use, if a string create or open new file
    title: string, optional
        The title description of the group
    attrs: dict, optional
        attributes on the table
    arrays: list, optional
        name of arrays referenced in the table
    **kwargs:
        keyword arguments passed to create_table/array, it is highly recommended to set expectedrows

    Returns
    -------
    h5f : h5filehandle
        Pytables handle to the hdf file
    """
    try:
        gr = h5f.create_group("/", "input", title=title)
    except AttributeError:
        h5f = tb.open_file(h5f, "a")
        gr = h5f.create_group("/", "input", title=title)
    # if no shape for input syms or bits is given use scalar
    t_in = h5f.create_table(gr,
                            "signal", {
                                "id": tb.Int64Col(),
                                "idx_symbols": tb.Int64Col(dflt=0),
                                "idx_bits": tb.Int64Col(dflt=0),
                                "rolloff": tb.Float64Col(dflt=rolloff_dflt)
                            },
                            title="parameters of input signal",
                            **kwargs)
    setattr(t_in.attrs, "arrays", arrays)
    arr_syms = h5f.create_mdvlarray(gr,
                                    "symbols",
                                    tb.ComplexAtom(itemsize=16, dflt=np.nan),
                                    title="sent symbols",
                                    **kwargs)
    arr_bits = h5f.create_mdvlarray(gr,
                                    "bits",
                                    tb.BoolAtom(),
                                    title="sent bits",
                                    **kwargs)
    for k, v in attrs:
        setattr(t_in.attrs, k, v)
    return h5f
예제 #9
0
파일: io.py 프로젝트: leiYan1225/QAMpy
def create_recvd_data_group(h5f, title="data analysis and qampy results", description=None, oversampling_dflt=2,
                            attrs=DSP_UNITS, arrays=["data", "symbols", "taps", "bits"], nmodes=2, **kwargs):
    """
    Create the table for saving recovered data and parameters after DSP

    Parameters
    ----------

    h5f : string or h5filehandle
        The file to use, if a string create or open new file
    title: string
        The title description of the group
    description: dict or tables.IsDescription (optional)
        If given use to create the table
    attrs: dict, optional
        attributes for the table
    arrays: list, optional
        name of arrays referenced in the table
    nmodes: int, optional
        number of modes/polarisations
    **kwargs:
        keyword arguments passed to create_table/array, it is highly recommended to set expectedrows

    Returns
    -------
    h5f : h5filehandle
        Pytables handle to the hdf file
    """
    try:
        gr = h5f.create_group("/", "analysis", title=title)
    except AttributeError:
        h5f = tb.open_file(h5f, "a")
        gr = h5f.create_group("/", "analysis", title=title)
    gr_dsp = h5f.create_group(gr, "qampy", title="Signal from DSP")
    if description is None:
        dsp_params = { "freq_offset": tb.Float64Col(dflt=np.nan),
                       "freq_offset_N": tb.Int64Col(dflt=0), "phase_est": tb.StringCol(itemsize=20),
                       "N_angles": tb.Float64Col(dflt=np.nan), "ph_est_blocklength": tb.Int64Col(),
                       "stepsize": tb.Float64Col(shape=2), "trsyms": tb.Float64Col(shape=2),
                       "iterations": tb.Int64Col(shape=2),
                       "ntaps": tb.Int64Col(),
                       "method": tb.StringCol(itemsize=20)}
        description = {"id":tb.Int64Col(), "idx_data": tb.Int64Col(), "idx_symbols": tb.Int64Col(),
                       "idx_bits": tb.Int64Col(), "idx_taps": tb.Int64Col(),
                       "evm": tb.Float64Col(dflt=np.nan, shape=nmodes), "ber":tb.Float64Col(dflt=np.nan, shape=nmodes),
                       "ser":tb.Float64Col(dflt=np.nan, shape=nmodes), "oversampling":tb.Int64Col(dflt=oversampling_dflt)}
        description.update(dsp_params)
    t_rec = h5f.create_table(gr_dsp, "signal", description, "signal after DSP", **kwargs)
    setattr(t_rec.attrs, "arrays", arrays)
    data_arr = h5f.create_mdvlarray(gr_dsp, "data", tb.ComplexAtom(itemsize=16), "signal after DSP", **kwargs)
    syms_arr = h5f.create_mdvlarray(gr_dsp, "symbols", tb.ComplexAtom(itemsize=16, dflt=np.nan), "recovered symbols", **kwargs)
    taps_arr = h5f.create_mdvlarray(gr_dsp, "taps", tb.ComplexAtom(itemsize=16, dflt=np.nan), "qampy taps", **kwargs)
    bits_arr = h5f.create_mdvlarray(gr_dsp, "bits", tb.BoolAtom(dflt=False), "recovered bits", **kwargs)
    for k, v in attrs.items():
        setattr(t_rec.attrs, k, v)
    return h5f
예제 #10
0
def repeat_expt(smplr, n_expts, n_labels, output_file=None):
    """
    Parameters
    ----------
    smplr : sub-class of PassiveSampler
        sampler must have a sample_distinct method, reset method and ...

    n_expts : int
        number of expts to run

    n_labels : int
        number of labels to query from the oracle in each expt
    """

    FILTERS = tables.Filters(complib='zlib', complevel=5)

    max_iter = smplr._max_iter
    n_class = smplr._n_class
    if max_iter < n_labels:
        raise ValueError(
            "Cannot query {} labels. Sampler ".format(n_labels) +
            "instance supports only {} iterations".format(max_iter))

    if output_file is None:
        # Use current date/time as filename
        output_file = 'expt_' + time.strftime("%d-%m-%Y_%H:%M:%S") + '.h5'
    logging.info("Writing output to {}".format(output_file))

    f = tables.open_file(output_file, mode='w', filters=FILTERS)
    float_atom = tables.Float64Atom()
    bool_atom = tables.BoolAtom()
    int_atom = tables.Int64Atom()

    array_F = f.create_carray(f.root, 'F_measure', float_atom,
                              (n_expts, n_labels, n_class))
    array_s = f.create_carray(f.root, 'n_iterations', int_atom, (n_expts, 1))
    array_t = f.create_carray(f.root, 'CPU_time', float_atom, (n_expts, 1))

    logging.info("Starting {} experiments".format(n_expts))
    for i in range(n_expts):
        if i % np.ceil(n_expts / 10).astype(int) == 0:
            logging.info("Completed {} of {} experiments".format(i, n_expts))
        ti = time.process_time()
        smplr.reset()
        smplr.sample_distinct(n_labels)
        tf = time.process_time()
        if hasattr(smplr, 'queried_oracle_'):
            array_F[i, :, :] = smplr.estimate_[smplr.queried_oracle_]
        else:
            array_F[i, :, :] = smplr.estimate_
        array_s[i] = smplr.t_
        array_t[i] = tf - ti
    f.close()

    logging.info("Completed all experiments")
예제 #11
0
def make_shd(path_to_train, path_to_test, path_to_hdf5, digits, alphabet_size,
             pattern, window_length):
    train_data_file = tables.open_file(path_to_train, 'r')
    test_data_file = tables.open_file(path_to_test, 'r')

    T_max = 1. * 1e6
    S_prime = math.ceil(T_max / window_length)

    hdf5_file = tables.open_file(path_to_hdf5, 'w')

    # Make train group and arrays
    train = hdf5_file.create_group(where=hdf5_file.root, name='train')

    train_data, output_signal = load_shd(path_to_train, S_prime, digits,
                                         window_length, alphabet_size, pattern)
    train_data_array = hdf5_file.create_array(where=hdf5_file.root.train,
                                              name='data',
                                              atom=tables.BoolAtom(),
                                              obj=train_data)
    train_labels_array = hdf5_file.create_earray(where=hdf5_file.root.train,
                                                 name='label',
                                                 atom=tables.BoolAtom(),
                                                 obj=output_signal)

    test = hdf5_file.create_group(where=hdf5_file.root, name='test')
    test_data, output_signal = load_shd(path_to_test, S_prime, digits,
                                        window_length, alphabet_size, pattern)
    test_data_array = hdf5_file.create_array(where=hdf5_file.root.test,
                                             name='data',
                                             atom=tables.BoolAtom(),
                                             obj=test_data)

    test_labels_array = hdf5_file.create_earray(where=hdf5_file.root.test,
                                                name='label',
                                                atom=tables.BoolAtom(),
                                                obj=output_signal)

    make_stats_group(hdf5_file)

    train_data_file.close()
    test_data_file.close()
    hdf5_file.close()
예제 #12
0
def make_mnist_dvs(path_to_data, path_to_hdf5, digits, max_pxl_value, min_pxl_value, T_max, window_length, scale):
    """"
    Preprocess the .aedat file and save the dataset as an .hdf5 file
    """
    dirs = [r'/' + dir_ for dir_ in os.listdir(path_to_data)]

    S_prime = math.ceil(T_max / window_length)

    pattern = [1, 0, 0, 0, 0]  # the pattern used as output for the considered digit

    hdf5_file = tables.open_file(path_to_hdf5, 'w')

    train = hdf5_file.create_group(where=hdf5_file.root, name='train')
    train_data = hdf5_file.create_earray(where=hdf5_file.root.train, name='data', atom=tables.BoolAtom(),
                                         shape=(0, (max_pxl_value - min_pxl_value + 1) ** 2, S_prime))
    train_labels = hdf5_file.create_earray(where=hdf5_file.root.train, name='label', atom=tables.BoolAtom(),
                                           shape=(0, len(digits), S_prime))

    test = hdf5_file.create_group(where=hdf5_file.root, name='test')
    test_data = hdf5_file.create_earray(where=hdf5_file.root.test, name='data', atom=tables.BoolAtom(),
                                        shape=(0, (max_pxl_value - min_pxl_value + 1) ** 2, S_prime))
    test_labels = hdf5_file.create_earray(where=hdf5_file.root.test, name='label', atom=tables.BoolAtom(),
                                          shape=(0, len(digits), S_prime))

    for i, digit in enumerate(digits):
        for dir_ in dirs:
            if dir_.find(str(digit)) != -1:
                for subdir, _, _ in os.walk(path_to_data + dir_):
                    if subdir.find(scale) != -1:
                        for j, file in enumerate(glob.glob(subdir + r'/*.aedat')):
                            if j < 0.9 * len(glob.glob(subdir + r'/*.aedat')):
                                print('train', file)
                                train_data.append(
                                    load_dvs(file, S_prime, min_pxl_value=min_pxl_value, max_pxl_value=max_pxl_value,
                                             window_length=window_length))

                                output_signal = np.array([[[0] * S_prime] * i
                                                          + [pattern * int(S_prime / len(pattern)) + pattern[:(
                                        S_prime % len(pattern))]]
                                                          + [[0] * S_prime] * (len(digits) - 1 - i)], dtype=bool)
                                train_labels.append(output_signal)
                            else:
                                print('test', file)
                                test_data.append(
                                    load_dvs(file, S_prime, min_pxl_value=min_pxl_value, max_pxl_value=max_pxl_value,
                                             window_length=window_length))

                                output_signal = np.array([[[0] * S_prime] * i
                                                          + [pattern * int(S_prime / len(pattern)) + pattern[:(
                                        S_prime % len(pattern))]]
                                                          + [[0] * S_prime] * (len(digits) - 1 - i)], dtype=bool)
                                test_labels.append(output_signal)

    hdf5_file.close()
예제 #13
0
    def write_bluetooth(self, topic_group, data):

        str_fields = ['mac_addr', 'dev_name']
        self.pytable_writer_helper(topic_group, str_fields,
                                   tables.StringAtom(itemsize=20), data)
        self.pytable_writer_helper(topic_group, ['is_present'],
                                   tables.BoolAtom(), data)
        self.pytable_writer_helper(topic_group, ['rssi'], tables.Int64Atom(),
                                   data)
        self.pytable_writer_helper(topic_group, ['time'], tables.Float64Atom(),
                                   data)
예제 #14
0
    def _create_table_list(self, name, example):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        The modified version for creating table with appendList
        """
        type_map = {
            np.dtype(np.float64): tables.Float64Atom(),
            np.dtype(np.float32): tables.Float32Atom(),
            np.dtype(np.int): tables.Int64Atom(),
            np.dtype(np.int8): tables.Int8Atom(),
            np.dtype(np.uint8): tables.UInt8Atom(),
            np.dtype(np.int16): tables.Int16Atom(),
            np.dtype(np.uint16): tables.UInt16Atom(),
            np.dtype(np.int32): tables.Int32Atom(),
            np.dtype(np.uint32): tables.UInt32Atom(),
            np.dtype(np.bool): tables.BoolAtom(),
        }

        try:
            if type(example) == np.ndarray:
                h5type = type_map[example.dtype]
            elif type(example) == list and type(example[0]) == str:
                h5type = tables.VLStringAtom()
        except KeyError:
            raise TypeError("Don't know how to handle dtype '%s'" %
                            example.dtype)

        if type(example) == np.ndarray:
            h5dim = (0, ) + example.shape[1:]

            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_earray(h5.root,
                                                 name,
                                                 h5type,
                                                 h5dim,
                                                 filters=filters)
        elif type(example) == list and type(example[0]) == str:
            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_vlarray(h5.root,
                                                  name,
                                                  h5type,
                                                  filters=filters)
        self.types[name] = type(example)
예제 #15
0
    def _create_table(self, name, example):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        """
        type_map = {
            np.dtype(np.float64): tables.Float64Atom(),
            np.dtype(np.float32): tables.Float32Atom(),
            np.dtype(np.int): tables.Int64Atom(),
            np.dtype(np.int8): tables.Int8Atom(),
            np.dtype(np.uint8): tables.UInt8Atom(),
            np.dtype(np.int16): tables.Int16Atom(),
            np.dtype(np.uint16): tables.UInt16Atom(),
            np.dtype(np.int32): tables.Int32Atom(),
            np.dtype(np.uint32): tables.UInt32Atom(),
            np.dtype(np.bool): tables.BoolAtom(),
        }

        try:
            if type(example) == np.ndarray:
                h5type = type_map[example.dtype]
            elif type(example) == str:
                h5type = tables.VLStringAtom()
        except KeyError:
            raise TypeError(
                "Could not create table %s because of unknown dtype '%s'" %
                (name, example.dtype))  #+ ", of name: " % example.shape)

        if type(example) == np.ndarray:
            h5dim = (0, ) + example.shape

            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_earray(h5.root,
                                                 name,
                                                 h5type,
                                                 h5dim,
                                                 filters=filters)
        elif type(example) == str:
            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_vlarray(h5.root,
                                                  name,
                                                  h5type,
                                                  filters=filters)
        self.types[name] = type(example)
예제 #16
0
def _triage_write(key, value, root, *write_params):
    import tables as tb
    create_group, create_table, create_c_array, filters = write_params
    if isinstance(value, dict):
        sub_root = create_group(root, key, 'dict')
        for key, sub_value in value.items():
            if not isinstance(key, string_types):
                raise TypeError('All dict keys must be strings')
            _triage_write('key_{0}'.format(key), sub_value, sub_root,
                          *write_params)
    elif isinstance(value, (list, tuple)):
        title = 'list' if isinstance(value, list) else 'tuple'
        sub_root = create_group(root, key, title)
        for vi, sub_value in enumerate(value):
            _triage_write('idx_{0}'.format(vi), sub_value, sub_root,
                          *write_params)
    elif isinstance(value, type(None)):
        atom = tb.BoolAtom()
        s = create_c_array(root, key, atom, (1,), title='None',
                           filters=filters)
        s[:] = False
    elif isinstance(value, (int, float)):
        if isinstance(value, int):
            title = 'int'
        else:  # isinstance(value, float):
            title = 'float'
        value = np.atleast_1d(value)
        atom = tb.Atom.from_dtype(value.dtype)
        s = create_c_array(root, key, atom, (1,),
                           title=title, filters=filters)
        s[:] = value
    elif isinstance(value, string_types):
        atom = tb.UInt8Atom()
        if isinstance(value, text_type):  # unicode
            value = np.fromstring(value.encode('utf-8'), np.uint8)
            title = 'unicode'
        else:
            value = np.fromstring(value.encode('ASCII'), np.uint8)
            title = 'ascii'
        s = create_c_array(root, key, atom, (len(value),), title=title,
                           filters=filters)
        s[:] = value
    elif isinstance(value, np.ndarray):
        atom = tb.Atom.from_dtype(value.dtype)
        s = create_c_array(root, key, atom, value.shape,
                           title='ndarray', filters=filters)
        s[:] = value
    else:
        raise TypeError('unsupported type %s' % type(value))
def create_DenseTrackSet(outTableFile, contigLengths, grp):

    print "creating  Dense Track Set..."
    track = DenseTrackSet(
        contigLengths,
        outTableFile,
        overwrite=True,  ###############################DANGERDANGERDANGER
        openMode='w',
        compression=True)

    ###only one group, called, mask
    track.addGroup(grp)
    track[grp].addArray(tables.BoolAtom(), [])

    print "done"
    return track
예제 #18
0
    def _create_table(self, name, example, parent=None):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        """
        h5 = self.h5
        filters = tables.Filters(complevel=self.compression_level,
                                 complib='zlib',
                                 shuffle=True)
        if parent is None:
            parent = h5.root

        if type(example) == str:
            h5type = tables.VLStringAtom()
            h5.createVLArray(parent, name, h5type, filters=filters)
            return
        if type(example) == dict:
            self.h5.createGroup(parent, name)
            return
        #If we get here then we're dealing with numpy arrays
        example = np.asarray(example)

        #MODIFICATION: appended name everywhere and introduced string
        type_map = {
            np.dtype(np.float64).name: tables.Float64Atom(),
            np.dtype(np.float32).name: tables.Float32Atom(),
            np.dtype(np.int).name: tables.Int64Atom(),
            np.dtype(np.int8).name: tables.Int8Atom(),
            np.dtype(np.uint8).name: tables.UInt8Atom(),
            np.dtype(np.int16).name: tables.Int16Atom(),
            np.dtype(np.uint16).name: tables.UInt16Atom(),
            np.dtype(np.int32).name: tables.Int32Atom(),
            np.dtype(np.uint32).name: tables.UInt32Atom(),
            np.dtype(np.bool).name: tables.BoolAtom(),
            # Maximal string length of 128 per string - change if needed
            'string32': tables.StringAtom(128)
        }

        try:
            h5type = type_map[example.dtype.name]
            h5dim = (0, ) + example.shape
            h5.createEArray(parent, name, h5type, h5dim, filters=filters)
        except KeyError:
            raise TypeError("Don't know how to handle dtype '%s'" %
                            example.dtype)
예제 #19
0
def make_msid_file(colname, content, content_def):
    ft['content'] = content
    ft['msid'] = colname
    filename = msid_files['data'].abs
    if os.path.exists(filename):
        return

    logger.info('Making MSID data file %s', filename)

    if colname == 'TIME':
        dp_vals, indexes = derived.times_indexes(opt.start, opt.stop,
                                                 content_def['time_step'])
    else:
        dp = content_def['classes'][colname]()
        dataset = dp.fetch(opt.start, opt.stop)
        dp_vals = np.asarray(dp.calc(dataset), dtype=dp.dtype)

    # Finally make the actual MSID data file
    filters = tables.Filters(complevel=5, complib='zlib')
    h5 = tables.open_file(filename, mode='w', filters=filters)

    n_rows = int(20 * 3e7 / content_def['time_step'])
    h5shape = (0, )
    h5type = tables.Atom.from_dtype(dp_vals.dtype)
    h5.create_earray(h5.root,
                     'data',
                     h5type,
                     h5shape,
                     title=colname,
                     expectedrows=n_rows)
    h5.create_earray(h5.root,
                     'quality',
                     tables.BoolAtom(), (0, ),
                     title='Quality',
                     expectedrows=n_rows)

    logger.info('Made {} shape={} with n_rows(1e6)={}'.format(
        colname, h5shape, n_rows / 1.0e6))
    h5.close()
예제 #20
0
 def h5open(self):
     self.h5filename = os.path.join(
         self.conmatdir, "conmat_%s_%s.h5" % (self.projname, self.casename))
     self.h5f = h5f = td.openFile(self.h5filename, 'a')
     if not hasattr(h5f.root, 'conmat'):
         if not hasattr(self, 'reg'): self.regvec_from_discs()
         jdvec = int((self.jdmax - self.jdmin + 1) / self.djd) + 1
         shape = (jdvec, self.dtmax, self.nreg, self.nreg)
         iatom = td.UInt32Atom()
         fatom = td.FloatCol()
         batom = td.BoolAtom()
         filtr = td.Filters(complevel=5, complib='zlib')
         crc = h5f.createCArray
         cnmat = crc(h5f.root, 'conmat', iatom, shape, filters=filtr)
         jdvec = crc(h5f.root, 'jdvec', fatom, (shape[0], ))
         exist = crc(h5f.root, 'exist', batom, (shape[0], shape[1]))
         jdvec[:] = np.arange(self.jdmin, self.jdmax + 1, self.djd)
         exist[:] = False
     else:
         cnmat = h5f.root.conmat
         jdvec = h5f.root.jdvec
         exist = h5f.root.exist
     return cnmat, jdvec, exist
예제 #21
0
def make_stub_h5_col(msid, row0, row1, basedir_ref, basedir_stub):
    fetch.ft['msid'] = msid

    with set_fetch_basedir(basedir_ref):
        file_ref = fetch.msid_files['data'].abs

    if not Path(file_ref).exists():
        return

    with tables.open_file(file_ref, 'r') as h5:
        data_stub = h5.root.data[row0:row1]
        qual_stub = h5.root.quality[row0:row1]
        n_rows = len(h5.root.data)

    data_fill = np.zeros(row0, dtype=data_stub.dtype)
    qual_fill = np.ones(row0, dtype=qual_stub.dtype)  # True => bad

    with set_fetch_basedir(basedir_stub):
        file_stub = fetch.msid_files['data'].abs

    if os.path.exists(file_stub):
        os.unlink(file_stub)

    filters = tables.Filters(complevel=5, complib='zlib')
    with tables.open_file(file_stub, mode='w', filters=filters) as h5:
        h5shape = (0,) + data_stub.shape[1:]
        h5type = tables.Atom.from_dtype(data_stub.dtype)
        h5.create_earray(h5.root, 'data', h5type, h5shape, title=msid,
                         expectedrows=n_rows)
        h5.create_earray(h5.root, 'quality', tables.BoolAtom(), (0,), title='Quality',
                         expectedrows=n_rows)

    with tables.open_file(file_stub, mode='a') as h5:
        h5.root.data.append(data_fill)
        h5.root.data.append(data_stub)
        h5.root.quality.append(qual_fill)
        h5.root.quality.append(qual_stub)
예제 #22
0
    print "# outlier voxels:", len(np.where(outlier_mask)[0])
    print "# data voxels:", len(np.where(data_mask)[0])
    print 

    ###########################################################################
    # Build data array
    # 
    # Ignore all high-scoring labels and labels with outlier voxels
    #
    # Read all data files, but only voxels in data mask. 
    flat_data_mask = data_mask.reshape(-1)

    if not os.path.exists(datafile):
        print "Loading data into {}".format(datafile)
        fileh = tb.open_file(datafile, mode='w', title="data", filters=FILTERS)
        dataarray = fileh.createEArray(fileh.root,'data',tb.BoolAtom(), 
                        shape=(0,np.sum(flat_data_mask)))

        images_used = []
        t0 = time()
        for i, labelfile in enumerate(labelfiles):
            if i>0 and i % printfreq == 0: 
                progress(i,len(labelfiles),time()-t0,printfreq)
                t0 = time()

            if not passing_idx[i]: continue

            data = rawdata[i,:]

            #outlier_sum = np.sum(data[outlier_mask])
            #if outlier_sum > 0: continue
예제 #23
0
    def _create_table_list(self, name, example):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        The modified version for creating table with appendList
        """
        type_map = {
            np.dtype(np.float64): tables.Float64Atom(),
            np.dtype(np.float32): tables.Float32Atom(),
            np.dtype(np.int): tables.Int64Atom(),
            np.dtype(np.int8): tables.Int8Atom(),
            np.dtype(np.uint8): tables.UInt8Atom(),
            np.dtype(np.int16): tables.Int16Atom(),
            np.dtype(np.uint16): tables.UInt16Atom(),
            np.dtype(np.int32): tables.Int32Atom(),
            np.dtype(np.uint32): tables.UInt32Atom(),
            np.dtype(np.bool): tables.BoolAtom(),
        }

        try:
            if type(example) == np.ndarray:
                h5type = type_map[example.dtype]
            elif type(example) == list and type(example[0]) == str:
                h5type = tables.VLStringAtom()
        except KeyError:
            raise TypeError("Don't know how to handle dtype '%s'" %
                            example.dtype)

        if type(example) == np.ndarray:
            h5dim = (0, ) + example.shape[1:]

            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)

            nodes = h5.list_nodes(h5.root)

            nmpt = name.replace('.', '/\n')
            nmpt = nmpt.split('\n')

            path = '/'
            for kay in range(len(nmpt) - 1):
                #if not path+nmpt[kay][:-1] in str(nodes): h5.create_group(path,nmpt[kay][:-1])
                try:
                    h5.is_visible_node(path + nmpt[kay][:-1])
                except:
                    h5.create_group(path, nmpt[kay][:-1])
                path += nmpt[kay]

            self.tables[name] = h5.create_earray(path,
                                                 nmpt[-1],
                                                 h5type,
                                                 h5dim,
                                                 filters=filters)

        elif type(example) == list and type(example[0]) == str:
            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)

            nodes = h5.list_nodes(h5.root)

            nmpt = name.replace('.', '/\n')
            nmpt = nmpt.split('\n')

            path = '/'
            for kay in range(len(nmpt) - 1):
                #if not path+nmpt[kay][:-1] in str(nodes): h5.create_group(path,nmpt[kay][:-1])
                try:
                    h5.is_visible_node(path + nmpt[kay][:-1])
                except:
                    h5.create_group(path, nmpt[kay][:-1])
                path += nmpt[kay]

            self.tables[name] = h5.create_vlarray(path,
                                                  nmpt[-1],
                                                  h5type,
                                                  filters=filters)

        self.types[name] = type(example)
예제 #24
0
    subfiles = glob.glob(imagedir + '/*.h5')

    filelist = []
    for subfile in subfiles:
        subfileh = tb.open_file(subfile,
                                mode='r',
                                title="data",
                                filters=FILTERS)

        print "Loading data from {}".format(subfile)
        if not 'mask' in fileh.root:
            mask = subfileh.root.mask[:]
            _ = fileh.create_array(fileh.root, 'mask', mask)
            dataarray = fileh.createEArray(fileh.root,
                                           'data',
                                           tb.BoolAtom(),
                                           shape=[0] + list(mask.shape))

        if not 'cropbbox_min' in fileh.root:
            _ = fileh.create_array(fileh.root, 'cropbbox_min',
                                   subfileh.root.cropbbox_min[:])
        if not 'cropbbox_max' in fileh.root:
            _ = fileh.create_array(fileh.root, 'cropbbox_max',
                                   subfileh.root.cropbbox_max[:])

        dataarray.append(subfileh.root.data[:])
        filelist.extend(subfileh.root.files)
        subfileh.close()

    _ = fileh.create_array(fileh.root, 'files', filelist)
예제 #25
0
    def create_compressible_array(self,
                                  nodename,
                                  shape,
                                  precision,
                                  group=None):
        pass


if is_tables:

    precision_to_atom = {
        'float32': tables.Float32Atom(),
        'complex64': tables.ComplexAtom(8),
        'float64': tables.Float64Atom(),
        'complex128': tables.ComplexAtom(16),
        'bool': tables.BoolAtom(),
        'int32': tables.Int32Atom(),
        'int16': tables.Int16Atom(),
        'int8': tables.Int8Atom(),
    }

    class H5FileTables(H5FileBase, tables.File):
        def create_extendable_array(self,
                                    nodename,
                                    shape,
                                    precision,
                                    group=None):
            if not group: group = self.root
            atom = precision_to_atom[precision]
            self.create_earray(group, nodename, atom, shape)
예제 #26
0
def make_mnist_dvs(path_to_data, path_to_hdf5, digits, max_pxl_value,
                   min_pxl_value, T_max, window_length, scale, polarity,
                   pattern, alphabet_size):
    """"
    Preprocess the .aedat file and save the dataset as an .hdf5 file
    """

    dirs = [r'/' + dir_ for dir_ in os.listdir(path_to_data)]

    S_prime = math.ceil(T_max / window_length)

    hdf5_file = tables.open_file(path_to_hdf5, 'w')

    train = hdf5_file.create_group(where=hdf5_file.root, name='train')

    if alphabet_size == 1:
        data_shape = (0,
                      (1 + polarity) * (max_pxl_value - min_pxl_value + 1)**2,
                      S_prime)
        label_shape = (0, len(digits), S_prime)
    else:
        data_shape = (0, (max_pxl_value - min_pxl_value + 1)**2, alphabet_size,
                      S_prime)
        label_shape = (0, len(digits), alphabet_size, S_prime)

    train_data = hdf5_file.create_earray(where=hdf5_file.root.train,
                                         name='data',
                                         atom=tables.BoolAtom(),
                                         shape=data_shape)
    train_labels = hdf5_file.create_earray(where=hdf5_file.root.train,
                                           name='label',
                                           atom=tables.BoolAtom(),
                                           shape=label_shape)

    test = hdf5_file.create_group(where=hdf5_file.root, name='test')
    test_data = hdf5_file.create_earray(where=hdf5_file.root.test,
                                        name='data',
                                        atom=tables.BoolAtom(),
                                        shape=data_shape)
    test_labels = hdf5_file.create_earray(where=hdf5_file.root.test,
                                          name='label',
                                          atom=tables.BoolAtom(),
                                          shape=label_shape)

    for i, digit in enumerate(digits):
        output_signal = make_output(i, pattern, len(digits), alphabet_size,
                                    S_prime)

        for dir_ in dirs:
            if dir_.find(str(digit)) != -1:
                for subdir, _, _ in os.walk(path_to_data + dir_):
                    if subdir.find(scale) != -1:
                        for j, file in enumerate(
                                glob.glob(subdir + r'/*.aedat')):
                            if j < 0.9 * len(glob.glob(subdir + r'/*.aedat')):
                                print('train', file)
                                tmp = load_dvs(file,
                                               S_prime,
                                               min_pxl_value=min_pxl_value,
                                               max_pxl_value=max_pxl_value,
                                               window_length=window_length,
                                               polarity=polarity)
                                print(tmp.shape, data_shape)

                                train_data.append(tmp)
                                train_labels.append(output_signal)
                            else:
                                print('test', file)
                                test_data.append(
                                    load_dvs(file,
                                             S_prime,
                                             min_pxl_value=min_pxl_value,
                                             max_pxl_value=max_pxl_value,
                                             window_length=window_length,
                                             polarity=polarity))

                                test_labels.append(output_signal)

    make_stats_group(hdf5_file)

    hdf5_file.close()
예제 #27
0
    def write_bool(self, topic_group, data):

        self.pytable_writer_helper(topic_group, ['data'], tables.BoolAtom(),
                                   data)
        self.pytable_writer_helper(topic_group, ['time'], tables.Float64Atom(),
                                   data)
예제 #28
0
for cmph in cmph_covariates:
    print 'Subsetting for %s'%cmph
    lon_,lat_,data = map_utils.CRU_extract('.','%s'%cmph, zip=False)
    lon_.sort()
    lat_.sort()
    # data = map_utils.interp_geodata(lon_, lat_, data, lon[lon_min_i:lon_max_i], lat[lon_min_i:lon_max_i])
    data = map_utils.grid_convert(basemap.interp(map_utils.grid_convert(data,'y-x+','y+x+'), lon_, lat_, *np.meshgrid(lon[lon_min_i:lon_max_i],lat[lat_min_i:lat_max_i])),'y+x+','x+y+')
    for res in [5]:
        hf_out = tb.openFile(os.path.join('%ik-covariates'%res,cmph.lower()+'.hdf5'),'w')
        hf_out.createArray('/','lon',lon[lon_min_i:lon_max_i][::res])
        hf_out.createArray('/','lat',lat[lat_min_i:lat_max_i][::res])
        
        d = map_utils.grid_convert(data[::res,::res], 'x+y+','y-x+')
        
        hf_out.createCArray('/','data',atom=tb.FloatAtom(),shape=d.shape,filters=tb.Filters(complevel=1,complib='zlib'))
        hf_out.createCArray('/','mask',atom=tb.BoolAtom(),shape=d.shape,filters=tb.Filters(complevel=1,complib='zlib'))
        hf_out.root.data.attrs.view = 'y-x+'
        
        hf_out.root.data[:]=d
        hf_out.root.mask[:] = clipped_pete_mask
        
        hf_out.close()

glob = tb.openFile('Globcover.hdf5')
for c in glob_channels:
    subset_and_writeout(glob, 'globcover-channel-%i'%c, 3, glob_missing, lambda x:x==c)
glob.close()

# Reconcile the masks
print 'Finding the conservative mask'
# for res in [1,2,5]:
예제 #29
0
vlarray.append(["123", "456", "3"])
vlarray.append(["456", "3"])

# Binary strings
vlarray = fileh.create_vlarray(root, 'vlarray4', tables.UInt8Atom(),
                               "pickled bytes")
data = pickle.dumps((["123", "456"], "3"))
vlarray.append(np.ndarray(buffer=data, dtype=np.uint8, shape=len(data)))

# The next is a way of doing the same than before
vlarray = fileh.create_vlarray(root, 'vlarray5', tables.ObjectAtom(),
                               "pickled object")
vlarray.append([["123", "456"], "3"])

# Boolean arrays are supported as well
vlarray = fileh.create_vlarray(root, 'vlarray6', tables.BoolAtom(),
                               "Boolean atoms")
# The next lines are equivalent...
vlarray.append([1, 0])
vlarray.append([1, 0, 3, 0])  # This will be converted to a boolean
# This gives a TypeError
# vlarray.append([1,0,1])

# Variable length strings
vlarray = fileh.create_vlarray(root, 'vlarray7', tables.VLStringAtom(),
                               "Variable Length String")
vlarray.append("asd")
vlarray.append("aaana")

# Unicode variable length strings
vlarray = fileh.create_vlarray(root, 'vlarray8', tables.VLUnicodeAtom(),
예제 #30
0
def create_events_hdf5(directory,
                       path_to_hdf5,
                       classes,
                       alphabet_size,
                       pattern,
                       grid_size=128,
                       reduction_factor=4,
                       sample_length_train=500000,
                       sample_length_test=1800000,
                       window_length=5000):
    fns_train = gather_aedat(directory, 1, 24)
    fns_test = gather_aedat(directory, 24, 30)

    print(len(fns_train), len(fns_test))
    assert len(fns_train) == 98

    hdf5_file = tables.open_file(path_to_hdf5, 'w')

    n_neurons = int(grid_size / reduction_factor)**2

    S_prime_train = int(np.ceil(sample_length_train / window_length))
    S_prime_test = int(np.ceil(sample_length_test / window_length))

    if alphabet_size == 1:
        data_shape_train = (0, n_neurons, S_prime_train)
        label_shape_train = (0, len(classes), S_prime_train)

        data_shape_test = (0, n_neurons, S_prime_test)
        label_shape_test = (0, len(classes), S_prime_test)

    else:
        data_shape_train = (0, n_neurons, alphabet_size, S_prime_train)
        label_shape_train = (0, len(classes), alphabet_size, S_prime_train)

        data_shape_test = (0, n_neurons, alphabet_size, S_prime_test)
        label_shape_test = (0, len(classes), alphabet_size, S_prime_test)

    train = hdf5_file.create_group(where=hdf5_file.root, name='train')
    train_data_array = hdf5_file.create_earray(where=hdf5_file.root.train,
                                               name='data',
                                               atom=tables.BoolAtom(),
                                               shape=data_shape_train)
    train_labels_array = hdf5_file.create_earray(where=hdf5_file.root.train,
                                                 name='label',
                                                 atom=tables.BoolAtom(),
                                                 shape=label_shape_train)

    test = hdf5_file.create_group(where=hdf5_file.root, name='test')
    test_data_array = hdf5_file.create_earray(where=hdf5_file.root.test,
                                              name='data',
                                              atom=tables.BoolAtom(),
                                              shape=data_shape_test)
    test_labels_array = hdf5_file.create_earray(where=hdf5_file.root.test,
                                                name='label',
                                                atom=tables.BoolAtom(),
                                                shape=label_shape_test)

    for file_d in tqdm(fns_train + fns_test):
        istrain = file_d in fns_train
        if istrain:
            input, output = load_dvs(file_d, S_prime_train, classes,
                                     alphabet_size, pattern, window_length,
                                     sample_length_train, grid_size,
                                     reduction_factor)
            train_data_array.append(input)
            train_labels_array.append(output)
        else:
            input, output = load_dvs(file_d, S_prime_test, classes,
                                     alphabet_size, pattern, window_length,
                                     sample_length_test, grid_size,
                                     reduction_factor)
            test_data_array.append(input)
            test_labels_array.append(output)

    make_stats_group(hdf5_file)

    hdf5_file.close()