Python Filtersの例、tables.Filters Pythonの例

コード例 #1

0

ファイルを表示

def test_filters():
    from tables import Filters, open_file

    class TestContainer(Container):
        value = Field(-1, 'test')

    no_comp = Filters(complevel=0)
    zstd = Filters(complevel=5, complib='blosc:zstd')

    with tempfile.NamedTemporaryFile(suffix='.hdf5') as f:
        with HDF5TableWriter(f.name, group_name='data', mode='w', filters=no_comp) as writer:
            assert writer._h5file.filters.complevel == 0

            c = TestContainer(value=5)
            writer.write('default', c)

            writer.filters = zstd
            writer.write('zstd', c)

            writer.filters = no_comp
            writer.write('nocomp', c)

        with open_file(f.name) as h5file:
            assert h5file.root.data.default.filters.complevel == 0
            assert h5file.root.data.zstd.filters.complevel == 5
            assert h5file.root.data.zstd.filters.complib == 'blosc:zstd'
            assert h5file.root.data.nocomp.filters.complevel == 0

コード例 #2

0

ファイルを表示

def test_filters():
    from tables import Filters, open_file

    class TestContainer(Container):
        value = Field(-1, "test")

    no_comp = Filters(complevel=0)
    zstd = Filters(complevel=5, complib="blosc:zstd")

    with tempfile.NamedTemporaryFile(suffix=".hdf5") as f:
        with HDF5TableWriter(f.name,
                             group_name="data",
                             mode="w",
                             filters=no_comp) as writer:
            assert writer._h5file.filters.complevel == 0

            c = TestContainer(value=5)
            writer.write("default", c)

            writer.filters = zstd
            writer.write("zstd", c)

            writer.filters = no_comp
            writer.write("nocomp", c)

        with open_file(f.name) as h5file:
            assert h5file.root.data.default.filters.complevel == 0
            assert h5file.root.data.zstd.filters.complevel == 5
            assert h5file.root.data.zstd.filters.complib == "blosc:zstd"
            assert h5file.root.data.nocomp.filters.complevel == 0

コード例 #3

0

ファイルを表示

def test_filters(tmp_path):
    from tables import Filters, open_file

    path = tmp_path / "test_time.hdf5"

    class TestContainer(Container):
        value = Field(-1, "test")

    no_comp = Filters(complevel=0)
    zstd = Filters(complevel=5, complib="blosc:zstd")

    with HDF5TableWriter(path, group_name="data", mode="w",
                         filters=no_comp) as writer:
        assert writer.h5file.filters.complevel == 0

        c = TestContainer(value=5)
        writer.write("default", c)

        writer.filters = zstd
        writer.write("zstd", c)

        writer.filters = no_comp
        writer.write("nocomp", c)

    with open_file(path) as h5file:
        assert h5file.root.data.default.filters.complevel == 0
        assert h5file.root.data.zstd.filters.complevel == 5
        assert h5file.root.data.zstd.filters.complib == "blosc:zstd"
        assert h5file.root.data.nocomp.filters.complevel == 0

コード例 #4

0

ファイルを表示

ファイル: logger.py プロジェクト: supergravity/PYQUM

    def savanalysis(self, adataname, adatarray):
        '''
        prerequisite: accesstructure, mkanalysis
        '''
        m, n = adatarray.shape[0], adatarray.shape[1]
        with open_file(self.analysispath / (self.analysisfolder + ".h5"),
                       'w') as f:
            filters = Filters(complevel=5, complib='blosc')
            acontainer = f.create_carray(f.root,
                                         adataname,
                                         Float64Atom(),
                                         shape=(m, n),
                                         filters=filters)
            acontainer[:, :] = adatarray

            # Create a table in the root directory and append data...
            class About(IsDescription):
                task = StringCol(len(self.task), pos=1)  # N-character String
                comment = StringCol(len(self.comment),
                                    pos=2)  # N-character String

            tableroot = f.create_table(f.root, 'info', About,
                                       "A table at root", Filters(1))
            tableroot.append(
                [(self.task, self.comment)]
            )  # , ("Mediterranean", 11, -1, 11*11, 11**2), ("Adriatic", 12, -2, 12*12, 12**2)])

        return

コード例 #5

0

ファイルを表示

ファイル: mean_table.py プロジェクト: Zelenyy/phd-code

 def init_table(self):
     filters = Filters(complevel=3, fletcher32=True)
     table = self.file.create_table(self.file.root,
                                    "deposit",
                                    description=self.dtype,
                                    filters=filters)
     return table

コード例 #6

0

ファイルを表示

    def save_pytables(self, filename, title='SVD results', filters=None, **kw):
        from csc.divisi.pyt_utils import get_pyt_handle
        from tables import ObjectAtom, Filters, Atom
        fileh = get_pyt_handle(filename, title)
        if filters is None and kw:
            filters = Filters(**kw)
        try:
            root = fileh.root

            def store_tensor(name, tensor):
                data = tensor._data
                arr = fileh.createCArray(root, name, Atom.from_dtype(data.dtype), tensor.shape, filters=filters)
                arr[:] = data
            # Labeled stuff
            for name in ('u', 'v', 'weighted_u', 'weighted_v'):
                store_tensor(name, getattr(self, name).tensor)
            # Unlabeled stuff
            for name in ('svals', 'core'):
                store_tensor(name, getattr(self, name))

            # Ordered sets
            def write_labels(name, view):
                arr = fileh.createVLArray(root, name, ObjectAtom(), filters=filters)
                for label in view.label_lists():
                    arr.append(label)
            write_labels('u_labels', self.u)
            write_labels('v_labels', self.v)
        finally:
            fileh.close()

コード例 #7

0

ファイルを表示

ファイル: poc.py プロジェクト: peterpan83/BIN_GOCI_L2

    def __ssc(self, l2f):
        data = gdal.Open(l2f)
        ds = data.GetSubDatasets()
        ssc_sert = gdal.Open(ds[self.dic['ssc']][0]).ReadAsArray() * 1000
        sscfname = '%s_ssc_sert.l2' % (l2f.split('.')[0])
        if os.path.exists(sscfname):
            return
        h5file_l2 = tables.open_file(sscfname, 'w')
        shape = (self.lines, self.pixels)
        atom = Float32Atom()
        filters = Filters(complevel=5, complib='zlib')

        h5file_l2.root._v_attrs.title = 'GOCI SSC product produced by SKLEC,Yanqun Pan'

        grpChla = h5file_l2.create_group(h5file_l2.root, 'SSC', 'SSC')
        grpChla._v_attrs.Scans = self.lines
        grpChla._v_attrs.Pixels = self.pixels
        grpChla._v_attrs.AlgorithmName = 'ATC_MPL'

        ca = h5file_l2.create_carray(grpChla,
                                     'SSC_SERT',
                                     atom,
                                     shape,
                                     filters=filters)
        ca._v_attrs._FillValue = self._fillvalue
        ca[:] = ssc_sert
        h5file_l2.close()

コード例 #8

0

ファイルを表示

    def start(self, filename=None):
        """
		Starts recording incomming events to a file.
		If no filename is given, a new timestamped file is created
		in the directory that was specified to the constructor.
		"""
        if filename is None:
            datestring = time.strftime("%Y-%m-%dT%H:%M:%S")
            filename = os.path.abspath(
                os.path.join(self.base_dir, "zeodata_%s.h5" % datestring))

        filters = Filters(complevel=self.compression_level,
                          fletcher32=self.checksum)
        h5file = tables.openFile(filename, mode="w", filters=filters)
        group = h5file.createGroup("/", "zeolinkdata",
                                   "Zeo Raw Data Link Recording")
        self.replay_data = h5file.createVLArray(
            group,
            'data',
            VLStringAtom(),
            "Link Replay Data",
            expectedsizeinMB=(self.expected_hours * 3600 * 300) / (1024.0**2))
        self.replay_metadata = h5file.createTable(
            group,
            'metadata',
            TimestampedZeoDesc,
            "Link Replay Metadata",
            expectedrows=self.expected_hours * 3600 * 5)
        self.h5file = h5file
        print "Recording to %s started." % filename

コード例 #9

0

ファイルを表示

def save_hdf(data, fn, complevel=9, key='data'):
    filters = Filters(complevel=complevel, complib='blosc')
    with open_file(fn, mode="w") as f:
        _ = f.create_carray('/',
                            key,
                            Atom.from_dtype(data.dtype),
                            filters=filters,
                            obj=data)

コード例 #10

0

ファイルを表示

 def __init__(self, meta):
     self.reader = ProtoSetReader("stacking_simple.bin", CylinderProtoSet)
     filters = Filters(complevel=3, fletcher32=True)
     self.reader.set_filters(filters)
     self.path_hdf5 = "result.hdf5"
     self.counter = 0
     self.mess_templte = Template(MESSEGE)
     self.meta = meta
     self.step = 0.001

コード例 #11

0

ファイルを表示

ファイル: write_cmap.py プロジェクト: Yongcheng123/ChimeraX

def make_arrays(h5file, g, size, atom, settings):

    chunk_elements = settings['chunk_size'] // atom.itemsize
    chunk_shapes = settings['chunk_shapes']
    subsamples = settings['subsamples']
    min_subsample_elements = settings['min_subsample_elements']

    if 'compress' in settings and settings['compress']:
        method = settings.get('compress_method', 'zlib')
        level = settings.get('compress_level', 5)
        shuffle = settings.get('compress_shuffle', True)
        from tables import Filters
        filters = Filters(complevel = level, complib = method, shuffle = shuffle)
    else:
        filters = None
    
    arrays = []
    isize, jsize, ksize = size
    shape = (ksize,jsize,isize)
    cshapes = {}    # Avoid duplicating chunk shapes
    for csname in chunk_shapes:
        cshape = chunk_shape(shape, csname, chunk_elements)
        if not cshape in cshapes:
            a = h5file.create_carray(g, 'data_' + csname, atom, shape,
                                     chunkshape = cshape, filters = filters)
            arrays.append(((1,1,1),a))
            cshapes[cshape] = True

    # Compute step sizes to use.
    steps = list(subsamples)
    istep,jstep,kstep = tuple(2*s for s in subsamples[-1]) if subsamples else (2,2,2)
    from numpy import array, int32
    while (isize >= istep and jsize >= jstep and ksize >= kstep and
           (isize//istep)*(jsize//jstep)*(ksize//kstep) >= min_subsample_elements):
        steps.append((istep,jstep,kstep))
        istep *= 2
        jstep *= 2
        kstep *= 2

    # Make subsample arrays.
    for step in steps:
        istep,jstep,kstep = step
        shape = (1+(ksize-1)//kstep, 1+(jsize-1)//jstep, 1+(isize-1)//istep)
        cshapes = {}    # Avoid duplicating chunk shapes
        for csname in chunk_shapes:
            cshape = chunk_shape(shape, csname, chunk_elements)
            if not cshape in cshapes:
                sstep = '%d_%d_%d' % tuple(step)
                a = h5file.create_carray(g, 'data_%s_%s' % (csname,sstep), atom,
                                         shape, chunkshape = cshape,
                                         filters = filters)
                a._v_attrs.subsample_spacing = array(step, int32)
                arrays.append((step, a))
                cshapes[cshape] = True

    return arrays

コード例 #12

0

ファイルを表示

ファイル: seed_based_connectivity.py プロジェクト: sshyran/BrainImagingPipelines

def create_correlation_matrix(infiles, roi, out_type, package):
    import os
    import numpy as np
    import scipy.io as sio
    import nibabel as nb
    from nipype.utils.filemanip import split_filename, filename_to_list
    for idx, fname in enumerate(filename_to_list(infiles)):
        data = np.squeeze(nb.load(fname).get_data())
        if idx == 0:
            timeseries = data
        else:
            timeseries = np.vstack((timeseries, data))
    roi_data = np.genfromtxt(roi)
    if not len(roi_data.shape) == 2:
        roi_data = roi_data[:, None]
    corrmat = np.zeros((roi_data.shape[1], timeseries.shape[0]))
    print timeseries.shape
    for i in xrange(roi_data.shape[1]):
        for j in xrange(timeseries.shape[0]):
            r = np.corrcoef(timeseries[j, :], roi_data[:, i])[0][1]
            corrmat[i, j] = np.sqrt(timeseries.shape[1] - 3) * 0.5 * np.log(
                (1 + r) / (1 - r))

    #corrmat = np.corrcoef(timeseries,roi_data.T)
    print corrmat.shape

    _, name, _ = split_filename(filename_to_list(infiles)[0])
    if len(filename_to_list(infiles)) > 1:
        name = 'combined_' + name
    if 'mat' in out_type:
        matfile = os.path.abspath(name + '.mat')
        sio.savemat(matfile, {'corrmat': corrmat})
        output = matfile
    elif 'hdf5' in out_type:
        hdf5file = os.path.abspath(name + '.hf5')
        if package == 'h5py':
            import h5py
            f = h5py.File(hdf5file, 'w')
            f.create_dataset('corrmat', data=corrmat, compression=5)
            f.close()
        else:
            from tables import openFile, Float64Atom, Filters
            h5file = openFile(hdf5file, 'w')
            arr = h5file.createCArray(h5file.root,
                                      'corrmat',
                                      Float64Atom(),
                                      corrmat.shape,
                                      filters=Filters(complevel=5))
            arr[:] = corrmat
            h5file.close()
        output = hdf5file
    else:
        raise Exception('Unknown output type')
    return output

コード例 #13

0

ファイルを表示

ファイル: test_convertors.py プロジェクト: Zelenyy/phd-code

 def test_proto_set_convertor(self):
     readers = [
         ProtoSetReader("gammaSeed.bin", CylinderProtoSet),
         ProtoSetReader("positronSeed.bin", CylinderProtoSet),
         ProtoSetReader("histogram.bin", HistogramProtoSet)
     ]
     path = "/home/zelenyy/npm/phd/phd-code/cxx/thunderstorm/run"
     filters = Filters(complevel=3, fletcher32=True)
     convertor = ConverterFromBinToHDF5(readers)
     for reader in readers:
         reader.set_filters(filters)
     convertor.convert(path, "./test.hdf5")

コード例 #14

0

ファイルを表示

ファイル: l2bin.py プロジェクト: peterpan83/BIN_GOCI_L2

    def getAve(self, year, filter, productName):
        l2files = glob.glob(os.path.join(self.l2dir, str(year), filter))
        # files_stat.append([year,mon,len(l2files)])
        if len(l2files) == 0:
            print('no data in %s!' % (year))
            return
        print(len(l2files))
        values = self.process(l2files, productName, [0, 4000])
        values = cv2.blur(values, (5, 5))
        if year == "*":
            l2binfile = os.path.join(
                self.l2dir,
                'COMS%s%s_bin.l2' % ("2012-2016", productName.split('/')[-1]))
        else:
            l2binfile = os.path.join(
                self.l2dir, '%s/COMS%s%s_bin.l2' %
                (year, year, productName.split('/')[-1]))

        h5file_l2 = tables.open_file(l2binfile, 'w')
        atom = Float32Atom()
        filters = Filters(complevel=5, complib='zlib')
        h5file_l2.root._v_attrs.title = 'L2 bin product(%s) produced by SKLEC' % (
            productName.split('/')[-1])
        h5file_l2.root._v_attrs.Scans = self.LINES
        h5file_l2.root._v_attrs.Pixels = self.PIXELS
        h5file_l2.root._v_attrs.AlgorithmName = 'ATC_MPL'
        h5file_l2.root._v_attrs.AlgorithmAuthor = 'Yanqun Pan, State Key Laboratory of Estuarine and Coastal Research'
        ca = h5file_l2.create_carray(h5file_l2.root,
                                     productName.split('/')[-1],
                                     atom, (self.LINES, self.PIXELS),
                                     filters=filters)
        ca._v_attrs._FillValue = self._fillvalue
        ca[:, :] = values

        ca = h5file_l2.create_carray(h5file_l2.root,
                                     'longitude',
                                     atom, (self.LINES, self.PIXELS),
                                     filters=filters)
        ca._v_attrs._FillValue = self._fillvalue
        ca[:, :] = self.longitude

        ca = h5file_l2.create_carray(h5file_l2.root,
                                     'latitude',
                                     atom, (self.LINES, self.PIXELS),
                                     filters=filters)
        ca._v_attrs._FillValue = self._fillvalue
        ca[:, :] = self.latitude

        h5file_l2.close()

        print(values.shape)

コード例 #15

0

ファイルを表示

ファイル: test_convertors.py プロジェクト: Zelenyy/phd-code

 def test_cumulator2D(self):
     if os.path.exists("test_cumulator2d.hdf5"):
         os.remove("test_cumulator2d.hdf5")
     path = "/home/zelenyy/npm/phd/phd-code/cxx/thunderstorm/run"
     readers = [
         ProtoSetReader("electron_deposit_cumulator2d.bin",
                        Cumulator2DProtoSet),
         ProtoSetReader("electron_number_cumulator2d.bin",
                        Cumulator2DProtoSet)
     ]
     filters = Filters(complevel=3, fletcher32=True)
     convertor = ConverterFromBinToHDF5(readers)
     for reader in readers:
         reader.set_filters(filters)
     convertor.convert(path, "./test_cumulator2d.hdf5")

コード例 #16

0

ファイルを表示

ファイル: hdf5_tools.py プロジェクト: Zelenyy/phd-code

def get_convertor(readers: list, path_h5file, clear=False):
    filters = Filters(complevel=3, fletcher32=True)
    convertor = ConverterFromBinToHDF5(readers)
    for reader in readers:
        logging.root.debug("Reader: {} {}".format(type(reader),
                                                  reader.filename))
        reader.set_filters(filters)

    def post_run_processor(input_data: InputData):
        path = input_data.path
        convertor.convert(path, path_h5file, meta=input_data.to_meta())
        if clear:
            shutil.rmtree(path)

    return post_run_processor

コード例 #17

0

ファイルを表示

ファイル: write_cmap.py プロジェクト: project-renard-survey/semanticscience

def make_arrays(h5file, g, size, atom, settings):

    chunk_elements = settings['chunk_size'] / atom.itemsize
    chunk_shapes = settings['chunk_shapes']
    min_subsample_elements = settings['min_subsample_elements']

    if 'compress' in settings and settings['compress']:
        from tables import Filters
        filters = Filters(complevel = 9)
    else:
        filters = None
    
    arrays = []
    isize, jsize, ksize = size
    shape = (ksize,jsize,isize)
    cshapes = {}    # Avoid duplicating chunk shapes
    for csname in chunk_shapes:
        cshape = chunk_shape(shape, csname, chunk_elements)
        if not cshape in cshapes:
            a = h5file.createCArray(g, 'data_' + csname, atom, shape,
                                    chunkshape = cshape, filters = filters)
            arrays.append((1,a))
            cshapes[cshape] = True

    # Make subsample arrays.
    step = 2
    from numpy import array, int32
    while (isize >= step and jsize >= step and ksize >= step and
           (isize/step)*(jsize/step)*(ksize/step) >= min_subsample_elements):
        shape = (1+(ksize-1)/step, 1+(jsize-1)/step, 1+(isize-1)/step)
        cshapes = {}    # Avoid duplicating chunk shapes
        for csname in chunk_shapes:
            cshape = chunk_shape(shape, csname, chunk_elements)
            if not cshape in cshapes:
                a = h5file.createCArray(g, 'data_%s_%d' % (csname,step), atom,
                                        shape, chunkshape = cshape,
                                        filters = filters)
                a._v_attrs.subsample_spacing = array((step,step,step), int32)
                arrays.append((step, a))
                cshapes[cshape] = True
        step *= 2

    return arrays

コード例 #18

0

ファイルを表示

    def save(self, db):
        """Save the input data to disk.

        Notes
        -----
        Saves predictions, measurements, observables, and prior_pops to the
        HDF5 PyMC database.
        """

        if db != "hdf5":
            return

        from tables import Float64Atom, Filters
        compression = Filters(complevel=9, complib='blosc', shuffle=True)
        F = self.mcmc.db._h5file

        F.createCArray("/",
                       "predictions",
                       Float64Atom(),
                       self.predictions.shape,
                       filters=compression)
        F.root.predictions[:] = self.predictions

        F.createCArray("/",
                       "measurements",
                       Float64Atom(),
                       self.measurements.shape,
                       filters=compression)
        F.root.measurements[:] = self.measurements

        F.createCArray("/",
                       "uncertainties",
                       Float64Atom(),
                       self.uncertainties.shape,
                       filters=compression)
        F.root.uncertainties[:] = self.uncertainties

        F.createCArray("/",
                       "prior_pops",
                       Float64Atom(),
                       self.prior_pops.shape,
                       filters=compression)
        F.root.prior_pops[:] = self.prior_pops

コード例 #19

0

ファイルを表示

def create_correlation_matrix(infiles, out_type, package):
    import os
    import numpy as np
    import scipy.io as sio
    import nibabel as nb
    from nipype.utils.filemanip import split_filename, filename_to_list
    for idx, fname in enumerate(filename_to_list(infiles)):
        data = np.squeeze(nb.load(fname).get_data())
        if idx == 0:
            timeseries = data
        else:
            timeseries = np.vstack((timeseries, data))

    corrmat = np.corrcoef(timeseries)
    _, name, _ = split_filename(filename_to_list(infiles)[0])
    if len(filename_to_list(infiles)) > 1:
        name = 'combined_' + name
    if 'mat' in out_type:
        matfile = os.path.abspath(name + '.mat')
        sio.savemat(matfile, {'corrmat': corrmat})
        output = matfile
    elif 'hdf5' in out_type:
        hdf5file = os.path.abspath(name + '.hf5')
        if package == 'h5py':
            import h5py
            f = h5py.File(hdf5file, 'w')
            f.create_dataset('corrmat', data=corrmat, compression=5)
            f.close()
        else:
            from tables import openFile, Float64Atom, Filters
            h5file = openFile(hdf5file, 'w')
            arr = h5file.createCArray(h5file.root,
                                      'corrmat',
                                      Float64Atom(),
                                      corrmat.shape,
                                      filters=Filters(complevel=5))
            arr[:] = corrmat
            h5file.close()
        output = hdf5file
    else:
        raise Exception('Unknown output type')
    return output

コード例 #20

0

ファイルを表示

    def open_data(self, path, mode='r'):
        if self.repository:
            out = os.path.join(self.workspace_root, path)
            path = os.path.join(self.repository.root, path)
            if not os.path.isfile(out):
                self.info('copying {} to repository {}'.format(
                    path, os.path.dirname(out)))
                if not self.repository.retrieveFile(path, out):
                    return False
            path = out

        try:
            self._frame = open_file(
                path, mode, filters=Filters(complevel=self.compression_level))
            return True
        except Exception:
            self._frame = None
            import traceback
            traceback.print_exc()
            return True

コード例 #21

0

ファイルを表示

ファイル: test_convertors.py プロジェクト: Zelenyy/phd-code

    def test_cumulator(self):
        if os.path.exists("test_cumulator.hdf5"):
            os.remove("test_cumulator.hdf5")
        path = "/home/zelenyy/data/thunderstorm/test"
        readers = [
            ProtoSetReader("electron_z_cumulator.bin", Cumulator1DProtoSet),
            ProtoSetReader("electron_time_cumulator.bin", Cumulator1DProtoSet)
        ]
        filters = Filters(complevel=3, fletcher32=True)
        convertor = ConverterFromBinToHDF5(readers)
        for reader in readers:
            reader.set_filters(filters)
        convertor.convert(path, "./test_cumulator.hdf5")

        with tables.open_file("test_cumulator.hdf5") as h5file:
            for i in range(10):
                name = "event" + str(i).rjust(5, "0")
                # data = h5file.get_node("/test/electron_z_cumulator", name)
                data = h5file.get_node("/test/electron_time_cumulator", name)
                plt.plot(data)
                plt.show()

コード例 #22

0

ファイルを表示

def main(argv):
    # Manually change the list we iterate through to select between the data and masks. (Doing both kills the node.)
    for file, saveTarget in SKIN_SUBFOLDERS:
        h5file = saveTarget
        h5 = open_file(h5file, "w")
        X = unionJackPrep(file)
        atom = Atom.from_dtype(X.dtype)
        flt = Filters(complevel=0)
        h5data = h5.create_carray(h5.root, "data", atom, X.shape, filters=flt)
        h5data[:] = X
        h5data.attrs.mean = None
        h5data.attrs.std = None
        h5.flush()
        h5.close()
        del h5
        del X
        del atom
        del flt
        del h5data
        gc.collect()
        print("No mean or std to compute.")

コード例 #23

0

ファイルを表示

 def __init__(self, parent, p, m, complevel):
     self._file = open_file(p, m, filters=Filters(complevel=complevel))
     self._parent = parent
     self._parent._frame = self._file

コード例 #24

0

ファイルを表示

 def __init__(self, p, t, g, complevel, mode):
     self._file = open_file(p, mode, filters=Filters(complevel=complevel))
     self._t = t
     self._g = g

コード例 #25

0

ファイルを表示

ファイル: _util.py プロジェクト: hoffmangroup/genomedata

#!/usr/bin/env python

from __future__ import absolute_import, division, print_function

# Copyright 2008-2014 Michael M. Hoffman <*****@*****.**>

from argparse import ArgumentParser, FileType
from contextlib import closing
from gzip import open as _gzip_open
from os import extsep
import sys

from numpy import append, array, empty
from tables import Filters

FILTERS_GZIP = Filters(complevel=1)

EXT_GZ = "gz"
SUFFIX_GZ = extsep + EXT_GZ

GENOMEDATA_ENCODING = "ascii"

DEFAULT_CHROMOSOME_NAME_STYLE = "UCSC-style-name"

chromosome_name_map_parser = ArgumentParser(add_help=False)
chromsome_names = chromosome_name_map_parser.add_argument_group(
    "Chromosome naming")
chromsome_names.add_argument("-r",
                             "--assembly-report",
                             dest="assembly_report",
                             type=FileType('r'),

コード例 #26

0

ファイルを表示

ファイル: h5writer.py プロジェクト: csachs/mycelyso

    def _inner_hdf5_output(meta, result):

        # noinspection PyProtectedMember
        meta_str = '_'.join(
            k + '_' + ('%09d' % v if type(v) == int else v.__name__)
            for k, v in sorted(meta._asdict().items(), key=lambda x: x[0])
        )

        prefix = '/results/'

        if immediate_prefix != '':
            prefix += immediate_prefix

        prefix += '/' + meta_str + '/'

        success = False

        local_timeout = timeout

        base_filename = _filename

        lock_file = None

        while not success:

            filename, lock_file = wait_for_lock_and_prepare_filename(base_filename, local_timeout)

            compression_type = 'zlib'
            compression_level = 6

            compression_filter = Filters(complib=compression_type, complevel=compression_level)

            try:
                # race conditions
                # open(lock_file, 'w+')
                # noinspection PyUnusedLocal
                with acquire_lock(lock_file) as lock:
                    store = HDFStore(filename, complevel=compression_level, complib=compression_type)

                    # noinspection PyProtectedMember
                    h5 = store._handle

                    # cache for palettes
                    # currently unused
                    # palette_written = {}

                    def store_image(h5path, name, data, upsample_binary=True):
                        h5path = h5path.replace('//', '/')
                        # hdf5 stores bitfields as well, but default 0,1 will be invisible on a fixed 0-255 palette ...
                        if data.dtype == bool and upsample_binary:
                            data = (data * 255).astype(np.uint8)
                        arr = h5.create_carray(h5path, name, obj=data, createparents=True, filters=compression_filter)
                        arr.attrs.CLASS = 'IMAGE'
                        arr.attrs.IMAGE_SUBCLASS = 'IMAGE_GRAYSCALE'
                        arr.attrs.IMAGE_VERSION = '1.2'

                    def store_data(h5path, name, data):
                        h5path = h5path.replace('//', '/')

                        h5path_splits = [x for x in h5path.split('/') if x != '']

                        for i in range(len(h5path_splits)):
                            try:
                                h5.create_group('/' + '/'.join(h5path_splits[:i]), h5path_splits[i])
                            except NodeError:
                                pass

                        f = filenode.new_node(h5, where=h5path, name=name, filters=compression_filter)
                        if type(data) == str:
                            data = data.encode('utf-8')
                        f.write(data)
                        f.close()

                    def store_table(name, data):
                        _frame = DataFrame(data)
                        store[name] = _frame  # .append(name, _frame, data_columns=_frame.columns)

                    image_counter = {}
                    data_counter = {}
                    table_counter = {}

                    def process_row(result_table_rows, m, row):
                        cresults = []

                        # noinspection PyProtectedMember
                        tmp = {('meta_' + mk): (mv if type(mv) == int else -1) for mk, mv in m._asdict().items()}

                        if type(result_table_rows) == list:
                            result_table_rows = {key: True for key in result_table_rows}

                        if '_plain' in result_table_rows:
                            for v in result_table_rows['_plain']:
                                result_table_rows[v] = True
                            del result_table_rows['_plain']

                        def is_wildcarded(s):
                            return '*' in s

                        for k, v in list(result_table_rows.items()):
                            if is_wildcarded(k):
                                del result_table_rows[k]

                                for row_key in row.keys():
                                    if fnmatch(row_key, k):
                                        result_table_rows[row_key] = v

                        for k, v in result_table_rows.items():
                            if v == 'table':
                                if k not in table_counter:
                                    table_counter[k] = 0

                                if k in row and len(row[k]) > 0:
                                    if type(row[k][0]) == list:
                                        # it's a list of lists
                                        # create a mapping table
                                        # point to the mapping table

                                        the_counter = table_counter[k]

                                        new_path = '/tables/_mapping_%s' % (k,)
                                        new_name = '%s_%09d' % (k, the_counter)

                                        tmp[k] = -1
                                        tmp['_mapping_%s' % k] = the_counter

                                        table_counter[k] += 1

                                        i_mapping = []

                                        for n, i_table in enumerate(row[k]):
                                            i_new_path = '/tables/_individual_%s' % (k,)
                                            i_new_name = '%s_%09d' % (k, table_counter[k])
                                            store_table(prefix + i_new_path + '/' + i_new_name, i_table)

                                            i_mapping.append({
                                                '_index': n,
                                                'individual_table': table_counter[k]
                                            })

                                            table_counter[k] += 1

                                        store_table(prefix + new_path + '/' + new_name, i_mapping)

                                        tmp[k] = table_counter[k]
                                        table_counter[k] += 1
                                    else:
                                        new_path = '/tables/%s' % (k,)
                                        new_name = '%s_%09d' % (k, table_counter[k])
                                        store_table(prefix + new_path + '/' + new_name, row[k])
                                        tmp[k] = table_counter[k]
                                        table_counter[k] += 1
                                else:
                                    tmp[k] = table_counter[k]
                                    table_counter[k] += 1
                            elif v == 'image':
                                if k not in image_counter:
                                    image_counter[k] = 0

                                if k in row:
                                    new_path = '/images/%s' % (k,)
                                    new_name = '%s_%09d' % (k, image_counter[k])
                                    store_image(prefix + new_path, new_name, return_or_uncompress(row[k]))
                                tmp[k] = image_counter[k]

                                image_counter[k] += 1
                            elif v == 'data':
                                if k not in data_counter:
                                    data_counter[k] = 0

                                if k in row:
                                    new_path = '/data/%s' % (k,)
                                    new_name = '%s_%09d' % (k, data_counter[k])
                                    store_data(prefix + new_path, new_name, return_or_uncompress(row[k]))
                                tmp[k] = data_counter[k]

                                data_counter[k] += 1
                            else:
                                if k in row:
                                    tmp[k] = row[k]
                                else:
                                    tmp[k] = float('nan')

                        cresults.append(tmp)
                        return cresults

                    if 'collected' in result:
                        collected = []
                        for m, row in result['collected'].items():
                            if tabular_name in row:
                                result_table_rows = row[tabular_name]
                                collected += process_row(result_table_rows, m, row)
                        store_table(prefix + 'result_table_collected', collected)

                    if tabular_name in result:
                        store_table(prefix + 'result_table', process_row(result[tabular_name], meta, result))

                    store.close()

                success = True
            except NodeError:
                print("NodeError Exception occurred while writing, " +
                      "apparently the file has already been used to store similar results.")
                # print("Leaving it LOCKED (remove manually!) and trying to write to another file!")
                local_timeout = 0
                release_lock(lock_file)

            except Exception as e:
                print("Exception occurred while writing results: " + repr(e))
                release_lock(lock_file)
                return
        release_lock(lock_file)

        return result

コード例 #27

0

ファイルを表示

ファイル: logging.py プロジェクト: wwxFromTju/mackrl

    def log(self, key, item, T_env):

        import uuid
        T_env_str = "{}_{}".format(T_env, uuid.uuid4().hex[:6])
        try:
            from tables import open_file, Filters
            file_T_id = T_env // self.T_per_file
            file_path = os.path.join(self.folder_name, "T_env_{}:{}.h5".format(file_T_id*self.T_per_file, (file_T_id + 1)*self.T_per_file))
            self.h5file = open_file(file_path,
                                    mode="a",
                                    title="Experiment results: {}".format(self.name))

            if isinstance(item, BatchEpisodeBuffer):

                    group = "learner_samples"+key
                    if not hasattr(self.h5file.root, group):
                        self.h5file.create_group("/", group, 'Learner samples')

                    if not hasattr(getattr(self.h5file.root, group), "T{}".format(T_env_str)):
                        self.h5file.create_group("/{}/".format(group), "T{}".format(T_env_str), 'Learner samples T_env:{}'.format(T_env))

                    if not hasattr(getattr(getattr(self.h5file.root, group), "T{}".format(T_env_str)), "_transition"):
                        self.h5file.create_group("/{}/T{}".format(group, T_env_str), "_transition", 'Transition-wide data')

                    if not hasattr(getattr(getattr(self.h5file.root, group), "T{}".format(T_env_str)), "_episode"):
                        self.h5file.create_group("/{}/T{}".format(group, T_env_str), "_episode", 'Episode-wide data')

                    filters = Filters(complevel=5, complib='blosc')

                    # if table layout has not been created yet, do it now:
                    for _c, _pos in item.columns._transition.items():
                        it = item.get_col(_c)[0].cpu().numpy()
                        if not hasattr(getattr(self.h5file.root, group), _c):
                            self.h5file.create_carray(getattr(getattr(self.h5file.root, group), "T{}".format(T_env_str))._transition,
                                                                    _c, obj=it, filters=filters)
                        else:
                            getattr(getattr(self.h5file.root, group)._transition, _c).append(it)
                            getattr(getattr(self.h5file.root, group)._transition, _c).flush()

                    # if table layout has not been created yet, do it now:
                    for _c, _pos in item.columns._episode.items():
                        it = item.get_col(_c, scope="episode")[0].cpu().numpy()
                        if not hasattr(getattr(self.h5file.root, group), _c):
                            self.h5file.create_carray(getattr(getattr(self.h5file.root, group), "T{}".format(T_env_str))._episode,
                                                                           _c, obj=it, filters=filters)
                        else:
                            getattr(getattr(self.h5file.root, group)._episode, _c).append(it)
                            getattr(getattr(self.h5file.root, group)._episode, _c).flush()

            else:

                key = "__".join(key.split(" "))
                # item needs to be scalar!#
                import torch as th
                import numpy as np
                if isinstance(item, th.Tensor):
                    item = np.array([item.cpu().clone().item()])
                elif not isinstance(item, np.ndarray):
                    item = np.array([item])

                if not hasattr(self.h5file.root, "log_values"):
                    self.h5file.create_group("/", "log_values", 'Log Values')

                if not hasattr(self.h5file.root.log_values, key):
                    from tables import Float32Atom, IntAtom
                    self.h5file.create_earray(self.h5file.root.log_values,
                                                                   key, atom=Float32Atom(), shape=[0])
                    self.h5file.create_earray(self.h5file.root.log_values,
                                                                   "{}_T_env".format(key), atom=IntAtom(), shape=[0])
                else:
                    getattr(self.h5file.root.log_values, key).append(item)
                    getattr(self.h5file.root.log_values, key).flush()

                    getattr(self.h5file.root.log_values, "{}_T_env".format(key)).append(np.array([T_env]))
                    getattr(self.h5file.root.log_values, "{}_T_env".format(key)).flush()


            self.h5file.close()

        except Exception as e:
            self.logging_struct.py_logger.warning("Could not execute HDF logger save - no disk space, or no permissions? Error message: {}, T_env: {}, key: {}, item: {}".format(e, T_env, key, str(item)))

        return

コード例 #28

0

ファイルを表示

ファイル: poc.py プロジェクト: peterpan83/BIN_GOCI_L2

    def __poc(self, l2f):
        data = gdal.Open(l2f)
        ds = data.GetSubDatasets()
        Rrs412 = gdal.Open(ds[self.dic['Rrs_412']]
                           [0]).ReadAsArray() * self.slope + self.intercept
        Rrs443 = gdal.Open(ds[self.dic['Rrs_443']]
                           [0]).ReadAsArray() * self.slope + self.intercept
        Rrs490 = gdal.Open(ds[self.dic['Rrs_490']]
                           [0]).ReadAsArray() * self.slope + self.intercept
        Rrs555 = gdal.Open(ds[self.dic['Rrs_555']]
                           [0]).ReadAsArray() * self.slope + self.intercept
        Rrs660 = gdal.Open(ds[self.dic['Rrs_660']]
                           [0]).ReadAsArray() * self.slope + self.intercept
        Rrs680 = gdal.Open(ds[self.dic['Rrs_680']]
                           [0]).ReadAsArray() * self.slope + self.intercept

        ssc_sert = gdal.Open(ds[self.dic['ssc']][0]).ReadAsArray()
        chla_oc3 = gdal.Open(ds[self.dic['chl_oc3']][0]).ReadAsArray()

        m412, m443, m490, m555, m660, m680 = Rrs412 < 0, Rrs443 < 0, Rrs490 < 0, Rrs555 < 0, Rrs660 < 0, Rrs680 < 0

        mask = m412 | m443 | m490 | m555

        Rrs412_m = ma.array(Rrs412, mask=mask)
        Rrs443_m = ma.array(Rrs443, mask=mask)
        Rrs490_m = ma.array(Rrs490, mask=mask)
        Rrs555_m = ma.array(Rrs555, mask=mask)
        Rrs660_m = ma.array(Rrs660, mask=mask)
        Rrs680_m = ma.array(Rrs680, mask=mask)

        tempR = (Rrs443_m / Rrs555_m) * np.power(Rrs412_m / Rrs490_m, -1.012)
        temp = 0.342 - 2.511 * np.log10(tempR) - 0.277 * np.power(
            np.log10(tempR), 2)
        chla = np.power(10, temp)
        print(chla.data)
        chla_data = chla.data
        chla_data[mask] = self._fillvalue
        # plt.imshow(chla_data)
        # plt.show()

        pocfname = '%s_POC.l2' % (l2f.split('.')[0])

        if os.path.exists(pocfname):
            return
        h5file_l2 = tables.open_file(pocfname, 'w')
        shape = (self.lines, self.pixels)
        atom = Float32Atom()
        filters = Filters(complevel=5, complib='zlib')

        h5file_l2.root._v_attrs.title = 'GOCI POC product produced by SKLEC,Yanqun Pan'

        grpChla = h5file_l2.create_group(h5file_l2.root, 'Chla',
                                         'remote sensing reflectance')
        grpChla._v_attrs.Scans = self.lines
        grpChla._v_attrs.Pixels = self.pixels
        grpChla._v_attrs.AlgorithmName = 'ATC_MPL'

        ca = h5file_l2.create_carray(grpChla,
                                     'Chla-OC3',
                                     atom,
                                     shape,
                                     filters=filters)
        ca._v_attrs._FillValue = self._fillvalue
        ca[:] = chla_oc3

        ca = h5file_l2.create_carray(grpChla,
                                     'Chla-YOC',
                                     atom,
                                     shape,
                                     filters=filters)
        ca._v_attrs._FillValue = self._fillvalue
        ca[:] = chla_data

        mtemp1 = Rrs660 > Rrs490
        mtemp2 = Rrs660 > Rrs680

        ratio = Rrs490_m / Rrs555_m
        POC = np.zeros((self.lines, self.pixels), dtype=np.float)
        POC[mtemp1] = ssc_sert[mtemp1] * 1000 * 5.06 + 37.33
        POC[(~mtemp1) & (mtemp2)] = 87.3 * np.power(
            ratio.data[(~mtemp1) & (mtemp2)], -2.04)
        POC[(~mtemp1) & (~mtemp2)] = 69.9 * np.power(
            chla_data[(~mtemp1) & (~mtemp2)], 0.63)

        POC[mask] = self._fillvalue

        h5file_l2.root._v_attrs.title = 'GOCI POC product produced by SKLEC,Yanqun Pan'

        grpPOC = h5file_l2.create_group(h5file_l2.root, 'POC',
                                        'remote sensing reflectance')
        grpPOC._v_attrs.Scans = self.lines
        grpPOC._v_attrs.Pixels = self.pixels
        grpPOC._v_attrs.AlgorithmName = 'ATC_MPL'

        ca = h5file_l2.create_carray(grpPOC,
                                     'POC',
                                     atom,
                                     shape,
                                     filters=filters)
        ca._v_attrs._FillValue = self._fillvalue
        ca[:] = POC

        h5file_l2.close()

コード例 #29

0

ファイルを表示

    def _make_feature_hdf5(self):

        with gzip_open(self._gff3_gz_file_path) as gff3_gz_file:

            print("Getting data-start position ...")

            data_start_position = None

            line = gff3_gz_file.readline().decode()

            while line.startswith("#"):

                data_start_position = gff3_gz_file.tell()

                line = gff3_gz_file.readline().decode()

            print("Counting features per seqid ...")

            seqid_n_row = defaultdict(lambda: 0)

            n = 0

            seqid = None

            while line:

                n += 1

                if not line.startswith("#"):

                    seqid_ = line.split(sep="\t")[0]

                    if seqid_ != seqid:

                        print("\t{} ...".format(seqid_))

                        seqid = seqid_

                    seqid_n_row[seqid_] += 1

                line = gff3_gz_file.readline().decode()

            print("Making {} ...".format(self._feature_hdf5_file_path))

            with open_file(
                self._feature_hdf5_file_path,
                mode="w",
                filters=Filters(complevel=1, complib="blosc"),
            ) as feature_hdf5:

                seqid_table_row = {}

                n_per_print = max(1, n // 10)

                gff3_gz_file.seek(data_start_position)

                for i, line in enumerate(gff3_gz_file):

                    if i % n_per_print == 0:

                        print("\t{:,}/{:,} ...".format(i, n))

                    line = line.decode(errors="replace")

                    if line.startswith("#"):

                        continue

                    seqid, source, type_, start, end, score, strand, phase, attributes = line.split(
                        "\t"
                    )

                    if type_ not in self._types:

                        continue

                    if seqid not in seqid_table_row:

                        print("\t\tMaking {} table ...".format(seqid))

                        seqid_table = feature_hdf5.create_table(
                            "/",
                            "seqid_{}_features".format(seqid),
                            description=self._FeatureDescription,
                            expectedrows=seqid_n_row[seqid],
                        )

                        seqid_table_row[seqid] = seqid_table.row

                    cursor = seqid_table_row[seqid]

                    cursor["seqid"] = seqid

                    cursor["start"] = start

                    cursor["end"] = end

                    name = None

                    biotype = None

                    for attribute in attributes.split(sep=";"):

                        field, value = attribute.split(sep="=")

                        if field == "Name":

                            name = value

                        elif field == "biotype":

                            biotype = value

                    cursor["Name"] = name

                    cursor["biotype"] = biotype

                    cursor.append()

                    self._name_seqid[name] = seqid

                print("\tFlushing tables and making column indices ...")

                for seqid in seqid_table_row:

                    print("\t\t{} table ...".format(seqid))

                    seqid_table = feature_hdf5.get_node(
                        "/", "seqid_{}_features".format(seqid)
                    )

                    seqid_table.flush()

                    for column in ("seqid", "start", "end", "Name", "biotype"):

                        seqid_table.cols._f_col(column).create_csindex()

                self._feature_hdf5 = feature_hdf5

                print(self._feature_hdf5)

                print("Writing {} ...".format(self._name_seqid_pickle_gz_file_path))

                with gzip_open(
                    self._name_seqid_pickle_gz_file_path, mode="wb"
                ) as name_seqid_pickle_gz_file:

                    dump(self._name_seqid, name_seqid_pickle_gz_file)

コード例 #30

0

ファイルを表示

ファイル: convert.py プロジェクト: Zelenyy/phd-code

def get_convertor(readers: list):
    filters = Filters(complevel=3, fletcher32=True)
    convertor = ConverterFromBinToHDF5(readers)
    for reader in readers:
        reader.set_filters(filters)
    return convertor