Esempio n. 1
0
def createImgGroup(fid,
                   name,
                   tot_frames,
                   im_height,
                   im_width,
                   is_expandable=True):
    parentnode, _, name = name.rpartition('/')
    parentnode += '/'

    if is_expandable:
        img_dataset = fid.create_earray(parentnode,
                                        name,
                                        atom=tables.UInt8Atom(),
                                        shape=(0, im_height, im_width),
                                        chunkshape=(1, im_height, im_width),
                                        expectedrows=tot_frames,
                                        filters=TABLE_FILTERS)
    else:
        img_dataset = fid.create_carray(parentnode,
                                        name,
                                        atom=tables.UInt8Atom(),
                                        shape=(tot_frames, im_height,
                                               im_width),
                                        filters=TABLE_FILTERS)

    img_dataset._v_attrs["CLASS"] = np.string_("IMAGE")
    img_dataset._v_attrs["IMAGE_SUBCLASS"] = np.string_("IMAGE_GRAYSCALE")
    img_dataset._v_attrs["IMAGE_WHITE_IS_ZERO"] = np.array(0, dtype="uint8")
    img_dataset._v_attrs["DISPLAY_ORIGIN"] = np.string_("UL")  # not rotated
    img_dataset._v_attrs["IMAGE_VERSION"] = np.string_("1.2")

    return img_dataset
Esempio n. 2
0
def create_dataset_pytables(symbols,
                            fonts,
                            sizes,
                            fname,
                            width=64,
                            height=64,
                            compression=None):
    import tables

    mw, mh = max_width_height(fonts, symbols, max(sizes))
    xx = range(0, width - mw, 1)
    yy = range(0, height - mh, 1)
    combinations = list(itertools.product(symbols, fonts, sizes, xx, yy))
    n_combinations = len(combinations)
    if compression:
        filters = tables.Filters(complevel=1, complib='zlib')
    else:
        filters = None

    print("Generating {num} images".format(num=n_combinations))

    font_id = dict(zip(fonts, range(len(fonts))))

    table_handle = tables.open_file(fname, mode='w')

    images = table_handle.createEArray(table_handle.root,
                                       'images',
                                       tables.Float32Atom(),
                                       shape=(0, width, height, 3),
                                       filters=filters,
                                       expectedrows=n_combinations)
    labels = table_handle.createEArray(table_handle.root,
                                       'labels',
                                       tables.UInt8Atom(),
                                       shape=(0, 1),
                                       expectedrows=n_combinations)
    font_label = table_handle.createEArray(table_handle.root,
                                           'fonts',
                                           tables.UInt8Atom(),
                                           shape=(0, 1),
                                           expectedrows=n_combinations)
    with click.progressbar(combinations,
                           label="Generating {num} images".format(
                               num=len(combinations))) as w_combinations:
        for symbol, font, size, x, y in w_combinations:
            im = draw_symbol(symbol,
                             font,
                             x,
                             y,
                             size,
                             im_width=width,
                             im_height=height)
            a_img = np.array(im.getdata()).reshape((1, width, height, 3))
            images.append(a_img / 255.0)
            labels.append(np.uint8(symbols.find(symbol)).reshape(1, 1))
            font_label.append(np.uint8(font_id[font]).reshape(1, 1))
            table_handle.flush()
    print("Done")
    table_handle.close()
Esempio n. 3
0
    def __init__(self, h5_name):
        self.hdf5_name = '%s.h5' % h5_name

        try:
            os.makedirs(cache_dir)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        self.hdf5_path = os.path.join(cache_dir, self.hdf5_name)
        self.hdf5_write_mode = 'a'  # default to append mode, can be changed to 'w' mode using overwrite

        self.train_unalt_set = TrainingSet(manip=False)
        self.train_manip_set = TrainingSet(manip=True)
        self.test_unalt_set = TestSet(manip=False)
        self.test_manip_set = TestSet(manip=True)

        self.data_src = {
            'train_unalt': self.train_unalt_set,
            'train_manip': self.train_manip_set,
            'test_unalt': self.test_unalt_set,
            'test_manip': self.test_manip_set
        }

        print('# train images: %d\n'
              '# train_manip images: %d\n'
              '# test images: %d\n'
              '# test_manip images: %d\n' % (self.train_unalt_set.n_files,
                                             self.train_manip_set.n_files,
                                             self.test_unalt_set.n_files,
                                             self.test_manip_set.n_files))

        self.feature_dtype = tables.Float32Atom()

        self.data_type = {
            'x': tables.UInt8Atom(),
            'y': tables.UInt8Atom(),
            'image_index': tables.UInt16Atom(),
            'patch_coord': tables.UInt16Atom(),
            'manip': tables.UInt8Atom(),
        }

        self.data_shape = {
            'y': (0,),
            'patch_coord': (0, 2),
            'image_index': (0, ),
            'manip': (0, )
        }

        self.filters = tables.Filters(complevel=5, complib='blosc')
        self.label_shape = (0,)
        self.coord_shape = (0, 2)
        self.index_shape = (0, )
        self.manip_shape = (0, )
Esempio n. 4
0
def createHDF5(splitspathname, splitsdict, patchSize):
    """
    splitspathname : dictionary containing filename vs their phases (train, test, val )
    splitsdict : splits dictionary. key : filename/case, value : phase (train,test)
    patchSize : x,y dimension of the image 
    """

    outputfolder = fr"outputs\hdf5\{splitspathname}"
    Path(outputfolder).mkdir(parents=True, exist_ok=True)

    img_dtype = tables.Float32Atom()
    ls_dtype = tables.UInt8Atom()
    pm_dtype = tables.UInt8Atom()
    data_shape = (0, patchSize, patchSize)
    mask_shape = (0, patchSize, patchSize)
    orgmask_shape = (0, patchSize, patchSize)

    filters = tables.Filters(complevel=5)

    phases = np.unique(list(splitsdict.values()))

    for phase in phases:
        hdf5_path = fr'{outputfolder}\{phase}.h5'

        if os.path.exists(hdf5_path):
            Path(hdf5_path).unlink()

        hdf5_file = tables.open_file(hdf5_path, mode='w')

        data = hdf5_file.create_earray(hdf5_file.root,
                                       "data",
                                       img_dtype,
                                       shape=data_shape,
                                       chunkshape=(1, patchSize, patchSize),
                                       filters=filters)

        mask = hdf5_file.create_earray(hdf5_file.root,
                                       "mask",
                                       ls_dtype,
                                       shape=mask_shape,
                                       chunkshape=(1, patchSize, patchSize),
                                       filters=filters)

        orgmask = hdf5_file.create_earray(hdf5_file.root,
                                          "orgmask",
                                          pm_dtype,
                                          shape=orgmask_shape,
                                          chunkshape=(1, patchSize, patchSize),
                                          filters=filters)

        hdf5_file.close()
Esempio n. 5
0
    def __init__(self, node, h5file, **kwargs):
        super(RAFileNode, self).__init__()

        if node is not None:
            # Open an existing node and get its version.
            self._checkAttributes(node)
            self._version = node.attrs.NODE_TYPE_VERSION
        elif h5file is not None:
            # Check for allowed keyword arguments,
            # to avoid unwanted arguments falling through to array constructor.
            for kwarg in kwargs:
                if kwarg not in self.__allowedInitKwArgs:
                    raise TypeError("%s keyword argument is not allowed" %
                                    repr(kwarg))

            # Turn 'expectedsize' into 'expectedrows'.
            if 'expectedsize' in kwargs:
                # These match since one byte is stored per row.
                expectedrows = kwargs['expectedsize']
                kwargs = kwargs.copy()
                del kwargs['expectedsize']
                kwargs['expectedrows'] = expectedrows

            # Create a new array in the specified PyTables file.
            self._version = NodeTypeVersions[-1]
            shape = self._byteShape[self._version]
            node = h5file.createEArray(atom=tables.UInt8Atom(),
                                       shape=shape,
                                       **kwargs)

            # Set the node attributes, else remove the array itself.
            try:
                self._setAttributes(node)
            except RuntimeError:
                h5file.removeNode(kwargs['where'], kwargs['name'])
                raise

        # Set required attributes (besides of '_version').
        self.node = node
        self.mode = 'a+'
        self.offset = 0L

        # Cache some dictionary lookups regarding file version.
        # self._version is a NumPy scalar and when Python < 2.5
        # this cannot be used as an index.
        # Will force a conversion to an integer.
        version = int(self._version)
        self._vType = tables.UInt8Atom().dtype.base.type
        self._vShape = self._sizeToShape[version]
Esempio n. 6
0
def listener(q, output_path, tile_size):
    """
    """
    try:
        logger = logging.getLogger(__name__)
        filename = os.path.basename(output_path)
        counter = 0
        pid = os.getpid()
        logger.debug(f'Listener running on {pid}...')
        hdf5_file = tb.open_file(output_path, mode='w')
        img_storage = hdf5_file.create_earray(hdf5_file.root,
                                              'training',
                                              tb.UInt8Atom(),
                                              shape=(0, tile_size, tile_size,
                                                     3))
        while 1:
            counter += 1
            if counter % 100 == 0:
                logger.info(f'{counter} tiles saved in {filename}...')
            try:
                img = q.get()
            except EOFError:
                continue
            if str(img) == 'kill':
                logger.info('Listner closed.')
                hdf5_file.close()
                return None

            img_storage.append(img[None])

    finally:
        hdf5_file.close()
Esempio n. 7
0
def save_hdf(table, filename, format="data"):
    """
    Save a semantic vector space into an HDF5 file.  If the optional argument
    "format" has value "data", the space will be stored under a group named
    "data", in two arrays named "index" and "vectors".  This format allows
    reading a subset of the rows of the space if desired.  Otherwise the space
    will be stored (using to_hdf) under a group named "mat".  This format does
    not allow partial reads (more precisely, load_hdf can be called on such
    files to read a subset of the rows, but the entire file will be read into
    memory before the subset is returned) but permits column labels to be
    stored.
    """
    if format == "data":
        with contextlib.closing(tables.open_file(filename, mode="w")) as file:
            file.create_group("/", "data", "Index and vector data.")
            file.create_array(file.root.data,
                              name="vectors",
                              byteorder="little",
                              obj=table.values)
            index = file.create_vlarray(
                file.root.data,
                name="index",
                byteorder="little",
                atom=tables.UInt8Atom(shape=()),
            )
            for term in table.index:
                index.append(np.frombuffer(term.encode("utf-8"), np.uint8))
    else:
        table.to_hdf(filename, "mat", mode="w", encoding="utf-8")
Esempio n. 8
0
def concatenate_data_files(out_filname, input_filenames):
    """
    concatenate given input filenames into one big hdf5 file.
    :param out_filname: name of output file
    :param input_filenames: list of names of input files
    :return:
    """
    # getting all necessary information from input files
    input_file = tables.open_file(input_filenames[0], "r")
    n_channels = input_file.root.data.shape[
        1]  # number of channels in the data
    patch_shape = input_file.root.data.shape[
        -3:]  # shape of every patch in the data
    input_file.close()

    # total number of entries in the files
    n_entries = 0
    for filename in input_filenames:
        with tables.open_file(filename, "r") as input_file:
            n_entries += input_file.root.data.shape[0]
    print('n_entries is:', n_entries)

    # creating hd5 file for the patches
    try:
        hdf5_file, data_storage, truth_storage, index_storage, normalization_storage = \
            create_data_file(out_filname,
                             n_channels=n_channels,
                             n_samples=n_entries,
                             image_shape=patch_shape,
                             storage_names=('data', 'truth', 'index', 'normalization'),
                             affine_shape=(0, 4),
                             affine_dtype=tables.UInt8Atom(),
                             normalize=False)
    except Exception as e:
        # If something goes wrong, delete the incomplete data file
        os.remove(out_filname)
        raise e
    print('succesfully created file', out_filname)

    # writing data to file
    t = time.time()
    for filename in input_filenames:
        print('appending data from file', filename)
        with tables.open_file(filename, "r") as input_file:
            data = input_file.root.data[:]
            data_storage.append(np.asarray(
                data, dtype=np.uint8))  # TODO: maybe with np.newaxis?
            print('appended data')
            truth = input_file.root.truth[:]
            truth_storage.append(truth)
            print('appended truth')
            index = input_file.root.index[:]
            index_storage.append(index)
            print('appended index')
            norm = input_file.root.normalization[:]
            normalization_storage.append(norm)
            print('appended normalization\nDone file')
            print('took:', time.time() - t)

    hdf5_file.close()
    def write_audio16(self, topic_group, data):

        # Fix nan possibilities with the first value that is good
        # Currently not supported....
        '''
        if np.any(np.isnan(data['data'])):
            replace_idx = np.where(np.all(np.isnan(data['data']), axis=1))[0]
            good_idx = np.where(np.all(np.logical_not(np.isnan(data['data'])), axis=1))[0][0]
            data['data'][replace_idx] = data['data'][good_idx]
            data['time'][replace_idx] = data['time'][good_idx]
        '''
        converted_arr = []
        for seg in data['data']:
            if isinstance(seg, int):
                converted_arr.append(np.array([seg]))
            else:
                converted_arr.append(np.fromstring(seg, dtype=np.uint8))

        data['raw_audio'] = converted_arr
        #data['raw_audio'] = np.fromstring(''.join(data['data']), dtype=np.uint8)
        # Pull out left and right audio
        # Warning: this might be flipped...(right/left)
        # NOTE: Don't need to do this currently for mono channel (Kinect and Mic). Later make a flag
        #data['right_audio'], data['left_audio'] = raw_audio[0::2],raw_audio[1::2]
        #self.pytable_writer_helper(topic_group, ['left_audio', 'right_audio'], tables.Int64Atom(), data)

        self.pytable_writer_helper(topic_group, ['time'], tables.Int64Atom(),
                                   data)
        self.pytable_extend_writer_helper(topic_group, ['raw_audio'],
                                          tables.UInt8Atom(), data)
Esempio n. 10
0
def create_data_file(out_file, n_samples, image_shape, modality_names):
    #     pdb.set_trace()
    hdf5_file = tables.open_file(out_file, mode='w')
    filters = tables.Filters(complevel=5, complib='blosc')
    modality_shape = tuple([0, 1] + list(image_shape))
    brain_width_shape = (0, 2, 3)

    modality_storage_list = [
        hdf5_file.create_earray(hdf5_file.root,
                                modality_name,
                                tables.Float32Atom(),
                                shape=modality_shape,
                                filters=filters,
                                expectedrows=n_samples)
        for modality_name in modality_names
    ]

    brain_width_storage = hdf5_file.create_earray(hdf5_file.root,
                                                  'brain_width',
                                                  tables.UInt8Atom(),
                                                  shape=brain_width_shape,
                                                  filters=filters,
                                                  expectedrows=n_samples)

    return hdf5_file, modality_storage_list, brain_width_storage
Esempio n. 11
0
def create_data_file(out_file, n_channels, n_samples, image_shape):
    hdf5_file = tables.open_file(out_file, mode='w')
    print("DEBUG: Opening HDF5 file")
    filters = tables.Filters(complevel=5, complib='blosc')
    data_shape = tuple([0, n_channels] + list(image_shape))
    truth_shape = tuple([0, 1] + list(image_shape))
    print("DEBUG: Writing data_storage to HDF5 file")
    data_storage = hdf5_file.create_earray(hdf5_file.root,
                                           'data',
                                           tables.Float32Atom(),
                                           shape=data_shape,
                                           filters=filters,
                                           expectedrows=n_samples)

    print("DEBUG: Writing truth_storage to HDF5 file")
    truth_storage = hdf5_file.create_earray(hdf5_file.root,
                                            'truth',
                                            tables.UInt8Atom(),
                                            shape=truth_shape,
                                            filters=filters,
                                            expectedrows=n_samples)

    print("DEBUG: Writing affine_storage to HDF5 file")
    affine_storage = hdf5_file.create_earray(hdf5_file.root,
                                             'affine',
                                             tables.Float32Atom(),
                                             shape=(0, 4, 4),
                                             filters=filters,
                                             expectedrows=n_samples)
    return hdf5_file, data_storage, truth_storage, affine_storage
    def init_h5file(self):

        file, curr_dir = self.get_new_file_name()

        self.settings.child('acquisition', 'temp_file').setValue(file+'.h5')
        self.h5file = tables.open_file(os.path.join(curr_dir, file+'.h5'), mode='w')
        h5group = self.h5file.root
        h5group._v_attrs['settings'] = customparameter.parameter_to_xml_string(self.settings)
        h5group._v_attrs.type = 'detector'
        h5group._v_attrs['format_name'] = 'timestamps'

        channels_index = [self.channels_enabled[k]['index'] for k in self.channels_enabled.keys() if
                          self.channels_enabled[k]['enabled']]
        self.marker_array = self.h5file.create_earray(self.h5file.root, 'markers', tables.UInt8Atom(), (0,),
                                                      title='markers')
        self.marker_array._v_attrs['data_type'] = '1D'
        self.marker_array._v_attrs['type'] = 'tttr_data'

        self.nanotimes_array = self.h5file.create_earray(self.h5file.root, 'nanotimes', tables.UInt16Atom(), (0,),
                                                         title='nanotimes')
        self.nanotimes_array._v_attrs['data_type'] = '1D'
        self.nanotimes_array._v_attrs['type'] = 'tttr_data'

        self.timestamp_array = self.h5file.create_earray(self.h5file.root, 'timestamps', tables.UInt64Atom(), (0,),
                                                   title='timestamps')
        self.timestamp_array._v_attrs['data_type'] = '1D'
        self.timestamp_array._v_attrs['type'] = 'tttr_data'
Esempio n. 13
0
def test_hdf5_dataset():
    num_rows = 500
    filters = tables.Filters(complib='blosc', complevel=5)

    h5file = tables.open_file("tmp.h5",
                              mode="w",
                              title="Test file",
                              filters=filters)
    group = h5file.create_group("/", 'Data')
    atom = tables.UInt8Atom()
    y = h5file.create_carray(group,
                             'y',
                             atom=atom,
                             title='Data targets',
                             shape=(num_rows, 1),
                             filters=filters)
    for i in range(num_rows):
        y[i] = i
    h5file.flush()
    h5file.close()

    dataset = Hdf5Dataset(['y'], 0, 500, 'tmp.h5')
    assert_equal(
        dataset.get_data(request=slice(0, 10))[0],
        numpy.arange(10).reshape(10, 1))
    # Test if pickles
    dump = pickle.dumps(dataset)
    pickle.loads(dump)

    os.remove('tmp.h5')
Esempio n. 14
0
    def create_data_file(out_file, n_samples, image_shape, channels=4):
        hdf5_file = tables.open_file(out_file, mode='w')

        # complevel - compression level
        # complib - the library for compression
        filters = tables.Filters(complevel=5, complib='blosc')

        data_shape = tuple([0, channels] + list(image_shape))

        truth_shape = tuple([0, 1] + list(image_shape))

        data_storage = hdf5_file.create_earray(hdf5_file.root,
                                               'data',
                                               tables.Float32Atom(),
                                               shape=data_shape,
                                               filters=filters,
                                               expectedrows=n_samples)

        truth_storage = hdf5_file.create_earray(hdf5_file.root,
                                                'true',
                                                tables.UInt8Atom(),
                                                shape=truth_shape,
                                                filters=filters,
                                                expectedrows=n_samples)

        affine_storage = hdf5_file.create_earray(hdf5_file.root,
                                                 'affine',
                                                 tables.Float32Atom(),
                                                 shape=(0, 4, 4),
                                                 filters=filters,
                                                 expectedrows=n_samples)
        return hdf5_file, data_storage, truth_storage, affine_storage
Esempio n. 15
0
def main(args):

    total_start = timeit.default_timer()
    print('Starting Preibisch fusion', args.substack_id)

    ss = SubStack(args.first_view_dir, args.substack_id)
    minz = int(
        ss.info['Files'][0].split("/")[-1].split('_')[-1].split('.tif')[0])
    prefix = '_'.join(
        ss.info['Files'][0].split("/")[-1].split('_')[0:-1]) + '_'
    np_tensor_3d_first_view, _ = imtensor.load_nearby(
        args.tensorimage_first_view, ss, args.size_patch)
    sc_in = np_tensor_3d_first_view.shape

    if args.transformation_file is not None:
        R, t = parse_transformation_file(args.transformation_file)
        np_tensor_3d_second_view = transform_substack(
            args.second_view_dir,
            args.tensorimage_second_view,
            args.substack_id,
            R,
            t,
            args.size_patch,
            invert=True)
    else:
        np_tensor_3d_second_view, _ = imtensor.load_nearby(
            args.tensorimage_second_view, ss, args.size_patch)

    fused_image, entropy_mask__view, entropy_mask_second_view = do_content_based_fusion(
        np_tensor_3d_first_view,
        np_tensor_3d_second_view,
        args.size_patch,
        args.size_patch,
        speedup=1,
        fast_computation=True)

    if args.extramargin > args.size_patch:
        args.extramargin = args.size_patch

    offset_margin = args.size_patch - args.extramargin
    fused_image_output = fused_image[offset_margin:sc_in[0] - offset_margin,
                                     offset_margin:sc_in[1] - offset_margin,
                                     offset_margin:sc_in[2] - offset_margin]
    atom = tables.UInt8Atom()
    mkdir_p(args.outdir)
    h5f = tables.openFile(args.outdir + '/' + args.substack_id + '.h5', 'w')
    sc_out = fused_image_output.shape
    ca = h5f.createCArray(h5f.root, 'full_image', atom, sc_out)
    for z in xrange(0, sc_out[0], 1):
        ca[z, :, :] = fused_image_output[z, :, :]
    h5f.close()

    imtensor.save_tensor_as_tif(fused_image_output,
                                args.outdir + '/' + args.substack_id,
                                minz,
                                prefix=prefix)
    print("total time Preibisch fusion: %s" %
          (str(timeit.default_timer() - total_start)))
Esempio n. 16
0
def main():
    # global vars
    global gross_tiles, net_tiles, net_tumor, val_storage, val_labels
    global train_labels, train_storage, val_coords, train_coords
    train_labels, val_labels, train_coords, val_coords = [], [], [], []

    # get all of our arguments, put in defaults as needed
    args = getArgs()
    demag, output = args.demagnify, args.output
    blank_frac, rgb_cutoff, val_frac = args.blank_frac, args.rgb_cutoff, args.val_frac
    args.folder = args.folder.rstrip("/")

    output = "big_new.hdf5"
    # print out our params
    print("\nPARAMETERS: ")
    print("  demagnify: "+str(demag))
    print("  hdf5: " + str(output))
    print("  blank_frac: "+str(blank_frac))
    print("  rgb_cutoff: "+str(rgb_cutoff))
    print("  val_frac: "+str(val_frac))
    print("\nSLIDES:")

    img_dtype = tables.UInt8Atom()  # dtype in which the images will be saved
    hdf5_file = tables.open_file(output, mode='w')
    # make image arrays
    # NOTE: 0 is the extensible axis, 3 is channels and comes last for TensorFlow
    val_storage = hdf5_file.create_earray(
        hdf5_file.root, 'val_img', img_dtype, shape=(0, 256, 256, 3))
    train_storage = hdf5_file.create_earray(
        hdf5_file.root, 'train_img', img_dtype, shape=(0, 256, 256, 3))

    # iterate over TIFs in folder
    for tif in os.listdir(os.getcwd()):
        if tif.lower().endswith('.tif'):
            print("  "+str(tif[:-4]))
            png = tif[:-4]+'.png'
            xml = tif[:-4]+'.xml'
            osr = openslide.OpenSlide(tif)
            print("    tifToPng: " + str(tifToPng(osr, demag, png)))
            print("    drawTumor: "+str(drawTumor(xml, png, demag)))
            print("    makeTiles: "+str(makeTiles(256, demag, png, blank_frac,
                                                  rgb_cutoff, val_frac, osr, args.folder, hdf5_file)))
            print("    non-blank tiles so far: "+str(net_tiles))

    # add in label arrays
    hdf5_file.create_array(hdf5_file.root, 'val_labels', val_labels)
    hdf5_file.create_array(hdf5_file.root, 'train_labels', train_labels)
    #hdf5_file.create_array(hdf5_file.root, 'val_coords', val_coords)
    #hdf5_file.create_array(hdf5_file.root, 'train_coords', train_coords)

    hdf5_file.close()

    print("\nTILE RESULTS:")
    print("  total tiles: "+str(gross_tiles))
    print("  blank tiles: "+str(gross_tiles-net_tiles))
    print("  not blank tiles: "+str(net_tiles)+" = "+str((100*net_tiles)/gross_tiles)+"%")
    print("  tumor tiles: "+str(net_tumor)+" = "+str((100*net_tumor)/net_tiles)+"% of non-blanks")
    return(True)
def inizialize_dataset():
    h5 = tables.open_file(dbOut_path, mode='w')
    data_shape = (0, sizedb[0], sizedb[1], sizedb[2])
    img_dtype = tables.UInt8Atom()
    label_dtype = tables.UInt64Atom()
    X_storage = h5.create_earray(h5.root, 'X', img_dtype, shape=data_shape)
    Y_storageID = h5.create_earray(h5.root, 'Y_ID', label_dtype, shape=(0,))
    Y_desc = h5.create_earray(h5.root, 'desc', label_dtype, shape=(0,6))
    return X_storage, Y_storageID, Y_desc
Esempio n. 18
0
def save_in_hdf5_file(hdf5_path, data, data_shape):
    import tables
    img_dtype = tables.UInt8Atom()      # dtype in which the images will be saved
    data_shape = (0, data_shape[0], data_shape[1], 64)
    hdf5_file = tables.open_file(hdf5_path+"inference_output.hdf5", mode='w')       # open a hdf5 file and create earrays
    image_storage = hdf5_file.create_earray(hdf5_file.root, img_dtype, shape=data_shape)
    
    for i, datum in enumerate(data):
        hdf5_file[i] = datum
Esempio n. 19
0
def create_carray(track, chrom):
    atom = tables.UInt8Atom(dflt=0)
    zlib_filter = tables.Filters(complevel=1, complib="zlib")
    
    # create CArray for this chromosome
    shape = [chrom.length]
    carray = track.h5f.createCArray(track.h5f.root, chrom.name,
                                    atom, shape, filters=zlib_filter)

    return carray
def inizialize_dataset():
    global X_storage, Y_storageID, desc_storage
    h5 = tables.open_file(db_path, mode='w')
    data_shape = (0, sizedb[0], sizedb[1], sizedb[2])
    img_dtype = tables.UInt8Atom()
    label_dtype = tables.UInt64Atom()
    X_storage = h5.create_earray(h5.root, 'X', img_dtype, shape=data_shape)
    Y_storageID = h5.create_earray(h5.root, 'Y_ID', label_dtype, shape=(0, ))
    desc_storage = h5.create_earray(h5.root, 'desc', label_dtype,
                                    shape=(0, 6))  #video,frame,boundingbox
def recordStringInHDF5(hf5, group, nodename, s):
    '''creates an Array object in an HDF5 file that represents a unicode string'''
    bytes = np.fromstring(s.encode('utf-8'), np.uint8)
    atom = tables.UInt8Atom()
    array = hf5.create_array(group,
                             nodename,
                             atom=atom,
                             obj=bytes,
                             shape=(len(bytes), ))
    return array
Esempio n. 22
0
def prepare():
    import os
    import sys
    import numpy as np
    import tables
    import tqdm
    import domain_datasets
    import cv2

    synsigns_path = domain_datasets.get_data_dir('syn_signs')
    data_path = os.path.join(synsigns_path, 'synthetic_data')

    labels_path = os.path.join(data_path, 'train_labelling.txt')

    if not os.path.exists(labels_path):
        print('Labels path {} does not exist'.format(labels_path))
        sys.exit(0)

    # Open the file that lists the image files along with their ground truth class
    lines = [line.strip() for line in open(labels_path, 'r').readlines()]
    lines = [line for line in lines if line != '']

    output_path = os.path.join(synsigns_path, 'syn_signs.h5')
    print('Creating {}...'.format(output_path))
    f_out = tables.open_file(output_path, mode='w')
    g_out = f_out.create_group(f_out.root, 'syn_signs', 'Syn-Signs data')
    filters = tables.Filters(complevel=9, complib='blosc')
    X_u8_arr = f_out.create_earray(g_out,
                                   'X_u8',
                                   tables.UInt8Atom(), (0, 3, 40, 40),
                                   expectedrows=len(lines),
                                   filters=filters)

    y = []
    for line in tqdm.tqdm(lines):
        image_filename, gt, _ = line.split()
        image_path = os.path.join(data_path, image_filename)

        if not os.path.exists(image_path):
            print(
                'Could not find image file {} mentioned in annotations'.format(
                    image_path))
            return
        image_data = cv2.imread(image_path)[:, :, ::-1]

        X_u8_arr.append(image_data.transpose(2, 0, 1)[None, ...])
        y.append(int(gt))

    y = np.array(y, dtype=np.int32)
    f_out.create_array(g_out, 'y', y)

    print('X.shape={}'.format(X_u8_arr.shape))
    print('y.shape={}'.format(y.shape))

    f_out.close()
Esempio n. 23
0
def create_data_file(out_file, n_samples, image_shape, imtype):
    n_channels=1
    if os.path.isfile(out_file):
        os.remove(out_file)
    hdf5_file = tables.open_file(out_file, mode='w')
    filters = tables.Filters(complevel=5, complib='zlib')
    print('Compression details for '+imtype+' images :',filters)
    data_shape = tuple([0] + list(image_shape) + [n_channels])
    storage = hdf5_file.create_earray(hdf5_file.root, imtype, tables.UInt8Atom(), shape=data_shape,
                                           filters=filters, expectedrows=n_samples)
    return hdf5_file, storage, filters
Esempio n. 24
0
def init_dataset(fid, tot_frames, im_height, im_width, is_expandable=True):

    img_dataset = fid.create_earray('/',
                                    'full_data',
                                    atom=tables.UInt8Atom(),
                                    shape=(0, im_height, im_width),
                                    chunkshape=(1, im_height, im_width),
                                    expectedrows=tot_frames,
                                    filters=TABLE_FILTERS)

    return img_dataset
Esempio n. 25
0
 def write_image(self, topic_group, data):
     # Note: you need to load and reshape (data.reshape(480,640,3))
     self.pytable_writer_helper(topic_group, ['data'], tables.UInt8Atom(),
                                data)
     self.pytable_writer_helper(topic_group,
                                ['width', 'height', 'step', 'is_bigendian'],
                                tables.Int64Atom(), data)
     self.pytable_writer_helper(topic_group, ['encoding'],
                                tables.StringAtom(itemsize=15), data)
     self.pytable_writer_helper(topic_group, ['time'], tables.Float64Atom(),
                                data)
Esempio n. 26
0
def accumulate_in_hdf5(assembled, h5file_path, img_dim, lab_dim, augment,
                       augment_chances, classes_to_augment, valid_ratio):
    # create hdf5 file of preprocessed images for fast loading and training
    compression = tables.Filters(complevel=5, complib='bzip2')
    h5file = tables.open_file(h5file_path, 'w', filters=compression)
    storage_train_x = h5file.create_earray(h5file.root,
                                           'train_x',
                                           tables.UInt8Atom(),
                                           shape=(0, img_dim[0], img_dim[1]))
    storage_test_x = h5file.create_earray(h5file.root,
                                          'test_x',
                                          tables.UInt8Atom(),
                                          shape=(0, img_dim[0], img_dim[1]))
    storage_train_y = h5file.create_earray(h5file.root,
                                           'train_y',
                                           tables.UInt8Atom(),
                                           shape=(0, lab_dim))
    storage_test_y = h5file.create_earray(h5file.root,
                                          'test_y',
                                          tables.UInt8Atom(),
                                          shape=(0, lab_dim))

    images = DataGenFile(assembled, augment_chances, to_uint8, lab_dim,
                         img_dim, augment, classes_to_augment)
    for i, (img, lab, is_augm) in enumerate(images()):
        lab = lab.astype(np.uint8)
        if np.random.random() < valid_ratio and not is_augm:
            storage_test_x.append(img[None])
            storage_test_y.append(lab[None])
        else:
            storage_train_x.append(img[None])
            storage_train_y.append(lab[None])

        if i % 1000 == 0:
            print('{}/lot is done, where lot > {}'.format(i, len(assembled)))

    h5file.close()
    print('HDF5 FILE SAVED')

    print('IMAGES SKIPPED:', file=sys.stderr)
    print(images.images_skipped, file=sys.stderr)
Esempio n. 27
0
    def create(self, key, n=5):
        if key not in self._groups:
            self._groups[key] = self._fileh.create_group(self._fileh.root, key, title=key)
        if "current" not in self._groups[key]:
            # NOTE: Creating as a UInt8Atom because of issues with implicit byte conversion using
            # VLStringAtom (that I don't understand).  Effect should be the same
            self._fileh.create_vlarray(self._groups[key], "current", atom=tables.UInt8Atom(shape=()),
                                       filters=tables.Filters(complevel=0))
        self._segment_limits[key] = n
        self._groups[key]._v_attrs['_twola_n_segments'] = n

        self._fileh.flush()
Esempio n. 28
0
def _save_ndarray(handler, group, name, x, filters=None):
    if np.issubdtype(x.dtype, np.unicode_):
        # Convert unicode strings to pure byte arrays
        strtype = b'unicode'
        itemsize = x.itemsize // 4
        atom = tables.UInt8Atom()
        x = x.view(dtype=np.uint8)
    elif np.issubdtype(x.dtype, np.string_):
        strtype = b'ascii'
        itemsize = x.itemsize
        atom = tables.StringAtom(itemsize)
    elif x.dtype == np.object:
        # Not supported by HDF5, force pickling
        _save_pickled(handler, group, x, name=name)
        return
    else:
        atom = tables.Atom.from_dtype(x.dtype)
        strtype = None
        itemsize = None

    if x.ndim > 0 and np.min(x.shape) == 0:
        sh = np.array(x.shape)
        atom0 = tables.Atom.from_dtype(np.dtype(np.int64))
        node = handler.create_array(group, name, atom=atom0, shape=(sh.size, ))
        node._v_attrs.zeroarray_dtype = np.dtype(x.dtype).str.encode('ascii')
        node[:] = sh
        return

    if x.ndim == 0 and len(x.shape) == 0:
        # This is a numpy array scalar. We will store it as a regular scalar
        # instead, which means it will be unpacked as a numpy scalar (not numpy
        # array scalar)
        setattr(group._v_attrs, name, x[()])
        return

    # For small arrays, compression actually leads to larger files, so we are
    # settings a threshold here. The threshold has been set through
    # experimentation.
    if filters is not None and x.size > 300:
        node = handler.create_carray(group,
                                     name,
                                     atom=atom,
                                     shape=x.shape,
                                     chunkshape=None,
                                     filters=filters)
    else:
        node = handler.create_array(group, name, atom=atom, shape=x.shape)
    if strtype is not None:
        node._v_attrs.strtype = strtype
        node._v_attrs.itemsize = itemsize
    node[:] = x
Esempio n. 29
0
def create_data_file(out_file,
                     n_channels,
                     n_samples,
                     image_shape,
                     storage_names=('data', 'truth', 'affine'),
                     affine_shape=(0, 4, 4),
                     normalize=True,
                     affine_dtype=tables.Float32Atom()):
    hdf5_file = tables.open_file(out_file, mode='w')
    filters = tables.Filters(
        complevel=5
    )  #, complib='blosc')  # suggested remove in https://github.com/ellisdg/3DUnetCNN/issues/58
    data_shape = tuple([0, n_channels] + list(image_shape))
    truth_shape = tuple([0, 1] + list(image_shape))

    if not normalize:
        data_storage = hdf5_file.create_earray(hdf5_file.root,
                                               storage_names[0],
                                               tables.Int8Atom(),
                                               shape=data_shape,
                                               filters=filters,
                                               expectedrows=n_samples)
    else:
        data_storage = hdf5_file.create_earray(hdf5_file.root,
                                               storage_names[0],
                                               tables.Float32Atom(),
                                               shape=data_shape,
                                               filters=filters,
                                               expectedrows=n_samples)
    truth_storage = hdf5_file.create_earray(hdf5_file.root,
                                            storage_names[1],
                                            tables.UInt8Atom(),
                                            shape=truth_shape,
                                            filters=filters,
                                            expectedrows=n_samples)
    affine_storage = hdf5_file.create_earray(hdf5_file.root,
                                             storage_names[2],
                                             affine_dtype,
                                             shape=affine_shape,
                                             filters=filters,
                                             expectedrows=n_samples)
    if len(storage_names) == 4:
        normalization_storage = hdf5_file.create_earray(hdf5_file.root,
                                                        storage_names[3],
                                                        tables.Float32Atom(),
                                                        shape=(0, 2),
                                                        filters=filters,
                                                        expectedrows=n_samples)
        # will hold mean and std of this case for later normalization
        return hdf5_file, data_storage, truth_storage, affine_storage, normalization_storage
    return hdf5_file, data_storage, truth_storage, affine_storage
Esempio n. 30
0
    def _create_table_list(self, name, example):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        The modified version for creating table with appendList
        """
        type_map = {
            np.dtype(np.float64): tables.Float64Atom(),
            np.dtype(np.float32): tables.Float32Atom(),
            np.dtype(np.int): tables.Int64Atom(),
            np.dtype(np.int8): tables.Int8Atom(),
            np.dtype(np.uint8): tables.UInt8Atom(),
            np.dtype(np.int16): tables.Int16Atom(),
            np.dtype(np.uint16): tables.UInt16Atom(),
            np.dtype(np.int32): tables.Int32Atom(),
            np.dtype(np.uint32): tables.UInt32Atom(),
            np.dtype(np.bool): tables.BoolAtom(),
        }

        try:
            if type(example) == np.ndarray:
                h5type = type_map[example.dtype]
            elif type(example) == list and type(example[0]) == str:
                h5type = tables.VLStringAtom()
        except KeyError:
            raise TypeError("Don't know how to handle dtype '%s'" %
                            example.dtype)

        if type(example) == np.ndarray:
            h5dim = (0, ) + example.shape[1:]

            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_earray(h5.root,
                                                 name,
                                                 h5type,
                                                 h5dim,
                                                 filters=filters)
        elif type(example) == list and type(example[0]) == str:
            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_vlarray(h5.root,
                                                  name,
                                                  h5type,
                                                  filters=filters)
        self.types[name] = type(example)