def createImgGroup(fid, name, tot_frames, im_height, im_width, is_expandable=True): parentnode, _, name = name.rpartition('/') parentnode += '/' if is_expandable: img_dataset = fid.create_earray(parentnode, name, atom=tables.UInt8Atom(), shape=(0, im_height, im_width), chunkshape=(1, im_height, im_width), expectedrows=tot_frames, filters=TABLE_FILTERS) else: img_dataset = fid.create_carray(parentnode, name, atom=tables.UInt8Atom(), shape=(tot_frames, im_height, im_width), filters=TABLE_FILTERS) img_dataset._v_attrs["CLASS"] = np.string_("IMAGE") img_dataset._v_attrs["IMAGE_SUBCLASS"] = np.string_("IMAGE_GRAYSCALE") img_dataset._v_attrs["IMAGE_WHITE_IS_ZERO"] = np.array(0, dtype="uint8") img_dataset._v_attrs["DISPLAY_ORIGIN"] = np.string_("UL") # not rotated img_dataset._v_attrs["IMAGE_VERSION"] = np.string_("1.2") return img_dataset
def create_dataset_pytables(symbols, fonts, sizes, fname, width=64, height=64, compression=None): import tables mw, mh = max_width_height(fonts, symbols, max(sizes)) xx = range(0, width - mw, 1) yy = range(0, height - mh, 1) combinations = list(itertools.product(symbols, fonts, sizes, xx, yy)) n_combinations = len(combinations) if compression: filters = tables.Filters(complevel=1, complib='zlib') else: filters = None print("Generating {num} images".format(num=n_combinations)) font_id = dict(zip(fonts, range(len(fonts)))) table_handle = tables.open_file(fname, mode='w') images = table_handle.createEArray(table_handle.root, 'images', tables.Float32Atom(), shape=(0, width, height, 3), filters=filters, expectedrows=n_combinations) labels = table_handle.createEArray(table_handle.root, 'labels', tables.UInt8Atom(), shape=(0, 1), expectedrows=n_combinations) font_label = table_handle.createEArray(table_handle.root, 'fonts', tables.UInt8Atom(), shape=(0, 1), expectedrows=n_combinations) with click.progressbar(combinations, label="Generating {num} images".format( num=len(combinations))) as w_combinations: for symbol, font, size, x, y in w_combinations: im = draw_symbol(symbol, font, x, y, size, im_width=width, im_height=height) a_img = np.array(im.getdata()).reshape((1, width, height, 3)) images.append(a_img / 255.0) labels.append(np.uint8(symbols.find(symbol)).reshape(1, 1)) font_label.append(np.uint8(font_id[font]).reshape(1, 1)) table_handle.flush() print("Done") table_handle.close()
def __init__(self, h5_name): self.hdf5_name = '%s.h5' % h5_name try: os.makedirs(cache_dir) except OSError as e: if e.errno != errno.EEXIST: raise self.hdf5_path = os.path.join(cache_dir, self.hdf5_name) self.hdf5_write_mode = 'a' # default to append mode, can be changed to 'w' mode using overwrite self.train_unalt_set = TrainingSet(manip=False) self.train_manip_set = TrainingSet(manip=True) self.test_unalt_set = TestSet(manip=False) self.test_manip_set = TestSet(manip=True) self.data_src = { 'train_unalt': self.train_unalt_set, 'train_manip': self.train_manip_set, 'test_unalt': self.test_unalt_set, 'test_manip': self.test_manip_set } print('# train images: %d\n' '# train_manip images: %d\n' '# test images: %d\n' '# test_manip images: %d\n' % (self.train_unalt_set.n_files, self.train_manip_set.n_files, self.test_unalt_set.n_files, self.test_manip_set.n_files)) self.feature_dtype = tables.Float32Atom() self.data_type = { 'x': tables.UInt8Atom(), 'y': tables.UInt8Atom(), 'image_index': tables.UInt16Atom(), 'patch_coord': tables.UInt16Atom(), 'manip': tables.UInt8Atom(), } self.data_shape = { 'y': (0,), 'patch_coord': (0, 2), 'image_index': (0, ), 'manip': (0, ) } self.filters = tables.Filters(complevel=5, complib='blosc') self.label_shape = (0,) self.coord_shape = (0, 2) self.index_shape = (0, ) self.manip_shape = (0, )
def createHDF5(splitspathname, splitsdict, patchSize): """ splitspathname : dictionary containing filename vs their phases (train, test, val ) splitsdict : splits dictionary. key : filename/case, value : phase (train,test) patchSize : x,y dimension of the image """ outputfolder = fr"outputs\hdf5\{splitspathname}" Path(outputfolder).mkdir(parents=True, exist_ok=True) img_dtype = tables.Float32Atom() ls_dtype = tables.UInt8Atom() pm_dtype = tables.UInt8Atom() data_shape = (0, patchSize, patchSize) mask_shape = (0, patchSize, patchSize) orgmask_shape = (0, patchSize, patchSize) filters = tables.Filters(complevel=5) phases = np.unique(list(splitsdict.values())) for phase in phases: hdf5_path = fr'{outputfolder}\{phase}.h5' if os.path.exists(hdf5_path): Path(hdf5_path).unlink() hdf5_file = tables.open_file(hdf5_path, mode='w') data = hdf5_file.create_earray(hdf5_file.root, "data", img_dtype, shape=data_shape, chunkshape=(1, patchSize, patchSize), filters=filters) mask = hdf5_file.create_earray(hdf5_file.root, "mask", ls_dtype, shape=mask_shape, chunkshape=(1, patchSize, patchSize), filters=filters) orgmask = hdf5_file.create_earray(hdf5_file.root, "orgmask", pm_dtype, shape=orgmask_shape, chunkshape=(1, patchSize, patchSize), filters=filters) hdf5_file.close()
def __init__(self, node, h5file, **kwargs): super(RAFileNode, self).__init__() if node is not None: # Open an existing node and get its version. self._checkAttributes(node) self._version = node.attrs.NODE_TYPE_VERSION elif h5file is not None: # Check for allowed keyword arguments, # to avoid unwanted arguments falling through to array constructor. for kwarg in kwargs: if kwarg not in self.__allowedInitKwArgs: raise TypeError("%s keyword argument is not allowed" % repr(kwarg)) # Turn 'expectedsize' into 'expectedrows'. if 'expectedsize' in kwargs: # These match since one byte is stored per row. expectedrows = kwargs['expectedsize'] kwargs = kwargs.copy() del kwargs['expectedsize'] kwargs['expectedrows'] = expectedrows # Create a new array in the specified PyTables file. self._version = NodeTypeVersions[-1] shape = self._byteShape[self._version] node = h5file.createEArray(atom=tables.UInt8Atom(), shape=shape, **kwargs) # Set the node attributes, else remove the array itself. try: self._setAttributes(node) except RuntimeError: h5file.removeNode(kwargs['where'], kwargs['name']) raise # Set required attributes (besides of '_version'). self.node = node self.mode = 'a+' self.offset = 0L # Cache some dictionary lookups regarding file version. # self._version is a NumPy scalar and when Python < 2.5 # this cannot be used as an index. # Will force a conversion to an integer. version = int(self._version) self._vType = tables.UInt8Atom().dtype.base.type self._vShape = self._sizeToShape[version]
def listener(q, output_path, tile_size): """ """ try: logger = logging.getLogger(__name__) filename = os.path.basename(output_path) counter = 0 pid = os.getpid() logger.debug(f'Listener running on {pid}...') hdf5_file = tb.open_file(output_path, mode='w') img_storage = hdf5_file.create_earray(hdf5_file.root, 'training', tb.UInt8Atom(), shape=(0, tile_size, tile_size, 3)) while 1: counter += 1 if counter % 100 == 0: logger.info(f'{counter} tiles saved in {filename}...') try: img = q.get() except EOFError: continue if str(img) == 'kill': logger.info('Listner closed.') hdf5_file.close() return None img_storage.append(img[None]) finally: hdf5_file.close()
def save_hdf(table, filename, format="data"): """ Save a semantic vector space into an HDF5 file. If the optional argument "format" has value "data", the space will be stored under a group named "data", in two arrays named "index" and "vectors". This format allows reading a subset of the rows of the space if desired. Otherwise the space will be stored (using to_hdf) under a group named "mat". This format does not allow partial reads (more precisely, load_hdf can be called on such files to read a subset of the rows, but the entire file will be read into memory before the subset is returned) but permits column labels to be stored. """ if format == "data": with contextlib.closing(tables.open_file(filename, mode="w")) as file: file.create_group("/", "data", "Index and vector data.") file.create_array(file.root.data, name="vectors", byteorder="little", obj=table.values) index = file.create_vlarray( file.root.data, name="index", byteorder="little", atom=tables.UInt8Atom(shape=()), ) for term in table.index: index.append(np.frombuffer(term.encode("utf-8"), np.uint8)) else: table.to_hdf(filename, "mat", mode="w", encoding="utf-8")
def concatenate_data_files(out_filname, input_filenames): """ concatenate given input filenames into one big hdf5 file. :param out_filname: name of output file :param input_filenames: list of names of input files :return: """ # getting all necessary information from input files input_file = tables.open_file(input_filenames[0], "r") n_channels = input_file.root.data.shape[ 1] # number of channels in the data patch_shape = input_file.root.data.shape[ -3:] # shape of every patch in the data input_file.close() # total number of entries in the files n_entries = 0 for filename in input_filenames: with tables.open_file(filename, "r") as input_file: n_entries += input_file.root.data.shape[0] print('n_entries is:', n_entries) # creating hd5 file for the patches try: hdf5_file, data_storage, truth_storage, index_storage, normalization_storage = \ create_data_file(out_filname, n_channels=n_channels, n_samples=n_entries, image_shape=patch_shape, storage_names=('data', 'truth', 'index', 'normalization'), affine_shape=(0, 4), affine_dtype=tables.UInt8Atom(), normalize=False) except Exception as e: # If something goes wrong, delete the incomplete data file os.remove(out_filname) raise e print('succesfully created file', out_filname) # writing data to file t = time.time() for filename in input_filenames: print('appending data from file', filename) with tables.open_file(filename, "r") as input_file: data = input_file.root.data[:] data_storage.append(np.asarray( data, dtype=np.uint8)) # TODO: maybe with np.newaxis? print('appended data') truth = input_file.root.truth[:] truth_storage.append(truth) print('appended truth') index = input_file.root.index[:] index_storage.append(index) print('appended index') norm = input_file.root.normalization[:] normalization_storage.append(norm) print('appended normalization\nDone file') print('took:', time.time() - t) hdf5_file.close()
def write_audio16(self, topic_group, data): # Fix nan possibilities with the first value that is good # Currently not supported.... ''' if np.any(np.isnan(data['data'])): replace_idx = np.where(np.all(np.isnan(data['data']), axis=1))[0] good_idx = np.where(np.all(np.logical_not(np.isnan(data['data'])), axis=1))[0][0] data['data'][replace_idx] = data['data'][good_idx] data['time'][replace_idx] = data['time'][good_idx] ''' converted_arr = [] for seg in data['data']: if isinstance(seg, int): converted_arr.append(np.array([seg])) else: converted_arr.append(np.fromstring(seg, dtype=np.uint8)) data['raw_audio'] = converted_arr #data['raw_audio'] = np.fromstring(''.join(data['data']), dtype=np.uint8) # Pull out left and right audio # Warning: this might be flipped...(right/left) # NOTE: Don't need to do this currently for mono channel (Kinect and Mic). Later make a flag #data['right_audio'], data['left_audio'] = raw_audio[0::2],raw_audio[1::2] #self.pytable_writer_helper(topic_group, ['left_audio', 'right_audio'], tables.Int64Atom(), data) self.pytable_writer_helper(topic_group, ['time'], tables.Int64Atom(), data) self.pytable_extend_writer_helper(topic_group, ['raw_audio'], tables.UInt8Atom(), data)
def create_data_file(out_file, n_samples, image_shape, modality_names): # pdb.set_trace() hdf5_file = tables.open_file(out_file, mode='w') filters = tables.Filters(complevel=5, complib='blosc') modality_shape = tuple([0, 1] + list(image_shape)) brain_width_shape = (0, 2, 3) modality_storage_list = [ hdf5_file.create_earray(hdf5_file.root, modality_name, tables.Float32Atom(), shape=modality_shape, filters=filters, expectedrows=n_samples) for modality_name in modality_names ] brain_width_storage = hdf5_file.create_earray(hdf5_file.root, 'brain_width', tables.UInt8Atom(), shape=brain_width_shape, filters=filters, expectedrows=n_samples) return hdf5_file, modality_storage_list, brain_width_storage
def create_data_file(out_file, n_channels, n_samples, image_shape): hdf5_file = tables.open_file(out_file, mode='w') print("DEBUG: Opening HDF5 file") filters = tables.Filters(complevel=5, complib='blosc') data_shape = tuple([0, n_channels] + list(image_shape)) truth_shape = tuple([0, 1] + list(image_shape)) print("DEBUG: Writing data_storage to HDF5 file") data_storage = hdf5_file.create_earray(hdf5_file.root, 'data', tables.Float32Atom(), shape=data_shape, filters=filters, expectedrows=n_samples) print("DEBUG: Writing truth_storage to HDF5 file") truth_storage = hdf5_file.create_earray(hdf5_file.root, 'truth', tables.UInt8Atom(), shape=truth_shape, filters=filters, expectedrows=n_samples) print("DEBUG: Writing affine_storage to HDF5 file") affine_storage = hdf5_file.create_earray(hdf5_file.root, 'affine', tables.Float32Atom(), shape=(0, 4, 4), filters=filters, expectedrows=n_samples) return hdf5_file, data_storage, truth_storage, affine_storage
def init_h5file(self): file, curr_dir = self.get_new_file_name() self.settings.child('acquisition', 'temp_file').setValue(file+'.h5') self.h5file = tables.open_file(os.path.join(curr_dir, file+'.h5'), mode='w') h5group = self.h5file.root h5group._v_attrs['settings'] = customparameter.parameter_to_xml_string(self.settings) h5group._v_attrs.type = 'detector' h5group._v_attrs['format_name'] = 'timestamps' channels_index = [self.channels_enabled[k]['index'] for k in self.channels_enabled.keys() if self.channels_enabled[k]['enabled']] self.marker_array = self.h5file.create_earray(self.h5file.root, 'markers', tables.UInt8Atom(), (0,), title='markers') self.marker_array._v_attrs['data_type'] = '1D' self.marker_array._v_attrs['type'] = 'tttr_data' self.nanotimes_array = self.h5file.create_earray(self.h5file.root, 'nanotimes', tables.UInt16Atom(), (0,), title='nanotimes') self.nanotimes_array._v_attrs['data_type'] = '1D' self.nanotimes_array._v_attrs['type'] = 'tttr_data' self.timestamp_array = self.h5file.create_earray(self.h5file.root, 'timestamps', tables.UInt64Atom(), (0,), title='timestamps') self.timestamp_array._v_attrs['data_type'] = '1D' self.timestamp_array._v_attrs['type'] = 'tttr_data'
def test_hdf5_dataset(): num_rows = 500 filters = tables.Filters(complib='blosc', complevel=5) h5file = tables.open_file("tmp.h5", mode="w", title="Test file", filters=filters) group = h5file.create_group("/", 'Data') atom = tables.UInt8Atom() y = h5file.create_carray(group, 'y', atom=atom, title='Data targets', shape=(num_rows, 1), filters=filters) for i in range(num_rows): y[i] = i h5file.flush() h5file.close() dataset = Hdf5Dataset(['y'], 0, 500, 'tmp.h5') assert_equal( dataset.get_data(request=slice(0, 10))[0], numpy.arange(10).reshape(10, 1)) # Test if pickles dump = pickle.dumps(dataset) pickle.loads(dump) os.remove('tmp.h5')
def create_data_file(out_file, n_samples, image_shape, channels=4): hdf5_file = tables.open_file(out_file, mode='w') # complevel - compression level # complib - the library for compression filters = tables.Filters(complevel=5, complib='blosc') data_shape = tuple([0, channels] + list(image_shape)) truth_shape = tuple([0, 1] + list(image_shape)) data_storage = hdf5_file.create_earray(hdf5_file.root, 'data', tables.Float32Atom(), shape=data_shape, filters=filters, expectedrows=n_samples) truth_storage = hdf5_file.create_earray(hdf5_file.root, 'true', tables.UInt8Atom(), shape=truth_shape, filters=filters, expectedrows=n_samples) affine_storage = hdf5_file.create_earray(hdf5_file.root, 'affine', tables.Float32Atom(), shape=(0, 4, 4), filters=filters, expectedrows=n_samples) return hdf5_file, data_storage, truth_storage, affine_storage
def main(args): total_start = timeit.default_timer() print('Starting Preibisch fusion', args.substack_id) ss = SubStack(args.first_view_dir, args.substack_id) minz = int( ss.info['Files'][0].split("/")[-1].split('_')[-1].split('.tif')[0]) prefix = '_'.join( ss.info['Files'][0].split("/")[-1].split('_')[0:-1]) + '_' np_tensor_3d_first_view, _ = imtensor.load_nearby( args.tensorimage_first_view, ss, args.size_patch) sc_in = np_tensor_3d_first_view.shape if args.transformation_file is not None: R, t = parse_transformation_file(args.transformation_file) np_tensor_3d_second_view = transform_substack( args.second_view_dir, args.tensorimage_second_view, args.substack_id, R, t, args.size_patch, invert=True) else: np_tensor_3d_second_view, _ = imtensor.load_nearby( args.tensorimage_second_view, ss, args.size_patch) fused_image, entropy_mask__view, entropy_mask_second_view = do_content_based_fusion( np_tensor_3d_first_view, np_tensor_3d_second_view, args.size_patch, args.size_patch, speedup=1, fast_computation=True) if args.extramargin > args.size_patch: args.extramargin = args.size_patch offset_margin = args.size_patch - args.extramargin fused_image_output = fused_image[offset_margin:sc_in[0] - offset_margin, offset_margin:sc_in[1] - offset_margin, offset_margin:sc_in[2] - offset_margin] atom = tables.UInt8Atom() mkdir_p(args.outdir) h5f = tables.openFile(args.outdir + '/' + args.substack_id + '.h5', 'w') sc_out = fused_image_output.shape ca = h5f.createCArray(h5f.root, 'full_image', atom, sc_out) for z in xrange(0, sc_out[0], 1): ca[z, :, :] = fused_image_output[z, :, :] h5f.close() imtensor.save_tensor_as_tif(fused_image_output, args.outdir + '/' + args.substack_id, minz, prefix=prefix) print("total time Preibisch fusion: %s" % (str(timeit.default_timer() - total_start)))
def main(): # global vars global gross_tiles, net_tiles, net_tumor, val_storage, val_labels global train_labels, train_storage, val_coords, train_coords train_labels, val_labels, train_coords, val_coords = [], [], [], [] # get all of our arguments, put in defaults as needed args = getArgs() demag, output = args.demagnify, args.output blank_frac, rgb_cutoff, val_frac = args.blank_frac, args.rgb_cutoff, args.val_frac args.folder = args.folder.rstrip("/") output = "big_new.hdf5" # print out our params print("\nPARAMETERS: ") print(" demagnify: "+str(demag)) print(" hdf5: " + str(output)) print(" blank_frac: "+str(blank_frac)) print(" rgb_cutoff: "+str(rgb_cutoff)) print(" val_frac: "+str(val_frac)) print("\nSLIDES:") img_dtype = tables.UInt8Atom() # dtype in which the images will be saved hdf5_file = tables.open_file(output, mode='w') # make image arrays # NOTE: 0 is the extensible axis, 3 is channels and comes last for TensorFlow val_storage = hdf5_file.create_earray( hdf5_file.root, 'val_img', img_dtype, shape=(0, 256, 256, 3)) train_storage = hdf5_file.create_earray( hdf5_file.root, 'train_img', img_dtype, shape=(0, 256, 256, 3)) # iterate over TIFs in folder for tif in os.listdir(os.getcwd()): if tif.lower().endswith('.tif'): print(" "+str(tif[:-4])) png = tif[:-4]+'.png' xml = tif[:-4]+'.xml' osr = openslide.OpenSlide(tif) print(" tifToPng: " + str(tifToPng(osr, demag, png))) print(" drawTumor: "+str(drawTumor(xml, png, demag))) print(" makeTiles: "+str(makeTiles(256, demag, png, blank_frac, rgb_cutoff, val_frac, osr, args.folder, hdf5_file))) print(" non-blank tiles so far: "+str(net_tiles)) # add in label arrays hdf5_file.create_array(hdf5_file.root, 'val_labels', val_labels) hdf5_file.create_array(hdf5_file.root, 'train_labels', train_labels) #hdf5_file.create_array(hdf5_file.root, 'val_coords', val_coords) #hdf5_file.create_array(hdf5_file.root, 'train_coords', train_coords) hdf5_file.close() print("\nTILE RESULTS:") print(" total tiles: "+str(gross_tiles)) print(" blank tiles: "+str(gross_tiles-net_tiles)) print(" not blank tiles: "+str(net_tiles)+" = "+str((100*net_tiles)/gross_tiles)+"%") print(" tumor tiles: "+str(net_tumor)+" = "+str((100*net_tumor)/net_tiles)+"% of non-blanks") return(True)
def inizialize_dataset(): h5 = tables.open_file(dbOut_path, mode='w') data_shape = (0, sizedb[0], sizedb[1], sizedb[2]) img_dtype = tables.UInt8Atom() label_dtype = tables.UInt64Atom() X_storage = h5.create_earray(h5.root, 'X', img_dtype, shape=data_shape) Y_storageID = h5.create_earray(h5.root, 'Y_ID', label_dtype, shape=(0,)) Y_desc = h5.create_earray(h5.root, 'desc', label_dtype, shape=(0,6)) return X_storage, Y_storageID, Y_desc
def save_in_hdf5_file(hdf5_path, data, data_shape): import tables img_dtype = tables.UInt8Atom() # dtype in which the images will be saved data_shape = (0, data_shape[0], data_shape[1], 64) hdf5_file = tables.open_file(hdf5_path+"inference_output.hdf5", mode='w') # open a hdf5 file and create earrays image_storage = hdf5_file.create_earray(hdf5_file.root, img_dtype, shape=data_shape) for i, datum in enumerate(data): hdf5_file[i] = datum
def create_carray(track, chrom): atom = tables.UInt8Atom(dflt=0) zlib_filter = tables.Filters(complevel=1, complib="zlib") # create CArray for this chromosome shape = [chrom.length] carray = track.h5f.createCArray(track.h5f.root, chrom.name, atom, shape, filters=zlib_filter) return carray
def inizialize_dataset(): global X_storage, Y_storageID, desc_storage h5 = tables.open_file(db_path, mode='w') data_shape = (0, sizedb[0], sizedb[1], sizedb[2]) img_dtype = tables.UInt8Atom() label_dtype = tables.UInt64Atom() X_storage = h5.create_earray(h5.root, 'X', img_dtype, shape=data_shape) Y_storageID = h5.create_earray(h5.root, 'Y_ID', label_dtype, shape=(0, )) desc_storage = h5.create_earray(h5.root, 'desc', label_dtype, shape=(0, 6)) #video,frame,boundingbox
def recordStringInHDF5(hf5, group, nodename, s): '''creates an Array object in an HDF5 file that represents a unicode string''' bytes = np.fromstring(s.encode('utf-8'), np.uint8) atom = tables.UInt8Atom() array = hf5.create_array(group, nodename, atom=atom, obj=bytes, shape=(len(bytes), )) return array
def prepare(): import os import sys import numpy as np import tables import tqdm import domain_datasets import cv2 synsigns_path = domain_datasets.get_data_dir('syn_signs') data_path = os.path.join(synsigns_path, 'synthetic_data') labels_path = os.path.join(data_path, 'train_labelling.txt') if not os.path.exists(labels_path): print('Labels path {} does not exist'.format(labels_path)) sys.exit(0) # Open the file that lists the image files along with their ground truth class lines = [line.strip() for line in open(labels_path, 'r').readlines()] lines = [line for line in lines if line != ''] output_path = os.path.join(synsigns_path, 'syn_signs.h5') print('Creating {}...'.format(output_path)) f_out = tables.open_file(output_path, mode='w') g_out = f_out.create_group(f_out.root, 'syn_signs', 'Syn-Signs data') filters = tables.Filters(complevel=9, complib='blosc') X_u8_arr = f_out.create_earray(g_out, 'X_u8', tables.UInt8Atom(), (0, 3, 40, 40), expectedrows=len(lines), filters=filters) y = [] for line in tqdm.tqdm(lines): image_filename, gt, _ = line.split() image_path = os.path.join(data_path, image_filename) if not os.path.exists(image_path): print( 'Could not find image file {} mentioned in annotations'.format( image_path)) return image_data = cv2.imread(image_path)[:, :, ::-1] X_u8_arr.append(image_data.transpose(2, 0, 1)[None, ...]) y.append(int(gt)) y = np.array(y, dtype=np.int32) f_out.create_array(g_out, 'y', y) print('X.shape={}'.format(X_u8_arr.shape)) print('y.shape={}'.format(y.shape)) f_out.close()
def create_data_file(out_file, n_samples, image_shape, imtype): n_channels=1 if os.path.isfile(out_file): os.remove(out_file) hdf5_file = tables.open_file(out_file, mode='w') filters = tables.Filters(complevel=5, complib='zlib') print('Compression details for '+imtype+' images :',filters) data_shape = tuple([0] + list(image_shape) + [n_channels]) storage = hdf5_file.create_earray(hdf5_file.root, imtype, tables.UInt8Atom(), shape=data_shape, filters=filters, expectedrows=n_samples) return hdf5_file, storage, filters
def init_dataset(fid, tot_frames, im_height, im_width, is_expandable=True): img_dataset = fid.create_earray('/', 'full_data', atom=tables.UInt8Atom(), shape=(0, im_height, im_width), chunkshape=(1, im_height, im_width), expectedrows=tot_frames, filters=TABLE_FILTERS) return img_dataset
def write_image(self, topic_group, data): # Note: you need to load and reshape (data.reshape(480,640,3)) self.pytable_writer_helper(topic_group, ['data'], tables.UInt8Atom(), data) self.pytable_writer_helper(topic_group, ['width', 'height', 'step', 'is_bigendian'], tables.Int64Atom(), data) self.pytable_writer_helper(topic_group, ['encoding'], tables.StringAtom(itemsize=15), data) self.pytable_writer_helper(topic_group, ['time'], tables.Float64Atom(), data)
def accumulate_in_hdf5(assembled, h5file_path, img_dim, lab_dim, augment, augment_chances, classes_to_augment, valid_ratio): # create hdf5 file of preprocessed images for fast loading and training compression = tables.Filters(complevel=5, complib='bzip2') h5file = tables.open_file(h5file_path, 'w', filters=compression) storage_train_x = h5file.create_earray(h5file.root, 'train_x', tables.UInt8Atom(), shape=(0, img_dim[0], img_dim[1])) storage_test_x = h5file.create_earray(h5file.root, 'test_x', tables.UInt8Atom(), shape=(0, img_dim[0], img_dim[1])) storage_train_y = h5file.create_earray(h5file.root, 'train_y', tables.UInt8Atom(), shape=(0, lab_dim)) storage_test_y = h5file.create_earray(h5file.root, 'test_y', tables.UInt8Atom(), shape=(0, lab_dim)) images = DataGenFile(assembled, augment_chances, to_uint8, lab_dim, img_dim, augment, classes_to_augment) for i, (img, lab, is_augm) in enumerate(images()): lab = lab.astype(np.uint8) if np.random.random() < valid_ratio and not is_augm: storage_test_x.append(img[None]) storage_test_y.append(lab[None]) else: storage_train_x.append(img[None]) storage_train_y.append(lab[None]) if i % 1000 == 0: print('{}/lot is done, where lot > {}'.format(i, len(assembled))) h5file.close() print('HDF5 FILE SAVED') print('IMAGES SKIPPED:', file=sys.stderr) print(images.images_skipped, file=sys.stderr)
def create(self, key, n=5): if key not in self._groups: self._groups[key] = self._fileh.create_group(self._fileh.root, key, title=key) if "current" not in self._groups[key]: # NOTE: Creating as a UInt8Atom because of issues with implicit byte conversion using # VLStringAtom (that I don't understand). Effect should be the same self._fileh.create_vlarray(self._groups[key], "current", atom=tables.UInt8Atom(shape=()), filters=tables.Filters(complevel=0)) self._segment_limits[key] = n self._groups[key]._v_attrs['_twola_n_segments'] = n self._fileh.flush()
def _save_ndarray(handler, group, name, x, filters=None): if np.issubdtype(x.dtype, np.unicode_): # Convert unicode strings to pure byte arrays strtype = b'unicode' itemsize = x.itemsize // 4 atom = tables.UInt8Atom() x = x.view(dtype=np.uint8) elif np.issubdtype(x.dtype, np.string_): strtype = b'ascii' itemsize = x.itemsize atom = tables.StringAtom(itemsize) elif x.dtype == np.object: # Not supported by HDF5, force pickling _save_pickled(handler, group, x, name=name) return else: atom = tables.Atom.from_dtype(x.dtype) strtype = None itemsize = None if x.ndim > 0 and np.min(x.shape) == 0: sh = np.array(x.shape) atom0 = tables.Atom.from_dtype(np.dtype(np.int64)) node = handler.create_array(group, name, atom=atom0, shape=(sh.size, )) node._v_attrs.zeroarray_dtype = np.dtype(x.dtype).str.encode('ascii') node[:] = sh return if x.ndim == 0 and len(x.shape) == 0: # This is a numpy array scalar. We will store it as a regular scalar # instead, which means it will be unpacked as a numpy scalar (not numpy # array scalar) setattr(group._v_attrs, name, x[()]) return # For small arrays, compression actually leads to larger files, so we are # settings a threshold here. The threshold has been set through # experimentation. if filters is not None and x.size > 300: node = handler.create_carray(group, name, atom=atom, shape=x.shape, chunkshape=None, filters=filters) else: node = handler.create_array(group, name, atom=atom, shape=x.shape) if strtype is not None: node._v_attrs.strtype = strtype node._v_attrs.itemsize = itemsize node[:] = x
def create_data_file(out_file, n_channels, n_samples, image_shape, storage_names=('data', 'truth', 'affine'), affine_shape=(0, 4, 4), normalize=True, affine_dtype=tables.Float32Atom()): hdf5_file = tables.open_file(out_file, mode='w') filters = tables.Filters( complevel=5 ) #, complib='blosc') # suggested remove in https://github.com/ellisdg/3DUnetCNN/issues/58 data_shape = tuple([0, n_channels] + list(image_shape)) truth_shape = tuple([0, 1] + list(image_shape)) if not normalize: data_storage = hdf5_file.create_earray(hdf5_file.root, storage_names[0], tables.Int8Atom(), shape=data_shape, filters=filters, expectedrows=n_samples) else: data_storage = hdf5_file.create_earray(hdf5_file.root, storage_names[0], tables.Float32Atom(), shape=data_shape, filters=filters, expectedrows=n_samples) truth_storage = hdf5_file.create_earray(hdf5_file.root, storage_names[1], tables.UInt8Atom(), shape=truth_shape, filters=filters, expectedrows=n_samples) affine_storage = hdf5_file.create_earray(hdf5_file.root, storage_names[2], affine_dtype, shape=affine_shape, filters=filters, expectedrows=n_samples) if len(storage_names) == 4: normalization_storage = hdf5_file.create_earray(hdf5_file.root, storage_names[3], tables.Float32Atom(), shape=(0, 2), filters=filters, expectedrows=n_samples) # will hold mean and std of this case for later normalization return hdf5_file, data_storage, truth_storage, affine_storage, normalization_storage return hdf5_file, data_storage, truth_storage, affine_storage
def _create_table_list(self, name, example): """ Create a new table within the HDF file, where the tables shape and its datatype are determined by *example*. The modified version for creating table with appendList """ type_map = { np.dtype(np.float64): tables.Float64Atom(), np.dtype(np.float32): tables.Float32Atom(), np.dtype(np.int): tables.Int64Atom(), np.dtype(np.int8): tables.Int8Atom(), np.dtype(np.uint8): tables.UInt8Atom(), np.dtype(np.int16): tables.Int16Atom(), np.dtype(np.uint16): tables.UInt16Atom(), np.dtype(np.int32): tables.Int32Atom(), np.dtype(np.uint32): tables.UInt32Atom(), np.dtype(np.bool): tables.BoolAtom(), } try: if type(example) == np.ndarray: h5type = type_map[example.dtype] elif type(example) == list and type(example[0]) == str: h5type = tables.VLStringAtom() except KeyError: raise TypeError("Don't know how to handle dtype '%s'" % example.dtype) if type(example) == np.ndarray: h5dim = (0, ) + example.shape[1:] h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_earray(h5.root, name, h5type, h5dim, filters=filters) elif type(example) == list and type(example[0]) == str: h5 = self.h5 filters = tables.Filters(complevel=self.compression_level, complib='zlib', shuffle=True) self.tables[name] = h5.create_vlarray(h5.root, name, h5type, filters=filters) self.types[name] = type(example)