def prepareMusk2(path, name): print("Prepare Musk2:") fname = prepareLoading(name, path) # Create MIL problem from Musk2 data musk2_raw = np.array(pd.read_csv(fname, usecols=range(0,169), skip_blank_lines=True, header=None)) # Scale data by z-score normalization features = musk2_raw[:, 2:-1].astype(np.float64) features = scale(features) keys = musk2_raw[:, 0] labels = musk2_raw[:, -1] musk2 = milData('musk2') for i, row in enumerate(features): key = keys[i] x = features[i] z = labels[i] musk2.add_x(key, x, z, UPDATE=False ) musk2.save(path) print("Bags: ", musk2.N_B, "(+: {p}, -: {n})".format(p=np.sum(musk2.z==1), n=np.sum(musk2.z==0))) print("Instances: ", musk2.N_X, "(+: {p}, -: {n})".format(p=np.sum(musk2.y==1), n=np.sum(musk2.y==0))) print ("Features: ", musk2.N_D) print("\n") return musk2
def prepareFox(path, name): print("Prepare Fox:") fname = prepareLoading(name, path) # Create MIL problem from Fox data features = loadmat(fname)["Data"] features = scale(features) keys = loadmat(fname)['bags'] labels = loadmat(fname)['labels'] b = 0 fox = milData('fox') for i, row in enumerate(features): x = row key = keys[i][0] z = labels[b][0] fox.add_x(key, x, z, UPDATE=False) if i < len(features) - 1 and key != keys[ i + 1]: # will the next key belong to the next bag? b += 1 fox.save(path) print( "Bags: ", fox.N_B, "(+: {p}, -: {n})".format(p=np.sum(fox.z == 1), n=np.sum(fox.z == 0))) print( "Instances: ", fox.N_X, "(+: {p}, -: {n})".format(p=np.sum(fox.y == 1), n=np.sum(fox.y == 0))) print("Features: ", fox.N_D) print("\n") return fox
def clif2lf(clif_file, lf_file, clif_group, lf_dataset="lightfield" ): """ Convert a standard .clif file to an .hdf5 lightfield file. Parameters ---------- clif_file : string The .clif filename including the directory. lf_file : string The filename (including the directory) of the output .hdf5 lightfield. clif_group : string The container name inside the .clif file. lf_dataset : string, optional The dataset name inside the .hdf5 file for the lightfield. """ # Initialze the hdf5 file objects fname_in = os.path.basename(clif_file) dir_in = os.path.dirname(clif_file) clif_file = prepareLoading(fname_in, path=dir_in) fname_out = os.path.basename(lf_file) dir_out = os.path.dirname(lf_file) lf_file = prepareSaving(fname_out, path=dir_out, extension=".hdf5") data = h5py.File(clif_file, 'r')[clif_group] data = np.swapaxes(data, 1, 3 ) data = np.swapaxes(data, 1, 2) f_out = h5py.File(lf_file, 'w') f_out.create_dataset(lf_dataset, data=data) f_out.close()
def load_dict(fname, path=None): """ Load the MilDictionary from a .json file. Parameters ---------- fname: string The name of the resulting .json file path : string, optional The path where to store the datafile. Returns ------- mydict : dict A MilDictionary has the bag names as keys and for each key an instance dataset as a 2D ndarray. """ fname = prepareLoading(fname, path=path, extension=".json") mydict = pd.read_json(fname, typ='series').to_dict() for key, value in mydict.items(): mydict[key] = np.array(value) return mydict
def __init__(self, lightfield, lf_dataset, output_dir, working_dir='work_tmp/', n_cpus=-1, r_start=None, s_hat=None, DEBUG=False): """ Constructor to settle up all files and parameters. Do also some pre- computations required Parameters ---------- lightfield : string The filename of the lightfield including the directory. lf_dataset : string The group name inside the lightfield's hdf5 file. output_dir: string The directory to store the final results in. working_dir: string, optional A temporal directory to work in. n_cpus : int, optional The number of cpus to use. If -1 all cpus available will be used. r_start : tuble (), optional r_v and r_u to start with s_hat : int, optional If given, only this s-dimension will be calculated. DEBUG : Boolean, optional enable DEBUG output """ lightfield = prepareLoading(lightfield) self.output_dir = prepareSaving(output_dir) self.working_dir = prepareSaving(working_dir) self.n_cpus = n_cpus if self.n_cpus == -1: self.n_cpus = cpu_count() self.DEBUG = DEBUG # Plot intermediate results and other debugging output # Attributes of .hdf5 files to load or store the data self.lf_dataset = lf_dataset self.light_field = h5py.File(os.path.expanduser(lightfield), 'r') self.epi_field = h5py.File(os.path.join(self.working_dir, 'epis.hdf5'),'a') self.disp_field = h5py.File( os.path.join(self.working_dir, 'disparities.hdf5'), 'a') if self.DEBUG: self.score_field = h5py.File( os.path.join(self.working_dir, 'scores.hdf5'), 'a') self.DB_field = h5py.File( os.path.join(self.working_dir, 'disparity_bounds.hdf5'), 'a') self.Ce_field = h5py.File( os.path.join(self.working_dir, 'edge_confidences.hdf5'), 'a') self.Cd_field = h5py.File( os.path.join(self.working_dir, 'disparity_confidences.hdf5'), 'a') # Runtime attributes self.lf_res = self.light_field[ self.lf_dataset].shape # The resolution of the original lightfield (s,v,u) self.epi_res = None # A ndarray [r]. The different EPI resolutions (v,u) self.r_start = self.lf_res[1:3] if self.DEBUG: self.r_start = self.lf_res[ 1:3] if r_start is None else r_start # If given start calculation by this resolution self.s_hat = s_hat # If not None the only scanline to process self.initialize() print("All data loaded!")
def downsample_lightfield(lf_in, lf_out, hdf5_dataset, r_all): """ Reduces the dimension of the input lightfield to the values given. Results are stored in a new hdf5 file. Parameters ---------- lf_in : string The input hdf5 filename (including the directory) of the lightfield. lf_out : string The output hdf5 filename (including the directory) of the lightfield in all resolutions. hdf5_dataset: string The container name inside the hdf5 file for the lightfield. The same name will be used for the new file. r_all: array_like All resolutions to create. Each entry is a tuple (u,v) of resolutions. """ # Initialize the hdf5 file objects lf_in = prepareLoading(lf_in) lf_out = prepareSaving(lf_out, extension=".hdf5") lf_in = h5py.File(lf_in, 'r') lf_out = h5py.File(lf_out, 'w') data_in = lf_in[hdf5_dataset] # Find out what data we have if len(data_in.shape) == 4 and data_in.shape[-1] == 3: RGB = True elif len(data_in.shape) == 3: RGB = False else: raise TypeError('The given lightfield contains neither gray nor RGB images!') # Which dtype should be used? if data_in.dtype == np.float64: DTYPE = np.float64 elif data_in.dtype == np.uint8: DTYPE = np.uint8 elif data_in. dtype == np.uint16: DTYPE = np.uint16 else: raise TypeError('The given data type is not supported!') # We need to store all resolutions grp_out = lf_out.create_group(hdf5_dataset) grp_out.attrs.create('resolutions', r_all) # Initialize a progress bar to follow the downsampling widgets = ['Downscale lightfield: ', Percentage(), ' ', Bar(),' ', ETA(), ' '] progress = ProgressBar(widgets=widgets, max_val=r_all.shape[0]).start() for r,res in enumerate(r_all): if RGB: data_out = grp_out.create_dataset(str(res[0]) + 'x' + str(res[1]), shape=(data_in.shape[0], res[0], res[1], data_in.shape[3]), dtype=data_in.dtype) else: data_out = grp_out.create_dataset(str(res[0]) + 'x' + str(res[1]), shape=(data_in.shape[0], res[0], res[1]), dtype=data_in.dtype) if r == 0: # at lowest resolution we take the original image for s in range(data_in.shape[0]): data_out[s] = img_as_float(data_in[s]) else: # we smooth the imput data data_prior = grp_out[str(r_all[r-1][0]) + 'x' + str(r_all[r-1][1])] for s in range(data_in.shape[0]): data_smoothed = img_as_float(gaussian(data_prior[s], sigma=np.sqrt(0.5), multichannel=True)) if DTYPE is np.float64: data_out[s] = img_as_float(resize(data_smoothed, (res[0], res[1]))) elif DTYPE is np.uint16: data_out[s] = img_as_uint(resize(data_smoothed, (res[0], res[1]))) else: data_out[s] = img_as_ubyte(resize(data_smoothed, (res[0], res[1]))) progress.update(r) progress.finish() # Cleanup lf_in.close() lf_out.close()
def create_epis(lf_in, epi_out, hdf5_dataset_in="lightfield", hdf5_dataset_out="epis", dtype=np.float64, RGB=True): """ Create epis for all resolutions given by the input lightfield. Parameters ---------- lf_in : string The input hdf5 filename (including the directory) of the lightfield. epi_out : string The output hdf5 filename (including the directory) of the lightfield in all resolutions. hdf5_dataset_in: string The container name inside the hdf5 file for the lightfield. The same name will be used for the new file. hdf5_dataset_out: string, optional The container name inside the hdf5 file for the epis. dtype : numpy.dtype, optional The new data type for the epis. Must be either np.float64, np.uint8 or np.uint16. RGB : bool, optional If True, the output epis will be converted to RGB (default). Otherwise gray type images are stored. """ # Initialze the hdf5 file objects lf_in = prepareLoading(lf_in) epi_out = prepareSaving(epi_out, extension=".hdf5") # Initialze the hdf5 file objects lf_in = h5py.File(lf_in, 'r') epi_out = h5py.File(epi_out, 'w') # Check if there is a resolution attribute. Create otherwise r_all = lf_in[hdf5_dataset_in].attrs.get('resolutions')[...] epi_grp = epi_out.create_group(hdf5_dataset_out) epi_grp.attrs.create('resolutions', r_all) # Initialize a progress bar to follow the conversion widgets = ['Create EPIs: ', Percentage(), ' ', Bar(),' ', ETA(), ' '] progress = ProgressBar(widgets=widgets, max_val=r_all.shape[0]).start() for r,res in enumerate(r_all): progress.update(r) set_name = str(res[0]) + 'x' + str(res[1]) lf_data = lf_in[hdf5_dataset_in + '/' + set_name] # Find out what data we have if len(lf_data.shape) == 4 and lf_data.shape[-1] == 3: OLDRGB = True elif len(lf_data.shape) == 3: OLDRGB = False else: raise TypeError( 'The given lightfield contains neither gray nor RGB images!') if RGB: epi_data = epi_grp.create_dataset(set_name, shape=(res[0],lf_data.shape[0], res[1],3), dtype=dtype) else: epi_data = epi_grp.create_dataset(set_name, shape=(res[0],lf_data.shape[0], res[1]), dtype=dtype) for v in range(res[0]): if dtype == np.float64: if RGB and not OLDRGB: epi_data[v] = img_as_float(gray2rgb(lf_data[:,v,])).reshape(epi_data[v].shape) elif not RGB and OLDRGB: epi_data[v] = img_as_float(rgb2gray(lf_data[:,v,])).reshape(epi_data[v].shape) else: epi_data[v] = img_as_float(lf_data[:,v,...]).reshape(epi_data[v].shape) elif dtype == np.uint16: if RGB and not OLDRGB: epi_data[v] = img_as_uint(gray2rgb(lf_data[:,v,])).reshape(epi_data[v].shape) elif not RGB and OLDRGB: epi_data[v] = img_as_uint(rgb2gray(lf_data[:,v,])).reshape(epi_data[v].shape) else: epi_data[v] = img_as_uint(lf_data[:,v,...]).reshape(epi_data[v].shape) elif dtype == np.uint8: if RGB and not OLDRGB: epi_data[v] = img_as_ubyte(gray2rgb(lf_data[:,v,...])).reshape(epi_data[v].shape) elif not RGB and OLDRGB: epi_data[v] = img_as_ubyte(rgb2gray(lf_data[:,v,...])).reshape(epi_data[v].shape) else: epi_data[v] = img_as_ubyte(lf_data[:,v,...]).reshape(epi_data[v].shape) else: raise TypeError('Given dtype not supported.') progress.finish() # Cleanup lf_in.close() epi_out.close()
def __init__(self, lightfield, lf_dataset, output_dir, working_dir='work_tmp/', n_cpus=-1, r_start=None, s_hat=None, DEBUG=False): """ Constructor to settle up all files and parameters. Do also some pre- computations required Parameters ---------- lightfield : string The filename of the lightfield including the directory. lf_dataset : string The group name inside the lightfield's hdf5 file. output_dir: string The directory to store the final results in. working_dir: string, optional A temporal directory to work in. n_cpus : int, optional The number of cpus to use. If -1 all cpus available will be used. r_start : tuble (), optional r_v and r_u to start with s_hat : int, optional If given, only this s-dimension will be calculated. DEBUG : Boolean, optional enable DEBUG output """ lightfield = prepareLoading(lightfield) self.output_dir = prepareSaving(output_dir) self.working_dir = prepareSaving(working_dir) self.n_cpus = n_cpus if self.n_cpus == -1: self.n_cpus = cpu_count() self.DEBUG = DEBUG # Plot intermediate results and other debugging output # Attributes of .hdf5 files to load or store the data self.lf_dataset = lf_dataset self.light_field = h5py.File(os.path.expanduser(lightfield), 'r') self.epi_field = h5py.File(os.path.join(self.working_dir, 'epis.hdf5'), 'a') self.disp_field = h5py.File( os.path.join(self.working_dir, 'disparities.hdf5'), 'a') if self.DEBUG: self.score_field = h5py.File( os.path.join(self.working_dir, 'scores.hdf5'), 'a') self.DB_field = h5py.File( os.path.join(self.working_dir, 'disparity_bounds.hdf5'), 'a') self.Ce_field = h5py.File( os.path.join(self.working_dir, 'edge_confidences.hdf5'), 'a') self.Cd_field = h5py.File( os.path.join(self.working_dir, 'disparity_confidences.hdf5'), 'a') # Runtime attributes self.lf_res = self.light_field[ self. lf_dataset].shape # The resolution of the original lightfield (s,v,u) self.epi_res = None # A ndarray [r]. The different EPI resolutions (v,u) self.r_start = self.lf_res[1:3] if self.DEBUG: self.r_start = self.lf_res[ 1: 3] if r_start is None else r_start # If given start calculation by this resolution self.s_hat = s_hat # If not None the only scanline to process self.initialize() print("All data loaded!")