def __getitem__(self, item): datapoint = self.data[self.model_depth + self.model_dataset_depth_offset][item] if self.alpha < 1.0: datapoint = self.alpha_fade(datapoint) # print(data[0]) datapoint = adjust_dynamic_range(datapoint, self.range_in, self.range_out) # print(data[0]) return torch.from_numpy(datapoint.astype('float32'))
def __getitem__(self, item): if self.preload: # we have access to the data attribute return super(FolderDataset, self).__getitem__(item) datapoint = self.load_file(item) datapoint = self.get_datapoint_version(datapoint, self.max_dataset_depth, self.model_depth + self.model_dataset_depth_offset) datapoint = self.alpha_fade(datapoint) datapoint = adjust_dynamic_range(datapoint, self.range_in, self.range_out) return torch.from_numpy(datapoint.astype('float32'))
def image_to_sound(self, image): if self.mode == 'reallog' or self.mode == 'abslog': x = np.zeros((image.shape[0] + 1, image.shape[1])) # real spectrograms have 2**i + 1 freq bins # x.fill(image.mean()) x[:image.shape[0], :image.shape[1]] = image if self.mode == 'reallog': signed = adjust_dynamic_range(x, self.drange, (-1, 1)) sgn = np.sign(signed) real_pt_stft = (np.exp(np.abs(signed)) - 1) * sgn signal = lbr.istft(real_pt_stft, self.hop_length) else: x = adjust_dynamic_range(x, self.drange, (0, 255)) signal = self.reconstruct_from_magnitude(x) elif self.mode == 'raw': signal = image.ravel() else: raise Exception( 'image_to_sound: unrecognized mode: {}. Available modes are: reallog, abslog, raw.'.format(self.mode) ) signal = signal / np.abs(signal).max() return signal
def generate_samples(self, num_samples, current_resolution): for i in range(num_samples): latents = tf.random.normal((1, self.latent_size)) fakes = self.train_generator([latents, 1.0]) fakes = utils.adjust_dynamic_range(fakes, [-1.0, 1.0], [0.0, 255.0]) fakes = tf.clip_by_value(fakes, 0.0, 255.0) img_arr = np.squeeze(np.array(fakes[0])).astype(np.uint8) im = Image.fromarray(img_arr, 'L') im.save( str( self.generated_dir.joinpath('res_{}_{}.jpg'.format( current_resolution, i))))
def convert_to_pil_image(self, image): format = 'RGB' if image.ndim == 3: if image.shape[0] == 1: image = image[0] format = 'L' else: image = image.transpose(1, 2, 0) format = 'RGB' image = utils.adjust_dynamic_range(image, self.drange, (0, 255)) image = image.round().clip(0, 255).astype(np.uint8) return PIL.Image.fromarray(image, format)
def image_to_sound(self, image): if self.mode == 'reallog' or self.mode == 'abslog': x = np.zeros( (image.shape[0] + 1, image.shape[1])) # real spectrograms have 2**i + 1 freq bins # x.fill(image.mean()) x[:image.shape[0], :image.shape[1]] = image if self.mode == 'reallog': signed = adjust_dynamic_range(x, self.drange, (-1, 1)) sgn = np.sign(signed) real_pt_stft = (np.exp(np.abs(signed)) - 1) * sgn signal = lbr.istft(real_pt_stft, self.hop_length) else: x = adjust_dynamic_range(x, self.drange, (0, 255)) signal = self.reconstruct_from_magnitude(x) elif self.mode == 'raw': signal = image.ravel() else: raise Exception( 'image_to_sound: unrecognized mode: {}. Available modes are: reallog, abslog, raw.' .format(self.mode)) signal = signal / np.abs(signal).max() return signal
def generate_samples_3d(self, num_samples, current_resolution): for i in range(num_samples): latents = self.sample_random_latents(batch_size=1, label=i % 2) fakes = self.train_generator([latents, 1.0]) fakes = utils.adjust_dynamic_range(fakes, [-1.0, 1.0], [0.0, 255.0]) fakes = tf.clip_by_value(fakes, 0.0, 255.0) img_arr = np.squeeze(np.array(fakes[0])).astype(np.uint8) mri = nib.Nifti1Image(img_arr, np.eye(4)) nib.save( mri, str( self.generated_dir.joinpath('res_{}_{}.nii.gz'.format( current_resolution, i))))
def load_file(self, item): s, _ = sound_load_fun(self.files[item], self.frequency, dtype='float32') if s.ndim == 2: # stereo to mono s = (s.sum(axis=1)) / 2 if self.img_mode == 'raw': size = int(np.log2(np.sqrt(s.shape[0]))) s = s[:(2 ** size)**2].reshape((2 ** size, 2 ** size)) else: s = lbr.stft(s, self.n_fft, self.hop_length) s = s[:self.n_fft // 2, :self.n_fft // 2] if self.img_mode == 'abslog': s = np.log(1 + np.abs(s)) else: s = np.log(1 + np.abs(s.real)) * np.sign(s) s = np.uint8(adjust_dynamic_range(s, (s.min(), s.max()), self.range_in)) return s[np.newaxis]
def __init__(self, data_path, labels_fine_list=[], labels_coarse_list=[]): # By default, all factors (including shape) are considered ground truth factors. self.labels_fine_list = labels_fine_list self.labels_coarse_list = labels_coarse_list self.latent_factor_indices = self.labels_fine_list + self.labels_coarse_list self.fine_factor_indices = [0, 1] self.coarse_factor_indices = [2, 3, 4] if not set(self.labels_fine_list).issubset( set(self.fine_factor_indices)): print( "[warning]: labels_fine_list is not a subset of fine ground-truth list" ) if not set(self.labels_coarse_list).issubset( set(self.coarse_factor_indices)): print( "[warning]: labels_coarse_list is not a subset of coarse ground-truth list" ) self.data_shape = [64, 64, 1] # Load the data so that we can sample from it. dsprites_file = os.path.join( data_path, 'dsprites', 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz') with gfile.Open(dsprites_file, "rb") as data_file: # Data was saved originally using python2, so we need to set the encoding. data = np.load(data_file, encoding="latin1", allow_pickle=True) self.images = np.array(data["imgs"]) self.images = adjust_dynamic_range(self.images, drange_in=[0, 1], drange_out=[0, 255]) # TODO self.factor_sizes = np.array(data["metadata"][()]["latents_sizes"], dtype=np.int64) self.full_factor_sizes = [3, 6, 40, 32, 32] self.factor_bases = np.prod(self.full_factor_sizes) / np.cumprod( self.full_factor_sizes) self.state_space = SplitDiscreteStateSpace(self.full_factor_sizes, self.latent_factor_indices) self.labels_mask = np.random.uniform(0., 1., size=len( self.images)) # for semi-supervised learning
def get_image(gen, point, depth, alpha): image = gen(point, depth, alpha).detach() image = adjust_dynamic_range(image).squeeze(dim=0) return image.cpu().numpy().transpose(1, 2, 0)