Esempio n. 1
0
 def __getitem__(self, item):
     datapoint = self.data[self.model_depth + self.model_dataset_depth_offset][item]
     if self.alpha < 1.0:
         datapoint = self.alpha_fade(datapoint)
     # print(data[0])
     datapoint = adjust_dynamic_range(datapoint, self.range_in, self.range_out)
     # print(data[0])
     return torch.from_numpy(datapoint.astype('float32'))
Esempio n. 2
0
 def __getitem__(self, item):
     datapoint = self.data[self.model_depth + self.model_dataset_depth_offset][item]
     if self.alpha < 1.0:
         datapoint = self.alpha_fade(datapoint)
     # print(data[0])
     datapoint = adjust_dynamic_range(datapoint, self.range_in, self.range_out)
     # print(data[0])
     return torch.from_numpy(datapoint.astype('float32'))
Esempio n. 3
0
 def __getitem__(self, item):
     if self.preload:  # we have access to the data attribute
         return super(FolderDataset, self).__getitem__(item)
     datapoint = self.load_file(item)
     datapoint = self.get_datapoint_version(datapoint, self.max_dataset_depth,
                                            self.model_depth + self.model_dataset_depth_offset)
     datapoint = self.alpha_fade(datapoint)
     datapoint = adjust_dynamic_range(datapoint, self.range_in, self.range_out)
     return torch.from_numpy(datapoint.astype('float32'))
Esempio n. 4
0
 def __getitem__(self, item):
     if self.preload:  # we have access to the data attribute
         return super(FolderDataset, self).__getitem__(item)
     datapoint = self.load_file(item)
     datapoint = self.get_datapoint_version(datapoint, self.max_dataset_depth,
                                            self.model_depth + self.model_dataset_depth_offset)
     datapoint = self.alpha_fade(datapoint)
     datapoint = adjust_dynamic_range(datapoint, self.range_in, self.range_out)
     return torch.from_numpy(datapoint.astype('float32'))
 def image_to_sound(self, image):
     if self.mode == 'reallog' or self.mode == 'abslog':
         x = np.zeros((image.shape[0] + 1, image.shape[1]))  # real spectrograms have 2**i + 1 freq bins
         # x.fill(image.mean())
         x[:image.shape[0], :image.shape[1]] = image
         if self.mode == 'reallog':
             signed = adjust_dynamic_range(x, self.drange, (-1, 1))
             sgn = np.sign(signed)
             real_pt_stft = (np.exp(np.abs(signed)) - 1) * sgn
             signal = lbr.istft(real_pt_stft, self.hop_length)
         else:
             x = adjust_dynamic_range(x, self.drange, (0, 255))
             signal = self.reconstruct_from_magnitude(x)
     elif self.mode == 'raw':
         signal = image.ravel()
     else:
         raise Exception(
             'image_to_sound: unrecognized mode: {}. Available modes are: reallog, abslog, raw.'.format(self.mode)
         )
     signal = signal / np.abs(signal).max()
     return signal
Esempio n. 6
0
 def generate_samples(self, num_samples, current_resolution):
     for i in range(num_samples):
         latents = tf.random.normal((1, self.latent_size))
         fakes = self.train_generator([latents, 1.0])
         fakes = utils.adjust_dynamic_range(fakes, [-1.0, 1.0],
                                            [0.0, 255.0])
         fakes = tf.clip_by_value(fakes, 0.0, 255.0)
         img_arr = np.squeeze(np.array(fakes[0])).astype(np.uint8)
         im = Image.fromarray(img_arr, 'L')
         im.save(
             str(
                 self.generated_dir.joinpath('res_{}_{}.jpg'.format(
                     current_resolution, i))))
    def convert_to_pil_image(self, image):
        format = 'RGB'
        if image.ndim == 3:
            if image.shape[0] == 1:
                image = image[0]
                format = 'L'
            else:
                image = image.transpose(1, 2, 0)
                format = 'RGB'

        image = utils.adjust_dynamic_range(image, self.drange, (0, 255))

        image = image.round().clip(0, 255).astype(np.uint8)
        return PIL.Image.fromarray(image, format)
Esempio n. 8
0
    def convert_to_pil_image(self, image):
        format = 'RGB'
        if image.ndim == 3:
            if image.shape[0] == 1:
                image = image[0]
                format = 'L'
            else:
                image = image.transpose(1, 2, 0)
                format = 'RGB'

        image = utils.adjust_dynamic_range(image, self.drange, (0, 255))

        image = image.round().clip(0, 255).astype(np.uint8)
        return PIL.Image.fromarray(image, format)
Esempio n. 9
0
 def image_to_sound(self, image):
     if self.mode == 'reallog' or self.mode == 'abslog':
         x = np.zeros(
             (image.shape[0] + 1,
              image.shape[1]))  # real spectrograms have 2**i + 1 freq bins
         # x.fill(image.mean())
         x[:image.shape[0], :image.shape[1]] = image
         if self.mode == 'reallog':
             signed = adjust_dynamic_range(x, self.drange, (-1, 1))
             sgn = np.sign(signed)
             real_pt_stft = (np.exp(np.abs(signed)) - 1) * sgn
             signal = lbr.istft(real_pt_stft, self.hop_length)
         else:
             x = adjust_dynamic_range(x, self.drange, (0, 255))
             signal = self.reconstruct_from_magnitude(x)
     elif self.mode == 'raw':
         signal = image.ravel()
     else:
         raise Exception(
             'image_to_sound: unrecognized mode: {}. Available modes are: reallog, abslog, raw.'
             .format(self.mode))
     signal = signal / np.abs(signal).max()
     return signal
Esempio n. 10
0
 def generate_samples_3d(self, num_samples, current_resolution):
     for i in range(num_samples):
         latents = self.sample_random_latents(batch_size=1, label=i % 2)
         fakes = self.train_generator([latents, 1.0])
         fakes = utils.adjust_dynamic_range(fakes, [-1.0, 1.0],
                                            [0.0, 255.0])
         fakes = tf.clip_by_value(fakes, 0.0, 255.0)
         img_arr = np.squeeze(np.array(fakes[0])).astype(np.uint8)
         mri = nib.Nifti1Image(img_arr, np.eye(4))
         nib.save(
             mri,
             str(
                 self.generated_dir.joinpath('res_{}_{}.nii.gz'.format(
                     current_resolution, i))))
Esempio n. 11
0
 def load_file(self, item):
     s, _ = sound_load_fun(self.files[item], self.frequency, dtype='float32')
     if s.ndim == 2:  # stereo to mono
         s = (s.sum(axis=1)) / 2
     if self.img_mode == 'raw':
         size = int(np.log2(np.sqrt(s.shape[0])))
         s = s[:(2 ** size)**2].reshape((2 ** size, 2 ** size))
     else:
         s = lbr.stft(s, self.n_fft, self.hop_length)
         s = s[:self.n_fft // 2, :self.n_fft // 2]
         if self.img_mode == 'abslog':
             s = np.log(1 + np.abs(s))
         else:
             s = np.log(1 + np.abs(s.real)) * np.sign(s)
     s = np.uint8(adjust_dynamic_range(s, (s.min(), s.max()), self.range_in))
     return s[np.newaxis]
Esempio n. 12
0
 def load_file(self, item):
     s, _ = sound_load_fun(self.files[item], self.frequency, dtype='float32')
     if s.ndim == 2:  # stereo to mono
         s = (s.sum(axis=1)) / 2
     if self.img_mode == 'raw':
         size = int(np.log2(np.sqrt(s.shape[0])))
         s = s[:(2 ** size)**2].reshape((2 ** size, 2 ** size))
     else:
         s = lbr.stft(s, self.n_fft, self.hop_length)
         s = s[:self.n_fft // 2, :self.n_fft // 2]
         if self.img_mode == 'abslog':
             s = np.log(1 + np.abs(s))
         else:
             s = np.log(1 + np.abs(s.real)) * np.sign(s)
     s = np.uint8(adjust_dynamic_range(s, (s.min(), s.max()), self.range_in))
     return s[np.newaxis]
Esempio n. 13
0
    def __init__(self, data_path, labels_fine_list=[], labels_coarse_list=[]):
        # By default, all factors (including shape) are considered ground truth factors.
        self.labels_fine_list = labels_fine_list
        self.labels_coarse_list = labels_coarse_list
        self.latent_factor_indices = self.labels_fine_list + self.labels_coarse_list

        self.fine_factor_indices = [0, 1]
        self.coarse_factor_indices = [2, 3, 4]
        if not set(self.labels_fine_list).issubset(
                set(self.fine_factor_indices)):
            print(
                "[warning]: labels_fine_list is not a subset of fine ground-truth list"
            )
        if not set(self.labels_coarse_list).issubset(
                set(self.coarse_factor_indices)):
            print(
                "[warning]: labels_coarse_list is not a subset of coarse ground-truth list"
            )

        self.data_shape = [64, 64, 1]
        # Load the data so that we can sample from it.
        dsprites_file = os.path.join(
            data_path, 'dsprites',
            'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz')
        with gfile.Open(dsprites_file, "rb") as data_file:
            # Data was saved originally using python2, so we need to set the encoding.
            data = np.load(data_file, encoding="latin1", allow_pickle=True)
            self.images = np.array(data["imgs"])
            self.images = adjust_dynamic_range(self.images,
                                               drange_in=[0, 1],
                                               drange_out=[0, 255])  # TODO
            self.factor_sizes = np.array(data["metadata"][()]["latents_sizes"],
                                         dtype=np.int64)
        self.full_factor_sizes = [3, 6, 40, 32, 32]
        self.factor_bases = np.prod(self.full_factor_sizes) / np.cumprod(
            self.full_factor_sizes)
        self.state_space = SplitDiscreteStateSpace(self.full_factor_sizes,
                                                   self.latent_factor_indices)

        self.labels_mask = np.random.uniform(0., 1., size=len(
            self.images))  # for semi-supervised learning
def get_image(gen, point, depth, alpha):
    image = gen(point, depth, alpha).detach()
    image = adjust_dynamic_range(image).squeeze(dim=0)
    return image.cpu().numpy().transpose(1, 2, 0)