def __getitem__(self, index):
     (seq, _) = load(self.file_names[index], sr=None, mono=True)
     return torch.cat([
         torch.LongTensor(self.overlap_len) \
              .fill_(utils.q_zero(self.q_levels)),
         utils.linear_quantize(
             torch.from_numpy(seq), self.q_levels
         )
     ])
 def __getitem__(self, index):
     file_path, class_label = self.file_names[index]
     (seq, _) = load(file_path, sr=self.sample_rate, mono=True)
     seq = fix_length(seq, size=self.audio_length, mode='edge')
     return torch.cat((
         torch.LongTensor([class_label]),
         # torch.LongTensor(self.overlap_len)
         #      .fill_(utils.q_zero(self.q_levels)),
         utils.linear_quantize(torch.from_numpy(seq), self.q_levels)))
    def __getitem__(self, index):
        label = int(self.file_names[index].split("/")
                    [-2])  # label associated with file_name
        (seq, _) = load(self.file_names[index], sr=None, mono=True)

        return torch.cat([
            torch.LongTensor(self.overlap_len) \
                 .fill_(utils.q_zero(self.q_levels)),
            utils.linear_quantize(
                torch.from_numpy(seq), self.q_levels
            ),
            torch.LongTensor([label])
        ])
Beispiel #4
0
 def __getitem__(self, index):
     seq = self.audio_samples[index]
     hsl_data = self.hsl_data_samples[index]
     emotion = self.emotion_samples[index]
     text = self.text_samples[index]
     text = text.decode('utf-8') if isinstance(text, bytes) else text
     return torch.cat([
         torch.LongTensor(self.overlap_len) \
              .fill_(utils.q_zero(self.q_levels)),
         utils.linear_quantize(
             torch.from_numpy(seq), self.q_levels
         )
     ]), seq, hsl_data, emotion, text
Beispiel #5
0
    def __getitem__(self, index):
        # Try to load using librosa
        try:
            (seq, _) = load(self.file_names[index], sr=None, mono=True)
        except audioread.NoBackendError:
            # Use scipy if librosa didn't work
            _, seq = scipy.io.wavfile.read(self.file_names[index])

        return torch.cat([
            torch.LongTensor(self.overlap_len) \
                 .fill_(utils.q_zero(self.q_levels)),
            utils.linear_quantize(
                torch.from_numpy(seq), self.q_levels
            )
        ])
    dataset_filenames = get_test_data(model, params)

    # Gets initial samples form 1 test sample and check if it re-generates it
    audio_filename = dataset_filenames[0]
    from librosa.core import load
    sr = params['sample_rate']
    seq, sr = load(audio_filename, sr=sr, mono=True)
    print("Sample rate: {}".format(sr))

    # Generate Plugin
    num_samples = 6  # params['n_samples']

    initial_seq_size = 64 * 100  # has to be multiple of rnn.n_frame_samples ???
    initial_seq = None
    if initial_seq_size > 1:
        init = utils.linear_quantize(torch.from_numpy(seq[0:initial_seq_size]), params['q_levels'])
        # init = seq[0:initial_seed_size]
        init = np.tile(init, (num_samples, 1))
        initial_seq = torch.LongTensor(init)
        # initial_seed = utils.linear_quantize(initial_seed, params['q_levels'])

    sample_length = params['sample_length']
    sample_rate = params['sample_rate']
    print("Number samples: {}, sample_length: {}, sample_rate: {}".format(num_samples, sample_length, sample_rate))
    generator = GeneratorPlugin(GENERATED_PATH, num_samples, sample_length, sample_rate)

    # Overloads register function to accept the trained model and the cuda setting
    generator.register_generate(model.cuda(), params['cuda'])

    # Generate new audio
    generator.epoch('Test19_{}'.format(initial_seq_size), initial_seed=initial_seq)
Beispiel #7
0
    def __call__(self, n_seqs, seq_len, class_label=0, data_seed=None):
        # generation doesn't work with CUDNN for some reason

        torch.backends.cudnn.enabled = False
        label_tensor = torch.LongTensor([class_label])
        with torch.no_grad():
            self.reset_hidden_states()

            # bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples
            bottom_frame_size = 16
            sequences = torch.LongTensor(n_seqs, self.model.lookback + seq_len) \
                             .fill_(utils.q_zero(self.model.q_levels))
            if data_seed is not None:
                seeds = []
                for _ in range(n_seqs):
                    seeds.append(
                        utils.linear_quantize(
                            torch.from_numpy(
                                data_seed.getClassSplit(
                                    class_num=class_label,
                                    seq_len=self.model.lookback)),
                            self.model.q_levels))
                seed = torch.stack(seeds)
                sequences[:, :self.model.lookback] = seed
            frame_level_outputs = [None for _ in self.model.frame_level_rnns]

            for i in range(self.model.lookback, self.model.lookback + seq_len):
                for (tier_index, rnn) in \
                        reversed(list(enumerate(self.model.frame_level_rnns))):
                    # if i % rnn.n_frame_samples != 0:
                    #     continue

                    prev_samples = sequences[:, i - 16:i]
                    prev_samples = torch.autograd.Variable(
                        2 * utils.linear_dequantize(
                            prev_samples, self.model.q_levels).unsqueeze(1))
                    if self.cuda:
                        prev_samples = prev_samples.cuda()
                        label_tensor = label_tensor.cuda()

                    if tier_index == len(self.model.frame_level_rnns) - 1:
                        upper_tier_conditioning = None
                        # if self.model.num_classes > 1:
                        #     upper_tier_conditioning = self.model.class_embedding(label_tensor) + self.model.class_bias.cuda()
                        #     if self.cuda:
                        #         upper_tier_conditioning = upper_tier_conditioning.cuda()
                    else:
                        frame_index = (i // rnn.n_frame_samples) % \
                            self.model.frame_level_rnns[tier_index + 1].frame_size
                        upper_tier_conditioning = \
                            frame_level_outputs[tier_index + 1][:, frame_index, :] \
                                               .unsqueeze(1)
                    if isinstance(class_label, int):
                        class_label = torch.Tensor([class_label])
                    frame_level_outputs[tier_index] = self.run_rnn(
                        rnn, prev_samples, upper_tier_conditioning,
                        class_label)
                prev_samples = torch.autograd.Variable(
                    sequences[:, i - bottom_frame_size:i])
                if self.cuda:
                    prev_samples = prev_samples.cuda()
                frame_level_outputs[0] = frame_level_outputs[0].view(
                    n_seqs, bottom_frame_size, -1)
                upper_tier_conditioning = \
                    frame_level_outputs[0][:, i % bottom_frame_size, :].unsqueeze(1)
                sample_dist = self.model.sample_level_mlp(
                    prev_samples,
                    upper_tier_conditioning).squeeze(1).exp_().data
                sequences[:, i] = sample_dist.multinomial(1).squeeze(1)

            torch.backends.cudnn.enabled = True

            return sequences[:, self.model.lookback:]