def __call__(self, n_seqs, seq_len):
        # generation doesn't work with CUDNN for some reason
        #torch.backends.cudnn.enabled = False

        self.reset_hidden_states()

        bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples
        sequences = torch.LongTensor(n_seqs, self.model.lookback + seq_len) \
                         .fill_(utils.q_zero(self.model.q_levels))
        frame_level_outputs = [None for _ in self.model.frame_level_rnns]

        print('Generating sample...')

        for i in tqdm.tqdm(range(self.model.lookback, self.model.lookback + seq_len), mininterval=1, ascii=True):
            for (tier_index, rnn) in \
                    reversed(list(enumerate(self.model.frame_level_rnns))):
                if i % rnn.n_frame_samples != 0:
                    continue

                prev_samples = torch.autograd.Variable(
                    2 * utils.linear_dequantize(
                        sequences[:, i - rnn.n_frame_samples : i],
                        self.model.q_levels
                    ).unsqueeze(1),
                    volatile=True
                )
                if self.cuda:
                    prev_samples = prev_samples.cuda()

                if tier_index == len(self.model.frame_level_rnns) - 1:
                    upper_tier_conditioning = None
                else:
                    frame_index = (i // rnn.n_frame_samples) % \
                        self.model.frame_level_rnns[tier_index + 1].frame_size
                    upper_tier_conditioning = \
                        frame_level_outputs[tier_index + 1][:, frame_index, :] \
                                           .unsqueeze(1)

                frame_level_outputs[tier_index] = self.run_rnn(
                    rnn, prev_samples, upper_tier_conditioning
                )

            prev_samples = torch.autograd.Variable(
                sequences[:, i - bottom_frame_size : i],
                volatile=True
            )
            if self.cuda:
                prev_samples = prev_samples.cuda()
            upper_tier_conditioning = \
                frame_level_outputs[0][:, i % bottom_frame_size, :] \
                                      .unsqueeze(1)
            sample_dist = self.model.sample_level_mlp(
                prev_samples, upper_tier_conditioning
            ).squeeze(1).exp_().data
            sequences[:, i] = sample_dist.multinomial(1).squeeze(1)

        #torch.backends.cudnn.enabled = True

        return sequences[:, self.model.lookback :]
 def __getitem__(self, index):
     (seq, _) = load(self.file_names[index], sr=None, mono=True)
     return torch.cat([
         torch.LongTensor(self.overlap_len) \
              .fill_(utils.q_zero(self.q_levels)),
         utils.linear_quantize(
             torch.from_numpy(seq), self.q_levels
         )
     ])
Exemple #3
0
 def __getitem__(self, index):
     (seq, _) = sf.read(self.file_names[index], samplerate=None)
     return torch.cat([
         torch.LongTensor(self.overlap_len) \
              .fill_(utils.q_zero(self.q_levels)),
         self._quantize(
             torch.from_numpy(seq), self.q_levels
         )
     ])
    def __getitem__(self, index):
        label = int(self.file_names[index].split("/")
                    [-2])  # label associated with file_name
        (seq, _) = load(self.file_names[index], sr=None, mono=True)

        return torch.cat([
            torch.LongTensor(self.overlap_len) \
                 .fill_(utils.q_zero(self.q_levels)),
            utils.linear_quantize(
                torch.from_numpy(seq), self.q_levels
            ),
            torch.LongTensor([label])
        ])
Exemple #5
0
 def __getitem__(self, index):
     seq = self.audio_samples[index]
     hsl_data = self.hsl_data_samples[index]
     emotion = self.emotion_samples[index]
     text = self.text_samples[index]
     text = text.decode('utf-8') if isinstance(text, bytes) else text
     return torch.cat([
         torch.LongTensor(self.overlap_len) \
              .fill_(utils.q_zero(self.q_levels)),
         utils.linear_quantize(
             torch.from_numpy(seq), self.q_levels
         )
     ]), seq, hsl_data, emotion, text
Exemple #6
0
    def __getitem__(self, index):
        # Try to load using librosa
        try:
            (seq, _) = load(self.file_names[index], sr=None, mono=True)
        except audioread.NoBackendError:
            # Use scipy if librosa didn't work
            _, seq = scipy.io.wavfile.read(self.file_names[index])

        return torch.cat([
            torch.LongTensor(self.overlap_len) \
                 .fill_(utils.q_zero(self.q_levels)),
            utils.linear_quantize(
                torch.from_numpy(seq), self.q_levels
            )
        ])
Exemple #7
0
    def __call__(self,
                 n_seqs,
                 seq_len,
                 sampling_temperature=0.9,
                 initial_seq=None):
        # generation doesn't work with CUDNN for some reason
        torch.backends.cudnn.enabled = False

        self.reset_hidden_states()

        bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples
        sequences = torch.LongTensor(n_seqs, self.model.lookback + seq_len) \
            .fill_(utils.q_zero(self.model.q_levels))
        if initial_seq is None:
            initial_i = self.model.lookback
            final_i = initial_i + seq_len
        else:  # CONDITIONAL
            sequences[:, 0:np.shape(initial_seq)[1]] = initial_seq
            initial_i = np.shape(initial_seq)[1] - self.model.lookback
            # initial_i = np.shape(initial_seq)[1] + self.model.lookback
            final_i = self.model.lookback + seq_len
        frame_level_outputs = [None for _ in self.model.frame_level_rnns]

        for i in range(initial_i, final_i):
            for (tier_index, rnn) in \
                    reversed(list(enumerate(self.model.frame_level_rnns))):
                if i % rnn.n_frame_samples != 0:
                    continue

                prev_samples = torch.autograd.Variable(
                    2 * utils.linear_dequantize(
                        sequences[:, i - rnn.n_frame_samples:i],
                        self.model.q_levels).unsqueeze(1),
                    volatile=True)
                # print("Tier {}: prev_samples from {} to {}, shape {}: {}".format(tier_index, i - rnn.n_frame_samples, i, np.shape(prev_samples), prev_samples))
                if self.cuda:
                    prev_samples = prev_samples.cuda()

                l = len(self.model.frame_level_rnns) - 1
                if tier_index == l:
                    print("No upper tier conditioning")
                    upper_tier_conditioning = None
                else:
                    frame_index = (i // rnn.n_frame_samples) % \
                        self.model.frame_level_rnns[tier_index + 1].frame_size
                    upper_tier_conditioning = \
                        frame_level_outputs[tier_index + 1][:, frame_index, :] \
                                           .unsqueeze(1)
                    print("Frame index {}, upper_tier_conditioning shape {}".
                          format(frame_index,
                                 np.shape(upper_tier_conditioning)))

                frame_level_outputs[tier_index] = self.run_rnn(
                    rnn, prev_samples, upper_tier_conditioning)
                print("Tier {} frame level outputs shape {}".format(
                    tier_index, np.shape(frame_level_outputs[tier_index])))

            # print(sequences[:, i - bottom_frame_size : i])
            prev_samples = torch.autograd.Variable(
                sequences[:, i - bottom_frame_size:i], volatile=True)
            # print("Tier {}: prev_samples from {} to {}, shape {}: {}".format(tier_index, i - bottom_frame_size, i, np.shape(prev_samples), prev_samples))
            if self.cuda:
                prev_samples = prev_samples.cuda()
            upper_tier_conditioning = \
                frame_level_outputs[0][:, i % bottom_frame_size, :] \
                                      .unsqueeze(1)
            sample_dist = self.model.sample_level_mlp(prev_samples,
                                                      upper_tier_conditioning)
            sample_dist = sample_dist.div(sampling_temperature).squeeze(
                1).exp_().data
            print("Sample dist {}".format(np.shape(sample_dist)))
            print("Before: {}".format(sequences[:, i]))
            sequences[:, i] = sample_dist.multinomial(1).squeeze(1)
            print("After {}".format(sequences[:, i]))

        torch.backends.cudnn.enabled = True

        return sequences[:, self.model.lookback:]
Exemple #8
0
    def __call__(self, n_seqs, seq_len, sampling_temperature=0.9):
        # generation doesn't work with CUDNN for some reason
        #torch.backends.cudnn.enabled = False

        self.reset_hidden_states()

        bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples
        sequences = torch.LongTensor(n_seqs, self.model.lookback + seq_len) \
                         .fill_(utils.q_zero(self.model.q_levels))
        frame_level_outputs = [None for _ in self.model.frame_level_rnns]

        logging.info('Generating sample, total: {} lookback: {}'.format(
            seq_len, self.model.lookback))

        start_time = time.time()
        for i in range(self.model.lookback, self.model.lookback + seq_len):
            # logging
            if i % self.model.lookback == 0:
                log_time = time.time()
                logging.info('{}% {}/{} {}it/s'.format(
                    round((i / seq_len) * 100, 2), i, seq_len,
                    round(self.model.lookback / (log_time - start_time), 2)))
                start_time = log_time

            with torch.no_grad():
                for (tier_index, rnn) in \
                        reversed(list(enumerate(self.model.frame_level_rnns))):
                    if i % rnn.n_frame_samples != 0:
                        continue

                    prev_samples = torch.autograd.Variable(
                        2 * self._dequantize(
                            sequences[:, i - rnn.n_frame_samples:i],
                            self.model.q_levels).unsqueeze(1))
                    if self.cuda:
                        prev_samples = prev_samples.cuda()

                    if tier_index == len(self.model.frame_level_rnns) - 1:
                        upper_tier_conditioning = None
                    else:
                        frame_index = (i // rnn.n_frame_samples) % \
                            self.model.frame_level_rnns[tier_index + 1].frame_size
                        upper_tier_conditioning = \
                            frame_level_outputs[tier_index + 1][:, frame_index, :] \
                                               .unsqueeze(1)

                    frame_level_outputs[tier_index] = self.run_rnn(
                        rnn, prev_samples, upper_tier_conditioning)

                prev_samples = torch.autograd.Variable(
                    sequences[:, i - bottom_frame_size:i])
                if self.cuda:
                    prev_samples = prev_samples.cuda()
                upper_tier_conditioning = \
                    frame_level_outputs[0][:, i % bottom_frame_size, :] \
                                          .unsqueeze(1)
                sample_dist = self.model.sample_level_mlp(
                    prev_samples, upper_tier_conditioning).div(
                        sampling_temperature).squeeze(1).exp_().data
                sequences[:, i] = sample_dist.multinomial(1).squeeze(1)

        #torch.backends.cudnn.enabled = True

        return sequences[:, self.model.lookback:]
Exemple #9
0
    def __call__(self, n_seqs, seq_len):
        torch.backends.cudnn.enabled = True

        self.reset_hidden_states()

        bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples

        sequences = torch.LongTensor(n_seqs*self.nb_classes, self.model.lookback + seq_len) \
                         .fill_(utils.q_zero(self.model.q_levels))
        frame_level_outputs = [None for _ in self.model.frame_level_rnns]

        for i in range(self.model.lookback, self.model.lookback + seq_len):
            for (tier_index, rnn) in \
                    reversed(list(enumerate(self.model.frame_level_rnns))):
                if i % rnn.n_frame_samples != 0:
                    continue

                prev_samples = torch.autograd.Variable(
                    2 * utils.linear_dequantize(
                        sequences[:, i - rnn.n_frame_samples:i],
                        self.model.q_levels).unsqueeze(1),
                    volatile=True)
                if self.cond == False:  #no conditiong,generate all classes
                    one_hot_tensor = torch.tensor([]).float()
                    for label in range(self.nb_classes):
                        for nb_sample in range(n_seqs):
                            one_hot_vec = utils.one_hot(
                                torch.tensor([label]),
                                self.nb_classes).float()
                            one_hot_tensor = torch.cat(
                                [one_hot_tensor, one_hot_vec])
                else:  #use custom one hot vector
                    one_hot_tensor = torch.tensor([]).float()
                    one_hot_vec = torch.tensor([self.cond]).float()
                    for label in range(self.nb_classes):
                        for nb_sample in range(n_seqs):
                            one_hot_tensor = torch.cat(
                                [one_hot_tensor, one_hot_vec])
                one_hot_tensor = one_hot_tensor[:, None, :]
                prev_samples = torch.cat([prev_samples, one_hot_tensor], 2)
                if self.cuda:
                    prev_samples = prev_samples.cuda()

                if tier_index == len(self.model.frame_level_rnns) - 1:
                    upper_tier_conditioning = None
                else:
                    frame_index = (i // rnn.n_frame_samples) % \
                        self.model.frame_level_rnns[tier_index + 1].frame_size
                    upper_tier_conditioning = \
                        frame_level_outputs[tier_index + 1][:, frame_index, :] \
                                           .unsqueeze(1)

                frame_level_outputs[tier_index] = self.run_rnn(
                    rnn, prev_samples, upper_tier_conditioning)

            prev_samples = torch.autograd.Variable(
                sequences[:, i - bottom_frame_size:i], volatile=True)
            if self.cuda:
                prev_samples = prev_samples.cuda()
            upper_tier_conditioning = \
                frame_level_outputs[0][:, i % bottom_frame_size, :] \
                                      .unsqueeze(1)
            sample_dist = self.model.sample_level_mlp(
                prev_samples, upper_tier_conditioning).squeeze(1).exp_().data
            sequences[:, i] = sample_dist.multinomial(1).squeeze(1)

        torch.backends.cudnn.enabled = True

        return sequences[:, self.model.lookback:]
Exemple #10
0
    def __call__(self, n_seqs, seq_len, class_label=0, data_seed=None):
        # generation doesn't work with CUDNN for some reason

        torch.backends.cudnn.enabled = False
        label_tensor = torch.LongTensor([class_label])
        with torch.no_grad():
            self.reset_hidden_states()

            # bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples
            bottom_frame_size = 16
            sequences = torch.LongTensor(n_seqs, self.model.lookback + seq_len) \
                             .fill_(utils.q_zero(self.model.q_levels))
            if data_seed is not None:
                seeds = []
                for _ in range(n_seqs):
                    seeds.append(
                        utils.linear_quantize(
                            torch.from_numpy(
                                data_seed.getClassSplit(
                                    class_num=class_label,
                                    seq_len=self.model.lookback)),
                            self.model.q_levels))
                seed = torch.stack(seeds)
                sequences[:, :self.model.lookback] = seed
            frame_level_outputs = [None for _ in self.model.frame_level_rnns]

            for i in range(self.model.lookback, self.model.lookback + seq_len):
                for (tier_index, rnn) in \
                        reversed(list(enumerate(self.model.frame_level_rnns))):
                    # if i % rnn.n_frame_samples != 0:
                    #     continue

                    prev_samples = sequences[:, i - 16:i]
                    prev_samples = torch.autograd.Variable(
                        2 * utils.linear_dequantize(
                            prev_samples, self.model.q_levels).unsqueeze(1))
                    if self.cuda:
                        prev_samples = prev_samples.cuda()
                        label_tensor = label_tensor.cuda()

                    if tier_index == len(self.model.frame_level_rnns) - 1:
                        upper_tier_conditioning = None
                        # if self.model.num_classes > 1:
                        #     upper_tier_conditioning = self.model.class_embedding(label_tensor) + self.model.class_bias.cuda()
                        #     if self.cuda:
                        #         upper_tier_conditioning = upper_tier_conditioning.cuda()
                    else:
                        frame_index = (i // rnn.n_frame_samples) % \
                            self.model.frame_level_rnns[tier_index + 1].frame_size
                        upper_tier_conditioning = \
                            frame_level_outputs[tier_index + 1][:, frame_index, :] \
                                               .unsqueeze(1)
                    if isinstance(class_label, int):
                        class_label = torch.Tensor([class_label])
                    frame_level_outputs[tier_index] = self.run_rnn(
                        rnn, prev_samples, upper_tier_conditioning,
                        class_label)
                prev_samples = torch.autograd.Variable(
                    sequences[:, i - bottom_frame_size:i])
                if self.cuda:
                    prev_samples = prev_samples.cuda()
                frame_level_outputs[0] = frame_level_outputs[0].view(
                    n_seqs, bottom_frame_size, -1)
                upper_tier_conditioning = \
                    frame_level_outputs[0][:, i % bottom_frame_size, :].unsqueeze(1)
                sample_dist = self.model.sample_level_mlp(
                    prev_samples,
                    upper_tier_conditioning).squeeze(1).exp_().data
                sequences[:, i] = sample_dist.multinomial(1).squeeze(1)

            torch.backends.cudnn.enabled = True

            return sequences[:, self.model.lookback:]
    def __call__(self, n_seqs, seq_len, cond, spk):
        # generation doesn't work with CUDNN for some reason

        cuda_enabled = torch.backends.cudnn.enabled
        torch.backends.cudnn.enabled = False

        self.reset_hidden_states()
        (num_cond, n_dim) = cond.shape
        condtot = cond
        global_spk = spk
        seq_len = num_cond * self.model.lookback
        print('seq len', seq_len)
        print('model look-back', self.model.lookback)
        bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples
        sequences = torch.LongTensor(n_seqs,
                                     self.model.lookback + seq_len).fill_(
                                         utils.q_zero(self.model.q_levels))
        frame_level_outputs = [None for _ in self.model.frame_level_rnns]

        for i in range(self.model.lookback, self.model.lookback + seq_len):
            for (tier_index, rnn) in \
                    reversed(list(enumerate(self.model.frame_level_rnns))):
                if i % rnn.n_frame_samples != 0:
                    continue

                # 2 * utils.linear_dequantize(
                print('Predicting sample ', i)
                prev_samples = torch.autograd.Variable(
                    2 * self.model.dequantize(
                        sequences[:, i - rnn.n_frame_samples:i],
                        self.model.q_levels).unsqueeze(1),
                    volatile=True)
                # print('prev samples', prev_samples)
                if self.cuda:
                    prev_samples = prev_samples.cuda()

                if tier_index == len(self.model.frame_level_rnns) - 1:
                    upper_tier_conditioning = None
                    j = i // self.model.lookback - 1
                    cond = condtot[j, :]
                    cond = torch.from_numpy(cond.reshape(1, 1, n_dim))
                    spk = global_spk
                    spk = torch.from_numpy(np.array(spk).reshape(1, 1))
                else:
                    cond = None
                    spk = None
                    frame_index = (i // rnn.n_frame_samples) % \
                        self.model.frame_level_rnns[tier_index + 1].frame_size
                    upper_tier_conditioning = \
                        frame_level_outputs[tier_index + 1][:, frame_index, :] \
                        .unsqueeze(1)

                if self.cuda:
                    cond = Variable(cond).cuda()
                    spk = Variable(spk).cuda()
                frame_level_outputs[tier_index] = self.run_rnn(
                    rnn, prev_samples, upper_tier_conditioning, cond, spk, i)
            # print('frame out', frame_level_outputs)
            prev_samples = torch.autograd.Variable(
                sequences[:, i - bottom_frame_size:i], volatile=True)
            # print('prev samples', prev_samples)
            if self.cuda:
                prev_samples = prev_samples.cuda()
            upper_tier_conditioning = \
                frame_level_outputs[0][:, i % bottom_frame_size, :] \
                .unsqueeze(1)
            sample_dist = self.model.sample_level_mlp(
                prev_samples, upper_tier_conditioning).squeeze(1).exp_().data
            sequences[:, i] = sample_dist.multinomial(1).squeeze(1)
        torch.backends.cudnn.enabled = cuda_enabled

        return self.model.dequantize(sequences[:, self.model.lookback:],
                                     self.model.q_levels)