def __call__(self, n_seqs, seq_len): # generation doesn't work with CUDNN for some reason #torch.backends.cudnn.enabled = False self.reset_hidden_states() bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples sequences = torch.LongTensor(n_seqs, self.model.lookback + seq_len) \ .fill_(utils.q_zero(self.model.q_levels)) frame_level_outputs = [None for _ in self.model.frame_level_rnns] print('Generating sample...') for i in tqdm.tqdm(range(self.model.lookback, self.model.lookback + seq_len), mininterval=1, ascii=True): for (tier_index, rnn) in \ reversed(list(enumerate(self.model.frame_level_rnns))): if i % rnn.n_frame_samples != 0: continue prev_samples = torch.autograd.Variable( 2 * utils.linear_dequantize( sequences[:, i - rnn.n_frame_samples : i], self.model.q_levels ).unsqueeze(1), volatile=True ) if self.cuda: prev_samples = prev_samples.cuda() if tier_index == len(self.model.frame_level_rnns) - 1: upper_tier_conditioning = None else: frame_index = (i // rnn.n_frame_samples) % \ self.model.frame_level_rnns[tier_index + 1].frame_size upper_tier_conditioning = \ frame_level_outputs[tier_index + 1][:, frame_index, :] \ .unsqueeze(1) frame_level_outputs[tier_index] = self.run_rnn( rnn, prev_samples, upper_tier_conditioning ) prev_samples = torch.autograd.Variable( sequences[:, i - bottom_frame_size : i], volatile=True ) if self.cuda: prev_samples = prev_samples.cuda() upper_tier_conditioning = \ frame_level_outputs[0][:, i % bottom_frame_size, :] \ .unsqueeze(1) sample_dist = self.model.sample_level_mlp( prev_samples, upper_tier_conditioning ).squeeze(1).exp_().data sequences[:, i] = sample_dist.multinomial(1).squeeze(1) #torch.backends.cudnn.enabled = True return sequences[:, self.model.lookback :]
def __getitem__(self, index): (seq, _) = load(self.file_names[index], sr=None, mono=True) return torch.cat([ torch.LongTensor(self.overlap_len) \ .fill_(utils.q_zero(self.q_levels)), utils.linear_quantize( torch.from_numpy(seq), self.q_levels ) ])
def __getitem__(self, index): (seq, _) = sf.read(self.file_names[index], samplerate=None) return torch.cat([ torch.LongTensor(self.overlap_len) \ .fill_(utils.q_zero(self.q_levels)), self._quantize( torch.from_numpy(seq), self.q_levels ) ])
def __getitem__(self, index): label = int(self.file_names[index].split("/") [-2]) # label associated with file_name (seq, _) = load(self.file_names[index], sr=None, mono=True) return torch.cat([ torch.LongTensor(self.overlap_len) \ .fill_(utils.q_zero(self.q_levels)), utils.linear_quantize( torch.from_numpy(seq), self.q_levels ), torch.LongTensor([label]) ])
def __getitem__(self, index): seq = self.audio_samples[index] hsl_data = self.hsl_data_samples[index] emotion = self.emotion_samples[index] text = self.text_samples[index] text = text.decode('utf-8') if isinstance(text, bytes) else text return torch.cat([ torch.LongTensor(self.overlap_len) \ .fill_(utils.q_zero(self.q_levels)), utils.linear_quantize( torch.from_numpy(seq), self.q_levels ) ]), seq, hsl_data, emotion, text
def __getitem__(self, index): # Try to load using librosa try: (seq, _) = load(self.file_names[index], sr=None, mono=True) except audioread.NoBackendError: # Use scipy if librosa didn't work _, seq = scipy.io.wavfile.read(self.file_names[index]) return torch.cat([ torch.LongTensor(self.overlap_len) \ .fill_(utils.q_zero(self.q_levels)), utils.linear_quantize( torch.from_numpy(seq), self.q_levels ) ])
def __call__(self, n_seqs, seq_len, sampling_temperature=0.9, initial_seq=None): # generation doesn't work with CUDNN for some reason torch.backends.cudnn.enabled = False self.reset_hidden_states() bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples sequences = torch.LongTensor(n_seqs, self.model.lookback + seq_len) \ .fill_(utils.q_zero(self.model.q_levels)) if initial_seq is None: initial_i = self.model.lookback final_i = initial_i + seq_len else: # CONDITIONAL sequences[:, 0:np.shape(initial_seq)[1]] = initial_seq initial_i = np.shape(initial_seq)[1] - self.model.lookback # initial_i = np.shape(initial_seq)[1] + self.model.lookback final_i = self.model.lookback + seq_len frame_level_outputs = [None for _ in self.model.frame_level_rnns] for i in range(initial_i, final_i): for (tier_index, rnn) in \ reversed(list(enumerate(self.model.frame_level_rnns))): if i % rnn.n_frame_samples != 0: continue prev_samples = torch.autograd.Variable( 2 * utils.linear_dequantize( sequences[:, i - rnn.n_frame_samples:i], self.model.q_levels).unsqueeze(1), volatile=True) # print("Tier {}: prev_samples from {} to {}, shape {}: {}".format(tier_index, i - rnn.n_frame_samples, i, np.shape(prev_samples), prev_samples)) if self.cuda: prev_samples = prev_samples.cuda() l = len(self.model.frame_level_rnns) - 1 if tier_index == l: print("No upper tier conditioning") upper_tier_conditioning = None else: frame_index = (i // rnn.n_frame_samples) % \ self.model.frame_level_rnns[tier_index + 1].frame_size upper_tier_conditioning = \ frame_level_outputs[tier_index + 1][:, frame_index, :] \ .unsqueeze(1) print("Frame index {}, upper_tier_conditioning shape {}". format(frame_index, np.shape(upper_tier_conditioning))) frame_level_outputs[tier_index] = self.run_rnn( rnn, prev_samples, upper_tier_conditioning) print("Tier {} frame level outputs shape {}".format( tier_index, np.shape(frame_level_outputs[tier_index]))) # print(sequences[:, i - bottom_frame_size : i]) prev_samples = torch.autograd.Variable( sequences[:, i - bottom_frame_size:i], volatile=True) # print("Tier {}: prev_samples from {} to {}, shape {}: {}".format(tier_index, i - bottom_frame_size, i, np.shape(prev_samples), prev_samples)) if self.cuda: prev_samples = prev_samples.cuda() upper_tier_conditioning = \ frame_level_outputs[0][:, i % bottom_frame_size, :] \ .unsqueeze(1) sample_dist = self.model.sample_level_mlp(prev_samples, upper_tier_conditioning) sample_dist = sample_dist.div(sampling_temperature).squeeze( 1).exp_().data print("Sample dist {}".format(np.shape(sample_dist))) print("Before: {}".format(sequences[:, i])) sequences[:, i] = sample_dist.multinomial(1).squeeze(1) print("After {}".format(sequences[:, i])) torch.backends.cudnn.enabled = True return sequences[:, self.model.lookback:]
def __call__(self, n_seqs, seq_len, sampling_temperature=0.9): # generation doesn't work with CUDNN for some reason #torch.backends.cudnn.enabled = False self.reset_hidden_states() bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples sequences = torch.LongTensor(n_seqs, self.model.lookback + seq_len) \ .fill_(utils.q_zero(self.model.q_levels)) frame_level_outputs = [None for _ in self.model.frame_level_rnns] logging.info('Generating sample, total: {} lookback: {}'.format( seq_len, self.model.lookback)) start_time = time.time() for i in range(self.model.lookback, self.model.lookback + seq_len): # logging if i % self.model.lookback == 0: log_time = time.time() logging.info('{}% {}/{} {}it/s'.format( round((i / seq_len) * 100, 2), i, seq_len, round(self.model.lookback / (log_time - start_time), 2))) start_time = log_time with torch.no_grad(): for (tier_index, rnn) in \ reversed(list(enumerate(self.model.frame_level_rnns))): if i % rnn.n_frame_samples != 0: continue prev_samples = torch.autograd.Variable( 2 * self._dequantize( sequences[:, i - rnn.n_frame_samples:i], self.model.q_levels).unsqueeze(1)) if self.cuda: prev_samples = prev_samples.cuda() if tier_index == len(self.model.frame_level_rnns) - 1: upper_tier_conditioning = None else: frame_index = (i // rnn.n_frame_samples) % \ self.model.frame_level_rnns[tier_index + 1].frame_size upper_tier_conditioning = \ frame_level_outputs[tier_index + 1][:, frame_index, :] \ .unsqueeze(1) frame_level_outputs[tier_index] = self.run_rnn( rnn, prev_samples, upper_tier_conditioning) prev_samples = torch.autograd.Variable( sequences[:, i - bottom_frame_size:i]) if self.cuda: prev_samples = prev_samples.cuda() upper_tier_conditioning = \ frame_level_outputs[0][:, i % bottom_frame_size, :] \ .unsqueeze(1) sample_dist = self.model.sample_level_mlp( prev_samples, upper_tier_conditioning).div( sampling_temperature).squeeze(1).exp_().data sequences[:, i] = sample_dist.multinomial(1).squeeze(1) #torch.backends.cudnn.enabled = True return sequences[:, self.model.lookback:]
def __call__(self, n_seqs, seq_len): torch.backends.cudnn.enabled = True self.reset_hidden_states() bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples sequences = torch.LongTensor(n_seqs*self.nb_classes, self.model.lookback + seq_len) \ .fill_(utils.q_zero(self.model.q_levels)) frame_level_outputs = [None for _ in self.model.frame_level_rnns] for i in range(self.model.lookback, self.model.lookback + seq_len): for (tier_index, rnn) in \ reversed(list(enumerate(self.model.frame_level_rnns))): if i % rnn.n_frame_samples != 0: continue prev_samples = torch.autograd.Variable( 2 * utils.linear_dequantize( sequences[:, i - rnn.n_frame_samples:i], self.model.q_levels).unsqueeze(1), volatile=True) if self.cond == False: #no conditiong,generate all classes one_hot_tensor = torch.tensor([]).float() for label in range(self.nb_classes): for nb_sample in range(n_seqs): one_hot_vec = utils.one_hot( torch.tensor([label]), self.nb_classes).float() one_hot_tensor = torch.cat( [one_hot_tensor, one_hot_vec]) else: #use custom one hot vector one_hot_tensor = torch.tensor([]).float() one_hot_vec = torch.tensor([self.cond]).float() for label in range(self.nb_classes): for nb_sample in range(n_seqs): one_hot_tensor = torch.cat( [one_hot_tensor, one_hot_vec]) one_hot_tensor = one_hot_tensor[:, None, :] prev_samples = torch.cat([prev_samples, one_hot_tensor], 2) if self.cuda: prev_samples = prev_samples.cuda() if tier_index == len(self.model.frame_level_rnns) - 1: upper_tier_conditioning = None else: frame_index = (i // rnn.n_frame_samples) % \ self.model.frame_level_rnns[tier_index + 1].frame_size upper_tier_conditioning = \ frame_level_outputs[tier_index + 1][:, frame_index, :] \ .unsqueeze(1) frame_level_outputs[tier_index] = self.run_rnn( rnn, prev_samples, upper_tier_conditioning) prev_samples = torch.autograd.Variable( sequences[:, i - bottom_frame_size:i], volatile=True) if self.cuda: prev_samples = prev_samples.cuda() upper_tier_conditioning = \ frame_level_outputs[0][:, i % bottom_frame_size, :] \ .unsqueeze(1) sample_dist = self.model.sample_level_mlp( prev_samples, upper_tier_conditioning).squeeze(1).exp_().data sequences[:, i] = sample_dist.multinomial(1).squeeze(1) torch.backends.cudnn.enabled = True return sequences[:, self.model.lookback:]
def __call__(self, n_seqs, seq_len, class_label=0, data_seed=None): # generation doesn't work with CUDNN for some reason torch.backends.cudnn.enabled = False label_tensor = torch.LongTensor([class_label]) with torch.no_grad(): self.reset_hidden_states() # bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples bottom_frame_size = 16 sequences = torch.LongTensor(n_seqs, self.model.lookback + seq_len) \ .fill_(utils.q_zero(self.model.q_levels)) if data_seed is not None: seeds = [] for _ in range(n_seqs): seeds.append( utils.linear_quantize( torch.from_numpy( data_seed.getClassSplit( class_num=class_label, seq_len=self.model.lookback)), self.model.q_levels)) seed = torch.stack(seeds) sequences[:, :self.model.lookback] = seed frame_level_outputs = [None for _ in self.model.frame_level_rnns] for i in range(self.model.lookback, self.model.lookback + seq_len): for (tier_index, rnn) in \ reversed(list(enumerate(self.model.frame_level_rnns))): # if i % rnn.n_frame_samples != 0: # continue prev_samples = sequences[:, i - 16:i] prev_samples = torch.autograd.Variable( 2 * utils.linear_dequantize( prev_samples, self.model.q_levels).unsqueeze(1)) if self.cuda: prev_samples = prev_samples.cuda() label_tensor = label_tensor.cuda() if tier_index == len(self.model.frame_level_rnns) - 1: upper_tier_conditioning = None # if self.model.num_classes > 1: # upper_tier_conditioning = self.model.class_embedding(label_tensor) + self.model.class_bias.cuda() # if self.cuda: # upper_tier_conditioning = upper_tier_conditioning.cuda() else: frame_index = (i // rnn.n_frame_samples) % \ self.model.frame_level_rnns[tier_index + 1].frame_size upper_tier_conditioning = \ frame_level_outputs[tier_index + 1][:, frame_index, :] \ .unsqueeze(1) if isinstance(class_label, int): class_label = torch.Tensor([class_label]) frame_level_outputs[tier_index] = self.run_rnn( rnn, prev_samples, upper_tier_conditioning, class_label) prev_samples = torch.autograd.Variable( sequences[:, i - bottom_frame_size:i]) if self.cuda: prev_samples = prev_samples.cuda() frame_level_outputs[0] = frame_level_outputs[0].view( n_seqs, bottom_frame_size, -1) upper_tier_conditioning = \ frame_level_outputs[0][:, i % bottom_frame_size, :].unsqueeze(1) sample_dist = self.model.sample_level_mlp( prev_samples, upper_tier_conditioning).squeeze(1).exp_().data sequences[:, i] = sample_dist.multinomial(1).squeeze(1) torch.backends.cudnn.enabled = True return sequences[:, self.model.lookback:]
def __call__(self, n_seqs, seq_len, cond, spk): # generation doesn't work with CUDNN for some reason cuda_enabled = torch.backends.cudnn.enabled torch.backends.cudnn.enabled = False self.reset_hidden_states() (num_cond, n_dim) = cond.shape condtot = cond global_spk = spk seq_len = num_cond * self.model.lookback print('seq len', seq_len) print('model look-back', self.model.lookback) bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples sequences = torch.LongTensor(n_seqs, self.model.lookback + seq_len).fill_( utils.q_zero(self.model.q_levels)) frame_level_outputs = [None for _ in self.model.frame_level_rnns] for i in range(self.model.lookback, self.model.lookback + seq_len): for (tier_index, rnn) in \ reversed(list(enumerate(self.model.frame_level_rnns))): if i % rnn.n_frame_samples != 0: continue # 2 * utils.linear_dequantize( print('Predicting sample ', i) prev_samples = torch.autograd.Variable( 2 * self.model.dequantize( sequences[:, i - rnn.n_frame_samples:i], self.model.q_levels).unsqueeze(1), volatile=True) # print('prev samples', prev_samples) if self.cuda: prev_samples = prev_samples.cuda() if tier_index == len(self.model.frame_level_rnns) - 1: upper_tier_conditioning = None j = i // self.model.lookback - 1 cond = condtot[j, :] cond = torch.from_numpy(cond.reshape(1, 1, n_dim)) spk = global_spk spk = torch.from_numpy(np.array(spk).reshape(1, 1)) else: cond = None spk = None frame_index = (i // rnn.n_frame_samples) % \ self.model.frame_level_rnns[tier_index + 1].frame_size upper_tier_conditioning = \ frame_level_outputs[tier_index + 1][:, frame_index, :] \ .unsqueeze(1) if self.cuda: cond = Variable(cond).cuda() spk = Variable(spk).cuda() frame_level_outputs[tier_index] = self.run_rnn( rnn, prev_samples, upper_tier_conditioning, cond, spk, i) # print('frame out', frame_level_outputs) prev_samples = torch.autograd.Variable( sequences[:, i - bottom_frame_size:i], volatile=True) # print('prev samples', prev_samples) if self.cuda: prev_samples = prev_samples.cuda() upper_tier_conditioning = \ frame_level_outputs[0][:, i % bottom_frame_size, :] \ .unsqueeze(1) sample_dist = self.model.sample_level_mlp( prev_samples, upper_tier_conditioning).squeeze(1).exp_().data sequences[:, i] = sample_dist.multinomial(1).squeeze(1) torch.backends.cudnn.enabled = cuda_enabled return self.model.dequantize(sequences[:, self.model.lookback:], self.model.q_levels)