def __getitem__(self, index): (seq, _) = load(self.file_names[index], sr=None, mono=True) return torch.cat([ torch.LongTensor(self.overlap_len) \ .fill_(utils.q_zero(self.q_levels)), utils.linear_quantize( torch.from_numpy(seq), self.q_levels ) ])
def __getitem__(self, index): file_path, class_label = self.file_names[index] (seq, _) = load(file_path, sr=self.sample_rate, mono=True) seq = fix_length(seq, size=self.audio_length, mode='edge') return torch.cat(( torch.LongTensor([class_label]), # torch.LongTensor(self.overlap_len) # .fill_(utils.q_zero(self.q_levels)), utils.linear_quantize(torch.from_numpy(seq), self.q_levels)))
def __getitem__(self, index): label = int(self.file_names[index].split("/") [-2]) # label associated with file_name (seq, _) = load(self.file_names[index], sr=None, mono=True) return torch.cat([ torch.LongTensor(self.overlap_len) \ .fill_(utils.q_zero(self.q_levels)), utils.linear_quantize( torch.from_numpy(seq), self.q_levels ), torch.LongTensor([label]) ])
def __getitem__(self, index): seq = self.audio_samples[index] hsl_data = self.hsl_data_samples[index] emotion = self.emotion_samples[index] text = self.text_samples[index] text = text.decode('utf-8') if isinstance(text, bytes) else text return torch.cat([ torch.LongTensor(self.overlap_len) \ .fill_(utils.q_zero(self.q_levels)), utils.linear_quantize( torch.from_numpy(seq), self.q_levels ) ]), seq, hsl_data, emotion, text
def __getitem__(self, index): # Try to load using librosa try: (seq, _) = load(self.file_names[index], sr=None, mono=True) except audioread.NoBackendError: # Use scipy if librosa didn't work _, seq = scipy.io.wavfile.read(self.file_names[index]) return torch.cat([ torch.LongTensor(self.overlap_len) \ .fill_(utils.q_zero(self.q_levels)), utils.linear_quantize( torch.from_numpy(seq), self.q_levels ) ])
dataset_filenames = get_test_data(model, params) # Gets initial samples form 1 test sample and check if it re-generates it audio_filename = dataset_filenames[0] from librosa.core import load sr = params['sample_rate'] seq, sr = load(audio_filename, sr=sr, mono=True) print("Sample rate: {}".format(sr)) # Generate Plugin num_samples = 6 # params['n_samples'] initial_seq_size = 64 * 100 # has to be multiple of rnn.n_frame_samples ??? initial_seq = None if initial_seq_size > 1: init = utils.linear_quantize(torch.from_numpy(seq[0:initial_seq_size]), params['q_levels']) # init = seq[0:initial_seed_size] init = np.tile(init, (num_samples, 1)) initial_seq = torch.LongTensor(init) # initial_seed = utils.linear_quantize(initial_seed, params['q_levels']) sample_length = params['sample_length'] sample_rate = params['sample_rate'] print("Number samples: {}, sample_length: {}, sample_rate: {}".format(num_samples, sample_length, sample_rate)) generator = GeneratorPlugin(GENERATED_PATH, num_samples, sample_length, sample_rate) # Overloads register function to accept the trained model and the cuda setting generator.register_generate(model.cuda(), params['cuda']) # Generate new audio generator.epoch('Test19_{}'.format(initial_seq_size), initial_seed=initial_seq)
def __call__(self, n_seqs, seq_len, class_label=0, data_seed=None): # generation doesn't work with CUDNN for some reason torch.backends.cudnn.enabled = False label_tensor = torch.LongTensor([class_label]) with torch.no_grad(): self.reset_hidden_states() # bottom_frame_size = self.model.frame_level_rnns[0].n_frame_samples bottom_frame_size = 16 sequences = torch.LongTensor(n_seqs, self.model.lookback + seq_len) \ .fill_(utils.q_zero(self.model.q_levels)) if data_seed is not None: seeds = [] for _ in range(n_seqs): seeds.append( utils.linear_quantize( torch.from_numpy( data_seed.getClassSplit( class_num=class_label, seq_len=self.model.lookback)), self.model.q_levels)) seed = torch.stack(seeds) sequences[:, :self.model.lookback] = seed frame_level_outputs = [None for _ in self.model.frame_level_rnns] for i in range(self.model.lookback, self.model.lookback + seq_len): for (tier_index, rnn) in \ reversed(list(enumerate(self.model.frame_level_rnns))): # if i % rnn.n_frame_samples != 0: # continue prev_samples = sequences[:, i - 16:i] prev_samples = torch.autograd.Variable( 2 * utils.linear_dequantize( prev_samples, self.model.q_levels).unsqueeze(1)) if self.cuda: prev_samples = prev_samples.cuda() label_tensor = label_tensor.cuda() if tier_index == len(self.model.frame_level_rnns) - 1: upper_tier_conditioning = None # if self.model.num_classes > 1: # upper_tier_conditioning = self.model.class_embedding(label_tensor) + self.model.class_bias.cuda() # if self.cuda: # upper_tier_conditioning = upper_tier_conditioning.cuda() else: frame_index = (i // rnn.n_frame_samples) % \ self.model.frame_level_rnns[tier_index + 1].frame_size upper_tier_conditioning = \ frame_level_outputs[tier_index + 1][:, frame_index, :] \ .unsqueeze(1) if isinstance(class_label, int): class_label = torch.Tensor([class_label]) frame_level_outputs[tier_index] = self.run_rnn( rnn, prev_samples, upper_tier_conditioning, class_label) prev_samples = torch.autograd.Variable( sequences[:, i - bottom_frame_size:i]) if self.cuda: prev_samples = prev_samples.cuda() frame_level_outputs[0] = frame_level_outputs[0].view( n_seqs, bottom_frame_size, -1) upper_tier_conditioning = \ frame_level_outputs[0][:, i % bottom_frame_size, :].unsqueeze(1) sample_dist = self.model.sample_level_mlp( prev_samples, upper_tier_conditioning).squeeze(1).exp_().data sequences[:, i] = sample_dist.multinomial(1).squeeze(1) torch.backends.cudnn.enabled = True return sequences[:, self.model.lookback:]