def test_model(model, data_loader, args): model.eval() predictions = list() targets = list() inds = list() tqdm_loader = tqdm(enumerate(data_loader)) for step, (features, truth_data) in tqdm_loader: if 'inds' in features.keys(): inds.append(features['inds']) features = to_var(features, args.device) truth_data = to_var(truth_data, args.device) if args.lossinside: _, outputs = model(features, truth_data, args, loss_func=None) else: outputs = model(features, args) # outputs = model(features, truth_data=truth_data) targets.append(truth_data.cpu().numpy()) predictions.append(outputs.cpu().detach().numpy()) pre2 = np.concatenate(predictions).squeeze() tar2 = np.concatenate(targets) if len(inds) > 0: print(len(inds)) print(inds[0]) inds = np.concatenate(inds) else: inds = None metric = calculate_metrics(pre2, tar2, args, plot=True, inds=inds) print(metric) with open(f'data/result_{args.model}.txt', 'a') as f: f.write(time.strftime("%m/%d %H:%M:%S",time.localtime(time.time()))) f.write(f"epoch:{args.epochs} lr:{args.lr}\ndataset:{args.dataset} identify:{args.identify}\n") f.write(f"{args.model_config}\n") f.write(f"{args.data_config}\n") f.write(f"{metric}\n\n")
def parse_batch(self, batch): text_padded, input_lengths, mel_padded, gate_padded, output_lengths = batch text_padded = to_var(text_padded).long() input_lengths = to_var(input_lengths).long() max_len = torch.max(input_lengths.data).item() mel_padded = to_var(mel_padded).float() gate_padded = to_var(gate_padded).float() output_lengths = to_var(output_lengths).long() return ((text_padded, input_lengths, mel_padded, max_len, output_lengths), (mel_padded, gate_padded))
def infer(wav_path, text, model): sequence = text_to_sequence(text, hps.text_cleaners) sequence = to_var(torch.IntTensor(sequence)[None, :]).long() mel = melspectrogram(load_wav(wav_path)) r = mel.shape[1]%hps.n_frames_per_step mel_in = to_var(torch.Tensor([mel[:, :-r]])) if mel_in.shape[2] < 1: return None sequence = torch.cat([sequence, sequence], 0) mel_in = torch.cat([mel_in, mel_in], 0) _, mel_outputs_postnet, _, _ = model.teacher_infer(sequence, mel_in) ret = mel ret[:, :-r] = to_arr(mel_outputs_postnet[0]) return ret
def inference(self, inputs, mode='train'): if mode == 'train': vid_inputs, vid_lengths, mels, max_len, output_lengths = inputs else: vid_inputs = inputs vid_inputs = to_var(torch.from_numpy(vid_inputs)).float() vid_inputs = vid_inputs.permute(3, 0, 1, 2).unsqueeze(0).contiguous() # vid_lengths, output_lengths = vid_lengths.data, output_lengths.data # embedded_inputs = self.embedding(inputs).transpose(1, 2) embedded_inputs = vid_inputs.type(torch.FloatTensor) encoder_outputs = self.encoder.inference(embedded_inputs.cuda()) mel_outputs, gate_outputs, alignments = self.decoder.inference( encoder_outputs) mel_outputs_postnet = self.postnet(mel_outputs) mel_outputs_postnet = mel_outputs + mel_outputs_postnet outputs = self.parse_output( [mel_outputs, mel_outputs_postnet, gate_outputs, alignments]) return outputs
def teacher_infer(self, inputs, mels): il, _ = torch.sort(torch.LongTensor([len(x) for x in inputs]), dim = 0, descending = True) text_lengths = to_var(il) embedded_inputs = self.embedding(inputs).transpose(1, 2) encoder_outputs = self.encoder(embedded_inputs, text_lengths) mel_outputs, gate_outputs, alignments = self.decoder( encoder_outputs, mels, memory_lengths=text_lengths) mel_outputs_postnet = self.postnet(mel_outputs) mel_outputs_postnet = mel_outputs + mel_outputs_postnet return self.parse_output( [mel_outputs, mel_outputs_postnet, gate_outputs, alignments])
def parse_batch_vid(self, batch): vid_padded, input_lengths, mel_padded, gate_padded, target_lengths, split_infos, embed_targets = batch vid_padded = to_var(vid_padded).float() input_lengths = to_var(input_lengths).float() mel_padded = to_var(mel_padded).float() gate_padded = to_var(gate_padded).float() target_lengths = to_var(target_lengths).float() max_len_vid = split_infos[0].data.item() max_len_target = split_infos[1].data.item() mel_padded = to_var(mel_padded).float() return ((vid_padded, input_lengths, mel_padded, max_len_vid, target_lengths), (mel_padded, gate_padded))
def infer(text, model): sequence = text_to_sequence(text, hps.text_cleaners) sequence = to_var(torch.IntTensor(sequence)[None, :]).long() mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence) return (mel_outputs, mel_outputs_postnet, alignments)
def train_model(model: nn.Module, data_loaders: Dict[str, DataLoader], loss_func: callable, optimizer: optim, model_folder: str, tensorboard_folder: str, args, **kwargs): num_epochs = args.epochs phases = ['train', 'val', 'test'] writer = SummaryWriter(tensorboard_folder) since = time.clock() # save_dict, best_rmse = {'model_state_dict': copy.deepcopy(model.state_dict()), 'epoch': 0}, 100000 save_dict, best_pcc = {'model_state_dict': copy.deepcopy(model.state_dict()), 'epoch': 0}, 0 scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=.2, patience=5, threshold=1e-3, min_lr=1e-6) try: for epoch in range(num_epochs): running_loss = {phase: 0.0 for phase in phases} for phase in phases: if phase == 'train': model.train() else: model.eval() steps, predictions, targets = 0, list(), list() tqdm_loader = tqdm(enumerate(data_loaders[phase])) for step, (features, truth_data) in tqdm_loader: features = to_var(features, args.device) truth_data = to_var(truth_data, args.device) with torch.set_grad_enabled(phase == 'train'): if args.lossinside: loss, outputs = model(features, truth_data, args, loss_func=loss_func) else: outputs = model(features, args) loss = loss_func(truth=truth_data, predict=outputs) # loss = loss_func(outputs, truth_data) if phase == 'train': if torch.isnan(loss): print("=============LOSS NAN============") print(features) print(truth_data) print(outputs) else: optimizer.zero_grad() loss.backward() optimizer.step() targets.append(truth_data.cpu().numpy()) with torch.no_grad(): predictions.append(outputs.cpu().detach().numpy()) running_loss[phase] += loss * truth_data.size(0) steps += truth_data.size(0) tqdm_loader.set_description( f'{phase} epoch: {epoch}, {phase} loss: {running_loss[phase] / steps}') # For the issue that the CPU memory increases while training. DO NOT know why, but it works. torch.cuda.empty_cache() # 性能 predictions = np.concatenate(predictions) targets = np.concatenate(targets) # print(2) # print(predictions[:3, :3]) # print(targets[:3, :3]) scores = calculate_metrics(predictions.reshape(predictions.shape[0], -1), targets.reshape(targets.shape[0], -1), args, plot=epoch % 5 == 0, **kwargs) # print(3) writer.add_scalars(f'score/{phase}', scores, global_step=epoch) with open(model_folder+"/output.txt", "a") as f: f.write(f'{phase} epoch: {epoch}, {phase} loss: {running_loss[phase] / steps}\n') f.write(str(scores)) f.write('\n') f.write(str(time.time())) f.write("\n\n") print(scores) # if phase == 'val' and scores['RMSE'] < best_rmse: if phase == 'val' and scores['pearr'] > best_pcc: best_pcc = scores['pearr'] # best_rmse = scores['RMSE'] save_dict.update(model_state_dict=copy.deepcopy(model.state_dict()), epoch=epoch, optimizer_state_dict=copy.deepcopy(optimizer.state_dict())) scheduler.step(running_loss['train']) writer.add_scalars('Loss', { f'{phase} loss': running_loss[phase] / len(data_loaders[phase].dataset) for phase in phases}, global_step=epoch) finally: time_elapsed = time.clock() - since print(f"cost {time_elapsed} seconds") save_model(f"{model_folder}/best_model.pkl", **save_dict) save_model(f"{model_folder}/final_model.pkl", **{'model_state_dict': copy.deepcopy(model.state_dict()), 'epoch': num_epochs, 'optimizer_state_dict': copy.deepcopy(optimizer.state_dict())})