def pytorch_net_to_buffer(pytorch_net, input_dim, model_on_gpu, float_input=True): """Traces a pytorch net and outputs a python buffer object holding net.""" training = pytorch_net.training pytorch_net.train(False) for name, p in pytorch_net.named_parameters(): inf_count = torch.isinf(p).sum().item() nan_count = torch.isnan(p).sum().item() assert inf_count + nan_count == 0, "{} has {} inf and {} nan".format( name, inf_count, nan_count ) if float_input: dtype = torch.cuda.FloatTensor if model_on_gpu else torch.FloatTensor dummy_input = torch.randn(1, input_dim).type(dtype) else: dtype = torch.cuda.LongTensor if model_on_gpu else torch.LongTensor dummy_input = torch.randint(low=0, high=1, size=(1, input_dim)).type(dtype) write_buffer = BytesIO() try: torch.onnx.export(pytorch_net, dummy_input, write_buffer) finally: pytorch_net.train(training) return write_buffer
def forward(self, model_output, targets, loss_scalars): # loss scalars MelGlow_ls = loss_scalars['MelGlow_ls'] if loss_scalars['MelGlow_ls'] is not None else self.MelGlow_loss_scalar DurGlow_ls = loss_scalars['DurGlow_ls'] if loss_scalars['DurGlow_ls'] is not None else self.DurGlow_loss_scalar VarGlow_ls = loss_scalars['VarGlow_ls'] if loss_scalars['VarGlow_ls'] is not None else self.VarGlow_loss_scalar Sylps_ls = loss_scalars['Sylps_ls' ] if loss_scalars['Sylps_ls' ] is not None else self.Sylps_loss_scalar # loss_func mel_target, text_lengths, output_lengths, perc_loudness_target, f0_target, energy_target, sylps_target, voiced_mask, char_f0, char_voiced, char_energy, *_ = targets B, n_mel, dec_T = mel_target.shape enc_T = text_lengths.max() output_lengths_float = output_lengths.float() loss_dict = {} # Decoder / MelGlow Loss if True: mel_z, log_s_sum, logdet_w_sum = model_output['melglow'] # remove paddings before loss calc mask = get_mask_from_lengths(output_lengths)[:, None, :] # [B, 1, T] BoolTensor mask = mask.expand(mask.size(0), mel_target.size(1), mask.size(2))# [B, n_mel, T] BoolTensor n_elems = (output_lengths_float.sum() * n_mel) mel_z = torch.masked_select(mel_z, mask) dec_loss_z = ((mel_z.pow(2).sum()) / self.sigma2_2)/n_elems # mean z (over all elements) log_s_sum = log_s_sum.view(B, -1, dec_T) log_s_sum = torch.masked_select(log_s_sum , mask[:, :log_s_sum.shape[1], :]) dec_loss_s = -log_s_sum.sum()/(n_elems) dec_loss_w = -logdet_w_sum.sum()/(n_mel*dec_T) dec_loss_d = dec_loss_z+dec_loss_w+dec_loss_s loss = dec_loss_d*MelGlow_ls del mel_z, log_s_sum, logdet_w_sum, mask, n_elems loss_dict["Decoder_Loss_Z"] = dec_loss_z loss_dict["Decoder_Loss_W"] = dec_loss_w loss_dict["Decoder_Loss_S"] = dec_loss_s loss_dict["Decoder_Loss_Total"] = dec_loss_d assert not (torch.isnan(loss) | torch.isinf(loss)).any(), 'Inf/NaN Loss at MelGlow Latents' # CVarGlow Loss if True: z, log_s_sum, logdet_w_sum = model_output['cvarglow'] _ = glow_loss(z, log_s_sum, logdet_w_sum, text_lengths, self.dg_sigma2_2) cvar_loss_d, cvar_loss_z, cvar_loss_w, cvar_loss_s = _ if self.DurGlow_loss_scalar: loss = loss + cvar_loss_d*DurGlow_ls del z, log_s_sum, logdet_w_sum loss_dict["CVar_Loss_Z"] = cvar_loss_z loss_dict["CVar_Loss_W"] = cvar_loss_w loss_dict["CVar_Loss_S"] = cvar_loss_s loss_dict["CVar_Loss_Total"] = cvar_loss_d assert not (torch.isnan(loss) | torch.isinf(loss)).any(), 'Inf/NaN Loss at CVarGlow Latents' # FramGlow Loss if True: z, log_s_sum, logdet_w_sum = model_output['varglow'] z_channels = 6 z = z.view(z.shape[0], z_channels, -1) # remove paddings before loss calc mask = get_mask_from_lengths(output_lengths)[:, None, :]# [B, 1, T] BoolTensor mask = mask.expand(mask.size(0), z_channels, mask.size(2))# [B, n_mel, T] BoolTensor n_elems = (output_lengths_float.sum() * z_channels) z = torch.masked_select(z, mask) var_loss_z = ((z.pow(2).sum()) / self.sigma2_2)/n_elems # mean z (over all elements) log_s_sum = log_s_sum.view(B, -1, dec_T) log_s_sum = torch.masked_select(log_s_sum , mask[:, :log_s_sum.shape[1], :]) var_loss_s = -log_s_sum.sum()/(n_elems) var_loss_w = -logdet_w_sum.sum()/(z_channels*dec_T) var_loss_d = var_loss_z+var_loss_w+var_loss_s loss = loss + var_loss_d*VarGlow_ls del z, log_s_sum, logdet_w_sum, mask, n_elems, z_channels loss_dict["Variance_Loss_Z"] = var_loss_z loss_dict["Variance_Loss_W"] = var_loss_w loss_dict["Variance_Loss_S"] = var_loss_s loss_dict["Variance_Loss_Total"] = var_loss_d assert not (torch.isnan(loss) | torch.isinf(loss)).any(), 'Inf/NaN Loss at VarGlow Latents' # Sylps Loss if True: enc_global_outputs, sylps = model_output['sylps']# [B, 2], [B] mu, logvar = enc_global_outputs.transpose(0, 1)[:2, :]# [2, B] loss_dict["zSylps_Loss"] = NormalLLLoss(mu, logvar, sylps)# [B], [B], [B] -> [B] loss = loss + loss_dict["zSylps_Loss"]*Sylps_ls del mu, logvar, enc_global_outputs, sylps assert not (torch.isnan(loss) | torch.isinf(loss)).any(), 'Inf/NaN Loss at Pred Sylps' # Perceived Loudness Loss if True: enc_global_outputs, perc_loudness = model_output['perc_loud']# [B, 2], [B] mu, logvar = enc_global_outputs.transpose(0, 1)[2:4, :]# [2, B] loss_dict["zPL_Loss"] = NormalLLLoss(mu, logvar, perc_loudness)# [B], [B], [B] -> [B] loss = loss + loss_dict["zPL_Loss"]*Sylps_ls del mu, logvar, enc_global_outputs, perc_loudness assert not (torch.isnan(loss) | torch.isinf(loss)).any(), 'Inf/NaN Loss at Pred Perceived Loudness' loss_dict["loss"] = loss return loss_dict
def check_inf(tensor): return torch.isinf(tensor.detach()).any()
def train( dataloader_trn, dataloader_val, intersection_model, device, optimizer, lr_scheduler, early_stopping, binary_crossentropy=nn.BCELoss(reduction="none"), epoch_max=15, clip_value=5, ): # ==== TRAIN LOOP for epoch in range(epoch_max): progress_bar = tqdm((dataloader_trn), desc=f"Epoch {epoch}") losses_train = [] losses_lob_prob_train = [] losses_bce_train = [] for batch in progress_bar: ( tokens, token_type_ohe, token_timesteps, seq_len, all_true_classes, all_tte, classes_availabilities, tte_availabilities, ) = batch # moving to GPU if available tokens, token_type_ohe, token_timesteps, seq_len = ( tokens.to(device), token_type_ohe.to(device), token_timesteps.to(device), seq_len.to(device), ) all_true_classes = { tl_i: vals.to(device) for tl_i, vals in all_true_classes.items() } all_tte = {tl_i: vals.to(device) for tl_i, vals in all_tte.items()} classes_availabilities = { tl_i: vals.to(device) for tl_i, vals in classes_availabilities.items() } tte_availabilities = { tl_i: vals.to(device) for tl_i, vals in tte_availabilities.items() } intersection_model.train() torch.set_grad_enabled(True) tl_2_color_class, tl_2_tte_distr = intersection_model( tokens, token_type_ohe, token_timesteps, seq_len) loss_bce = torch.tensor([0.0]).to(device) loss_bce_terms_count = torch.tensor([0.0]).to(device) for tl_id, pred_color_classes in tl_2_color_class.items(): true_color_classes = all_true_classes[tl_id] bce_loss_tl = (binary_crossentropy( torch.squeeze(pred_color_classes), true_color_classes) * classes_availabilities[tl_id]) loss_bce += bce_loss_tl.sum() loss_bce_terms_count += classes_availabilities[tl_id].sum() if loss_bce_terms_count > 0: loss_bce /= loss_bce_terms_count loss_tte_log_prob = torch.tensor([0.0]).to(device) loss_tte_log_prob_terms_count = torch.tensor([0.0]).to(device) for tl_id, tte_distr in tl_2_tte_distr.items(): true_ttes = torch.unsqueeze(all_tte[tl_id], -1) log_prob_all = (torch.squeeze(tte_distr.log_prob(true_ttes)) * tte_availabilities[tl_id]) log_prob_all[torch.logical_or( torch.isnan(log_prob_all), torch.isinf(log_prob_all))] = torch.tensor(0.0).to(device) loss_tte_log_prob -= log_prob_all.sum() loss_tte_log_prob_terms_count += tte_availabilities[tl_id].sum( ) if loss_tte_log_prob_terms_count > 0: loss_tte_log_prob /= loss_tte_log_prob_terms_count loss = loss_bce + loss_tte_log_prob # Backward pass optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(intersection_model.parameters(), clip_value) optimizer.step() if loss_bce_terms_count > 0 or loss_tte_log_prob_terms_count > 0: losses_train.append(loss.item()) if loss_tte_log_prob_terms_count > 0: losses_lob_prob_train.append(loss_tte_log_prob.item()) if loss_bce_terms_count > 0: losses_bce_train.append(loss_bce.item()) progress_bar.set_description( f"Ep. {epoch}, loss: {loss.item():.2f} (bce: {loss_bce.item():.2f}, log prob: {loss_tte_log_prob.item():.3f})" ) print( f"Avg train loss: {np.mean(losses_train):.5f} (bce: {np.mean(losses_bce_train):.5f}, log prob: {np.mean(losses_lob_prob_train):.5f})" ) intersection_model.eval() losses_val_all = [] # TODO: rename properly losses_val_lob_prob_train = [] losses_val_bce_train = [] for batch in tqdm(dataloader_val, desc="Validation.."): ( tokens, token_type_ohe, token_timesteps, seq_len, all_true_classes, all_tte, classes_availabilities, tte_availabilities, ) = batch tokens, token_type_ohe, token_timesteps, seq_len = ( tokens.to(device), token_type_ohe.to(device), token_timesteps.to(device), seq_len.to(device), ) all_true_classes = { tl_i: vals.to(device) for tl_i, vals in all_true_classes.items() } all_tte = {tl_i: vals.to(device) for tl_i, vals in all_tte.items()} classes_availabilities = { tl_i: vals.to(device) for tl_i, vals in classes_availabilities.items() } tte_availabilities = { tl_i: vals.to(device) for tl_i, vals in tte_availabilities.items() } # TODO: comment the next two lines, leaft for reproducibility intersection_model.train() torch.set_grad_enabled(True) tl_2_color_class, tl_2_tte_distr = intersection_model( tokens, token_type_ohe, token_timesteps, seq_len) loss_bce = torch.tensor([0.0]).to(device) loss_bce_terms_count = torch.tensor([0.0]).to(device) for tl_id, pred_color_classes in tl_2_color_class.items(): true_color_classes = all_true_classes[tl_id] bce_loss_tl = (binary_crossentropy( torch.squeeze(pred_color_classes), true_color_classes) * classes_availabilities[tl_id]) loss_bce += bce_loss_tl.sum() loss_bce_terms_count += classes_availabilities[tl_id].sum() if loss_bce_terms_count: loss_bce /= loss_bce_terms_count loss_tte_log_prob = torch.tensor([0.0]).to(device) loss_tte_log_prob_terms_count = torch.tensor([0.0]).to(device) for tl_id, tte_distr in tl_2_tte_distr.items(): true_ttes = torch.unsqueeze(all_tte[tl_id], -1) log_prob_all = (torch.squeeze(tte_distr.log_prob(true_ttes)) * tte_availabilities[tl_id]) log_prob_all[torch.logical_or( torch.isnan(log_prob_all), torch.isinf(log_prob_all))] = torch.tensor(0.0).to(device) loss_tte_log_prob -= log_prob_all.sum() loss_tte_log_prob_terms_count += tte_availabilities[tl_id].sum( ) if loss_tte_log_prob_terms_count: loss_tte_log_prob /= loss_tte_log_prob_terms_count loss = ( loss_bce + loss_tte_log_prob / 2 ) # to have more close scales for values of bce vs. log_prob if loss_bce_terms_count > 0 or loss_tte_log_prob_terms_count > 0: losses_val_all.append(loss.item()) if loss_tte_log_prob_terms_count > 0: losses_val_lob_prob_train.append(loss_tte_log_prob.item()) if loss_bce_terms_count > 0: losses_val_bce_train.append(loss_bce.item()) loss_val_mean = np.mean(losses_val_all) print( f"Val loss: {loss_val_mean: .5f} (bce: {np.mean(losses_val_bce_train):.5f}, log prob: {np.mean(losses_val_lob_prob_train): .5f})" ) lr_scheduler.step(loss_val_mean) early_stopping(loss_val_mean, intersection_model) if early_stopping.early_stop or loss_val_mean == 0: break
predlabels = [] # Iterate over data. for batch_data in dataloaders[phase]: embedding_data = batch_data["embedding"] # print ('Embedding data: ', embedding_data.shape) # embedding_data = embedding_data.type(torch.DoubleTensor).to(device) embedding_data = embedding_data.to(device) if args.task == "movement": truelabels.extend(batch_data["movement_label"].numpy()) target = batch_data["movement_label"] target = target.type(torch.LongTensor).to(device) elif args.task == "volatility": target = batch_data["volatility"] target[torch.isnan(target)] = 0 target[torch.isinf(target)] = 0 target = target.type( torch.FloatTensor).to(device).unsqueeze(-1) length = batch_data["length_data"] time_feats = batch_data["time_feature"].to(device).squeeze(-1) # zero the parameter gradients optimizer.zero_grad() # forward with torch.set_grad_enabled(phase == "train"): outputs, margin_output = model(embedding_data, length, time_feats) # print ('Outputs: ', outputs.shape)
def test_loop(cfg,model,optimizer,criterion,test_loader,epoch): model.train() model = model.to(cfg.device) total_psnr = 0 total_loss = 0 for batch_idx, (burst_imgs, res_imgs, raw_img) in enumerate(test_loader): # for batch_idx, (burst_imgs, raw_img) in enumerate(test_loader): BS = raw_img.shape[0] N,BS,C,H,W = burst_imgs.shape # -- selecting input frames -- input_order = np.arange(cfg.N) # print("pre",input_order) # if cfg.blind or True: middle_img_idx = -1 if not cfg.input_with_middle_frame: middle = cfg.N // 2 # print(middle) middle_img_idx = input_order[middle] input_order = np.r_[input_order[:middle],input_order[middle+1:]] else: # input_order = np.arange(cfg.N) middle = len(input_order) // 2 middle_img_idx = input_order[middle] input_order = np.arange(cfg.N) # print("post",input_order,middle_img_idx,cfg.blind,cfg.N) # -- reshaping of data -- raw_img = raw_img.cuda(non_blocking=True) burst_imgs = burst_imgs.cuda(non_blocking=True) if cfg.color_cat: stacked_burst = torch.cat([burst_imgs[input_order[x]] for x in range(cfg.input_N)],dim=1) else: stacked_burst = torch.stack([burst_imgs[input_order[x]] for x in range(cfg.input_N)],dim=1) # stacked_burst = torch.cat([burst_imgs[input_order[x]] for x in range(cfg.input_N)],dim=0) # stacked_burst = torch.cat([burst_imgs[input_order[x]] for x in range(cfg.input_N)],dim=0) stacked_burst = torch.stack([burst_imgs[input_order[x]] for x in range(cfg.input_N)],dim=1) cat_burst = torch.cat([burst_imgs[input_order[x]] for x in range(cfg.input_N)],dim=1) # -- dip denoising -- # img = burst_imgs[middle_img_idx] + 0.5 t_img = burst_imgs[middle_img_idx] + 0.5 img = stacked_burst + 0.5 # img = torch.normal(raw_img,25./255) # z = torch.normal(0,torch.ones_like(img[0].unsqueeze(0))) # print(z.shape) # z = z.requires_grad_(True) diff = 100 idx = 0 iters = 2400 tol = 5e-9 # params = [params.data.clone() for params in model.parameters()] # stacked_burst = torch.normal(0,torch.ones( ( BS, N, C, H, W) )) # stacked_burst = stacked_burst.cuda(non_blocking=True) # cat_burst = rearrange(stacked_burst,'bs n c h w -> bs (n c) h w') best_psnr = 0 model,criterion = load_model_kpn(cfg) optimizer = load_optimizer(cfg,model) model = model.cuda() model.apply(weights_init) # print(f"global_step: {cfg.global_step}") cfg.global_step = 0 while (idx < iters): idx += 1 optimizer.zero_grad() model.zero_grad() # z_img = z + torch.normal(0,torch.ones_like(z)) * 1./20 # stacked_burst_i = torch.normal(stacked_burst,1./20) # cat_burst_i = torch.normal(cat_burst,1./20) # print('m',torch.mean( (stacked_burst_i - stacked_burst)**2) ) # z_img = z # rec_img = model(z_img) # -- create inputs for kpn -- # stacked_burst = torch.stack([burst_imgs_noisy[input_order[x]] for x in range(cfg.input_N)], # dim=1) # cat_burst = torch.cat([burst_imgs_noisy[input_order[x]] for x in range(cfg.input_N)],dim=1) # -- forward kpn model -- rec_img_i,rec_img = model(cat_burst,stacked_burst) lossE_ = criterion(rec_img_i, rec_img, t_img, cfg.global_step) # lossE_ = criterion(rec_img_i, rec_img, t_img, cfg.global_step) cfg.global_step += 30 lossE = np.sum(lossE_) # lossE = F.mse_loss(t_img,rec_img) # lossE = np.sum([F.mse_loss(t_img,rec_img_i[:,i]) for i in range(N)]) # lossE = F.mse_loss(t_img,rec_img) if (idx % 1) == 0 or idx == 1: # print(rec_img.shape) loss = F.mse_loss(raw_img[:,:,:16,:16],rec_img[:,:,:16,:16],reduction='none').reshape(BS,-1) loss = torch.mean(loss,1).detach().cpu().numpy() psnr = np.mean(mse_to_psnr(loss)) if (idx % 100) == 0 or idx == 1: print("[%d/%d] lossE: [%.2e] psnr: [%.2f]" % (idx,iters,lossE,psnr)) if psnr > best_psnr: best_psnr = psnr if torch.isinf(lossE): break # a = list(model.parameters())[0].clone() lossE.backward() optimizer.step() # b = list(model.parameters())[0].clone() # print("EQ?",torch.equal(a.data,b.data)) # print(torch.mean(a.data - b.data)**2) # params_p = [params.data.clone() for params in model.parameters()] # diff = np.mean([float(torch.mean((p - p_p)**2).cpu().item()) for p,p_p in zip(params,params_p)]) # print("diff: {:.2e}".format(diff)) # params = params_p # rec_img = model(z) print(f"Best PSNR: {best_psnr}") # -- compare with stacked targets -- # rec_img = rescale_noisy_image(rec_img) # loss = F.mse_loss(raw_img,rec_img,reduction='none').reshape(BS,-1) # loss = torch.mean(loss,1).detach().cpu().numpy() # psnr = mse_to_psnr(loss) # print(np.mean(psnr)) total_psnr += np.mean(best_psnr) # total_loss += np.mean(loss) if (batch_idx % cfg.test_log_interval) == 0: root = Path(f"{settings.ROOT_PATH}/output/n2n/offset_out_noise/rec_imgs/N{cfg.N}/e{epoch}") if not root.exists(): root.mkdir(parents=True) fn = root / Path(f"b{batch_idx}.png") nrow = int(np.sqrt(cfg.batch_size)) rec_img = rec_img.detach().cpu() grid_imgs = vutils.make_grid(rec_img, padding=2, normalize=True, nrow=nrow) plt.imshow(grid_imgs.permute(1,2,0)) plt.savefig(fn) plt.close('all') ave_psnr = total_psnr / len(test_loader) ave_loss = total_loss / len(test_loader) print("[Blind: %d | N: %d] Testing results: Ave psnr %2.3e Ave loss %2.3e"%(cfg.blind,cfg.N,ave_psnr,ave_loss)) return ave_psnr
def torch_row_normalize(x): row_sum = x.sum(1).reshape(-1, 1) ret = x / row_sum ret[torch.isinf(ret)] = 0 return ret
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = DGLGraph(data.graph) n_edges = g.number_of_edges() # add self loop if args.self_loop: g.add_edges(g.nodes(), g.nodes()) # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def assert_tensor_is_good(self, tensor, shape=None): self.assertIsInstance(tensor, torch.Tensor) self.assertFalse(torch.isnan(tensor).any()) self.assertFalse(torch.isinf(tensor).any()) if shape is not None: self.assertEqual(tensor.shape, torch.Size(shape))
def is_legal(v): legal = not torch.isnan(v).any() and not torch.isinf(v) return legal
def train_tier(args: argparse.Namespace, hp: HParams, tier: int, extension_architecture: str, timestamp: str, tensorboardwriter: TensorboardWriter, logger: logging.Logger) -> None: """ Trains one tier of MelNet. Args: args (argparse.Namespace): parameters to set up the training. At least, args must contain: args = {"path_config": ..., "tier": ..., "checkpoint_path": ...} hp (HParams): hyperparameters for the model and other parameters (training, dataset, ...) tier (int): number of the tier to train. extension_architecture (str): information about the network's architecture of this run (training) to identify the logs and weights of the model. timestamp (str): information that identifies completely this run (training). tensorboardwriter (TensorboardWriter): to log information about training to tensorboard. logger (logging.Logger): to log general information about the training of the model. """ logger.info(f"Start training of tier {tier}/{hp.network.n_tiers}") # Setup the data ready to be consumed train_dataloader, test_dataloader, num_samples = get_dataloader(hp) # Setup tier # Calculate size of FREQ dimension for this tier tier_freq = tierutil.get_size_freqdim_of_tier(n_mels=hp.audio.mel_channels, n_tiers=hp.network.n_tiers, tier=tier) if tier == 1: model = Tier1(tier=tier, n_layers=hp.network.layers[tier - 1], hidden_size=hp.network.hidden_size, gmm_size=hp.network.gmm_size, freq=tier_freq) else: model = Tier(tier=tier, n_layers=hp.network.layers[tier - 1], hidden_size=hp.network.hidden_size, gmm_size=hp.network.gmm_size, freq=tier_freq) model = model.to(hp.device) model.train() parameters = model.parameters() # Setup loss criterion and optimizer criterion = GMMLoss() optimizer = torch.optim.RMSprop(params=parameters, lr=hp.training.lr, momentum=hp.training.momentum) # Check if training has to be resumed from previous checkpoint if args.checkpoint_path is not None: model, optimizer = resume_training(args, hp, tier, model, optimizer, logger) else: logger.info( f"Starting new training on dataset {hp.data.dataset} with configuration file " f"name {hp.name}") # Train the tier total_iterations = 0 loss_logging = 0 # accumulated loss between logging iterations loss_save = 0 # accumulated loss between saving iterations prev_loss_onesample = 1e8 # used to compare between saving iterations and decide whether or not # to save the model gradients = [] for epoch in range(hp.training.epochs): logger.info(f"Epoch: {epoch}/{hp.training.epochs} - Starting") for i, (waveform, utterance) in enumerate(train_dataloader): # 1.1 Transform waveform input to melspectrogram and apply preprocessing to normalize waveform = waveform.to(device=hp.device, non_blocking=True) spectrogram = transforms.wave_to_melspectrogram(waveform, hp) spectrogram = audio_normalizing.preprocessing(spectrogram, hp) # 1.2 Get input and output from the original spectrogram for this tier input_spectrogram, output_spectrogram = tierutil.split( spectrogram=spectrogram, tier=tier, n_tiers=hp.network.n_tiers) length_spectrogram = input_spectrogram.size(2) # if item is too long, we jump to the next one if length_spectrogram > 1000: continue # 2. Compute the model output if tier == 1: # generation is unconditional so there is only one input mu_hat, std_hat, pi_hat = model(spectrogram=input_spectrogram) else: # generation is conditional on the spectrogram generated by previous tiers mu_hat, std_hat, pi_hat = model( spectrogram=output_spectrogram, spectrogram_prev_tier=input_spectrogram) # gpumemory.stat_cuda("Forward") # 3. Calculate the loss loss = criterion(mu=mu_hat, std=std_hat, pi=pi_hat, target=output_spectrogram) # gpumemory.stat_cuda("Loss") del spectrogram del mu_hat, std_hat, pi_hat # 3.1 Check if loss has exploded if torch.isnan(loss) or torch.isinf(loss): error_msg = f"Loss exploded at Epoch: {epoch}/{hp.training.epochs} - " \ f"Iteration: {i * hp.training.batch_size}/{num_samples}" logger.error(error_msg) raise Exception(error_msg) # 4. Compute gradients loss_cpu = loss.item() loss = loss / hp.training.accumulation_steps loss.backward() # 5. Perform backpropagation (using gradient accumulation so efective batch size is the # same as in the paper) if (total_iterations + 1) % (hp.training.accumulation_steps / hp.training.batch_size) == 0: gradients.append(gradient_norm(model)) avg_gradient = sum(gradients) / len(gradients) logger.info(f"Gradient norm: {gradients[-1]} - " f"Avg gradient: {avg_gradient}") torch.nn.utils.clip_grad_norm_(parameters, 2200) optimizer.step() model.zero_grad() # 6. Logging and saving model loss_oneframe = loss_cpu / (length_spectrogram * hp.training.batch_size) loss_logging += loss_oneframe # accumulated loss between logging iterations loss_save += loss_oneframe # accumulated loss between saving iterations # 6.1 Save model (if is better than previous tier) if (total_iterations + 1) % hp.training.save_iterations == 0: # Calculate average loss of one sample of a batch loss_onesample = loss_save / hp.training.save_iterations # if loss_onesample of these iterations is lower, the tier is better and we save it if loss_onesample <= prev_loss_onesample: path = f"{hp.training.dir_chkpt}/tier{tier}_{timestamp}_loss{loss_onesample:.2f}.pt" torch.save(obj={ 'dataset': hp.data.dataset, 'tier_idx': tier, 'hp': hp, 'epoch': epoch, 'iterations': i, 'total_iterations': total_iterations, 'tier': model.state_dict(), 'optimizer': optimizer.state_dict() }, f=path) logger.info(f"Model saved to: {path}") prev_loss_onesample = loss_onesample loss_save = 0 # 6.2 Logging if (total_iterations + 1) % hp.logging.log_iterations == 0: # Calculate average loss of one sample of a batch loss_onesample = loss_logging / hp.logging.log_iterations tensorboardwriter.log_training(hp, loss_onesample, total_iterations) logger.info( f"Epoch: {epoch}/{hp.training.epochs} - " f"Iteration: {i * hp.training.batch_size}/{num_samples} - " f"Loss: {loss_onesample:.4f}") loss_logging = 0 # 6.3 Evaluate if (total_iterations + 1) % hp.training.evaluation_iterations == 0: evaluation(hp, tier, test_dataloader, model, criterion, logger) total_iterations += 1 # After finishing training: save model, hyperparameters and total loss path = f"{hp.training.dir_chkpt}/tier{tier}_{timestamp}_epoch{epoch}_final.pt" torch.save(obj={ 'dataset': hp.data.dataset, 'tier_idx': tier, 'hp': hp, 'epoch': epoch, 'iterations': evaluation(hp, tier, test_dataloader, model, criterion, logger), 'total_iterations': total_iterations, 'tier': model.state_dict(), 'optimizer': optimizer.state_dict() }, f=path) logger.info(f"Model saved to: {path}") tensorboardwriter.log_end_training(hp=hp, loss=-1) logger.info("Finished training")
def reparameterize(self, mu, logvar): # reparameterization trick std = torch.exp(0.5*logvar) # calculates std eps = torch.randn_like(std) # samples an epsilon assert not torch.isnan(std).any(), 'NAN-Values during reparameterization!' assert not torch.isinf(std).any(), 'Infinity-Values during reparameterization!' return eps.mul(std).add_(mu) # returns sample as if drawn from mu, std
def __getitem__(self, index): # load the data path_img, img_name, path_json = self.imgs[index] # load the image # img = cv2.imread(path_img,cv2.COLOR_BGR2RGB) img = np.array(Image.open(path_img).convert('RGB')) # load the json file all_projected_cuboid_keypoints = [] with open(path_json) as f: data_json = json.load(f) # load the projected cuboid keypoints for obj in data_json['objects']: if not self.objects_interest is None and \ not obj['class'] in self.objects_interest\ : continue # load the projected_cuboid_keypoints if obj['visibility'] == 1: projected_cuboid_keypoints = obj['projected_cuboid'] else: projected_cuboid_keypoints = [[-100,-100],[-100,-100],[-100,-100],\ [-100,-100],[-100,-100],[-100,-100],[-100,-100],[-100,-100],[-100,-100]] all_projected_cuboid_keypoints.append(projected_cuboid_keypoints) if len(all_projected_cuboid_keypoints) == 0: all_projected_cuboid_keypoints = [[[-100,-100],[-100,-100],[-100,-100],\ [-100,-100],[-100,-100],[-100,-100],[-100,-100],[-100,-100],[-100,-100]]] # flatten the keypoints flatten_projected_cuboid = [] for obj in all_projected_cuboid_keypoints: for p in obj: flatten_projected_cuboid.append(p) ####### if self.debug: img_to_save = Image.fromarray(img) draw = ImageDraw.Draw(img_to_save) for ip, p in enumerate(flatten_projected_cuboid): draw.ellipse((int(p[0]) - 2, int(p[1]) - 2, int(p[0]) + 2, int(p[1]) + 2), fill='green') # draw.text((p[0]*2+4, p[1]*2+4),str(ip),'green',font=font) img_to_save.save( f"debug/{img_name.replace('.png','_original.png')}") ####### # data augmentation transform = A.Compose([ A.RandomCrop(width=400, height=400), A.Rotate(limit=180), A.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.15, p=1), A.GaussNoise(p=1), ], keypoint_params=A.KeypointParams( format='xy', remove_invisible=False)) transformed = transform(image=img, keypoints=flatten_projected_cuboid) img_transformed = transformed['image'] flatten_projected_cuboid_transformed = transformed['keypoints'] # img_transformed[:,:,3] = 255 ####### # transform to the final output if not self.output_size == 400: transform = A.Compose([ A.Resize(width=self.output_size, height=self.output_size), ], keypoint_params=A.KeypointParams( format='xy', remove_invisible=False)) transformed = transform( image=img_transformed, keypoints=flatten_projected_cuboid_transformed) img_transformed_output_size = transformed['image'] flatten_projected_cuboid_transformed_output_size = transformed[ 'keypoints'] else: img_transformed_output_size = img_transformed flatten_projected_cuboid_transformed_output_size = flatten_projected_cuboid_transformed ####### if self.debug: img_transformed_saving = Image.fromarray(img_transformed) draw = ImageDraw.Draw(img_transformed_saving) for ip, p in enumerate(flatten_projected_cuboid_transformed): draw.ellipse((int(p[0]) - 2, int(p[1]) - 2, int(p[0]) + 2, int(p[1]) + 2), fill='green') # draw.text((p[0]*2+4, p[1]*2+4),str(ip),'green',font=font) img_transformed_saving.save( f"debug/{img_name.replace('.png','_transformed.png')}") ####### # update the keypoints list # obj x keypoint_id x (x,y) i_all = 0 for i_obj, obj in enumerate(all_projected_cuboid_keypoints): for i_p, point in enumerate(obj): all_projected_cuboid_keypoints[i_obj][ i_p] = flatten_projected_cuboid_transformed_output_size[ i_all] i_all += 1 # generate the belief maps beliefs = CreateBeliefMap( size=int(self.output_size), pointsBelief=all_projected_cuboid_keypoints, sigma=self.sigma, nbpoints=9, save=False, ) beliefs = torch.from_numpy(np.array(beliefs)) # generate affinity fields with centroid. # def GenerateMapAffinity(img,nb_vertex,pointsInterest,objects_centroid,scale): affinities = GenerateMapAffinity( size=int(self.output_size), nb_vertex=8, pointsInterest=all_projected_cuboid_keypoints, objects_centroid=np.array(all_projected_cuboid_keypoints) [:, -1].tolist(), scale=1, # save = True, ) # prepare for the image tensors normalize_tensor = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) to_tensor = transforms.Compose([ transforms.ToTensor(), ]) img_tensor = normalize_tensor(Image.fromarray(img_transformed)) img_original = to_tensor(img_transformed) ######## if self.debug: imgs = VisualizeBeliefMap(beliefs) img, grid = save_image( imgs, f"debug/{img_name.replace('.png','_beliefs.png')}", mean=0, std=1, nrow=3, save=True) imgs = VisualizeAffinityMap(affinities) save_image(imgs, f"debug/{img_name.replace('.png','_affinities.png')}", mean=0, std=1, nrow=3, save=True) ######## img_tensor[torch.isnan(img_tensor)] = 0 affinities[torch.isnan(affinities)] = 0 beliefs[torch.isnan(beliefs)] = 0 img_tensor[torch.isinf(img_tensor)] = 0 affinities[torch.isinf(affinities)] = 0 beliefs[torch.isinf(beliefs)] = 0 return { 'img': img_tensor, "affinities": torch.clamp(affinities, -1, 1), 'beliefs': torch.clamp(beliefs, 0, 1), 'file_name': img_name, 'img_original': img_original, }
def forward(self, batch: Batch, interim: bool = False) -> torch.Tensor: # start timer and get transition matrices transition_matrices = self.get_transition_matrices(batch) # assign batch_size batch_size = batch.size() # clone null scores tensor scores = self.scores.expand( # type: ignore batch_size, -1).clone() # clone restart_padding tensor to add start state for each word restart_padding = self.restart_padding.expand( # type: ignore batch_size, -1, -1).clone() # initialize hiddens tensor hiddens = self.hiddens.expand( # type: ignore batch_size, -1, -1).clone() # enumerate all end pattern states end_states = self.end_states.expand( # type: ignore batch_size, self.total_num_patterns, -1).clone() # get wildcard_matrix based on previous class settings wildcard_matrix = self.get_wildcard_matrix() # start loop over all transition matrices for token_index in range(transition_matrices.size(1)): # extract current transition matrix transition_matrix = transition_matrices[:, token_index, :, :] # retrieve all hiddens given current state embeddings hiddens = self.transition_once(hiddens, transition_matrix, wildcard_matrix, restart_padding) # look at the end state for each pattern, and "add" it into score end_state_values = torch.gather(hiddens, 2, end_states).view( batch_size, self.total_num_patterns) # only index active documents and not padding tokens active_doc_indices = torch.nonzero( torch.gt(batch.doc_lens, token_index), as_tuple=True)[0] # yapf: disable # update scores with relevant tensor values scores[active_doc_indices] = self.semiring.addition( scores[active_doc_indices], end_state_values[active_doc_indices]) # clone raw scores to keep track of it if interim: interim_scores = scores.clone() # extract scores from semiring to outer set scores = self.semiring.from_semiring_to_outer(scores) # extract all infinite indices isinf = torch.isinf(scores) if isinf.sum().item() > 0: scores_mask = ~isinf else: scores_mask = None # type: ignore # execute normalization of scores scores = self.normalizer(scores, scores_mask) # binarize scores using STE scores = self.binarizer(scores) # conditionally return different tensors depending on routine if interim: return torch.stack((interim_scores, scores), 1) else: return self.linear.forward(scores)
def backward(self, grad_output): ''' @staticmethod backward(self, grad_output): Compute SoftDTW gradient wrt x. See algorithm 2 in https://arxiv.org/abs/1703.01541 ''' # Get saved tensors x, y = self.saved_tensors # Determine size of alignment gradient matrix E_dims = (self.batch_dim, self.x_time_dim + 2, self.y_time_dim + 2, self.space_dim) \ if self.spatial_independent else (self.batch_dim, self.x_time_dim + 2, self.y_time_dim + 2) # Create alignment gradient matrix E = torch.zeros(E_dims).to(self.device) E[:, -1, -1] = 1 from math import inf self.R[torch.isinf(self.R)] = -inf self.R[:, -1, -1] = self.R[:, -2, -2] rev_idxs = reversed( list( MatrixDiagonalIndexIterator(self.x_time_dim, self.y_time_dim, bandwidth=self.bandwidth))) rev_idxsp1 = reversed( list( MatrixDiagonalIndexIterator(self.x_time_dim + 1, self.y_time_dim + 1, k_start=1, bandwidth=self.bandwidth))) rev_idxsp2 = reversed( list( MatrixDiagonalIndexIterator(self.x_time_dim + 2, self.y_time_dim + 2, k_start=2, bandwidth=self.bandwidth))) # Sweep diagonally through alignment gradient matrix for (i, j), (ip1, jp1), (ip2, jp2) in zip(rev_idxs, rev_idxsp1, rev_idxsp2): a = torch.exp( (self.R[:, ip2, jp1] - self.R[:, ip1, jp1] - self.D[:, ip1, j]) / self.gamma) b = torch.exp( (self.R[:, ip1, jp2] - self.R[:, ip1, jp1] - self.D[:, i, jp1]) / self.gamma) c = torch.exp((self.R[:, ip2, jp2] - self.R[:, ip1, jp1] - self.D[:, ip1, jp1]) / self.gamma) E[:, ip1, jp1] = E[:, ip2, jp1] * a + E[:, ip1, jp2] * b + E[:, ip2, jp2] * c # Compute Jacobean product to compute gradient wrt x if self.spatial_independent: G = jacobean_product_squared_euclidean( x.unsqueeze(2), y.unsqueeze(2), E[:, 1:-1, 1:-1].permute(0, 3, 2, 1)).squeeze(2) else: G = jacobean_product_squared_euclidean( x, y, E[:, 1:-1, 1:-1].permute(0, 2, 1)) # Must return as many outputs as inputs to forward function return G, None, None,
def test_elliprd(x, y, z, l): assert nice_and_close(elliprd(x, x, x), x**(-1.5)) assert nice_and_close(elliprd(l * x, l * y, l * z), elliprd(x, y, z) / l**1.5) assert nice_and_close(elliprd(0, y, y), 3 * pi / 4 / y**1.5) assert isinf(elliprd(0, 0, z))
def forward(self, z, cond, speaker_ids=None): # optional cond input """ z = z: batch x n_mel_channels x time cond = attention outputs: batch x time x enc_embed """ # Add speaker conditioning if self.speaker_embed_dim: speaker_embeddings = self.speaker_embed(speaker_ids) speaker_embeddings = speaker_embeddings.unsqueeze(-1).repeat( 1, 1, cond.shape[2]) # shape like cond cond = torch.cat([cond, speaker_embeddings], dim=1) # and concat them cond_res = cond for layer in self.cond_layers: cond_res = layer(cond_res) if hasattr(self, 'cond_act_func'): cond_res = self.cond_act_func(cond_res) if hasattr(self, 'alpha'): cond_res *= self.alpha # reZero modifier if self.cond_residual: cond = cond + cond_res # adjust the original input by a residual else: cond = cond_res # completely reform the input into something else batch_dim, n_mel_channels, group_steps = z.shape z = z.view(batch_dim, self.n_group, -1) # [B, n_mel, T] -> [B, n_mel/8, T*8] #cond = F.interpolate(cond, size=z.shape[-1]) # [B, enc_dim, T] -> [B, enc_dim/8, T*8] output_spect = [] split_sections = [self.n_early_size, self.n_group] for k, (convinv, affine_coup) in enumerate(zip(self.convinv, self.WN)): if k % self.n_early_every == 0 and k > 0: split_sections[1] -= self.n_early_size early_output, z = z.split(split_sections, 1) # these 2 lines actually copy tensors, may need optimization in the future output_spect.append(early_output) z = z.clone() if self.mix_first: z, log_det_W = convinv(z) assert not torch.isnan(z).any() assert not torch.isinf(z).any() z, log_s = affine_coup(z, cond) assert not torch.isnan(z).any() assert not torch.isinf(z).any() if not self.mix_first: z, log_det_W = convinv(z) assert not torch.isnan(z).any() assert not torch.isinf(z).any() if k: logdet_w_sum = logdet_w_sum + log_det_W log_s_sum = log_s_sum + log_s.float().sum((1, )) else: logdet_w_sum = log_det_W log_s_sum = log_s.float().sum((1, )) assert split_sections[1] == self.z_split_sizes[-1] output_spect.append(z) return torch.cat(output_spect, 1).contiguous().view(batch_dim, self.n_mel_channels, -1), log_s_sum, logdet_w_sum
def torch_col_normalize(x): col_sum = x.sum(0).reshape(1, -1) ret = x / col_sum ret[torch.isinf(ret)] = 0 return ret
def main(args): # load and preprocess dataset args.dataset = "reddit-self-loop" data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor start = time.perf_counter() g = DGLGraph(data.graph) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) preprocess_elapse = time.perf_counter() - start print("Preprocessing Time: {:.4f}".format(preprocess_elapse)) # create SGC model model = SGCLayer(g, features, in_feats, n_classes, K=2) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.LBFGS(model.parameters()) # define loss closure def closure(): optimizer.zero_grad() output = model(train_mask) loss_train = F.cross_entropy(output, labels[train_mask]) loss_train.backward() return loss_train # initialize graph dur = [] start = time.perf_counter() for epoch in range(args.n_epochs): model.train() logits = model(train_mask) # only compute the train set loss = optimizer.step(closure) train_elapse = time.perf_counter() - start print("Train epoch {} | Train Time(s) {:.4f}".format(epoch, train_elapse)) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def num_nan_inf(t): return torch.isnan(t).sum() + torch.isinf(t).sum()
def run_skill(self, skill): ims = [] rewards = [] rewards2 = [] for j in range(self.num_runs): state = self.env.reset(state=None, skill=skill) # print(datetime.datetime.now(dateutil.tz.tzlocal()).strftime('%Y-%m-%d-%H-%M-%S-%f-%Z')) # print("Running Validation") path_return = 0 path_return2 = 0 path_length = 0 with self.policy.deterministic(self.hparams.deterministic_eval): for k in range(self.hparams.max_path_length): action = self.policy.get_actions(state.reshape((1, -1))) next_ob, reward, done, info = self.env.step(action) (obs, z) = self._split_obs(torch.FloatTensor(next_ob)[None, :]) if self.on_gpu: # Neeeded because inputs are not on GPU during sample collection # in sanity check TODO: Sanity check is not the place for collecting samples. obs = obs.cuda(self.hparams.device) z = z.cuda(self.hparams.device) logits = self.discriminator(obs) # N x num_skills skillz = torch.argmax(z, dim=-1) # N reward = -1 * nn.CrossEntropyLoss(reduction='none')( logits, skillz) # N reward = torch.clamp(reward, min=-8) assert not torch.isnan(reward).any() and not torch.isinf( reward).any() p_z = torch.sum(self._p_z * z, dim=-1) # N log_p_z = torch.log(p_z + self.hparams.eps) if self.hparams.add_p_z: reward -= log_p_z assert not torch.isnan( reward).any() and not torch.isinf(reward).any() if self.hparams.render_validation: # self.env.render(mode="human") ims.append( cv2.resize(self.env.render(mode='rgb_array'), (500, 500))) # print(self.ims[0].shape)#config={'height':500,'width':500,'xpos':0,'ypos':0,'title':'validation'} # print(reward) state = next_ob path_return += reward path_length += 1 rewards.append(reward) if (self.hparams.eval_distilled): logits = self.distiller(obs) # N x num_skills reward = -1 * nn.CrossEntropyLoss(reduction='none')( logits, skillz) # N reward = torch.clamp(reward, min=-8) assert not torch.isnan( reward).any() and not torch.isinf(reward).any() p_z = torch.sum(self._p_z * z, dim=-1) # N log_p_z = torch.log(p_z + self.hparams.eps) if self.hparams.add_p_z: reward -= log_p_z assert not torch.isnan(reward).any( ) and not torch.isinf(reward).any() path_return2 += reward rewards2.append(reward) if (done): break print(path_return) print(path_return2) return ims, rewards, path_return, rewards2, path_return2
def replace_inf_with_zero(x): return th.masked_fill(x, th.isinf(x), 0)
hyp, eos, include_relaxation=True) if old_samp == 1: h_t = h_t.repeat(1, num_samps, 1).contiguous() logits = logits_t.unsqueeze(0).expand( -1, -1, num_samps, -1) z = z_t.unsqueeze(0) else: logits = torch.cat([logits, logits_t.unsqueeze(0)], dim=0) z = torch.cat([z, z_t.unsqueeze(0)], dim=0) ref_rep = ref_rep.view(-1, batch_size * num_samps) hyp = hyp[1:].view(-1, batch_size * num_samps) # get rid of sos logits = logits.view(-1, batch_size * num_samps, num_classes) z = z.view(-1, batch_size * num_samps, num_classes) mask = torch.isinf(z).any(-1, keepdim=True) lens = mask.eq(0).long().sum(0).squeeze(-1) fb = f(hyp, ref_rep) if estimator.startswith('reinforce'): if '-' in estimator: fb = diff = fb - c(logits.detach(), lens) g = reinforce(fb, hyp, logits, 'cat') else: ( diff, dlog_pb, dc_z, dc_z_tilde, ) = relax(fb, hyp, logits,
def clip_and_replace_explosures(grad): grad[torch.logical_or( torch.isnan(grad), torch.isinf(grad))] = torch.tensor(0.0).to(device) grad = torch.clamp(grad, -0.25, 0.25) return grad
def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: """ Args: input: the shape should be BNH[WD]. target: the shape should be BNH[WD]. Raises: ValueError: When ``self.reduction`` is not one of ["mean", "sum", "none"]. """ if self.sigmoid: input = torch.sigmoid(input) n_pred_ch = input.shape[1] if self.softmax: if n_pred_ch == 1: warnings.warn("single channel prediction, `softmax=True` ignored.") else: input = torch.softmax(input, 1) if self.other_act is not None: input = self.other_act(input) if self.to_onehot_y: if n_pred_ch == 1: warnings.warn("single channel prediction, `to_onehot_y=True` ignored.") else: target = one_hot(target, num_classes=n_pred_ch) if not self.include_background: if n_pred_ch == 1: warnings.warn("single channel prediction, `include_background=False` ignored.") else: # if skipping background, removing first channel target = target[:, 1:] input = input[:, 1:] if target.shape != input.shape: raise AssertionError(f"ground truth has differing shape ({target.shape}) from input ({input.shape})") # reducing only spatial dimensions (not batch nor channels) reduce_axis = list(range(2, len(input.shape))) if self.batch: reduce_axis = [0] + reduce_axis intersection = torch.sum(target * input, reduce_axis) ground_o = torch.sum(target, reduce_axis) pred_o = torch.sum(input, reduce_axis) denominator = ground_o + pred_o w = self.w_func(ground_o.float()) for b in w: infs = torch.isinf(b) b[infs] = 0.0 b[infs] = torch.max(b) f: torch.Tensor = 1.0 - (2.0 * (intersection * w).sum(0 if self.batch else 1) + self.smooth_nr) / ( (denominator * w).sum(0 if self.batch else 1) + self.smooth_dr ) if self.reduction == LossReduction.MEAN.value: f = torch.mean(f) # the batch and channel average elif self.reduction == LossReduction.SUM.value: f = torch.sum(f) # sum over the batch and channel dims elif self.reduction == LossReduction.NONE.value: pass # returns [N, n_classes] losses else: raise ValueError(f'Unsupported reduction: {self.reduction}, available options are ["mean", "sum", "none"].') return f
def predict( dataloader, intersection_model, device, eval_performance=True, binary_crossentropy=nn.BCELoss(reduction="none"), ): progress_bar = tqdm(dataloader, desc="Prediction..") intersection_model.eval() if eval_performance: losses = [] losses_lob_prob = [] losses_bce = [] scene_indices_list = [] frame_indices_list = [] traffic_light_indices = [ int(name.replace("fc_tte_k_", "")) for name, _ in intersection_model.named_children() if "fc_tte_k_" in name ] tl_idx_2_mode_list = defaultdict(list) tl_idx_2_25perc_list = defaultdict(list) tl_idx_2_75perc_list = defaultdict(list) tl_idx_2_green_color_prob_list = defaultdict(list) for batch in progress_bar: ( tokens, token_type_ohe, token_timesteps, seq_len, all_true_classes, all_tte, classes_availabilities, tte_availabilities, scene_indices, frame_indices, ) = batch # moving to GPU if available tokens, token_type_ohe, token_timesteps, seq_len = ( tokens.to(device), token_type_ohe.to(device), token_timesteps.to(device), seq_len.to(device), ) tl_2_color_class, tl_2_tte_distr, tl_2_tte_mode_quantiles = intersection_model( tokens, token_type_ohe, token_timesteps, seq_len, output_mode_with_percentiles=True, ) scene_indices_list.extend(scene_indices.numpy() if device == "cpu" else scene_indices.cpu().numpy()) frame_indices_list.extend(frame_indices.numpy() if device == "cpu" else frame_indices.cpu().numpy()) tl_idx_2_tte_preds = [( key, val.detach().numpy() if device == "cpu" else val.detach().cpu().numpy(), ) for key, val in tl_2_tte_mode_quantiles.items()] for tl_id, tte_preds in tl_idx_2_tte_preds: tl_idx_2_mode_list[tl_id].extend(tte_preds[:, 0]) tl_idx_2_25perc_list[tl_id].extend(tte_preds[:, 1]) tl_idx_2_75perc_list[tl_id].extend(tte_preds[:, 2]) for tl_id, green_color_prob_torch in tl_2_color_class.items(): tl_idx_2_green_color_prob_list[tl_id].extend( green_color_prob_torch.detach().numpy( ).reshape(-1) if device == "cpu" else green_color_prob_torch. detach().cpu().numpy().reshape(-1)) if eval_performance: all_true_classes = { tl_i: vals.to(device) for tl_i, vals in all_true_classes.items() } all_tte = {tl_i: vals.to(device) for tl_i, vals in all_tte.items()} classes_availabilities = { tl_i: vals.to(device) for tl_i, vals in classes_availabilities.items() } tte_availabilities = { tl_i: vals.to(device) for tl_i, vals in tte_availabilities.items() } loss_bce = torch.tensor([0.0]).to(device) loss_bce_terms_count = torch.tensor([0.0]).to(device) for tl_id, pred_color_classes in tl_2_color_class.items(): true_color_classes = all_true_classes[tl_id] bce_loss_tl = (binary_crossentropy( torch.squeeze(pred_color_classes), true_color_classes) * classes_availabilities[tl_id]) loss_bce += bce_loss_tl.sum() loss_bce_terms_count += classes_availabilities[tl_id].sum() if loss_bce_terms_count > 0: loss_bce /= loss_bce_terms_count loss_tte_log_prob = torch.tensor([0.0]).to(device) loss_tte_log_prob_terms_count = torch.tensor([0.0]).to(device) for tl_id, tte_distr in tl_2_tte_distr.items(): true_ttes = torch.unsqueeze(all_tte[tl_id], -1) log_prob_all = (torch.squeeze(tte_distr.log_prob(true_ttes)) * tte_availabilities[tl_id]) log_prob_all[torch.logical_or( torch.isnan(log_prob_all), torch.isinf(log_prob_all))] = torch.tensor(0.0).to(device) loss_tte_log_prob -= log_prob_all.sum() loss_tte_log_prob_terms_count += tte_availabilities[tl_id].sum( ) if loss_tte_log_prob_terms_count > 0: loss_tte_log_prob /= loss_tte_log_prob_terms_count loss = loss_bce + loss_tte_log_prob if loss_bce_terms_count > 0 or loss_tte_log_prob_terms_count > 0: losses.append(loss.item()) if loss_tte_log_prob_terms_count > 0: losses_lob_prob.append(loss_tte_log_prob.item()) if loss_bce_terms_count > 0: losses_bce.append(loss_bce.item()) if eval_performance: print( f"Avg eval loss: {np.mean(losses):.5f} (bce: {np.mean(losses_bce):.5f}, log prob: {np.mean(losses_lob_prob):.5f})" ) values_dict = { "scene_idx": scene_indices_list, "scene_frame_idx": frame_indices_list, } for tl_id in traffic_light_indices: values_dict.update({ f"{tl_id}_green_prob": tl_idx_2_green_color_prob_list[tl_id], f"{tl_id}_tte_mode": tl_idx_2_mode_list[tl_id], f"{tl_id}_tte_25th_perc": tl_idx_2_25perc_list[tl_id], f"{tl_id}_tte_75th_perc": tl_idx_2_75perc_list[tl_id], }) pd.DataFrame(values_dict).to_hdf( f'outputs/tl_predictions/tl_pred{"_" + prediction_id if prediction_id != "" else ""}_{fold_i}_intersection_{intersection_idx}.hdf5', key="data", )
def check_values(tensor): """return true if tensor doesn't contain NaN or Inf""" return not (torch.any(torch.isnan(tensor)).item() or torch.any(torch.isinf(tensor)).item())
def _do_paste_mask(masks, boxes, img_h, img_w, skip_empty=True): """Paste instance masks acoording to boxes. This implementation is modified from https://github.com/facebookresearch/detectron2/ Args: masks (Tensor): N, 1, H, W boxes (Tensor): N, 4 img_h (int): Height of the image to be pasted. img_w (int): Width of the image to be pasted. skip_empty (bool): Only paste masks within the region that tightly bound all boxes, and returns the results this region only. An important optimization for CPU. Returns: tuple: (Tensor, tuple). The first item is mask tensor, the second one is the slice object. If skip_empty == False, the whole image will be pasted. It will return a mask of shape (N, img_h, img_w) and an empty tuple. If skip_empty == True, only area around the mask will be pasted. A mask of shape (N, h', w') and its start and end coordinates in the original image will be returned. """ # On GPU, paste all masks together (up to chunk size) # by using the entire image to sample the masks # Compared to pasting them one by one, # this has more operations but is faster on COCO-scale dataset. device = masks.device if skip_empty: x0_int, y0_int = torch.clamp(boxes.min(dim=0).values.floor()[:2] - 1, min=0).to(dtype=torch.int32) x1_int = torch.clamp(boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32) y1_int = torch.clamp(boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32) else: x0_int, y0_int = 0, 0 x1_int, y1_int = img_w, img_h x0, y0, x1, y1 = torch.split(boxes, 1, dim=1) # each is Nx1 N = masks.shape[0] img_y = torch.arange(y0_int, y1_int, device=device, dtype=torch.float32) + 0.5 img_x = torch.arange(x0_int, x1_int, device=device, dtype=torch.float32) + 0.5 img_y = (img_y - y0) / (y1 - y0) * 2 - 1 img_x = (img_x - x0) / (x1 - x0) * 2 - 1 # img_x, img_y have shapes (N, w), (N, h) if torch.isinf(img_x).any(): inds = torch.where(torch.isinf(img_x)) img_x[inds] = 0 if torch.isinf(img_y).any(): inds = torch.where(torch.isinf(img_y)) img_y[inds] = 0 gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1)) gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1)) grid = torch.stack([gx, gy], dim=3) img_masks = F.grid_sample(masks.to(dtype=torch.float32), grid, align_corners=False) if skip_empty: return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int)) else: return img_masks[:, 0], ()
def test_elliprf(x, y, z, l): assert nice_and_close(elliprf(x, x, x), x**(-0.5)) assert nice_and_close(elliprf(0, y, y), pi / 2 / y**0.5) assert nice_and_close(elliprf(l * x, l * y, l * z), elliprf(x, y, z) / l**0.5) assert isinf(elliprf(0, 0, z))
def forward(self, confidence, predicted_locations, labels, gt_locations, weighted_vector=None): """Compute classification loss and smooth l1 loss. Args: confidence (batch_size, num_priors, num_classes): class predictions. locations (batch_size, num_priors, 4): predicted locations. labels (batch_size, num_priors): real labels of all the priors. boxes (batch_size, num_priors, 4): real boxes corresponding all the priors. """ query_table = (torch.isinf(gt_locations[:, :, 2]) + torch.isinf(gt_locations[:, :, 3])) == 0 gt_locations = gt_locations[query_table, :] predicted_locations = predicted_locations[query_table, :] confidence = confidence[query_table, :] labels = labels[query_table] num_classes = confidence.size(1) with torch.no_grad(): loss = -1 * F.log_softmax(confidence, dim=1)[:, 0] loss = loss.unsqueeze(dim=0) labels = labels.unsqueeze(dim=0) mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio) mask = mask.squeeze(dim=0) labels = labels.squeeze(dim=0) confidence = confidence[mask, :] if int(torch.sum(confidence).data.cpu()) == 0: print("Only have background sample") return 0 * torch.sum(confidence), 0 * torch.sum(confidence) else: final_label = labels[mask] if final_label.type() == "torch.FloatTensor": final_label = torch.LongTensor(final_label).cuda() elif final_label.type() == "torch.cuda.FloatTensor": final_label = final_label.data.cpu().numpy() final_label = torch.LongTensor(final_label).cuda() elif final_label.type() == "torch.cuda.LongTensor": pass else: print(final_label.type()) if weighted_vector is not None: classification_loss = F.cross_entropy(confidence.reshape( -1, num_classes), final_label, weighted_vector, size_average=False) else: classification_loss = F.cross_entropy(confidence.reshape( -1, num_classes), final_label, size_average=False) pos_mask = labels > 0 predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4) gt_locations = gt_locations[pos_mask, :].reshape(-1, 4) smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, size_average=False) num_pos = gt_locations.size(0) if num_pos != 0: return smooth_l1_loss / num_pos, classification_loss / num_pos else: print("L1_loss{}, classification{}".format(smooth_l1_loss, classification_loss)) return smooth_l1_loss * 0, classification_loss * 0
def run_epoch( data_loader: DataLoader, model: nn.Module, optimiser: optim.Optimizer, # type: ignore device: torch.device, logger: lavd.Logger, epoch: int, train: bool = True, amp_scaler: Optional[amp.GradScaler] = None, masked_lm: bool = True, name: str = "", ) -> Dict: # Disables autograd during validation mode torch.set_grad_enabled(train) if train: model.train() else: model.eval() sampler = ( data_loader.sampler # type: ignore if isinstance(data_loader.sampler, DistributedSampler) # type: ignore else None) if sampler is not None: sampler.set_epoch(epoch) losses = [] pbar = logger.progress_bar(name, total=len(data_loader.dataset), leave=False, dynamic_ncols=True) tokeniser = data_loader.dataset.tokeniser # type: ignore for d in data_loader: d = d.to(device) inputs, labels = mask_tokens(d, tokeniser) if masked_lm else (d, d) # The last batch may not be a full batch curr_batch_size = inputs.size(0) # Automatically run it in mixed precision (FP16) if a scaler is given with amp.autocast(enabled=amp_scaler is not None): output = (model(inputs, masked_lm_labels=labels) if masked_lm else model(inputs, labels=labels)) loss = output[0] losses.append(loss.item()) if torch.isnan(loss) or torch.isinf(loss): breakpoint() if train: optimiser.zero_grad() if amp_scaler is None: loss.backward() # Clip gradients to avoid exploding gradients nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) optimiser.step() else: amp_scaler.scale(loss).backward() amp_scaler.unscale_(optimiser) # Clip gradients to avoid exploding gradients nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) amp_scaler.step(optimiser) amp_scaler.update() pbar.update(curr_batch_size if sampler is None else curr_batch_size * sampler.num_replicas # type: ignore ) pbar.close() loss = torch.mean(torch.tensor(losses, device=device)) # Gather the loss onto the primary process to have accurate metrics. if sampler is not None: gathered_losses = [ torch.zeros_like(loss) for _ in range(sampler.num_replicas) # type: ignore ] dist.all_gather(gathered_losses, loss) loss = torch.mean(torch.tensor(gathered_losses)) perplexity = torch.exp(loss) return OrderedDict(loss=loss.item(), perplexity=perplexity.item())