def test(test_data, model, epoch, device, logger): model.eval() ce_loss_meter = AverageMeter() accuracy_meter = AverageMeter() entropy_meter = AverageMeter() n_entropy_meter = AverageMeter() with torch.no_grad(): for labels, premises, p_mask, hypothese, h_mask in test_data: if torch.cuda.is_available(): labels = labels.to(device=device) premises = premises.to(device=device) p_mask = p_mask.to(device=device) hypotheses = hypotheses.to(device=device) h_mask = h_mask.to(device=device) pred_labels, ce_loss, rewards, actions, actions_log_prob, entropy, normalized_entropy = model( premises, p_mask, hypotheses, h_mask, labels) entropy = entropy.mean() normalized_entropy = normalized_entropy.mean() accuracy = (labels == pred_labels).to(dtype=torch.float32).mean() n = p_mask.shape[0] accuracy_meter.update(accuracy.item(), n) ce_loss_meter.update(ce_loss.item(), n) entropy_meter.update(entropy.item(), n) n_entropy_meter.update(normalized_entropy.item(), n) logger.info( f"Test: ce_loss: {ce_loss_meter.avg:.4f} accuracy: {accuracy_meter.avg:.4f} " f"entropy: {entropy_meter.avg:.4f} n_entropy: {n_entropy_meter.avg:.4f} " ) return accuracy_meter.avg
def evaluate(eval_loader, model, writer, step, Save_model, epoch): top_prec = AverageMeter() softmax = nn.Softmax().cuda() for i, (images, labels, names) in enumerate(eval_loader): images = Variable(images).cuda() labels = Variable(labels).cuda() gender_pred = model(images) gender_pred = softmax(gender_pred) prec = accuracy(gender_pred, labels, topk=(1, )) top_prec.update(prec[0].item()) print('evaluate * Prec@1 {top:.3f}'.format(top=top_prec.avg)) writer.add_scalar('prec', top_prec.avg, step) Save_model.save(model, top_prec.avg, epoch)
def validate(valid_data, model, epoch, device, logger, summary_writer): model.eval() ce_loss_meter = AverageMeter() accuracy_meter = AverageMeter() entropy_meter = AverageMeter() n_entropy_meter = AverageMeter() with torch.no_grad(): for labels, premises, p_mask, hypotheses, h_mask in valid_data: if torch.cuda.is_available(): labels = labels.to(device=device) premises = premises.to(device=device) p_mask = p_mask.to(device=device) hypotheses = hypotheses.to(device=device) h_mask = h_mask.to(device=device) pred_labels, ce_loss, rewards, actions, actions_log_prob, entropy, normalized_entropy = model( premises, p_mask, hypotheses, h_mask, labels) entropy = entropy.mean() normalized_entropy = normalized_entropy.mean() n = p_mask.shape[0] accuracy = (labels == pred_labels).to(dtype=torch.float32).mean() accuracy_meter.update(accuracy.item(), n) ce_loss_meter.update(ce_loss.item(), n) entropy_meter.update(entropy.item(), n) n_entropy_meter.update(normalized_entropy.item(), n) logger.info( f"Valid: epoch: {epoch} ce_loss: {ce_loss_meter.avg:.4f} accuracy: {accuracy_meter.avg:.4f} " f"entropy: {entropy_meter.avg:.4f} n_entropy: {n_entropy_meter.avg:.4f} " ) summary_writer["valid"].add_scalar(tag="ce", scalar_value=ce_loss_meter.avg, global_step=global_step) summary_writer["valid"].add_scalar(tag="accuracy", scalar_value=accuracy_meter.avg, global_step=global_step) summary_writer["valid"].add_scalar(tag="n_entropy", scalar_value=n_entropy_meter.avg, global_step=global_step) model.train() return accuracy_meter.avg
def test_model(args, model, test_loader, logging): trlog = torch.load(osp.join(args.save_path1, 'trlog')) model.load_state_dict( torch.load(osp.join(args.save_path1, 'max_acc.pth'))['params']) t1 = AverageMeter() tmaes = AverageMeter() tmses = AverageMeter() model.eval() logging.info( 'Best Epoch {}, best val mae={:.4f}, best val mse={:.4f}'.format( trlog['max_mae_epoch'], trlog['max_mae'], trlog['max_mse'])) with torch.no_grad(): for i, batch in enumerate(test_loader, 1): data1, data2, data3, gt_label1, gt_label2, gt_label3 = batch[ 0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda( ), batch[4].cuda(), batch[5].cuda() if args.model_type == 'SACANet': data, gt_label = batch[0].cuda(), batch[1].cuda() pred_map = model(data) loss = criterion(pred_map, gt_label) else: raise ValueError('') pred_map = pred_map[:, 1, :, :].data.cpu().numpy() gt_label = gt_label[:, 1, :, :].data.cpu().numpy() for i_img in range(pred_map.shape[0]): pred_cnt = np.sum(pred_map[i_img]) / args.LOG_PARA gt_count = np.sum(gt_label[i_img]) / args.LOG_PARA tmaes.update(abs(gt_count - pred_cnt)) tmses.update((gt_count - pred_cnt) * (gt_count - pred_cnt)) t1.update(loss.item(), data.size(0)) t1 = t1.avg tmae = tmaes.avg tmse = np.sqrt(tmses.avg) logging.info('Test mae={:.4f}, mse={:.4f}'.format(tmae, tmse))
def train_latent(self, imgs, classes, model_dir, tensorboard_dir, retrain=False): data = dict( img=torch.from_numpy(imgs).permute(0, 3, 1, 2), img_id=torch.from_numpy(np.arange(imgs.shape[0])), class_id=torch.from_numpy(classes.astype(np.int64)) ) dataset = NamedTensorDataset(data) data_loader = DataLoader( dataset, batch_size=self.config['train']['batch_size'], shuffle=True, sampler=None, batch_sampler=None, num_workers=1, pin_memory=True, drop_last=True ) if not retrain: self.latent_model = LatentModel(self.config) self.latent_model.init() self.latent_model.to(self.device) criterion = VGGDistance(self.config['perceptual_loss']['layers']).to(self.device) # content_criterion = nn.KLDivLoss() optimizer = Adam([ { 'params': itertools.chain(self.latent_model.modulation.parameters(), self.latent_model.generator.parameters()), 'lr': self.config['train']['learning_rate']['generator'] }, { 'params': itertools.chain(self.latent_model.content_embedding.parameters(), self.latent_model.class_embedding.parameters()), 'lr': self.config['train']['learning_rate']['latent'] } ], betas=(0.5, 0.999)) scheduler = CosineAnnealingLR( optimizer, T_max=self.config['train']['n_epochs'] * len(data_loader), eta_min=self.config['train']['learning_rate']['min'] ) summary = SummaryWriter(log_dir=tensorboard_dir) train_loss = AverageMeter() for epoch in range(self.config['train']['n_epochs']): self.latent_model.train() train_loss.reset() pbar = tqdm(iterable=data_loader) for batch in pbar: batch = {name: tensor.to(self.device) for name, tensor in batch.items()} optimizer.zero_grad() out = self.latent_model(batch['img_id'], batch['class_id']) content_penalty = torch.sum(out['content_code'] ** 2, dim=1).mean() # content_penalty = content_criterion(out['content_code'], torch.normal(0, self.config['content_std'], size=out['content_code'].shape).to(self.device)) loss = criterion(out['img'], batch['img']) + self.config['content_decay'] * content_penalty loss.backward() optimizer.step() scheduler.step() train_loss.update(loss.item()) pbar.set_description_str('epoch #{}'.format(epoch)) pbar.set_postfix(loss=train_loss.avg) pbar.close() self.save(model_dir, latent=True, amortized=False) summary.add_scalar(tag='loss', scalar_value=train_loss.avg, global_step=epoch) fixed_sample_img = self.generate_samples(dataset, randomized=False) random_sample_img = self.generate_samples(dataset, randomized=True) summary.add_image(tag='sample-fixed', img_tensor=fixed_sample_img, global_step=epoch) summary.add_image(tag='sample-random', img_tensor=random_sample_img, global_step=epoch) summary.close()
def train_amortized(self, imgs, classes, model_dir, tensorboard_dir): self.amortized_model = AmortizedModel(self.config) self.amortized_model.modulation.load_state_dict(self.latent_model.modulation.state_dict()) self.amortized_model.generator.load_state_dict(self.latent_model.generator.state_dict()) data = dict( img=torch.from_numpy(imgs).permute(0, 3, 1, 2), img_id=torch.from_numpy(np.arange(imgs.shape[0])), class_id=torch.from_numpy(classes.astype(np.int64)) ) dataset = NamedTensorDataset(data) data_loader = DataLoader( dataset, batch_size=self.config['train']['batch_size'], shuffle=True, sampler=None, batch_sampler=None, num_workers=1, pin_memory=True, drop_last=True ) self.latent_model.to(self.device) self.amortized_model.to(self.device) reconstruction_criterion = VGGDistance(self.config['perceptual_loss']['layers']).to(self.device) embedding_criterion = nn.MSELoss() optimizer = Adam( params=self.amortized_model.parameters(), lr=self.config['train_encoders']['learning_rate']['max'], betas=(0.5, 0.999) ) scheduler = CosineAnnealingLR( optimizer, T_max=self.config['train_encoders']['n_epochs'] * len(data_loader), eta_min=self.config['train_encoders']['learning_rate']['min'] ) summary = SummaryWriter(log_dir=tensorboard_dir) train_loss = AverageMeter() for epoch in range(self.config['train_encoders']['n_epochs']): self.latent_model.eval() self.amortized_model.train() train_loss.reset() pbar = tqdm(iterable=data_loader) for batch in pbar: batch = {name: tensor.to(self.device) for name, tensor in batch.items()} optimizer.zero_grad() target_content_code = self.latent_model.content_embedding(batch['img_id']) target_class_code = self.latent_model.class_embedding(batch['class_id']) out = self.amortized_model(batch['img']) loss_reconstruction = reconstruction_criterion(out['img'], batch['img']) loss_content = embedding_criterion(out['content_code'], target_content_code) loss_class = embedding_criterion(out['class_code'], target_class_code) loss = loss_reconstruction + 10 * loss_content + 10 * loss_class loss.backward() optimizer.step() scheduler.step() train_loss.update(loss.item()) pbar.set_description_str('epoch #{}'.format(epoch)) pbar.set_postfix(loss=train_loss.avg) pbar.close() self.save(model_dir, latent=False, amortized=True) summary.add_scalar(tag='loss-amortized', scalar_value=loss.item(), global_step=epoch) summary.add_scalar(tag='rec-loss-amortized', scalar_value=loss_reconstruction.item(), global_step=epoch) summary.add_scalar(tag='content-loss-amortized', scalar_value=loss_content.item(), global_step=epoch) summary.add_scalar(tag='class-loss-amortized', scalar_value=loss_class.item(), global_step=epoch) fixed_sample_img = self.generate_samples_amortized(dataset, randomized=False) random_sample_img = self.generate_samples_amortized(dataset, randomized=True) summary.add_image(tag='sample-fixed-amortized', img_tensor=fixed_sample_img, global_step=epoch) summary.add_image(tag='sample-random-amortized', img_tensor=random_sample_img, global_step=epoch) summary.close()
def infer(args): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # prepare model if args.model_type == "GLU_Transformer": model = GLU_Transformer(phone_size=args.phone_size, embed_size=args.embedding_size, hidden_size=args.hidden_size, glu_num_layers=args.glu_num_layers, dropout=args.dropout, output_dim=args.feat_dim, dec_nhead=args.dec_nhead, n_mels=args.n_mels, local_gaussian=args.local_gaussian, dec_num_block=args.dec_num_block) elif args.model_type == "PureTransformer": model = TransformerSVS( phone_size=args.phone_size, embed_size=args.embedding_size, hidden_size=args.hidden_size, glu_num_layers=args.glu_num_layers, dropout=args.dropout, output_dim=args.feat_dim, dec_nhead=args.dec_nhead, dec_num_block=args.dec_num_block, n_mels=args.n_mels, local_gaussian=args.local_gaussian, ) elif args.model_type == "PureTransformer_norm": model = TransformerSVS_norm( stats_file=args.stats_file, stats_mel_file=args.stats_mel_file, phone_size=args.phone_size, embed_size=args.embedding_size, hidden_size=args.hidden_size, glu_num_layers=args.glu_num_layers, dropout=args.dropout, output_dim=args.feat_dim, dec_nhead=args.dec_nhead, dec_num_block=args.dec_num_block, n_mels=args.n_mels, local_gaussian=args.local_gaussian, ) elif args.model_type == "GLU_Transformer_norm": model = GLU_TransformerSVS_norm( stats_file=args.stats_file, stats_mel_file=args.stats_mel_file, phone_size=args.phone_size, embed_size=args.embedding_size, hidden_size=args.hidden_size, glu_num_layers=args.glu_num_layers, dropout=args.dropout, output_dim=args.feat_dim, dec_nhead=args.dec_nhead, dec_num_block=args.dec_num_block, n_mels=args.n_mels, local_gaussian=args.local_gaussian, ) else: raise ValueError('Not Support Model Type %s' % args.model_type) # Load model weights print("Loading pretrained weights from {}".format(args.model_file)) checkpoint = torch.load(args.model_file, map_location=device) state_dict = checkpoint['state_dict'] model_dict = model.state_dict() state_dict_new = {} para_list = [] for k, v in state_dict.items(): assert k in model_dict if model_dict[k].size() == state_dict[k].size(): state_dict_new[k] = v else: para_list.append(k) print("Total {} parameters, loaded {} parameters".format( len(state_dict), len(state_dict_new))) if len(para_list) > 0: print("Not loading {} because of different sizes".format( ", ".join(para_list))) model_dict.update(state_dict_new) model.load_state_dict(model_dict) print("Loaded checkpoint {}".format(args.model_file)) model = model.to(device) model.eval() # Decode test_set = SVSDataset(align_root_path=args.test_align, pitch_beat_root_path=args.test_pitch, wav_root_path=args.test_wav, char_max_len=args.char_max_len, max_len=args.num_frames, sr=args.sampling_rate, preemphasis=args.preemphasis, nfft=args.nfft, frame_shift=args.frame_shift, frame_length=args.frame_length, n_mels=args.n_mels, power=args.power, max_db=args.max_db, ref_db=args.ref_db, standard=args.standard, sing_quality=args.sing_quality) collate_fn_svs = SVSCollator(args.num_frames, args.char_max_len, args.use_asr_post, args.phone_size) test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=1, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn_svs, pin_memory=True) if args.loss == "l1": loss = MaskedLoss("l1") elif args.loss == "mse": loss = MaskedLoss("mse") else: raise ValueError("Not Support Loss Type") losses = AverageMeter() spec_losses = AverageMeter() if args.perceptual_loss > 0: pe_losses = AverageMeter() if args.n_mels > 0: mel_losses = AverageMeter() if not os.path.exists(args.prediction_path): os.makedirs(args.prediction_path) for step, (phone, beat, pitch, spec, real, imag, length, chars, char_len_list) in enumerate(test_loader, 1): if step >= args.decode_sample: break phone = phone.to(device) beat = beat.to(device) pitch = pitch.to(device).float() spec = spec.to(device).float() mel = mel.to(device).float() real = real.to(device).float() imag = imag.to(device).float() length_mask = length.unsqueeze(2) length_mel_mask = length_mask.repeat(1, 1, mel.shape[2]).float() length_mask = length_mask.repeat(1, 1, spec.shape[2]).float() length_mask = length_mask.to(device) length_mel_mask = length_mel_mask.to(device) length = length.to(device) char_len_list = char_len_list.to(device) if not args.use_asr_post: chars = chars.to(device) char_len_list = char_len_list.to(device) else: phone = phone.float() if args.model_type == "GLU_Transformer": output, att, output_mel = model(chars, phone, pitch, beat, pos_char=char_len_list, pos_spec=length) elif args.model_type == "LSTM": output, hidden, output_mel = model(phone, pitch, beat) att = None elif args.model_type == "PureTransformer": output, att, output_mel = model(chars, phone, pitch, beat, pos_char=char_len_list, pos_spec=length) elif args.model_type in ("PureTransformer_norm", "GLU_Transformer_norm"): output, att, output_mel, spec_norm, mel_norm = model( spec, mel, chars, phone, pitch, beat, pos_char=char_len_list, pos_spec=length) output, _ = model.normalizer.inverse(output) if args.normalize: global_normalizer = GlobalMVN(args.stats_file) output, _ = global_normalizer.inverse(output, length) spec_loss = criterion(output, spec, length_mask) if args.n_mels > 0: mel_loss = criterion( output_mel, mel, length_mel_mask) # FIX ME here, mel_loss is recover version else: mel_loss = 0 final_loss = mel_loss + spec_loss losses.update(train_loss.item(), phone.size(0)) spec_losses.update(spec_loss.item(), phone.size(0)) if args.n_mels > 0: mel_losses.update(mel_loss.item(), phone.size(0)) if step % 1 == 0: log_figure(step, output, spec, att, length, args.prediction_path, args) print("loss avg for test is {}".format(losses.avg))
def train(self, imgs, classes, model_dir, tensorboard_dir): imgs = torch.from_numpy(imgs).permute(0, 3, 1, 2) class_ids = torch.from_numpy(classes.astype(int)) img_ids = torch.arange(imgs.shape[0]) tensor_dataset = TensorDataset(imgs, img_ids, class_ids) data_loader = DataLoader(tensor_dataset, batch_size=self.config['train']['batch_size'], shuffle=True, sampler=None, batch_sampler=None, num_workers=1, pin_memory=True, drop_last=True) self.model.init() self.model.to(self.device) criterion = VGGDistance(self.config['perceptual_loss']['layers']).to( self.device) optimizer = Adam( [{ 'params': self.model.generator.parameters(), 'lr': self.config['train']['learning_rate']['generator'] }, { 'params': self.model.modulation.parameters(), 'lr': self.config['train']['learning_rate']['generator'] }, { 'params': self.model.embeddings.parameters(), 'lr': self.config['train']['learning_rate']['latent'] }], betas=(0.5, 0.999)) scheduler = CosineAnnealingLR( optimizer, T_max=self.config['train']['n_epochs'] * len(data_loader), eta_min=self.config['train']['learning_rate']['min']) with SummaryWriter( log_dir=os.path.join(tensorboard_dir, 'stage1')) as summary: train_loss = AverageMeter() for epoch in range(1, self.config['train']['n_epochs'] + 1): self.model.train() train_loss.reset() with tqdm(iterable=data_loader) as pbar: for batch in pbar: batch_imgs, batch_img_ids, batch_class_ids = ( tensor.to(self.device) for tensor in batch) generated_imgs, batch_content_codes, batch_class_codes = self.model( batch_img_ids, batch_class_ids) optimizer.zero_grad() content_penalty = torch.sum(batch_content_codes**2, dim=1).mean() loss = criterion( generated_imgs, batch_imgs ) + self.config['content_decay'] * content_penalty loss.backward() optimizer.step() scheduler.step() train_loss.update(loss.item()) pbar.set_description_str('epoch #{}'.format(epoch)) pbar.set_postfix(loss=train_loss.avg) torch.save(self.model.generator.state_dict(), os.path.join(model_dir, 'generator.pth')) torch.save(self.model.embeddings.state_dict(), os.path.join(model_dir, 'embeddings.pth')) torch.save(self.model.modulation.state_dict(), os.path.join(model_dir, 'class_modulation.pth')) self.model.eval() fixed_sample_img = self.evaluate(imgs, img_ids, class_ids, randomized=False) random_sample_img = self.evaluate(imgs, img_ids, class_ids, randomized=True) summary.add_scalar(tag='loss', scalar_value=train_loss.avg, global_step=epoch) summary.add_image(tag='sample-fixed', img_tensor=fixed_sample_img, global_step=epoch) summary.add_image(tag='sample-random', img_tensor=random_sample_img, global_step=epoch)
def infer(args): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # prepare model if args.model_type == "GLU_Transformer": model = GLU_Transformer(phone_size=args.phone_size, embed_size=args.embedding_size, hidden_size=args.hidden_size, glu_num_layers=args.glu_num_layers, dropout=args.dropout, output_dim=args.feat_dim, dec_nhead=args.dec_nhead, dec_num_block=args.dec_num_block) else: raise ValueError('Not Support Model Type %s' % args.model_type) # Load model weights print("Loading pretrained weights from {}".format(args.model_file)) checkpoint = torch.load(args.model_file, map_location=device) state_dict = checkpoint['state_dict'] model_dict = model.state_dict() state_dict_new = {} para_list = [] for k, v in state_dict.items(): assert k in model_dict if model_dict[k].size() == state_dict[k].size(): state_dict_new[k] = v else: para_list.append(k) print("Total {} parameters, loaded {} parameters".format(len(state_dict), len(state_dict_new))) if len(para_list) > 0: print("Not loading {} because of different sizes".format(", ".join(para_list))) model_dict.update(state_dict_new) model.load_state_dict(model_dict) print("Loaded checkpoint {}".format(args.model_file)) model = model.to(device) model.eval() # Decode test_set = SVSDataset(align_root_path=args.test_align, pitch_beat_root_path=args.test_pitch, wav_root_path=args.test_wav, char_max_len=args.char_max_len, max_len=args.num_frames, sr=args.sampling_rate, preemphasis=args.preemphasis, frame_shift=args.frame_shift, frame_length=args.frame_length, n_mels=args.n_mels, power=args.power, max_db=args.max_db, ref_db=args.ref_db) collate_fn_svs = SVSCollator(args.num_frames, args.char_max_len) test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=1, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn_svs, pin_memory=True) if args.loss == "l1": loss = MaskedLoss("l1") elif args.loss == "mse": loss = MaskedLoss("mse") else: raise ValueError("Not Support Loss Type") losses = AverageMeter() if not os.path.exists(args.prediction_path): os.makedirs(args.prediction_path) for step, (phone, beat, pitch, spec, length, chars, char_len_list) in enumerate(test_loader, 1): if step >= args.decode_sample: break phone = phone.to(device) beat = beat.to(device) pitch = pitch.to(device).float() spec = spec.to(device).float() chars = chars.to(device) length_mask = create_src_key_padding_mask(length, args.num_frames) length_mask = length_mask.unsqueeze(2) length_mask = length_mask.repeat(1, 1, spec.shape[2]).float() length_mask = length_mask.to(device) length = length.to(device) char_len_list = char_len_list.to(device) output, att = model(chars, phone, pitch, beat, src_key_padding_mask=length, char_key_padding_mask=char_len_list) test_loss = loss(output, spec, length_mask) if step % 1 == 0: # save wav and plot spectrogram output = output.cpu().detach().numpy()[0] out_spec = spec.cpu().detach().numpy()[0] length = length.cpu().detach().numpy()[0] att = att.cpu().detach().numpy()[0] # np.save("output.npy", output) # np.save("out_spec.npy", out_spec) # np.save("att.npy", att) output = output[:length] out_spec = out_spec[:length] att = att[:, :length, :length] wav = spectrogram2wav(output, args.max_db, args.ref_db, args.preemphasis, args.power, args.sampling_rate, args.frame_shift, args.frame_length) wav_true = spectrogram2wav(out_spec, args.max_db, args.ref_db, args.preemphasis, args.power, args.sampling_rate, args.frame_shift, args.frame_length) write_wav(os.path.join(args.prediction_path, '{}.wav'.format(step)), wav, args.sampling_rate) write_wav(os.path.join(args.prediction_path, '{}_true.wav'.format(step)), wav_true, args.sampling_rate) plt.subplot(1, 2, 1) specshow(output.T) plt.title("prediction") plt.subplot(1, 2, 2) specshow(out_spec.T) plt.title("ground_truth") plt.savefig(os.path.join(args.prediction_path, '{}.png'.format(step))) plt.subplot(1, 4, 1) specshow(att[0]) plt.subplot(1, 4, 2) specshow(att[1]) plt.subplot(1, 4, 3) specshow(att[2]) plt.subplot(1, 4, 4) specshow(att[3]) plt.savefig(os.path.join(args.prediction_path, '{}_att.png'.format(step))) losses.update(test_loss.item(), phone.size(0)) print("loss avg for test is {}".format(losses.avg))
def train_epoch(epoch, summary, summary_writer, model, loss_fn, optimizer, dataloader_train, cfg): model.train() num_classes = cfg['num_classes'] train_loss = AverageMeter() train_acc = AverageMeter() steps = len(dataloader_train) batch_size = dataloader_train.batch_size dataiter = iter(dataloader_train) time_now = time.time() loss_sum = 0 acc_sum = 0 summary['epoch'] = epoch if args.local_rank == 0: print("steps:", steps) prefetcher = data_prefetcher(dataiter) img, target = prefetcher.next() for step in range(steps): data = img.to(device) target = target.to(device) output = model(data) output = F.relu(output) output = output.view(img.size(0), num_classes) target = target.view(img.size(0), num_classes) # print(output[:, 1].shape) # output = output.view(int(batch_size)) loss = loss_fn(output[:, 0], target[:, 0]) + \ 0.5*loss_fn(output[:, 1], target[:, 1]) optimizer.zero_grad() with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() # loss.backward() optimizer.step() torch.cuda.synchronize() # scheduler.step() # lr = scheduler.get_last_lr()[0] target_class = ((target[:, 0]) * 3 >= 15) predicts_class = ((output[:, 0]) * 3 >= 15) acc = (predicts_class == target_class).type( torch.cuda.FloatTensor).sum() * 1.0 / img.size(0) r2 = r2_score(target.cpu().detach().numpy(), output.cpu().detach().numpy()) reduced_loss = reduce_tensor(loss.data) reduced_acc = reduce_tensor(acc.data) train_loss.update(to_python_float(reduced_loss)) train_acc.update(to_python_float(reduced_acc)) if args.local_rank == 0: time_spent = time.time() - time_now time_now = time.time() logging.info('Epoch : {}, Step : {}, Training Loss : {:.5f}, ' 'R2 : {:.3f}, Acc : {:.3f}, Run Time : {:.2f}'.format( summary['epoch'], summary['step'], train_loss.avg, r2, reduced_acc, time_spent)) summary['step'] += 1 img, target = prefetcher.next() if args.local_rank == 0: time_spent = time.time() - time_now time_now = time.time() summary_writer.add_scalar('train/loss', train_loss.avg, epoch) summary_writer.add_scalar('train/R2', r2, epoch) summary_writer.add_scalar('train/Acc', train_acc.avg, epoch) # summary_writer.add_scalar( # 'learning_rate', lr, summary['step'] + steps*epoch) summary['epoch'] = epoch summary_writer.flush() return summary
def train_model(args, model, train_loader, val_loader, logging): optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.max_epoch) criterion = nn.MSELoss().cuda() reg1 = nn.L1Loss().cuda() def save_model(name): torch.save(dict(params=model.state_dict()), osp.join(args.save_path1, name + '.pth')) trlog = {} trlog['args'] = vars(args) trlog['train_loss'] = [] trlog['val_loss'] = [] trlog['train_mae'] = [] trlog['train_mse'] = [] trlog['val_mae'] = [] trlog['val_mse'] = [] trlog['max_mae'] = 1000000 trlog['max_mse'] = 1000000 trlog['max_mae_epoch'] = 0 trlog['max_mae_last10'] = 1000000 trlog['max_mse_last10'] = 1000000 trlog['max_mae_last10_epoch'] = 0 timer = Timer() global_count = 0 writer = SummaryWriter(logdir=args.save_path1) epoch_time = AverageMeter() for epoch in range(1, args.max_epoch + 1): epoch_start = time.time() model.train() t1 = AverageMeter() maes = AverageMeter() mses = AverageMeter() batch_time = AverageMeter() for i, batch in enumerate(train_loader, 1): batch_start = time.time() global_count = global_count + 1 if args.model_type == 'SACANet': data, gt_label = batch[0].cuda(), batch[1].cuda() pred_map = model(data) pred_map = torch.squeeze(pred_map) gt_label = torch.squeeze(gt_label) loss = criterion(pred_map, gt_label) else: raise ValueError('') pred_map = pred_map.data.cpu().numpy() gt_label = gt_label.data.cpu().numpy() for i_img in range(pred_map.shape[0]): pred_cnt = np.sum(pred_map[i_img]) / args.LOG_PARA gt_count = np.sum(gt_label[i_img]) / args.LOG_PARA maes.update(abs(gt_count - pred_cnt)) mses.update((gt_count - pred_cnt) * (gt_count - pred_cnt)) writer.add_scalar('data/loss', float(loss), global_count) optimizer.zero_grad() loss.backward() optimizer.step() torch.cuda.synchronize() batch_duration = time.time() - batch_start batch_time.update(batch_duration) t1.update(loss.item(), data.size(0)) t1 = t1.avg mae = maes.avg mse = np.sqrt(mses.avg) lr_scheduler.step() epoch_duration = time.time() - epoch_start epoch_time.update(epoch_duration) logging.info( 'epoch {}, loss={:4f}, train mae={:4f}, train mse={:4f}'.format( epoch, float(t1), float(mae), float(mse))) logging.info('Epoch time: {:3f}s'.format(epoch_duration)) v1 = AverageMeter() vmaes = AverageMeter() vmses = AverageMeter() model.eval() with torch.no_grad(): for i, batch in enumerate(val_loader, 1): if args.model_type == 'CSRNet': data, gt_label = batch[0].cuda(), batch[1].cuda() pred_map = model(data) pred_map = torch.squeeze(pred_map) gt_label = torch.squeeze(gt_label) loss = criterion(pred_map, gt_label) else: raise ValueError('') vmaes.update(abs(gt_count - pred_cnt)) vmses.update((gt_count - pred_cnt) * (gt_count - pred_cnt)) v1 = v1.avg vmae = vmaes.avg vmse = np.sqrt(vmses.avg) writer.add_scalar('data/val_loss', float(v1), epoch) logging.info('epoch {}, val mae={:}, val mse={:}'.format( epoch, vmae, vmse)) if epoch % 10 == 0 or epoch > (args.max_epoch - 30): if vmae < trlog['max_mae']: trlog['max_mae'] = vmae trlog['max_mse'] = vmse trlog['max_mae_epoch'] = epoch save_model('max_acc') if epoch >= (args.max_epoch - 10): if vmae <= trlog['max_mae_last10']: trlog['max_mae_last10'] = vmae trlog['max_mse_last10'] = vmse trlog['max_mae_last10_epoch'] = epoch trlog['train_loss'].append(t1) trlog['train_mae'].append(mae) trlog['train_mse'].append(mse) trlog['val_loss'].append(v1) trlog['val_mae'].append(vmae) trlog['val_mse'].append(vmse) torch.save(trlog, osp.join(args.save_path1, 'trlog')) logging.info( 'best epoch {}, best val mae={:.4f}, best val mse={:.4f}'. format(trlog['max_mae_epoch'], trlog['max_mae'], trlog['max_mse'])) logging.info( 'best val mae last 10 epoch {}, val mae last10={}, val mse last10={:.4f}' .format(trlog['max_mae_last10_epoch'], trlog['max_mae_last10'], trlog['max_mse_last10'])) logging.info('ETA:{}/{}'.format( timer.measure(), timer.measure(epoch / args.max_epoch))) logging.info( 'Total epoch training time: {:.3f}s, average: {:.3f}s'.format( epoch_time.sum, epoch_time.avg)) writer.close() logging.info(args.save_path1) return model
def train(train_data, valid_data, model, optimizers, schedulers, epoch, args, logger, summary_writer): ce_loss_meter = AverageMeter() accuracy_meter = AverageMeter() entropy_meter = AverageMeter() n_entropy_meter = AverageMeter() prob_ratio_meter = AverageMeter() device = args.gpu_id model.train() global best_val_accuracy with tqdm(total=len(train_data), desc=f"Train Epoch #{epoch+1}") as t: for batch_idx, (labels, premises, p_mask, hypotheses, h_mask) in enumerate(train_data): if torch.cuda.is_available(): labels = labels.to(device=device) premises = premises.to(device=device) p_mask = p_mask.to(device=device) hypotheses = hypotheses.to(device=device) h_mask = h_mask.to(device=device) pred_labels, ce_loss, rewards, actions, actions_log_prob, entropy, normalized_entropy = model( premises, p_mask, hypotheses, h_mask, labels) ce_loss.backward() optimizers["environment"].step() optimizers["environment"].zero_grad() for k in range(args.ppo_updates): if k == 0: new_normalized_entropy, new_actions_log_prob = normalized_entropy, actions_log_prob else: new_normalized_entropy, new_actions_log_prob = model.evaluate_actions( premises, p_mask, actions["p_actions"], hypotheses, h_mask, actions["h_actions"]) prob_ratio = (new_actions_log_prob - actions_log_prob.detach()).exp() clamped_prob_ratio = prob_ratio.clamp(1.0 - args.epsilon, 1.0 + args.epsilon) ppo_loss = torch.max(prob_ratio * rewards, clamped_prob_ratio * rewards).mean() loss = ppo_loss - args.entropy_weight * new_normalized_entropy.mean( ) loss.backward() optimizers["policy"].step() optimizers["policy"].zero_grad() entropy = entropy.mean() normalized_entropy = normalized_entropy.mean() n = p_mask.shape[0] accuracy = (labels == pred_labels).to(dtype=torch.float32).mean() accuracy_meter.update(accuracy.item(), n) ce_loss_meter.update(ce_loss.item(), n) entropy_meter.update(entropy.item(), n) n_entropy_meter.update(normalized_entropy.item(), n) prob_ratio_meter.update( (1.0 - prob_ratio.detach()).abs().mean().item(), n) global global_step summary_writer["train"].add_scalar(tag="ce", scalar_value=ce_loss.item(), global_step=global_step) summary_writer["train"].add_scalar(tag="accuracy", scalar_value=accuracy.item(), global_step=global_step) summary_writer["train"].add_scalar( tag="n_entropy", scalar_value=normalized_entropy.item(), global_step=global_step) summary_writer["train"].add_scalar( tag="prob_ratio", scalar_value=prob_ratio_meter.value, global_step=global_step) global_step += 1 if (batch_idx + 1) % (len(train_data) // 10) == 0: logger.info( f"Train: epoch: {epoch} batch_idx: {batch_idx + 1} ce_loss: {ce_loss_meter.avg:.4f} " f"accuracy: {accuracy_meter.avg:.4f} entropy: {entropy_meter.avg:.4f} " f"n_entropy: {n_entropy_meter.avg:.4f}") new_val_accuracy = validate(valid_data, model, epoch, device, logger, summary_writer) # TODO(siyu) how scheduler works schedulers["environment"].step(new_val_accuracy) schedulers["policy"].step(new_val_accuracy) global best_model_path, best_val_accuracy if new_val_accuracy > best_val_accuracy: best_model_path = f"{args.model_dir}/{epoch}-{batch_idx}.mdl" logger.info("saving model to" + best_model_path) torch.save( { "epoch": epoch, "batch_idx": batch_idx, "state_dict": model.state_dict() }, best_model_path) best_val_accuracy = new_val_accuracy t.set_postfix({ 'loss': ce_loss_meter.avg, 'accuracy': 100. * accuracy_meter.avg }) # 'env_lr': schedulers["environment"].get_lr(), # 'policy_lr': schedulers["policy"].get_lr()}) t.update(1)
def valid_epoch(summary, summary_writer, epoch, model, loss_fn, dataloader_valid, cfg): logger = log.logger() model.eval() num_classes = cfg['num_classes'] class_point = cfg['class_point'] eval_loss = AverageMeter() eval_acc = AverageMeter() eval_pred_posit = AverageMeter() eval_label_posit = AverageMeter() confusion_matrix = ConfusionMatrix(num_classes=(num_classes)+1) dataloader = [dataloader_valid] name = cfg['labels'] time_now = time.time() loss_sum = 0 acc_sum = 0 count = 0 steps_count = 0 for i in range(len(dataloader)): steps = len(dataloader[i]) batch_size = dataloader[i].batch_size dataiter = iter(dataloader[i]) # 使用 torch,no_grad()构建不需要track的上下文环境 with torch.no_grad(): acc_tmp = 0 loss_tmp = 0 prefetcher = data_prefetcher(dataiter) img, target, label, label_degree = prefetcher.next() for step in range(steps): # data, target = next(dataiter) data = img.to(device) target = target.to(device) output = model(data) output = output.view(img.size(0), num_classes) target = target.view(img.size(0), num_classes) label = label.view(img.size(0)) conf_preds = torch.sigmoid(output) # print("conf_preds", conf_preds.shape) loss = loss_fn(conf_preds, target) torch.cuda.synchronize() predicts = (conf_preds >= 0.5) d = torch.Tensor([0] * img.size(0) ).reshape(-1, 1).to(device) predicts = torch.cat((d, predicts.float()), 1) logger.get_info(predicts) # _, predicts = torch.max(predicts, 1) predicts = MaxIndex(predicts, batch_size) # logger.get_info(predicts) acc = (predicts == label).type( torch.cuda.FloatTensor).sum() * 1.0 / img.size(0) recall_pred = (predicts[label_degree >= 20] > 1).type( torch.cuda.FloatTensor).sum() * 1.0 recall_label = (label_degree >= 20).sum() for t in range(num_classes+1): for p in range(num_classes+1): count = (predicts[label == t] == p).type( torch.cuda.FloatTensor).sum() reduced_count = reduce_tensor( count.data, reduction=False) confusion_matrix.update(t, p, to_python_float(reduced_count)) reduced_loss = reduce_tensor(loss.data) reduced_acc = reduce_tensor(acc.data) reduced_pred_20 = reduce_tensor(recall_pred.data) reduced_label_20 = reduce_tensor(recall_label) eval_loss.update(to_python_float(reduced_loss)) eval_acc.update(to_python_float(reduced_acc)) eval_pred_posit.update(to_python_float(reduced_pred_20)) eval_label_posit.update(to_python_float(reduced_label_20)) if args.local_rank == 0: time_spent = time.time() - time_now time_now = time.time() logging.info( 'data_num : {}, Step : {}, Testing Loss : {:.5f}, ' 'Testing Acc : {:.3f}, Run Time : {:.2f}' .format( str(i), summary['step'] + 1, reduced_loss, reduced_acc, time_spent)) summary['step'] += 1 img, target, label, label_degree = prefetcher.next() if args.local_rank == 0: recall = eval_pred_posit.sum/float(eval_label_posit.sum) summary['confusion_matrix'] = plot_confusion_matrix( confusion_matrix.matrix, cfg['labels'], tensor_name='Confusion matrix') summary['loss'] = eval_loss.avg summary['recall'] = recall # summary['acc'] = acc_sum / (steps * (batch_size)) summary['acc'] = eval_acc.avg print("Recall >=20:", recall) return summary
def train_epoch(epoch, summary, summary_writer, model, loss_fn, optimizer, dataloader_train, cfg): # logger = log.logger() model.train() num_classes = cfg['num_classes'] class_point = cfg['class_point'] train_loss = AverageMeter() train_acc = AverageMeter() train_pred_posit = AverageMeter() train_label_posit = AverageMeter() confusion_matrix = ConfusionMatrix(num_classes=(num_classes)+1) steps = len(dataloader_train) batch_size = dataloader_train.batch_size dataiter = iter(dataloader_train) time_now = time.time() loss_sum = 0 acc_sum = 0 summary['epoch'] = epoch if args.local_rank == 0: print("steps:", steps) prefetcher = data_prefetcher(dataiter) img, target, label, label_degree = prefetcher.next() for step in range(steps): # logger.get_info('...........'+'step' + str(step) + '............') data = img.to(device) target = target.to(device) # # mixup # # generate mixed inputs, two one-hot label vectors and mixing coefficient # data, target_a, target_b, lam = mixup_data( # data, target, args.alpha, use_cuda) # print(data.shape) output = model(data) output = output.view(int(batch_size), num_classes) target = target.view(int(batch_size), num_classes) label = label.view(int(batch_size)) # target = target.long() conf_preds = torch.sigmoid(output) # print("conf_preds", conf_preds.shape) loss = loss_fn(conf_preds, target) optimizer.zero_grad() with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() # loss.backward() optimizer.step() torch.cuda.synchronize() # scheduler.step() # lr = scheduler.get_last_lr()[0] # print(conf_preds.shape) # torch.max(a,1) 返回每一行中最大值的那个元素FloatTensor,且返回其索引LongTensor(返回最大元素在这一行的列索引) predicts = (conf_preds >= 0.5) d = torch.Tensor([0] * int(batch_size)).reshape(-1, 1).to(device) predicts = torch.cat((d, predicts.float()), 1) # logger.get_info(predicts) predicts = MaxIndex(predicts, batch_size) # logger.get_info(predicts) # target = (target >= class_point).long() acc = (predicts == label).type( torch.cuda.FloatTensor).sum() * 1.0 / label.size(0) # print(type(predicts), predicts[label_degree >= 20]) recall_pred = (predicts[label_degree >= 20] > 1).type( torch.cuda.FloatTensor).sum() * 1.0 recall_label = (label_degree >= 20).sum() # print('recall_pred : {}, recall_label : {}'.format(recall_pred, recall_label)) for t in range(num_classes+1): for p in range(num_classes+1): count = (predicts[label == t] == p).type( torch.cuda.FloatTensor).sum() reduced_count = reduce_tensor(count.data, reduction=False) confusion_matrix.update(t, p, to_python_float(reduced_count)) reduced_loss = reduce_tensor(loss.data) reduced_acc = reduce_tensor(acc.data) reduced_pred_20 = reduce_tensor(recall_pred.data) reduced_label_20 = reduce_tensor(recall_label) train_loss.update(to_python_float(reduced_loss)) train_acc.update(to_python_float(reduced_acc)) train_pred_posit.update(to_python_float(reduced_pred_20)) train_label_posit.update(to_python_float(reduced_label_20)) if args.local_rank == 0: time_spent = time.time() - time_now time_now = time.time() logging.info( 'Epoch : {}, Step : {}, Training Loss : {:.5f}, ' 'Training Acc : {:.3f}, Run Time : {:.2f}' .format( summary['epoch'] + 1, summary['step'] + 1, train_loss.avg, train_acc.avg, time_spent)) summary['step'] += 1 img, target, label, label_degree = prefetcher.next() if args.local_rank == 0: time_spent = time.time() - time_now time_now = time.time() recall = train_pred_posit.sum/float(train_label_posit.sum) summary_writer.add_scalar( 'train/loss', train_loss.val, epoch) summary_writer.add_scalar( 'train/acc', train_acc.val, epoch) summary_writer.add_scalar('train/recall', recall, epoch) # summary_writer.add_scalar( # 'learning_rate', lr, summary['step'] + steps*epoch) summary_writer.flush() summary['confusion_matrix'] = plot_confusion_matrix( confusion_matrix.matrix, cfg['labels'], tensor_name='train/Confusion matrix') # summary['loss'] = train_loss.avg # summary['acc'] = acc_sum / (steps * (batch_size)) # summary['acc'] = train_acc.avg summary['epoch'] = epoch print("Recall >=20:", recall) return summary
def train_epoch(epoch, summary, summary_writer, model, loss_fn, optimizer, dataloader_train, cfg): model.train() num_classes = cfg['num_classes'] class_point = cfg['class_point'] train_loss = AverageMeter() train_acc = AverageMeter() confusion_matrix = ConfusionMatrix(num_classes=num_classes) steps = len(dataloader_train) batch_size = dataloader_train.batch_size dataiter = iter(dataloader_train) time_now = time.time() loss_sum = 0 acc_sum = 0 summary['epoch'] = epoch if args.local_rank == 0: print("steps:", steps) prefetcher = data_prefetcher(dataiter) img, target, mask = prefetcher.next() for step in range(steps): data = img.to(device) target = target.to(device) # # mixup # # generate mixed inputs, two one-hot label vectors and mixing coefficient # data, target_a, target_b, lam = mixup_data( # data, target, args.alpha, use_cuda) # print(data.shape) output = model(data) output = output.view(int(batch_size), num_classes) target = target.view(int(batch_size)) mask = mask.view(int(batch_size)) # target = target.long() conf_targets = target[mask] conf_preds = output[mask] # print("conf_preds", conf_preds.shape) loss = loss_fn(conf_preds, conf_targets) # loss = loss_func(loss_fn, output) optimizer.zero_grad() with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() # loss.backward() optimizer.step() torch.cuda.synchronize() # scheduler.step() # lr = scheduler.get_last_lr()[0] probs = F.softmax(output, dim=1) # torch.max(a,1) 返回每一行中最大值的那个元素FloatTensor,且返回其索引LongTensor(返回最大元素在这一行的列索引) _, predicts = torch.max(probs, 1) # target = (target >= class_point).long() acc = (predicts[mask] == conf_targets).type( torch.cuda.FloatTensor).sum() * 1.0 / conf_targets.size(0) for t in range(num_classes): for p in range(num_classes): count = (predicts[mask][conf_targets == t] == p).type( torch.cuda.FloatTensor).sum() reduced_count = reduce_tensor(count.data, reduction=False) confusion_matrix.update(t, p, to_python_float(reduced_count)) reduced_loss = reduce_tensor(loss.data) reduced_acc = reduce_tensor(acc.data) train_loss.update(to_python_float(reduced_loss)) train_acc.update(to_python_float(reduced_acc)) if args.local_rank == 0: time_spent = time.time() - time_now time_now = time.time() logging.info( 'Epoch : {}, Step : {}, Training Loss : {:.5f}, ' 'Training Acc : {:.3f}, Run Time : {:.2f}' .format( summary['epoch'] + 1, summary['step'] + 1, train_loss.avg, train_acc.avg, time_spent)) summary['step'] += 1 img, target, mask = prefetcher.next() if args.local_rank == 0: time_spent = time.time() - time_now time_now = time.time() summary_writer.add_scalar( 'train/loss', train_loss.val, epoch) summary_writer.add_scalar( 'train/acc', train_acc.val, epoch) # summary_writer.add_scalar( # 'learning_rate', lr, summary['step'] + steps*epoch) summary_writer.flush() summary['confusion_matrix'] = plot_confusion_matrix( confusion_matrix.matrix, cfg['labels'], tensor_name='train/Confusion matrix') # summary['loss'] = train_loss.avg # summary['acc'] = acc_sum / (steps * (batch_size)) # summary['acc'] = train_acc.avg summary['epoch'] = epoch return summary
print("Rep {}".format(i)) exit(0) cur = 0 f.seek(0) new_file = hold_entry for row in summary: if cur == 0: cur += 1 continue if not args.stats: if args.hold_idx != -1: write_line(manifest_file, hold_file + "," + hold_entry + "\n") else: exit(1) (new_file, new_entry) = format_entry(row, root) make_folder(new_file) make_file(new_file, new_entry) else: seconds = sox.file_info.duration(row[0]) audio_dur.update(seconds) new_file = "{},{}".format(seconds, audio_dur.avg) write_line(manifest_file, row[0] + "," + new_file + "\n") sys.stdout.write("\r[{}/{}] {} ".format( cur, tot, new_file)) sys.stdout.flush() cur += 1 sys.stdout.write("\r[{}/{}] {} ".format(cur, tot, new_file)) sys.stdout.flush() print("\n")
def valid_epoch(summary, summary_writer, epoch, model, loss_fn, dataloader_valid, cfg): model.eval() num_classes = cfg['num_classes'] class_point = cfg['class_point'] eval_loss = AverageMeter() eval_acc = AverageMeter() confusion_matrix = ConfusionMatrix(num_classes=num_classes) dataloader = [dataloader_valid] name = cfg['labels'] time_now = time.time() loss_sum = 0 acc_sum = 0 count = 0 steps_count = 0 for i in range(len(dataloader)): steps = len(dataloader[i]) batch_size = dataloader[i].batch_size dataiter = iter(dataloader[i]) # 使用 torch,no_grad()构建不需要track的上下文环境 with torch.no_grad(): acc_tmp = 0 loss_tmp = 0 prefetcher = data_prefetcher(dataiter) img, target, mask = prefetcher.next() for step in range(steps): # data, target = next(dataiter) data = img.to(device) target = target.to(device) output = model(data) output = output.view(int(batch_size), num_classes) target = target.view(int(batch_size)) mask = mask.view(int(batch_size)) conf_targets = target[mask] conf_preds = output[mask] loss = loss_fn(conf_preds, conf_targets) torch.cuda.synchronize() probs = F.softmax(output, dim=1) _, predicts = torch.max(probs, 1) acc = (predicts[mask] == conf_targets).type( torch.cuda.FloatTensor).sum() * 1.0 / conf_targets.size(0) for t in range(num_classes): for p in range(num_classes): count = (predicts[mask][conf_targets == t] == p).type( torch.cuda.FloatTensor).sum() reduced_count = reduce_tensor( count.data, reduction=False) confusion_matrix.update(t, p, to_python_float(reduced_count)) reduced_loss = reduce_tensor(loss.data) reduced_acc = reduce_tensor(acc.data) eval_loss.update(to_python_float(reduced_loss)) eval_acc.update(to_python_float(reduced_acc)) if args.local_rank == 0: time_spent = time.time() - time_now time_now = time.time() logging.info( 'data_num : {}, Step : {}, Testing Loss : {:.5f}, ' 'Testing Acc : {:.3f}, Run Time : {:.2f}' .format( str(i), summary['step'] + 1, reduced_loss, reduced_acc, time_spent)) summary['step'] += 1 img, target, mask = prefetcher.next() if args.local_rank == 0: summary['confusion_matrix'] = plot_confusion_matrix( confusion_matrix.matrix, cfg['labels'], tensor_name='train/Confusion matrix') summary['loss'] = eval_loss.avg # summary['acc'] = acc_sum / (steps * (batch_size)) summary['acc'] = eval_acc.avg return summary
def train_encoders(self, imgs, classes, model_dir, tensorboard_dir): imgs = torch.from_numpy(imgs).permute(0, 3, 1, 2) class_ids = torch.from_numpy(classes.astype(int)) img_ids = torch.arange(imgs.shape[0]) tensor_dataset = TensorDataset(imgs, img_ids, class_ids) data_loader = DataLoader( tensor_dataset, batch_size=self.config['train_encoders']['batch_size'], shuffle=True, sampler=None, batch_sampler=None, num_workers=1, pin_memory=True, drop_last=True) self.embeddings = LordEmbeddings(self.config) self.modulation = LordModulation(self.config) self.encoders = LordEncoders(self.config) self.generator = LordGenerator(self.config) self.embeddings.load_state_dict( torch.load(os.path.join(model_dir, 'embeddings.pth'))) self.modulation.load_state_dict( torch.load(os.path.join(model_dir, 'class_modulation.pth'))) self.generator.load_state_dict( torch.load(os.path.join(model_dir, 'generator.pth'))) self.encoders.init() self.model = LordStage2(self.encoders, self.modulation, self.generator) self.model.to(self.device) self.embeddings.to(self.device) criterion = VGGDistance(self.config['perceptual_loss']['layers']).to( self.device) optimizer = Adam([{ 'params': self.model.encoders.parameters(), 'lr': self.config['train_encoders']['learning_rate'] }, { 'params': self.model.modulation.parameters(), 'lr': self.config['train_encoders']['learning_rate'] }, { 'params': self.model.generator.parameters(), 'lr': self.config['train_encoders']['learning_rate'] }], betas=(0.5, 0.999)) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=20, verbose=1) with SummaryWriter( log_dir=os.path.join(tensorboard_dir, 'stage2')) as summary: train_loss = AverageMeter() for epoch in range(1, self.config['train']['n_epochs'] + 1): self.model.train() train_loss.reset() with tqdm(iterable=data_loader) as pbar: for batch in pbar: batch_imgs, batch_img_ids, batch_class_ids = ( tensor.to(self.device) for tensor in batch) batch_content_codes, batch_class_codes = self.embeddings( batch_img_ids, batch_class_ids) generated_imgs, predicted_content_codes, predicted_class_codes = self.model( batch_imgs) optimizer.zero_grad() perc_loss = criterion(generated_imgs, batch_imgs) loss_content = F.mse_loss(batch_content_codes, predicted_content_codes) loss_class = F.mse_loss(batch_class_codes, predicted_class_codes) loss = perc_loss + 10 * loss_content + 10 * loss_class loss.backward() optimizer.step() train_loss.update(loss.item()) pbar.set_description_str('epoch #{}'.format(epoch)) pbar.set_postfix(loss=train_loss.avg) torch.save(self.model.encoders.state_dict(), os.path.join(model_dir, 'encoders.pth')) torch.save(self.model.generator.state_dict(), os.path.join(model_dir, 'generator.pth')) torch.save(self.model.modulation.state_dict(), os.path.join(model_dir, 'class_modulation.pth')) scheduler.step(train_loss.avg) self.model.eval() fixed_sample_img = self.encoder_evaluate(imgs, randomized=False) random_sample_img = self.encoder_evaluate(imgs, randomized=True) summary.add_scalar(tag='loss', scalar_value=train_loss.avg, global_step=epoch) summary.add_image(tag='sample-fixed', img_tensor=fixed_sample_img, global_step=epoch) summary.add_image(tag='sample-random', img_tensor=random_sample_img, global_step=epoch)
def valid_epoch(summary, summary_writer, epoch, model, loss_fn, dataloader_valid, cfg): model.eval() eval_loss = AverageMeter() eval_acc = AverageMeter() num_classes = cfg['num_classes'] dataloader = [dataloader_valid] time_now = time.time() loss_sum = 0 acc_sum = 0 count = 0 steps_count = 0 for i in range(len(dataloader)): steps = len(dataloader[i]) batch_size = dataloader[i].batch_size dataiter = iter(dataloader[i]) # 使用 torch,no_grad()构建不需要track的上下文环境 with torch.no_grad(): acc_tmp = 0 loss_tmp = 0 prefetcher = data_prefetcher(dataiter) img, target = prefetcher.next() for step in range(steps): # data, target = next(dataiter) data = img.to(device) target = target.to(device) output = model(data) output = F.relu(output) output = output.view(img.size(0), num_classes) target = target.view(img.size(0), num_classes) loss = loss_fn(output[:, 0], target[:, 0]) + \ 0.5*loss_fn(output[:, 1], target[:, 1]) torch.cuda.synchronize() target_class = ((target[:, 0]) * 3 >= 15) predicts_class = ((output[:, 0]) * 3 >= 15) acc = (predicts_class == target_class).type( torch.cuda.FloatTensor).sum() * 1.0 / img.size(0) r2 = r2_score(target.cpu().detach().numpy(), output.cpu().detach().numpy()) reduced_loss = reduce_tensor(loss.data) reduced_acc = reduce_tensor(acc.data) eval_loss.update(to_python_float(reduced_loss)) eval_acc.update(to_python_float(reduced_acc)) if args.local_rank == 0: print('target', target[:, 0] * 3) print('output', output[:, 0] * 3) time_spent = time.time() - time_now time_now = time.time() logging.info( 'data_num : {}, Step : {}, Testing Loss : {:.5f}, ' 'R2 : {:.3f}, Acc : {:.3f}, Run Time : {:.2f}'.format( str(i), summary['step'] + 1, reduced_loss, r2, reduced_acc, time_spent)) summary['step'] += 1 img, target = prefetcher.next() if args.local_rank == 0: summary['loss'] = eval_loss.avg # summary['acc'] = acc_sum / (steps * (batch_size)) summary['acc'] = eval_acc.avg summary['r2'] = r2 return summary
class CloudPublishNode: def __init__(self, seq, node_name, cloud_topic_name, tf_topic_name, dataset, global_tf_name="map", child_tf_name="car"): rospy.init_node(node_name) self.cloud_pub = rospy.Publisher(cloud_topic_name, PointCloud2, queue_size=queue_size) self.transform_broadcaster = tf2_ros.TransformBroadcaster() self.est_tf_pub = rospy.Publisher( tf_topic_name, TransformStamped, queue_size=queue_size) # for visualization self.gt_tf_pub = rospy.Publisher( "gt_pose", TransformStamped, queue_size=queue_size) # for visualization self.cap_pub = rospy.Publisher("CAP", CloudAndPose, queue_size=queue_size) self.rate = rospy.Rate(sleep_rate) self.header = Header(frame_id=global_tf_name) self.child_tf_name = child_tf_name # base name before appending prefix self.dataset = dataset self.seq = seq transform_dict = OrderedDict() transform_dict[GridSampling([args.grid_size] * 3)] = ["train", "test"] transform_dict[NormalizeScale()] = ["train", "test"] transform = ComposeAdapt(transform_dict) self.model = Net(graph_input=LOAD_GRAPH, act="LeakyReLU", transform=transform, dof=7) if args.model_path is not None and osp.exists(args.model_path): self.model.load_state_dict( torch.load(args.model_path, map_location=torch.device("cpu"))) print("loaded weights from", args.model_path) self.model.eval() self.absolute_gt_pose = np.eye(4)[:3, :] self.absolute_est_pose = np.eye(4)[:3, :] self.infer_time_meter = AverageMeter() self.tr_error_meter = AverageMeter() self.rot_error_meter = AverageMeter() self.fields = [ PointField('x', 0, PointField.FLOAT32, 1), PointField('y', 4, PointField.FLOAT32, 1), PointField('z', 8, PointField.FLOAT32, 1), PointField('intensity', 12, PointField.FLOAT32, 1) ] self.pose_list = [] def estimate_pose(self, target_cloud, source_cloud): source_cloud = torch.from_numpy(source_cloud) target_cloud = torch.from_numpy(target_cloud) begin = time.time() pose = self.model( (source_cloud.unsqueeze(0), target_cloud.unsqueeze(0), torch.tensor(len(source_cloud)).unsqueeze(0), torch.tensor(len(target_cloud)).unsqueeze(0))) self.infer_time_meter.update(time.time() - begin) pose = pose.detach().numpy() self.pose_list.append(pose) return pose[0, :3], pose[0, 3:] def tq2tf_msg(self, translation, quaternion, header, typ="gt"): assert typ in ["gt", "est"] t = TransformStamped() t.header = header t.child_frame_id = "{}_{}".format(typ, self.child_tf_name) t.transform.translation.x = translation[0] t.transform.translation.y = translation[1] t.transform.translation.z = translation[2] t.transform.rotation.x = quaternion[0] t.transform.rotation.y = quaternion[1] t.transform.rotation.z = quaternion[2] t.transform.rotation.w = quaternion[3] return t def mat2tf_msg(self, transform_mat, header, typ): translation = transform_mat[:3, -1] quat = Rotation.from_matrix(transform_mat[:3, :3]).as_quat() return self.tq2tf_msg(translation, quat, header, typ) def serve(self, idx): self.header.seq = idx self.header.stamp = rospy.Time.from_sec( self.dataset.timestamps[idx].total_seconds()) current_cloud = self.dataset.get_velo(idx) if idx == 0: # guess 0 pose at first time frame tr, quat = np.zeros((3, )), np.array([0., 0., 0., 1.]) else: # estimate coarse pose relative to the previous frame with model prev_cloud = self.dataset.get_velo(idx - 1) tr, quat = self.estimate_pose(prev_cloud, current_cloud) gt_pose = self.dataset.poses[idx] est_mat = trq2mat(tr, quat) delta_gt_pose = delta_poses(gt_pose.copy(), self.absolute_gt_pose.copy()) self.absolute_gt_pose = gt_pose trans_error, rot_error = pose_error(delta_gt_pose, est_mat.copy()) self.tr_error_meter.update(trans_error) self.rot_error_meter.update(rot_error) # correct the axis system of the estimated pose c_est_mat = kitti2rvizaxis(est_mat.copy()) c_tr, c_quat = mat2trq(c_est_mat) cap_msg = CloudAndPose() cap_msg.seq = idx cap_msg.point_cloud2 = point_cloud2.create_cloud( self.header, self.fields, [point for point in current_cloud]) cap_msg.init_guess = self.tq2tf_msg(*mat2trq(delta_gt_pose), self.header, "est") self.absolute_est_pose = add_poses(self.absolute_est_pose, c_est_mat) est_mat_temp = self.absolute_est_pose.copy() est_tf = self.mat2tf_msg(est_mat_temp, self.header, "est") gt_tf = self.mat2tf_msg(kitti2rvizaxis(gt_pose.copy()), self.header, "gt") self.est_tf_pub.publish(est_tf) self.gt_tf_pub.publish(gt_tf) self.transform_broadcaster.sendTransform(gt_tf) self.transform_broadcaster.sendTransform(est_tf) self.cloud_pub.publish( point_cloud2.create_cloud(Header(frame_id="gt_car"), self.fields, [point for point in current_cloud])) self.cap_pub.publish(cap_msg) print( "[{}] inference spent: {:.2f} ms\t\t| Trans : {}\t\t| GT Trans: {}\t\t| Trans error: {:.4f}\t\t| " "Rot error: {:.4f}".format( idx, self.infer_time_meter.avg, list(c_tr), list(delta_gt_pose[:3, -1].reshape(3, )), trans_error, rot_error)) self.rate.sleep() def __call__(self): for idx in range(args.start, len(self.dataset.poses)): if rospy.is_shutdown(): break self.serve(idx) print("Avg Tr Error: {:.3e}\tAvg Rot Error: {:.3e}".format( self.tr_error_meter.avg, self.rot_error_meter.avg)) save_pose_predictions( np.eye(4)[:3, :], self.pose_list, f"{self.seq}.txt")