def validate(model, val_set, params): val_data = tqdm(DataLoader(val_set, batch_size=params.batch_size, collate_fn=KeyphraseData.collate_fn), total=(len(val_set) // params.batch_size)) metrics = Metrics() loss_avg = RunningAverage() with torch.no_grad(): model.eval() for data, labels, mask in val_data: data = data.to(params.device) labels = labels.to(params.device) mask = mask.to(params.device) loss, logits = model(data, attention_mask=mask, labels=labels) predicted = logits.max(2)[1] metrics.update(batch_pred=predicted.cpu().numpy(), batch_true=labels.cpu().numpy(), batch_mask=mask.cpu().numpy()) loss_avg.update(torch.mean(loss).item()) val_data.set_postfix(type='VAL', loss='{:05.3f}'.format(loss_avg())) metrics.loss = loss_avg() return metrics
def train_epoch(self, epoch): """Train an epoch.""" self.model.train() # Set model to training mode losses = Metrics() total_iter = len(self.train_data_loader.dataset) // self.train_data_loader.batch_size for idx, (x, y) in enumerate(self.train_data_loader): s = time.monotonic() x = x.to(self.device) y = y.to(self.device) y_pred = self.model(x) self.optimizer.zero_grad() loss = self.criterion(y_pred, y) loss.backward() self.optimizer.step() losses.update(loss.item(), x.size(0)) self.writer.add_scalar('train/current_loss', losses.val, self.train_step) self.writer.add_scalar('train/avg_loss', losses.avg, self.train_step) self.train_step += 1 e = time.monotonic() if idx % self.print_freq == 0: log_time = self.print_freq * (e - s) eta = ((total_iter - idx) * log_time) / 60.0 print(f'Epoch {epoch} [{idx}/{total_iter}], loss={loss:.3f}, time={log_time:.2f}, ETA={eta:.2f}') return losses.avg
def test(model, dataloader, params): val_data = tqdm(dataloader.data_iterator(data_type='test', batch_size=params.batch_size), total=(dataloader.size()[0] // params.batch_size)) metrics = Metrics() loss_avg = RunningAverage() with torch.no_grad(): for data, labels in val_data: model.eval() data = torch.tensor(data, dtype=torch.long).to(params.device) labels = torch.tensor(labels, dtype=torch.long).to(params.device) batch_masks = data != 0 loss, logits = model(data, attention_mask=batch_masks, labels=labels) predicted = logits.max(2)[1] metrics.update(batch_pred=predicted.cpu().numpy(), batch_true=labels.cpu().numpy(), batch_mask=batch_masks.cpu().numpy()) loss_avg.update(torch.mean(loss).item()) val_data.set_postfix(type='VAL', loss='{:05.3f}'.format(loss_avg())) metrics.loss = loss_avg() return metrics
def run_experiment(self, load_controller, expert_demos): """Model predictive control. Arguments: load_controller (bool): If True, load mpc controller. expert_demos (bool): If True, initialize training set with extra expert demonstrations. """ if load_controller: self.mpc = torch.load(os.path.join(self.savedir, 'mpc.pth')) else: # Initial random rollouts obs, acts, lengths, _, _ = self._sample_rollouts(self.init_steps, actor=self.mpc) if expert_demos: obs_expert, acts_expert = self._load_expert_demos() obs = obs + tuple(o for o in obs_expert) acts = acts + tuple(a for a in acts_expert) # Train initial model self.mpc.train_initial(obs, acts) # Training loop step = self.mpc.X.shape[0] while step < self.total_steps: # Sample rollouts start = time.time() print(f"Rolling out {self.train_freq} timesteps...") obs, acts, lengths, scores, rollouts_metrics = self._sample_rollouts( self.train_freq, actor=self.mpc) step += sum(lengths) print_rollout_stats(obs[0], acts[0], lengths[0], scores[0]) act_metrics = Metrics() flat_rollouts_metrics = [ item for sublist in rollouts_metrics for item in sublist ] for x in flat_rollouts_metrics: act_metrics.store(x) for k, v in act_metrics.average().items(): self.logger.log_scalar(k, v, step) self.logger.log_scalar("score/avg_length", np.mean(lengths), step) self.logger.log_scalar("score/avg_score", np.mean(scores), step) self.logger.log_scalar("time/rollout_time", (time.time() - start), step) # Train model train_metrics, weights = self.mpc.train_iteration(obs, acts) for k, v in train_metrics.items(): self.logger.log_scalar(k, v, step) for k, v in weights.items(): self.logger.log_histogram(k, v, step) # Save model torch.save(self.mpc, os.path.join(self.savedir, 'mpc.pth'))
def test(exp_name): print('loading data......') test_data = getattr(datasets, opt.dataset)(opt.root, opt.test_data_dir, mode='test', size=opt.testsize) test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False, num_workers=opt.num_workers) total_batch = int(len(test_data) / 1) model, _, _ = generate_model(opt) model.eval() # metrics_logger initialization metrics = Metrics([ 'recall', 'specificity', 'precision', 'F1', 'F2', 'ACC_overall', 'IoU_poly', 'IoU_bg', 'IoU_mean' ]) logger = get_logger('./results/' + exp_name + '.log') with torch.no_grad(): for i, data in enumerate(test_dataloader): img, gt = data['image'], data['label'] if opt.use_gpu: img = img.cuda() gt = gt.cuda() output = model(img) _recall, _specificity, _precision, _F1, _F2, \ _ACC_overall, _IoU_poly, _IoU_bg, _IoU_mean = evaluate(output, gt) metrics.update(recall=_recall, specificity=_specificity, precision=_precision, F1=_F1, F2=_F2, ACC_overall=_ACC_overall, IoU_poly=_IoU_poly, IoU_bg=_IoU_bg, IoU_mean=_IoU_mean) metrics_result = metrics.mean(total_batch) print("Test Result:") logger.info( 'recall: %.4f, specificity: %.4f, precision: %.4f, F1: %.4f, F2: %.4f, ' 'ACC_overall: %.4f, IoU_poly: %.4f, IoU_bg: %.4f, IoU_mean: %.4f' % (metrics_result['recall'], metrics_result['specificity'], metrics_result['precision'], metrics_result['F1'], metrics_result['F2'], metrics_result['ACC_overall'], metrics_result['IoU_poly'], metrics_result['IoU_bg'], metrics_result['IoU_mean']))
def __init__(self, args): BaseModel.__init__(self, args) self.metrics = Metrics() # 一些测量指标 self.visual_images = [] # 需要可视化的图像 self.visual_losses = [] # 需要可视化的loss if self.args.mode == 'train': self.visual_images += ['train_confusion_matrix'] self.visual_losses += ['train_loss', 'train_precision', 'train_recall', 'train_f1_score'] if self.args.mode == 'valid': self.visual_images += ['valid_confusion_matrix'] self.visual_losses += ['valid_loss', 'valid_precision', 'valid_recall', 'valid_f1_score'] if self.args.mode == 'test': self.visual_images += ['test_confusion_matrix'] self.visual_losses += ['test_loss', 'test_precision', 'test_recall', 'test_f1_score']
def validate(self, val_loader, models, criterions, last_best_epochs): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() metrics = Metrics() losses_per_class = LossPerClassMeter(len(val_loader.dataset.dataset.classes)) models['backbone'].eval() models['module'].eval() end = time.time() with torch.no_grad(): for i, (data_x, data_y) in enumerate(val_loader): data_y = data_y.cuda(non_blocking=True) data_x = data_x.cuda(non_blocking=True) output = models['backbone'](data_x) loss = criterions['backbone'](output, data_y) losses_per_class.update(loss.cpu().detach().numpy(), data_y.cpu().numpy()) loss = torch.sum(loss) / loss.size(0) acc = accuracy(output.data, data_y, topk=(1, 2,)) losses.update(loss.data.item(), data_x.size(0)) top1.update(acc[0].item(), data_x.size(0)) top5.update(acc[1].item(), data_x.size(0)) metrics.add_mini_batch(data_y, output) batch_time.update(time.time() - end) end = time.time() if i % self.args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Last best epoch {last_best_epoch}' .format(i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, last_best_epoch=last_best_epochs)) report = metrics.get_report(target_names=val_loader.dataset.dataset.classes) print(' * Acc@1 {top1.avg:.3f}\t * Prec {0}\t * Recall {1} * Acc@5 {top5.avg:.3f}\t' .format(report['macro avg']['precision'], report['macro avg']['recall'], top1=top1, top5=top5)) return pd.DataFrame.from_dict({f'{k}-val-loss': losses_per_class.avg[i] for i, k in enumerate(val_loader.dataset.dataset.classes)}, orient='index').T, \ pd.DataFrame.from_dict(report)
def valid_loss_function(model, val_loader,epoch,save_freq): psnr=0 final_loss=0 g_loss = np.zeros((5000, 1)) model.eval() for batch_idx, (inputs, targets) in enumerate(val_loader): inputs, targets = inputs, targets in_img = inputs target = targets out_img = model(in_img) out_img = out_img loss = reduce_mean(out_img['output1'], target) g_loss[batch_idx] = loss.data.cpu() final_loss = np.mean(g_loss[np.where(g_loss)]) print("%d %d Loss=%.10f" % (epoch, batch_idx, final_loss)) if epoch % save_freq == 0: if not os.path.isdir(result_dir_val + '%04d' % epoch): os.makedirs(result_dir_val + '%04d' % epoch) out_img=transforms.ToPILImage()(out_img['output1'][0].cpu()) target_img = transforms.ToPILImage()(target[0].cpu()) out_img.save(result_dir_val +'/%04d/' % epoch+ '%04dDBN-FlorinDS_MSE_FS_30_00_train_%d.jpg' % (epoch, batch_idx)) target_img.save(result_dir_val +'/%04d/' % epoch+ '%04dDBN-FlorinDS_MSE_FS_30_00_target_%d.jpg' % (epoch, batch_idx)) psnr = metrics.PSNR(target_img, out_img) return final_loss, psnr
def debug_test_set(): clf = pickle_load(os.path.join('models6', 'strong_classifier_276.pkl')) trainer = Trainer(mp_pool=Pool(8)) trainer.load_data('data') print("Strong classifier test metrics:") predictions = clf.classify_batch(trainer.test_ds.X_integral) print(Metrics(predictions, trainer.test_ds.y))
def val(args, model=None, current_epoch=0): top1 = AverageMeter() top5 = AverageMeter() top1.reset() top5.reset() if model is None: model = get_model(args) model.eval() _, val_loader = data_loader(args, test_path=True) save_atten = SAVE_ATTEN(save_dir=args.save_atten_dir) global_counter = 0 prob = None gt = None for idx, dat in tqdm(enumerate(val_loader)): img_path, img, label_in = dat global_counter += 1 if args.tencrop == 'True': bs, ncrops, c, h, w = img.size() img = img.view(-1, c, h, w) label_input = label_in.repeat(10, 1) label = label_input.view(-1) else: label = label_in img, label = img.cuda(), label.cuda() img_var, label_var = Variable(img), Variable(label) logits = model(img_var, label_var) logits0 = logits[0] logits0 = F.softmax(logits0, dim=1) if args.tencrop == 'True': logits0 = logits0.view(bs, ncrops, -1).mean(1) # Calculate classification results prec1_1, prec5_1 = Metrics.accuracy(logits0.cpu().data, label_in.long(), topk=(1, 5)) # prec3_1, prec5_1 = Metrics.accuracy(logits[1].data, label.long(), topk=(1,5)) top1.update(prec1_1[0], img.size()[0]) top5.update(prec5_1[0], img.size()[0]) # save_atten.save_heatmap_segmentation(img_path, np_last_featmaps, label.cpu().numpy(), # save_dir='./save_bins/heatmaps', size=(0,0), maskedimg=True) # np_last_featmaps = logits[2].cpu().data.numpy() np_last_featmaps = logits[-1].cpu().data.numpy() np_scores, pred_labels = torch.topk(logits0, k=args.num_classes, dim=1) pred_np_labels = pred_labels.cpu().data.numpy() save_atten.save_top_5_pred_labels(pred_np_labels[:, :5], img_path, global_counter) # pred_np_labels[:,0] = label.cpu().numpy() #replace the first label with gt label # save_atten.save_top_5_atten_maps(np_last_featmaps, pred_np_labels, img_path) print('Top1:', top1.avg, 'Top5:', top5.avg)
def _evaluate(self, model_param, criterion): # evaluate in CPU # can't move all the training dataset to GPU, in my case and resources it is too much with torch.no_grad(): # operations inside don't track history self.model_eval.load_state_dict(state_dict=model_param) self.model_eval.eval() #train_prob = self.model_eval(self.training_set.x_data) #train_pred = train_prob.argmax(1) #train_loss = criterion(train_prob, self.training_set.y_data) #train_acc = (train_pred == self.training_set.y_data.long()).float().mean() #train_f1 = metrics.f1_score(self.training_set.y_data.long().numpy(), train_pred.numpy(), average='macro') #train_m = Metrics(self.training_set.y_data, train_pred, self.labels) #train_b = train_m.balanced_score() gc.collect() val_prob = self.model_eval(self.validation_set.x_data) val_pred = val_prob.argmax(1) val_loss = criterion(val_prob, self.validation_set.y_data) val_acc = ( val_pred == self.validation_set.y_data.long()).float().mean() val_f1 = metrics.f1_score( self.validation_set.y_data.long().numpy(), val_pred.numpy(), average='macro') val_m = Metrics(self.validation_set.y_data, val_pred, self.labels) val_b = val_m.balanced_score() gc.collect() # evaluating train uses too much CPU, so I actually justr need the vslidate values for now train_prob = val_prob train_pred = val_prob.argmax(1) train_loss = criterion(val_prob, self.validation_set.y_data) train_acc = ( val_pred == self.validation_set.y_data.long()).float().mean() train_f1 = metrics.f1_score( self.validation_set.y_data.long().numpy(), val_pred.numpy(), average='macro') train_m = Metrics(self.validation_set.y_data, val_pred, self.labels) train_b = val_m.balanced_score() return train_loss.item(), train_acc, train_f1, val_loss.item( ), val_acc, val_f1, train_b, val_b
def validate_epoch(self): """Validate after training an epoch.""" self.model.eval() # Set model to evaluate mode losses = Metrics() with torch.no_grad(): for idx, (x, y) in enumerate(self.val_data_loader): x = x.to(self.device) y = y.to(self.device) y_pred = self.model(x) loss = self.criterion(y_pred, y) losses.update(loss.item(), x.size(0)) self.writer.add_scalar('val/current_loss', losses.val, self.val_step) self.writer.add_scalar('val/avg_loss', losses.avg, self.val_step) self.val_step += 1 return losses.avg
def train(models, optimizers, dataset, corpus, ckpts, params, args): epoch_num = params.epoch_num batch_epoch = params.batch_epoch autoencoder.noise_radius = params.noise_radius step = 0 for e in range(epoch_num, params.max_epoch): for batch, (source, target) in islice(enumerate(dataset), batch_epoch, None): metrics = Metrics( epoch=e, max_epoch=params.max_epoch, ) for p in range(params.epoch_ae): ae_metrics = train_autoencoder(models, optimizers, source, target, params) metrics.accum(ae_metrics) metrics['ae_loss'] /= params.epoch_ae metrics['acc'] /= params.epoch_ae batch_epoch += 1 # anneal noise every 5 batch_epoch for now if batch_epoch % 5 == 0: autoencoder.noise_radius = autoencoder.noise_radius * 0.995 if batch_epoch % params.print_every == 0: ckpts.save() logging.info('--- Epoch {}/{} Batch {} ---'.format(e + 1, metrics['max_epoch'], batch_epoch)) logging.info('Loss {:.4f}'.format(float(metrics['ae_loss']))) params.batch_epoch = batch_epoch params.epoch_num = e params.noise_radius = autoencoder.noise_radius params.save(os.path.join(args.model_dir, 'params.json')) # Floydhub metrics print('{{"metric": "acc", "value": {}, "step": {}}}'.format(float(metrics['acc']), step)) print('{{"metric": "ae_loss", "value": {}, "step": {}}}'.format(float(metrics['ae_loss']), step)) step += 1 tb_writer.add_scalar('train/acc', metrics['acc'], step) tb_writer.add_scalar('train/ae_loss', metrics['ae_loss'], step) batch_epoch = 0
def __init__(self, hparams, dataset: HeteroNetDataset, metrics=["precision"]): num_edge = len(dataset.edge_index_dict) num_layers = hparams.num_layers num_class = dataset.n_classes self.collate_fn = hparams.collate_fn self.multilabel = dataset.multilabel num_nodes = dataset.num_nodes_dict[dataset.head_node_type] if dataset.in_features: w_in = dataset.in_features else: w_in = hparams.embedding_dim w_out = hparams.embedding_dim super(HAN, self).__init__(num_edge=num_edge, w_in=w_in, w_out=w_out, num_class=num_class, num_nodes=num_nodes, num_layers=num_layers) if not hasattr(dataset, "x") and not hasattr(dataset, "x_dict"): if num_nodes > 10000: self.embedding = { dataset.head_node_type: torch.nn.Embedding( num_embeddings=num_nodes, embedding_dim=hparams.embedding_dim).cpu() } else: self.embedding = torch.nn.Embedding( num_embeddings=num_nodes, embedding_dim=hparams.embedding_dim) self.dataset = dataset self.head_node_type = self.dataset.head_node_type hparams.n_params = self.get_n_params() self.train_metrics = Metrics(prefix="", loss_type=hparams.loss_type, n_classes=dataset.n_classes, multilabel=dataset.multilabel, metrics=metrics) self.valid_metrics = Metrics(prefix="val_", loss_type=hparams.loss_type, n_classes=dataset.n_classes, multilabel=dataset.multilabel, metrics=metrics) self.test_metrics = Metrics(prefix="test_", loss_type=hparams.loss_type, n_classes=dataset.n_classes, multilabel=dataset.multilabel, metrics=metrics) hparams.name = self.name() hparams.inductive = dataset.inductive self.hparams = hparams
def test(self): self.model.eval() losses = Metrics() # accuracy = Metrics() with torch.no_grad(): for idx, (x, y) in enumerate(self.test_data_loader): x = x.to(self.device) y = y.to(self.device) y_pred = self.model(x) loss = self.criterion(y_pred, y) losses.update(loss.item(), x.size(0)) # predict = 1 if get_mean_score(y_pred.cpu().numpy()[0]) > 5 else 0 # target = 1 if get_mean_score(y.cpu().numpy()[0]) > 5 else 0 # # accuracy.update(1 if predict == target else 0) logger.info(f'test loss={losses.avg}') print(losses.avg) return losses.avg
def main(): username = password = server = None parser = argparse.ArgumentParser(description='Show all boards in JIRA') cfg = None try: cf = ConfigFile('config.yaml') cfg = cf.config username = cfg['username'] password = cfg['password'] server = cfg['server'] except FileNotFoundError as e: print("Config File does not exist, falling back to argument parsing") parser.add_argument('-u', help="Provide User Name") parser.add_argument('-p', help="Provide Password") parser.add_argument('-s', help="Provide Server URL") args = parser.parse_args() if (cfg is None): username = args.u password = args.p server = args.s jc = JiraConn(username, password, server) m = Metrics(jc.jira) m.list_boards()
def valid(model, valid_dataloader, total_batch): model.eval() # Metrics_logger initialization metrics = Metrics([ 'recall', 'specificity', 'precision', 'F1', 'F2', 'ACC_overall', 'IoU_poly', 'IoU_bg', 'IoU_mean' ]) with torch.no_grad(): bar = tqdm(enumerate(valid_dataloader), total=total_batch) for i, data in bar: img, gt = data['image'], data['label'] if opt.use_gpu: img = img.cuda() gt = gt.cuda() output = model(img) _recall, _specificity, _precision, _F1, _F2, \ _ACC_overall, _IoU_poly, _IoU_bg, _IoU_mean = evaluate(output, gt) metrics.update(recall=_recall, specificity=_specificity, precision=_precision, F1=_F1, F2=_F2, ACC_overall=_ACC_overall, IoU_poly=_IoU_poly, IoU_bg=_IoU_bg, IoU_mean=_IoU_mean) metrics_result = metrics.mean(total_batch) model.train() return metrics_result
def cal_mAP(logits0, label_var, prob, gt): assert logits0.size() == label_var.size() res = torch.sigmoid(logits0) # res = torch.squeeze(res) res = res.cpu().data.numpy() gt_np = label_var.cpu().data.numpy() if prob is None: prob = res gt = gt_np else: prob = np.r_[prob, res] gt = np.r_[gt, gt_np] cls_mAP = Metrics.get_mAP(gt, prob) return cls_mAP, prob, gt
def test(model: nn.Module, device: torch.device, test_loader: DataLoader, criterion: nn.Module, text_transform: Callable, log_every=40): print('Evaluating...') model.eval() test_cer, test_wer, test_loss = [], [], [] data_len = len(test_loader) with torch.no_grad(): for i, _data in enumerate(test_loader): spectrograms, labels, input_lengths, label_lengths = _data spectrograms, labels = spectrograms.to(device), labels.to(device) output = model(spectrograms) # (batch, time, n_class) output = F.log_softmax(output, dim=2) output = output.transpose(0, 1) # (time, batch, n_class) loss = criterion(output, labels, input_lengths, label_lengths) test_loss.append(loss.item()) decoded_preds, decoded_targets = greedy_decode( output.transpose(0, 1), labels, label_lengths, text_transform) test_cer.append( word_error_rate(decoded_targets, decoded_preds, use_cer=True)) test_wer.append(word_error_rate(decoded_targets, decoded_preds)) if i % log_every == 0: print(f'{i}/{data_len}') print(f'Test WER: {test_wer[-1]}; CER: {test_cer[-1]}') for p, t in zip(decoded_preds, decoded_targets): print(f'Prediction: [{p}]\t Ground Truth: [{t}]') avg_cer = np.mean(test_cer) avg_wer = np.mean(test_wer) avg_loss = np.mean(test_loss) print( f'Test set: Average loss: {avg_loss}, Average CER: {avg_cer} Average WER: {avg_wer}' ) return Metrics(loss=avg_loss, cer=avg_cer, wer=avg_wer)
def train(model: nn.Module, device: torch.device, train_loader: DataLoader, criterion: nn.Module, optimizer: nn.Module, scheduler, epoch: int, iter_meter, tb_writer: SummaryWriter, log_every=20) -> Metrics: model.train() data_len = len(train_loader) epoch_loss = [] print('Training') for batch_idx, _data in enumerate(train_loader): spectrograms, labels, input_lengths, label_lengths = _data spectrograms, labels = spectrograms.to(device), labels.to(device) optimizer.zero_grad() output = model(spectrograms) # (batch, time, n_class) output = F.log_softmax(output, dim=2) output = output.transpose(0, 1) # (time, batch, n_class) loss = criterion(output, labels, input_lengths, label_lengths) loss.backward() loss_scalar = loss.item() optimizer.step() if scheduler: scheduler.step() iter_meter.step() if batch_idx % log_every == 0 or batch_idx == data_len: print(f'Train Epoch: {epoch} \t batch: {batch_idx}/{data_len}') print(f'Loss: {loss_scalar}') epoch_loss.append(loss_scalar) tb_writer.add_scalar('batch_loss', loss_scalar, iter_meter.get()) return Metrics(loss=np.mean(epoch_loss))
def xval(data_path, adaptor, classifier, summ): input_ = Input(FLAGS.xval_batch_size, FLAGS.num_points) waves, labels = input_(data_path) # Calculate the loss of the model. if FLAGS.adp: logits = adaptor(waves) logits = classifier(logits) else: logits = classifier(waves, expand_dims=True) logits = tf.argmax(logits, axis=-1) metrics = Metrics("accuracy") with tf.control_dependencies( [tf.assert_equal(tf.rank(labels), tf.rank(logits))]): score, xval_accu_op = metrics(labels, logits) assert summ, "invalid summary helper object" summ.register('xval', 'accuracy', score) xval_summ_op = summ('xval') return xval_accu_op, xval_summ_op
def __init__(self, hparams, dataset, metrics, *args): super().__init__(*args) self.train_metrics = Metrics(prefix="", loss_type=hparams.loss_type, n_classes=dataset.n_classes, multilabel=dataset.multilabel, metrics=metrics) self.valid_metrics = Metrics(prefix="val_", loss_type=hparams.loss_type, n_classes=dataset.n_classes, multilabel=dataset.multilabel, metrics=metrics) self.test_metrics = Metrics(prefix="test_", loss_type=hparams.loss_type, n_classes=dataset.n_classes, multilabel=dataset.multilabel, metrics=metrics) hparams.name = self.name() hparams.inductive = dataset.inductive self.hparams = hparams
def train_model(args, model, train, dev, teacher_model=None, save_path=None, maxsteps=None): if args.tensorboard and (not args.debug): from tensorboardX import SummaryWriter writer = SummaryWriter('./runs/{}'.format(args.prefix + args.hp_str)) # optimizer if args.optimizer == 'Adam': opt = torch.optim.Adam( [p for p in model.parameters() if p.requires_grad], betas=(0.9, 0.98), eps=1e-9) else: raise NotImplementedError # if resume training if (args.load_from is not None) and (args.resume): with torch.cuda.device(args.gpu): # very important. offset, opt_states = torch.load( './models/' + args.load_from + '.pt.states', map_location=lambda storage, loc: storage.cuda()) opt.load_state_dict(opt_states) else: offset = 0 # metrics if save_path is None: save_path = args.model_name best = Best(max, 'corpus_bleu', 'corpus_gleu', 'gleu', 'loss', 'i', model=model, opt=opt, path=save_path, gpu=args.gpu) train_metrics = Metrics('train', 'loss', 'real', 'fake') dev_metrics = Metrics('dev', 'loss', 'gleu', 'real_loss', 'fake_loss', 'distance', 'alter_loss', 'distance2', 'fertility_loss', 'corpus_gleu') progressbar = tqdm(total=args.eval_every, desc='start training.') for iters, batch in enumerate(train): iters += offset if iters % args.save_every == 0: args.logger.info( 'save (back-up) checkpoints at iter={}'.format(iters)) with torch.cuda.device(args.gpu): torch.save(best.model.state_dict(), '{}_iter={}.pt'.format(args.model_name, iters)) torch.save([iters, best.opt.state_dict()], '{}_iter={}.pt.states'.format( args.model_name, iters)) if iters % args.eval_every == 0: progressbar.close() dev_metrics.reset() if args.distillation: outputs_course = valid_model(args, model, dev, dev_metrics, distillation=True, teacher_model=None) outputs_data = valid_model( args, model, dev, None if args.distillation else dev_metrics, teacher_model=None, print_out=True) if args.tensorboard and (not args.debug): writer.add_scalar('dev/GLEU_sentence_', dev_metrics.gleu, iters) writer.add_scalar('dev/Loss', dev_metrics.loss, iters) writer.add_scalar('dev/GLEU_corpus_', outputs_data['corpus_gleu'], iters) writer.add_scalar('dev/BLEU_corpus_', outputs_data['corpus_bleu'], iters) if args.distillation: writer.add_scalar('dev/GLEU_corpus_dis', outputs_course['corpus_gleu'], iters) writer.add_scalar('dev/BLEU_corpus_dis', outputs_course['corpus_bleu'], iters) if not args.debug: best.accumulate(outputs_data['corpus_bleu'], outputs_data['corpus_gleu'], dev_metrics.gleu, dev_metrics.loss, iters) args.logger.info( 'the best model is achieved at {}, average greedy GLEU={}, corpus GLEU={}, corpus BLEU={}' .format(best.i, best.gleu, best.corpus_gleu, best.corpus_bleu)) args.logger.info('model:' + args.prefix + args.hp_str) # ---set-up a new progressor--- progressbar = tqdm(total=args.eval_every, desc='start training.') if maxsteps is None: maxsteps = args.maximum_steps if iters > maxsteps: args.logger.info('reach the maximum updating steps.') break # --- training --- # model.train() def get_learning_rate(i, lr0=0.1, disable=False): if not disable: return lr0 * 10 / math.sqrt(args.d_model) * min( 1 / math.sqrt(i), i / (args.warmup * math.sqrt(args.warmup))) return 0.00002 opt.param_groups[0]['lr'] = get_learning_rate( iters + 1, disable=args.disable_lr_schedule) opt.zero_grad() # prepare the data inputs, input_masks, \ targets, target_masks, \ sources, source_masks,\ encoding, batch_size = model.quick_prepare(batch, args.distillation) input_reorder, fertility_cost, decoder_inputs = None, None, inputs batch_fer = batch.fer_dec if args.distillation else batch.fer #print(input_masks.size(), target_masks.size(), input_masks.sum()) if type(model) is FastTransformer: inputs, input_reorder, input_masks, fertility_cost = model.prepare_initial( encoding, sources, source_masks, input_masks, batch_fer) # Maximum Likelihood Training if not args.finetuning: loss = model.cost(targets, target_masks, out=model(encoding, source_masks, inputs, input_masks)) if args.fertility: loss += fertility_cost else: # finetuning: # loss_student (MLE) if not args.fertility: decoding, out, probs = model(encoding, source_masks, inputs, input_masks, return_probs=True, decoding=True) loss_student = model.batched_cost(targets, target_masks, probs) # student-loss (MLE) decoder_masks = input_masks else: # Note that MLE and decoding has different translations. We need to run the same code twice # truth decoding, out, probs = model(encoding, source_masks, inputs, input_masks, decoding=True, return_probs=True) loss_student = model.cost(targets, target_masks, out=out) decoder_masks = input_masks # baseline decoder_inputs_b, _, decoder_masks_b, _, _ = model.prepare_initial( encoding, sources, source_masks, input_masks, None, mode='mean') decoding_b, out_b, probs_b = model( encoding, source_masks, decoder_inputs_b, decoder_masks_b, decoding=True, return_probs=True) # decode again # reinforce decoder_inputs_r, _, decoder_masks_r, _, _ = model.prepare_initial( encoding, sources, source_masks, input_masks, None, mode='reinforce') decoding_r, out_r, probs_r = model( encoding, source_masks, decoder_inputs_r, decoder_masks_r, decoding=True, return_probs=True) # decode again if args.fertility: loss_student += fertility_cost # loss_teacher (RKL+REINFORCE) teacher_model.eval() if not args.fertility: inputs_student_index, _, targets_student_soft, _, _, _, encoding_teacher, _ = model.quick_prepare( batch, False, decoding, probs, decoder_masks, decoder_masks, source_masks) out_teacher, probs_teacher = teacher_model( encoding_teacher, source_masks, inputs_student_index.detach(), decoder_masks, return_probs=True) loss_teacher = teacher_model.batched_cost( targets_student_soft, decoder_masks, probs_teacher.detach()) loss = ( 1 - args.beta1 ) * loss_teacher + args.beta1 * loss_student # final results else: inputs_student_index, _, targets_student_soft, _, _, _, encoding_teacher, _ = model.quick_prepare( batch, False, decoding, probs, decoder_masks, decoder_masks, source_masks) out_teacher, probs_teacher = teacher_model( encoding_teacher, source_masks, inputs_student_index.detach(), decoder_masks, return_probs=True) loss_teacher = teacher_model.batched_cost( targets_student_soft, decoder_masks, probs_teacher.detach()) inputs_student_index, _ = model.prepare_inputs( batch, decoding_b, False, decoder_masks_b) targets_student_soft, _ = model.prepare_targets( batch, probs_b, False, decoder_masks_b) out_teacher, probs_teacher = teacher_model( encoding_teacher, source_masks, inputs_student_index.detach(), decoder_masks_b, return_probs=True) _, loss_1 = teacher_model.batched_cost(targets_student_soft, decoder_masks_b, probs_teacher.detach(), True) inputs_student_index, _ = model.prepare_inputs( batch, decoding_r, False, decoder_masks_r) targets_student_soft, _ = model.prepare_targets( batch, probs_r, False, decoder_masks_r) out_teacher, probs_teacher = teacher_model( encoding_teacher, source_masks, inputs_student_index.detach(), decoder_masks_r, return_probs=True) _, loss_2 = teacher_model.batched_cost(targets_student_soft, decoder_masks_r, probs_teacher.detach(), True) rewards = -(loss_2 - loss_1).data rewards = rewards - rewards.mean() rewards = rewards.expand_as(source_masks) rewards = rewards * source_masks model.predictor.saved_fertilities.reinforce( 0.1 * rewards.contiguous().view(-1, 1)) loss = ( 1 - args.beta1 ) * loss_teacher + args.beta1 * loss_student # detect reinforce # accmulate the training metrics train_metrics.accumulate(batch_size, loss, print_iter=None) train_metrics.reset() # train the student if args.finetuning and args.fertility: torch.autograd.backward( (loss, model.predictor.saved_fertilities), (torch.ones(1).cuda(loss.get_device()), None)) else: loss.backward() opt.step() info = 'training step={}, loss={:.3f}, lr={:.5f}'.format( iters, export(loss), opt.param_groups[0]['lr']) if args.finetuning: info += '| NA:{:.3f}, AR:{:.3f}'.format(export(loss_student), export(loss_teacher)) if args.fertility: info += '| RL: {:.3f}'.format(export(rewards.mean())) if args.fertility: info += '| RE:{:.3f}'.format(export(fertility_cost)) if args.tensorboard and (not args.debug): writer.add_scalar('train/Loss', export(loss), iters) progressbar.update(1) progressbar.set_description(info)
def train(train_data, val_data, user_list_train_filtered, user_list_val_filtered, user_beta_train, user_beta_val, k, dataset, eta=0.1, lamb=0.1, tolerance=1e-4, num_iter_val=5, num_total_iter_training=6, random_seed=786, kU=None, cv_flag=True, verbose=False): np.random.seed(random_seed) user_feat = val_data.drop(['user', 'label'], axis=1).values user_feat_train = train_data.drop(['user', 'label'], axis=1).values w = np.random.normal(0, 1, user_feat.shape[1]) metrics = Metrics() metrics.eta_lr = eta metrics.lamb_reg = lamb print("running for eta", eta, "and lambda", lamb) for i in range(num_total_iter_training): grad, loss = subgradient(w, train_data, user_list_train_filtered, user_beta_train, k) grad += lamb * w w = w - (eta / np.sqrt(i + 1)) * grad metrics.w_list.append(w) metrics.loss_opt_list_train.append(loss) y_scores = user_feat_train.dot(w) data_true = deepcopy(train_data) data_true['scores'] = y_scores data_true = data_true.sort_values(by='scores', ascending=False) data_true = data_true.reset_index(drop=True) metrics.micro_auc_rel_k_list_train.append( compute_micro(data_true, user_list_train_filtered, user_beta_train, w, k)) if verbose: print('Epoch', i + 1, 'completed out of', num_total_iter_training, 'for prec@k loss train:', metrics.loss_opt_list_train[-1]) print('Epoch', i + 1, 'completed out of', num_total_iter_training, 'for prec@k grad train:', np.linalg.norm(grad)) # evaluate combined weights if (cv_flag): if i % num_iter_val == 0: y_scores = user_feat.dot(w) data_true = deepcopy(val_data) data_true['scores'] = y_scores data_true = data_true.sort_values(by='scores', ascending=False) data_true = data_true.reset_index(drop=True) metrics.micro_auc_rel_k_list_val.append( compute_micro(data_true, user_list_val_filtered, user_beta_val, w, k)) if verbose: print("\n") print('Epoch', i + 1, 'completed out of', num_total_iter_training, 'for prec@k loss val:', metrics.micro_auc_rel_k_list_val[-1]) print("\n") return metrics, None
model.compile(loss=WeightedBinaryCrossEntropy(POS_RATIO), optimizer='rmsprop', metrics=['binary_accuracy', f1]) logger.debug('Model summary: %s', model.summary()) # Set tensorboard callback tb = TensorBoard(log_dir='./learn_embedding_logs', histogram_freq=1, write_graph=True, write_images=False) # Metrics is now defined in utils metrics = Metrics(logger) # Train model # NOTE: Tensorboard callback is disabled to reduce model run time from # approx 3 horus to 17 minutes model.fit( x_train, y_train, validation_data=(x_dev, y_dev), epochs=EPOCHS, batch_size=BATCH_SIZE, #callbacks=[tb] )
# # In[ ]: from utils import Metrics # In[ ]: run_id = 'seg_model_gpu{}_n{}_bs{}_lr{}'.format(gpu_id, epochs, batch_size, learning_rate) print('\n\nTraining', run_id) save_path = run_id + '.pkl' optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) metrics = Metrics(train_loader.dataset.num_classes, train_loader.dataset.class_names) # Used to keep track of statistics class AverageMeter(object): def __init__(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count
def train(train_data, val_data, user_list_train_filtered, user_list_val_filtered, user_beta_train, user_beta_val, k, eta=0.1, lamb=0.1, num_iter_val=5, num_total_iter_training=6, n_classifiers=5, random_seed=786, verbose=False): np.random.seed(random_seed) user_list_val_filtered = user_list_train_filtered[ 0:int(0.2 * len(user_list_train_filtered))] user_list_train_filtered = list( set(user_list_train_filtered) - set(user_list_val_filtered)) val_data = train_data[train_data['user'].isin(user_list_val_filtered)] train_data = train_data[train_data['user'].isin(user_list_train_filtered)] metrics = Metrics() metrics.eta_lr = eta metrics.lamb_reg = lamb classifier_list = [] kf = KFold(n_splits=n_classifiers, shuffle=True) features = train_data.drop(['user', 'label'], axis=1) labels = train_data['label'] for _, split_indices in kf.split(features): split_features = features.iloc[split_indices].values split_labels = labels.iloc[split_indices].values num_examples = split_features.shape[0] w = np.random.normal(0, 1, (split_features.shape[1], )) w = w / np.linalg.norm(w) for num_iter in np.arange(num_total_iter_training): scores = sigmoid(np.dot(split_features, w)) loss = -1 / num_examples * np.sum(split_labels * np.log(scores) + (1 - split_labels) * np.log(1 - scores)) print("loss is ", loss) dLdwx = (scores - split_labels) * scores * (1 - scores) grad = 1 / num_examples * np.sum( dLdwx.reshape(-1, 1) * split_features) grad += lamb * w print("grad is ", np.linalg.norm(grad)) print("\n") w = w - (eta / np.sqrt(num_iter + 1)) * grad accuracy = np.sum(split_labels * (scores > 0.5) + (1 - split_labels) * (scores < 0.5)) print('accuracy: {}'.format(accuracy / num_examples)) classifier_list.append(w) print('eta is ', eta, 'and lambda is ', lamb) print('\n') classifiers_with_metrics = [] for w in classifier_list: user_feat = val_data.drop(['user', 'label'], axis=1).values y_scores = user_feat.dot(w) data_true = deepcopy(val_data) data_true['scores'] = y_scores data_true = data_true.sort_values(by='scores', ascending=False) data_true = data_true.reset_index(drop=True) metric = compute_micro(data_true, user_list_val_filtered, user_beta_train, w, k) classifiers_with_metrics.append((metric, w)) classifiers_with_metrics.sort(reverse=True, key=lambda x: x[0]) combined_w = classifiers_with_metrics[0][1] for _, w in classifiers_with_metrics[1:]: combined_w = merge_micro(val_data, combined_w, w, user_list_val_filtered, user_beta_train, k) # create dummy metrics # need weights and one validation loss for the "best iter" logic metrics = Metrics() metrics.w_list.append(combined_w) metrics.micro_auc_rel_k_list_val.append(0) metrics.micro_auc_rel_k_list_train.append(0) metrics.loss_opt_list_train.append(0) return metrics, None
progressbar.set_description(info) if use_prog_bar: progressbar.close() return model.save_fast_weights() # training start.. best = Best(max, 'corpus_bleu', 'i', model=model, opt=meta_opt, path=args.model_name, gpu=args.gpu) train_metrics = Metrics('train', 'loss', 'real', 'fake') dev_metrics = Metrics('dev', 'loss', 'gleu', 'real_loss', 'fake_loss', 'distance', 'alter_loss', 'distance2', 'fertility_loss', 'corpus_gleu') # overlall progress-ba progressbar = tqdm(total=args.eval_every, desc='start training') while True: # ----- saving the checkpoint ----- # if iters % args.save_every == 0: args.logger.info('save (back-up) checkpoints at iter={}'.format(iters)) with torch.cuda.device(args.gpu): torch.save(best.model.state_dict(), '{}_iter={}.pt'.format(args.model_name, iters))
def val(args, model=None, current_epoch=0): top1 = AverageMeter() top5 = AverageMeter() top1.reset() top5.reset() if model is None: model, _ = get_model(args) model.eval() train_loader, val_loader = data_loader(args, test_path=True) save_atten = SAVE_ATTEN(save_dir='../save_bins/') global_counter = 0 prob = None gt = None for idx, dat in tqdm(enumerate(val_loader)): img_path, img, label_in = dat global_counter += 1 if args.tencrop == 'True': bs, ncrops, c, h, w = img.size() img = img.view(-1, c, h, w) label_input = label_in.repeat(10, 1) label = label_input.view(-1) else: label = label_in img, label = img.cuda(), label.cuda() img_var, label_var = Variable(img), Variable(label) logits = model(img_var, label_var) logits0 = logits[0] logits0 = F.softmax(logits0, dim=1) if args.tencrop == 'True': logits0 = logits0.view(bs, ncrops, -1).mean(1) # Calculate classification results if args.onehot == 'True': val_mAP, prob, gt = cal_mAP(logits0, label_var, prob, gt) # print val_mAP else: prec1_1, prec5_1 = Metrics.accuracy(logits0.cpu().data, label_in.long(), topk=(1, 5)) # prec3_1, prec5_1 = Metrics.accuracy(logits[1].data, label.long(), topk=(1,5)) top1.update(prec1_1[0], img.size()[0]) top5.update(prec5_1[0], img.size()[0]) # model.module.save_erased_img(img_path) last_featmaps = model.module.get_localization_maps() np_last_featmaps = last_featmaps.cpu().data.numpy() # Save 100 sample masked images by heatmaps # if idx < 100/args.batch_size: save_atten.get_masked_img(img_path, np_last_featmaps, label_in.numpy(), size=(0, 0), maps_in_dir=True, only_map=True) # save_atten.save_heatmap_segmentation(img_path, np_last_featmaps, label.cpu().numpy(), # save_dir='./save_bins/heatmaps', size=(0,0), maskedimg=True) # save_atten.get_masked_img(img_path, np_last_featmaps, label_in.numpy(),size=(0,0), # maps_in_dir=True, save_dir='../heatmaps',only_map=True ) # np_scores, pred_labels = torch.topk(logits0,k=args.num_classes,dim=1) # # print pred_labels.size(), label.size() # pred_np_labels = pred_labels.cpu().data.numpy() # save_atten.save_top_5_pred_labels(pred_np_labels[:,:5], img_path, global_counter) # # pred_np_labels[:,0] = label.cpu().numpy() #replace the first label with gt label # # save_atten.save_top_5_atten_maps(np_last_featmaps, pred_np_labels, img_path) if args.onehot == 'True': print val_mAP print 'AVG:', np.mean(val_mAP) else: print('Top1:', top1.avg, 'Top5:', top5.avg)
def test_function(model, test_loader,epoch,save_freq, result_dir_val,experiment_name,plotter1,plotter2): out_imgs=[] target_imgs=[] input_imgs=[] batch_i=0 psnrs= [] ssims=[] average_psnr=0 average_ssim=0 final_loss=0 model.eval() for batch_idx, (inputs, targets,input_refs) in enumerate(test_loader): inputs, targets,input_refs = inputs, targets,input_refs in_img = inputs target = targets input_ref= input_refs out_img = model(in_img) out_img = out_img out_imgs=transforms.ToPILImage()(out_img['output1'][0].cpu()) target_imgs = transforms.ToPILImage()(target[0].cpu()) input_imgs = transforms.ToPILImage()(input_ref[0].cpu()) if epoch % save_freq == 0: if not os.path.isdir(result_dir_val + '%04d' % epoch): os.makedirs(result_dir_val + '%04d' % epoch) psnrs.append(metrics.PSNR(target_imgs, out_imgs)) ssims.append(metrics.SSIM(target_imgs, out_imgs)) # paralel_imgs = [] if batch_idx% 40==0: input_imgs.save( result_dir_val + '/%04d/' % epoch + '%04dDBN_FD_20s_512_00_input_%d.jpg' % (epoch, batch_idx)) out_imgs.save( result_dir_val + '/%04d/' % epoch + '%04dDBN_FD_20s_512_00_train_%d-PSNR-%f.jpg' % ( epoch, batch_idx,psnrs[batch_idx])) target_imgs.save( result_dir_val + '/%04d/' % epoch + '%04dDBN_FD_20s_512_00_target_%d.jpg' % (epoch, batch_idx)) # paralel_imgs.append(input_imgs) # paralel_imgs.append(out_imgs) # paralel_imgs.append(target_imgs) # UtilsImage.uniImage(paralel_imgs).save( # result_dir_val + '/%04d/' % epoch + '%04dDBN_D_ER_FDS_MSE_FS_Result_%d-PSNR-%d.jpg' % ( # epoch, batch_i, psnrs[batch_idx])) # if epoch % save_freq == 0: # if not os.path.isdir(result_dir_val + '%04d' % epoch): # os.makedirs(result_dir_val + '%04d' % epoch) # for batch_i in range(0,test_loader): # psnrs[batch_i] = metrics.PSNR(target_imgs[batch_i], out_imgs[batch_i]) # ssims[batch_i] = metrics.SSIM(target_imgs[batch_i], out_imgs[batch_i]) # if batch_i % 20 ==0: # paralel_imgs =[] # input_imgs[batch_i].save(result_dir_val +'/%04d/' % epoch+ '%04dDBNP_D_ER_FDS_MSE_FS_00_input_%d.jpg' % (epoch, batch_i)) # out_imgs[batch_i].save(result_dir_val +'/%04d/' % epoch+ '%04dDBNP_D_ER_FDS_MSE_FS_00_train_%d-PSNR-%d.jpg' % (epoch, batch_i,psnrs[batch_i])) # target_imgs[batch_i].save(result_dir_val +'/%04d/' % epoch+ '%04dDBN_D_ER_FDS_MSE_FS_00_target_%d.jpg' % (epoch, batch_i)) # paralel_imgs.append( input_imgs[batch_i] ) # paralel_imgs.append (out_imgs[batch_i] ) # paralel_imgs.append(target_imgs[batch_i] ) # UtilsImage.uniImage(paralel_imgs).save(result_dir_val +'/%04d/' % epoch+ '%04dDBN_D_ER_FDS_MSE_FS_Result_%d-PSNR-%d.jpg' % (epoch, batch_i,psnrs[batch_i])) # # wandb.log({ '%04dDBN_D_ER_FDS_MSE_FS_Result_%d.jpg' % (epoch, batch_i) : wandb.Image( Utils.uniImage(paralel_imgs)) # # , "PSNR: " :psnrs[batch_i] # # }) for i in range(0,psnrs.__len__()): average_psnr += psnrs[i] average_ssim += ssims[i] average_psnr=average_psnr/psnrs.__len__() average_ssim=average_ssim/ssims.__len__() print(epoch,average_psnr,average_ssim) writer.writerow([epoch, average_psnr, average_ssim]) plotter1.plot("average_PSNR", 'Val-PSNR', "Epoch", epoch, average_psnr) plotter2.plot("average_SSIM", 'Val-SSIM', "Epoch", epoch, average_ssim)