class Trainer(nn.Module): def __init__(self, head_detector): super(Trainer, self).__init__() self.head_detector = head_detector self.optimizer = self.head_detector.get_optimizer() self.anchor_target_layer = AnchorTargetLayer() self.loss_tuple = namedtuple('LossTuple', ['rpn_regr_loss', 'rpn_cls_loss', 'total_loss']) self.vis = Visualizer(env=cfg.VISDOM_ENV) self.rpn_cm = ConfusionMeter(2) # confusion matrix with 2 classes self.meters = {k: AverageValueMeter() for k in self.loss_tuple._fields} # average loss def forward(self, x, gt_boxes, scale): batch = x.size()[0] assert batch == 1, 'Currently only batch size 1 is supported.' img_size = x.size()[2:] # Forward pass feature_map = self.head_detector.extractor(x) rpn_regr, rpn_cls, _, _, anchors = self.head_detector.rpn(feature_map, img_size, scale) # Remove the batch dimension gt_boxes, rpn_regr, rpn_cls = gt_boxes[0], rpn_regr[0], rpn_cls[0] # Generates GT regression targets and GT labels gt_regr, gt_cls = self.anchor_target_layer(gt_boxes.numpy(), anchors, img_size) gt_regr = torch.from_numpy(gt_regr).cuda().float() gt_cls = torch.from_numpy(gt_cls).cuda().long() # Computes loss rpn_regr_loss = losses.rpn_regr_loss(rpn_regr, gt_regr, gt_cls) rpn_cls_loss = F.cross_entropy(rpn_cls, gt_cls, ignore_index=-1) total_loss = rpn_regr_loss + rpn_cls_loss loss_list = [rpn_regr_loss, rpn_cls_loss, total_loss] # Ignore samples with a label = -1 valid_gt_cls = gt_cls[gt_cls > -1] valid_pred_cls = rpn_cls[gt_cls > -1] # Computes the confusion matrix self.rpn_cm.add(valid_pred_cls.detach(), valid_gt_cls.detach()) return self.loss_tuple(*loss_list) def train_step(self, x, boxes, scale): loss_tuple = self.forward(x, boxes, scale) self.optimizer.zero_grad() loss_tuple.total_loss.backward() self.optimizer.step() self.update_meters(loss_tuple) def update_meters(self, loss_tuple): loss_dict = {k: v.item() for k, v in loss_tuple._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_dict[key]) def reset_meters(self): for meter in self.meters.values(): meter.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()} def save(self, path, save_optimizer=False): save_dict = dict() save_dict['model'] = self.head_detector.state_dict() save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() torch.save(save_dict, path) self.vis.save([self.vis.env]) def load(self, path, load_optimizer=True): state_dict = torch.load(path) self.head_detector.load_state_dict(state_dict['model']) if load_optimizer and 'optimizer' in state_dict: self.optimizer.load_state_dict(state_dict['optimizer']) def scale_lr(self, decay=0.1): for param_group in self.optimizer.param_groups: param_group['lr'] *= decay
def train(args, config): vis = Visualizer() train_set = MNIST(data_path=config.train_data_path, label_path=config.train_label_path, config=config, mode='train') valid_set = MNIST(data_path=config.train_data_path, label_path=config.train_label_path, config=config, mode='valid') train_dataloader = DataLoader(train_set, config.batch_size, shuffle=True, num_workers=config.num_workers) valid_dataloader = DataLoader(valid_set, config.batch_size, shuffle=False, num_workers=config.num_workers) model = getattr(network, args.model)().eval() if args.load_model_path: model.load(args.load_model_path) if args.use_gpu: model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.lr) train_loss_meter, valid_loss_meter = meter.AverageValueMeter( ), meter.AverageValueMeter() train_confusion_matrix, valid_confusion_matrix = meter.ConfusionMeter( 10), meter.ConfusionMeter(10) best_valid_loss = 1e5 best_epoch = 0 dist_to_best = 0 time_begin = time.clock() for epoch in range(config.epoch): # train model.train() train_loss_meter.reset() train_confusion_matrix.reset() for _iter, (train_data, train_target) in enumerate(train_dataloader): if args.use_gpu: train_data = train_data.cuda() train_target = train_target.cuda() optimizer.zero_grad() train_logits, train_output = model(train_data) train_loss = criterion(train_logits, train_target) train_loss.backward() optimizer.step() train_loss_meter.add(train_loss.item()) train_confusion_matrix.add(train_logits.data, train_target.data) if _iter % config.print_freq == 0: vis.plot('train_loss', train_loss_meter.value()[0]) model.save(path=os.path.join(args.ckpts_dir, 'model_{0}.pth'.format( str(epoch)))) # valid model.eval() valid_loss_meter.reset() valid_confusion_matrix.reset() for _iter, (valid_data, valid_target) in enumerate(valid_dataloader): if args.use_gpu: valid_data = valid_data.cuda() valid_target = valid_target.cuda() valid_logits, valid_output = model(valid_data) valid_loss = criterion(valid_logits, valid_target) valid_loss_meter.add(valid_loss.item()) valid_confusion_matrix.add(valid_logits.detach().squeeze(), valid_target.type(t.LongTensor)) valid_cm = valid_confusion_matrix.value() valid_accuracy = 100. * (valid_cm.diagonal().sum()) / (valid_cm.sum()) vis.plot('valid_accuracy', valid_accuracy) vis.log( "epoch:{epoch}, train_loss:{train_loss}, train_cm:{train_cm}, valid_loss:{valid_loss}, valid_cm:{valid_cm}, valid_accuracy:{valid_accuracy}" .format(epoch=epoch, train_loss=train_loss_meter.value()[0], train_cm=str(train_confusion_matrix.value()), valid_loss=valid_loss_meter.value()[0], valid_cm=str(valid_cm), valid_accuracy=valid_accuracy)) print( "epoch:{epoch}, train_loss:{train_loss}, valid_loss:{valid_loss}, valid_accuracy:{valid_accuracy}" .format(epoch=epoch, train_loss=train_loss_meter.value()[0], valid_loss=valid_loss_meter.value()[0], valid_accuracy=valid_accuracy)) print("train_cm:\n{train_cm}\n\nvalid_cm:\n{valid_cm}".format( train_cm=str(train_confusion_matrix.value()), valid_cm=str(valid_cm), )) # early stop if valid_loss_meter.value()[0] < best_valid_loss: best_epoch = epoch best_valid_loss = valid_loss_meter.value()[0] dist_to_best = 0 dist_to_best += 1 if dist_to_best > 4: break model.save(path=os.path.join(args.ckpts_dir, 'model.pth')) vis.save() print("save model successfully") print("best epoch: ", best_epoch) print("best valid loss: ", best_valid_loss) time_end = time.clock() print('time cost: %.2f' % (time_end - time_begin))
def train(args): vis = Visualizer() config = getattr(configs, args.model + 'Config')() dataset = PoemDataset(data_path=config.data_path, config=config) dataloader = DataLoader(dataset, config.batch_size, shuffle=True, num_workers=config.num_workers) config.vocab_size = dataset.vocab_size config.use_gpu = args.use_gpu model = getattr(network, args.model)(config).eval() if args.load_model_path: model.load(args.load_model_path, use_gpu=args.use_gpu) if args.use_gpu: model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.lr) loss_meter = meter.AverageValueMeter() time_begin = time.clock() for epoch in range(config.epoch): # train model.train() loss_meter.reset() for _iter, data in enumerate(dataloader): data = data.long().transpose(1, 0).contiguous() if args.use_gpu: data = data.cuda() optimizer.zero_grad() input, target = Variable(data[:-1, :]), Variable(data[1:, :]) output, _ = model(input) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.item()) if _iter % config.print_freq == 0: vis.plot('train_loss', loss_meter.value()[0]) model.save(path=os.path.join(args.ckpts_dir, 'model_{0}.pth'.format( str(epoch)))) vis.log("epoch:{epoch}, train_loss:{train_loss}".format( epoch=epoch, train_loss=loss_meter.value()[0], )) print("epoch:{epoch}, train_loss:{train_loss}".format( epoch=epoch, train_loss=loss_meter.value()[0], )) model.save(path=os.path.join(args.ckpts_dir, 'model.pth')) vis.save() print("save model successfully") time_end = time.clock() print('time cost: %.2f' % (time_end - time_begin))