コード例 #1
0
ファイル: trainer.py プロジェクト: liutianyuan/FCHD
class Trainer(nn.Module):
    def __init__(self, head_detector):
        super(Trainer, self).__init__()
        self.head_detector = head_detector
        self.optimizer = self.head_detector.get_optimizer()
        self.anchor_target_layer = AnchorTargetLayer()
        self.loss_tuple = namedtuple('LossTuple',
                                     ['rpn_regr_loss',
                                      'rpn_cls_loss',
                                      'total_loss'])
        self.vis = Visualizer(env=cfg.VISDOM_ENV)
        self.rpn_cm = ConfusionMeter(2)  # confusion matrix with 2 classes
        self.meters = {k: AverageValueMeter() for k in self.loss_tuple._fields}  # average loss

    def forward(self, x, gt_boxes, scale):
        batch = x.size()[0]
        assert batch == 1, 'Currently only batch size 1 is supported.'
        img_size = x.size()[2:]

        # Forward pass
        feature_map = self.head_detector.extractor(x)
        rpn_regr, rpn_cls, _, _, anchors = self.head_detector.rpn(feature_map, img_size, scale)

        # Remove the batch dimension
        gt_boxes, rpn_regr, rpn_cls = gt_boxes[0], rpn_regr[0], rpn_cls[0]

        # Generates GT regression targets and GT labels
        gt_regr, gt_cls = self.anchor_target_layer(gt_boxes.numpy(), anchors, img_size)
        gt_regr = torch.from_numpy(gt_regr).cuda().float()
        gt_cls = torch.from_numpy(gt_cls).cuda().long()

        # Computes loss
        rpn_regr_loss = losses.rpn_regr_loss(rpn_regr, gt_regr, gt_cls)
        rpn_cls_loss = F.cross_entropy(rpn_cls, gt_cls, ignore_index=-1)
        total_loss = rpn_regr_loss + rpn_cls_loss
        loss_list = [rpn_regr_loss, rpn_cls_loss, total_loss]

        # Ignore samples with a label = -1
        valid_gt_cls = gt_cls[gt_cls > -1]
        valid_pred_cls = rpn_cls[gt_cls > -1]

        # Computes the confusion matrix
        self.rpn_cm.add(valid_pred_cls.detach(), valid_gt_cls.detach())

        return self.loss_tuple(*loss_list)

    def train_step(self, x, boxes, scale):
        loss_tuple = self.forward(x, boxes, scale)
        self.optimizer.zero_grad()
        loss_tuple.total_loss.backward()
        self.optimizer.step()
        self.update_meters(loss_tuple)

    def update_meters(self, loss_tuple):
        loss_dict = {k: v.item() for k, v in loss_tuple._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_dict[key])

    def reset_meters(self):
        for meter in self.meters.values():
            meter.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}

    def save(self, path, save_optimizer=False):
        save_dict = dict()
        save_dict['model'] = self.head_detector.state_dict()
        save_dict['vis_info'] = self.vis.state_dict()
        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        torch.save(save_dict, path)
        self.vis.save([self.vis.env])

    def load(self, path, load_optimizer=True):
        state_dict = torch.load(path)
        self.head_detector.load_state_dict(state_dict['model'])
        if load_optimizer and 'optimizer' in state_dict:
            self.optimizer.load_state_dict(state_dict['optimizer'])

    def scale_lr(self, decay=0.1):
        for param_group in self.optimizer.param_groups:
            param_group['lr'] *= decay
コード例 #2
0
def train(args, config):
    vis = Visualizer()

    train_set = MNIST(data_path=config.train_data_path,
                      label_path=config.train_label_path,
                      config=config,
                      mode='train')
    valid_set = MNIST(data_path=config.train_data_path,
                      label_path=config.train_label_path,
                      config=config,
                      mode='valid')

    train_dataloader = DataLoader(train_set,
                                  config.batch_size,
                                  shuffle=True,
                                  num_workers=config.num_workers)
    valid_dataloader = DataLoader(valid_set,
                                  config.batch_size,
                                  shuffle=False,
                                  num_workers=config.num_workers)

    model = getattr(network, args.model)().eval()
    if args.load_model_path:
        model.load(args.load_model_path)
    if args.use_gpu:
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.lr)

    train_loss_meter, valid_loss_meter = meter.AverageValueMeter(
    ), meter.AverageValueMeter()
    train_confusion_matrix, valid_confusion_matrix = meter.ConfusionMeter(
        10), meter.ConfusionMeter(10)

    best_valid_loss = 1e5
    best_epoch = 0
    dist_to_best = 0

    time_begin = time.clock()

    for epoch in range(config.epoch):

        # train
        model.train()
        train_loss_meter.reset()
        train_confusion_matrix.reset()

        for _iter, (train_data, train_target) in enumerate(train_dataloader):

            if args.use_gpu:
                train_data = train_data.cuda()
                train_target = train_target.cuda()

            optimizer.zero_grad()
            train_logits, train_output = model(train_data)
            train_loss = criterion(train_logits, train_target)
            train_loss.backward()
            optimizer.step()

            train_loss_meter.add(train_loss.item())
            train_confusion_matrix.add(train_logits.data, train_target.data)

            if _iter % config.print_freq == 0:
                vis.plot('train_loss', train_loss_meter.value()[0])
        model.save(path=os.path.join(args.ckpts_dir, 'model_{0}.pth'.format(
            str(epoch))))

        # valid
        model.eval()
        valid_loss_meter.reset()
        valid_confusion_matrix.reset()

        for _iter, (valid_data, valid_target) in enumerate(valid_dataloader):

            if args.use_gpu:
                valid_data = valid_data.cuda()
                valid_target = valid_target.cuda()

            valid_logits, valid_output = model(valid_data)
            valid_loss = criterion(valid_logits, valid_target)

            valid_loss_meter.add(valid_loss.item())
            valid_confusion_matrix.add(valid_logits.detach().squeeze(),
                                       valid_target.type(t.LongTensor))

        valid_cm = valid_confusion_matrix.value()
        valid_accuracy = 100. * (valid_cm.diagonal().sum()) / (valid_cm.sum())

        vis.plot('valid_accuracy', valid_accuracy)

        vis.log(
            "epoch:{epoch}, train_loss:{train_loss}, train_cm:{train_cm}, valid_loss:{valid_loss}, valid_cm:{valid_cm}, valid_accuracy:{valid_accuracy}"
            .format(epoch=epoch,
                    train_loss=train_loss_meter.value()[0],
                    train_cm=str(train_confusion_matrix.value()),
                    valid_loss=valid_loss_meter.value()[0],
                    valid_cm=str(valid_cm),
                    valid_accuracy=valid_accuracy))
        print(
            "epoch:{epoch}, train_loss:{train_loss}, valid_loss:{valid_loss}, valid_accuracy:{valid_accuracy}"
            .format(epoch=epoch,
                    train_loss=train_loss_meter.value()[0],
                    valid_loss=valid_loss_meter.value()[0],
                    valid_accuracy=valid_accuracy))
        print("train_cm:\n{train_cm}\n\nvalid_cm:\n{valid_cm}".format(
            train_cm=str(train_confusion_matrix.value()),
            valid_cm=str(valid_cm),
        ))

        # early stop
        if valid_loss_meter.value()[0] < best_valid_loss:
            best_epoch = epoch
            best_valid_loss = valid_loss_meter.value()[0]
            dist_to_best = 0

        dist_to_best += 1
        if dist_to_best > 4:
            break

    model.save(path=os.path.join(args.ckpts_dir, 'model.pth'))
    vis.save()
    print("save model successfully")
    print("best epoch: ", best_epoch)
    print("best valid loss: ", best_valid_loss)
    time_end = time.clock()
    print('time cost: %.2f' % (time_end - time_begin))
コード例 #3
0
def train(args):
    vis = Visualizer()

    config = getattr(configs, args.model + 'Config')()
    dataset = PoemDataset(data_path=config.data_path, config=config)
    dataloader = DataLoader(dataset,
                            config.batch_size,
                            shuffle=True,
                            num_workers=config.num_workers)
    config.vocab_size = dataset.vocab_size
    config.use_gpu = args.use_gpu
    model = getattr(network, args.model)(config).eval()

    if args.load_model_path:
        model.load(args.load_model_path, use_gpu=args.use_gpu)
    if args.use_gpu:
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    loss_meter = meter.AverageValueMeter()

    time_begin = time.clock()

    for epoch in range(config.epoch):

        # train
        model.train()
        loss_meter.reset()

        for _iter, data in enumerate(dataloader):
            data = data.long().transpose(1, 0).contiguous()

            if args.use_gpu:
                data = data.cuda()

            optimizer.zero_grad()
            input, target = Variable(data[:-1, :]), Variable(data[1:, :])
            output, _ = model(input)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.item())

            if _iter % config.print_freq == 0:
                vis.plot('train_loss', loss_meter.value()[0])

        model.save(path=os.path.join(args.ckpts_dir, 'model_{0}.pth'.format(
            str(epoch))))

        vis.log("epoch:{epoch}, train_loss:{train_loss}".format(
            epoch=epoch,
            train_loss=loss_meter.value()[0],
        ))
        print("epoch:{epoch}, train_loss:{train_loss}".format(
            epoch=epoch,
            train_loss=loss_meter.value()[0],
        ))

    model.save(path=os.path.join(args.ckpts_dir, 'model.pth'))
    vis.save()
    print("save model successfully")
    time_end = time.clock()
    print('time cost: %.2f' % (time_end - time_begin))