Beispiel #1
0
    def evaluate(model, testloader):
        model.eval()
        # loss
        top1 = AverageMeter()
        top5 = AverageMeter()
        for _, (inputs, targets) in enumerate(testloader):
            # print(batch_idx)
            targets = Variable(targets).cuda()
            inputs = Variable(inputs).cuda()
            logits, _, _ = model(inputs)
            prec1, prec5 = Helper.accuracy(logits, targets, topk=(1, 5))

            top1.update(prec1.item(), inputs.size(0))
            top5.update(prec5.item(), inputs.size(0))

        sys.stdout.write('Acc@1: %.3f Acc@5: %.3f' % (top1.avg, top5.avg))
Beispiel #2
0
    def evaluate_fgan_loss(inet, fnet, dataloader, nactors):
        print('Evaluate Fusion Network')
        inet.eval()
        fnet.eval()
        top1 = AverageMeter()
        top5 = AverageMeter()
        for batch_idx, (inputs, targets) in enumerate(dataloader):
            z_missed, z_missed_g, target_missed, _ = batch_process(
                inet, fnet, inputs, targets, nactors)
            out_missed_g = inet.module.classifier(z_missed_g)
            prec1, prec5 = Helper.accuracy(out_missed_g,
                                           target_missed,
                                           topk=(1, 5))
            top1.update(prec1.item(), target_missed.size(0))
            top5.update(prec5.item(), target_missed.size(0))
            del z_missed, z_missed_g, target_missed, _

        return top1.avg, top5.avg
Beispiel #3
0
def train(train_dataloader, model, criterion, optimizer, epoch, device):
    """Training step"""
    model.train()

    # Smooth loss function
    face_loss = AverageMeter()
    mouth_loss = AverageMeter()
    eyebrow_loss = AverageMeter()
    eye_loss = AverageMeter()
    nose_loss = AverageMeter()
    jaw_loss = AverageMeter()
    total_loss = AverageMeter()

    face_acc = AverageMeter()
    mouth_acc = AverageMeter()
    eyebrow_acc = AverageMeter()
    eye_acc = AverageMeter()
    nose_acc = AverageMeter()
    jaw_acc = AverageMeter()
    total_acc = AverageMeter()

    data_time = AverageMeter()
    batch_time = AverageMeter()
    end = time.time()

    # with tqdm(train_dataloader) as _tqdm:
    #     for x, y0, y1, y2, y3, y4, y5 in _tqdm:
    bar = Bar('Processing train', max=len(train_dataloader))

    for batch_idx, (x, y0, y1, y2, y3, y4, y5) in enumerate(train_dataloader):
        data_time.update(time.time() - end)

        x = x.to(device)
        # y0 ~ y5 represent face, mouth, eyebrow, eye, nose, jaw
        y0 = y0.to(device)
        y1 = y1.to(device)
        y2 = y2.to(device)
        y3 = y3.to(device)
        y4 = y4.to(device)
        y5 = y5.to(device)
        outputs = model(x)
        sample_num = x.size(0)

        # Calculate loss
        # https://pytorch.org/docs/stable/nn.html?highlight=nn%20crossentropyloss#torch.nn.CrossEntropyLoss
        face_cur_loss = criterion(outputs[0], y0)
        face_cur_loss_ = face_cur_loss.item()
        mouth_cur_loss = criterion(outputs[1], y1)
        mouth_cur_loss_ = mouth_cur_loss.item()
        eyebrow_cur_loss = criterion(outputs[2], y2)
        eyebrow_cur_loss_ = eyebrow_cur_loss.item()
        eye_cur_loss = criterion(outputs[3], y3)
        eye_cur_loss_ = eye_cur_loss.item()
        nose_cur_loss = criterion(outputs[4], y4)
        nose_cur_loss_ = nose_cur_loss.item()
        jaw_cur_loss = criterion(outputs[5], y5)
        jaw_cur_loss_ = jaw_cur_loss.item()

        total_cur_loss = face_cur_loss + mouth_cur_loss + eyebrow_cur_loss + eye_cur_loss + nose_cur_loss + \
                         jaw_cur_loss
        total_cur_loss_ = total_cur_loss.item()

        face_loss.update(face_cur_loss_, sample_num)
        mouth_loss.update(mouth_cur_loss_, sample_num)
        eyebrow_loss.update(eyebrow_cur_loss_, sample_num)
        eye_loss.update(eye_cur_loss_, sample_num)
        nose_loss.update(nose_cur_loss_, sample_num)
        jaw_loss.update(jaw_cur_loss_, sample_num)
        total_loss.update(total_cur_loss_, sample_num)

        # Calculate correct
        correct_face_num = outputs[0].max(1)[1].eq(y0).sum().item()
        face_acc.update(correct_face_num, sample_num)
        correct_mouth_num = outputs[1].max(1)[1].eq(y1).sum().item()
        mouth_acc.update(correct_mouth_num, sample_num)
        correct_eyebrow_num = outputs[2].max(1)[1].eq(y2).sum().item()
        eyebrow_acc.update(correct_eyebrow_num, sample_num)
        correct_eye_num = outputs[3].max(1)[1].eq(y3).sum().item()
        eye_acc.update(correct_eye_num, sample_num)
        correct_nose_num = outputs[4].max(1)[1].eq(y4).sum().item()
        nose_acc.update(correct_nose_num, sample_num)
        correct_jaw_num = outputs[5].max(1)[1].eq(y5).sum().item()
        jaw_acc.update(correct_jaw_num, sample_num)

        total_acc.update(
            correct_face_num + correct_mouth_num + correct_eyebrow_num +
            correct_eye_num + correct_nose_num + correct_jaw_num,
            sample_num * 6)

        optimizer.zero_grad()
        total_cur_loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

        # _tqdm.set_postfix(OrderedDict(stage="train", epoch=epoch, loss=total_loss.avg),
        #                   acc=total_acc.avg, sample_num=sample_num)

        # plot progress
        bar.suffix = '(Epoch:{epoch} - {batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.3f} | Acc: {acc:.3f}'.format(
            batch=batch_idx + 1,
            size=len(train_dataloader),
            data=data_time.avg,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=total_loss.avg,
            acc=total_acc.avg,
            epoch=epoch)
        bar.next()
    bar.finish()

    return total_loss.avg, total_acc.avg
Beispiel #4
0
def validate(validate_dataloader, model, criterion, epoch, device):
    """Validating step"""
    model.eval()

    # Smooth loss function
    face_loss = AverageMeter()
    mouth_loss = AverageMeter()
    eyebrow_loss = AverageMeter()
    eye_loss = AverageMeter()
    nose_loss = AverageMeter()
    jaw_loss = AverageMeter()
    total_loss = AverageMeter()

    face_acc = AverageMeter()
    mouth_acc = AverageMeter()
    eyebrow_acc = AverageMeter()
    eye_acc = AverageMeter()
    nose_acc = AverageMeter()
    jaw_acc = AverageMeter()
    total_acc = AverageMeter()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    end = time.time()

    bar = Bar('Processing validate', max=len(validate_dataloader))
    with torch.no_grad():
        for batch_idx, (x, y0, y1, y2, y3, y4,
                        y5) in enumerate(validate_dataloader):
            data_time.update(time.time() - end)

            x = x.to(device)
            # pytorch交叉熵损失函数内部自动设置one-hot编码格式
            y0 = y0.to(device)
            y1 = y1.to(device)
            y2 = y2.to(device)
            y3 = y3.to(device)
            y4 = y4.to(device)
            y5 = y5.to(device)
            outputs = model(x)
            sample_num = x.size(0)

            face_cur_loss = criterion(outputs[0], y0)
            face_cur_loss_ = face_cur_loss.item()
            mouth_cur_loss = criterion(outputs[1], y1)
            mouth_cur_loss_ = mouth_cur_loss.item()
            eyebrow_cur_loss = criterion(outputs[2], y2)
            eyebrow_cur_loss_ = eyebrow_cur_loss.item()
            eye_cur_loss = criterion(outputs[3], y3)
            eye_cur_loss_ = eye_cur_loss.item()
            nose_cur_loss = criterion(outputs[4], y4)
            nose_cur_loss_ = nose_cur_loss.item()
            jaw_cur_loss = criterion(outputs[5], y5)
            jaw_cur_loss_ = jaw_cur_loss.item()

            total_cur_loss = face_cur_loss + mouth_cur_loss + eyebrow_cur_loss + eye_cur_loss + nose_cur_loss + \
                             jaw_cur_loss
            total_cur_loss_ = total_cur_loss.item()

            face_loss.update(face_cur_loss_, sample_num)
            mouth_loss.update(mouth_cur_loss_, sample_num)
            eyebrow_loss.update(eyebrow_cur_loss_, sample_num)
            eye_loss.update(eye_cur_loss_, sample_num)
            nose_loss.update(nose_cur_loss_, sample_num)
            jaw_loss.update(jaw_cur_loss_, sample_num)
            total_loss.update(total_cur_loss_, sample_num)

            # Calculate correct
            correct_face_num = outputs[0].max(1)[1].eq(y0).sum().item()
            face_acc.update(correct_face_num, sample_num)
            correct_mouth_num = outputs[1].max(1)[1].eq(y1).sum().item()
            mouth_acc.update(correct_mouth_num, sample_num)
            correct_eyebrow_num = outputs[2].max(1)[1].eq(y2).sum().item()
            eyebrow_acc.update(correct_eyebrow_num, sample_num)
            correct_eye_num = outputs[3].max(1)[1].eq(y3).sum().item()
            eye_acc.update(correct_eye_num, sample_num)
            correct_nose_num = outputs[4].max(1)[1].eq(y4).sum().item()
            nose_acc.update(correct_nose_num, sample_num)
            correct_jaw_num = outputs[5].max(1)[1].eq(y5).sum().item()
            jaw_acc.update(correct_jaw_num, sample_num)

            total_acc.update(
                correct_face_num + correct_mouth_num + correct_eyebrow_num +
                correct_eye_num + correct_nose_num + correct_jaw_num,
                sample_num * 6)

            batch_time.update(time.time() - end)
            end = time.time()

            # plot progress
            bar.suffix = '(Epoch:{epoch} - {batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.3f} | Acc: {acc:.3f}'.format(
                batch=batch_idx + 1,
                size=len(validate_dataloader),
                data=data_time.avg,
                bt=batch_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td,
                loss=total_loss.avg,
                acc=total_acc.avg,
                epoch=epoch)
            bar.next()
        bar.finish()

    return total_loss.avg, total_acc.avg
    def train_eopch(self):
        epoch_loss = AverageMeter()
        epoch_mae = AverageMeter()
        epoch_mse = AverageMeter()
        epoch_start = time.time()
        self.model.train()  # Set model to training mode

        # Iterate over data.
        for step, (inputs, points, targets,
                   st_sizes) in enumerate(self.dataloaders['train']):
            inputs = inputs.to(self.device)
            st_sizes = st_sizes.to(self.device)
            gd_count = np.array([len(p) for p in points], dtype=np.float32)
            points = [p.to(self.device) for p in points]
            targets = [t.to(self.device) for t in targets]

            with torch.set_grad_enabled(True):
                outputs = self.model(inputs)
                prob_list = self.post_prob(points, st_sizes)
                loss = self.criterion(prob_list, targets, outputs)

                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                N = inputs.size(0)
                pre_count = torch.sum(outputs.view(N, -1),
                                      dim=1).detach().cpu().numpy()
                res = pre_count - gd_count
                epoch_loss.update(loss.item(), N)
                epoch_mse.update(np.mean(res * res), N)
                epoch_mae.update(np.mean(abs(res)), N)

        logging.info(
            'Epoch {} Train, Loss: {:.2f}, MSE: {:.2f} MAE: {:.2f}, Cost {:.1f} sec'
            .format(self.epoch, epoch_loss.get_avg(),
                    np.sqrt(epoch_mse.get_avg()), epoch_mae.get_avg(),
                    time.time() - epoch_start))
        model_state_dic = self.model.state_dict()
        save_path = os.path.join(self.save_dir,
                                 '{}_ckpt.tar'.format(self.epoch))
        torch.save(
            {
                'epoch': self.epoch,
                'optimizer_state_dict': self.optimizer.state_dict(),
                'model_state_dict': model_state_dic
            }, save_path)
        self.save_list.append(save_path)  # control the number of saved models
Beispiel #6
0
    def train_eopch(self, epoch=0):
        epoch_loss = AverageMeter()
        epoch_fore = AverageMeter()
        epoch_back = AverageMeter()
        epoch_cls_loss = AverageMeter()
        epoch_cls_acc = AverageMeter()
        epoch_fea_loss = AverageMeter()
        epoch_mae = AverageMeter()
        epoch_mse = AverageMeter()
        epoch_start = time.time()
        self.model.train()  # Set model to training mode
        self.refiner.train()  # Set model to training mode
        s_loss = None

        # Iterate over data.
        for step, (inputs, points, targets, st_sizes) in enumerate(self.dataloaders['train']):
            inputs = inputs.to(self.device)
            st_sizes = st_sizes.to(self.device)
            gd_count = np.array([len(p) for p in points], dtype=np.float32)
            points = [p.to(self.device) for p in points]
            targets = [t.to(self.device) for t in targets]

            with torch.set_grad_enabled(True):
                outputs = self.model(inputs)

                gt = self.refiner(points, inputs, outputs.shape)

                loss = self.crit(gt, outputs)
                loss += 10*cos_loss(gt, outputs)
                loss /= self.args.batch_size

                self.optimizer.zero_grad()
                self.dml_optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                self.dml_optimizer.step()

                pre_count = outputs[0].sum().detach().cpu().numpy()
                res = (pre_count - gd_count[0]) #gd_count
                if step % 100 == 0:
                    print('Error: {}, Pred: {}, GT: {}, Loss: {}'.format(res, pre_count, gd_count[0], loss.item()))

                N = inputs.shape[0]
                epoch_loss.update(loss.item(), N)
                epoch_mse.update(np.mean(res * res), N)
                epoch_mae.update(np.mean(abs(res)), N)

        logging.info('Epoch {} Train, Loss: {:.2f}, MSE: {:.2f} MAE: {:.2f}, Cost {:.1f} sec'
                .format(self.epoch, epoch_loss.get_avg(), np.sqrt(epoch_mse.get_avg()), epoch_mae.get_avg(),
                    time.time()-epoch_start))
        model_state_dic = self.model.state_dict()
        save_path = os.path.join(self.save_dir, '{}_ckpt.tar'.format(self.epoch))
        torch.save({
            'epoch': self.epoch,
            'optimizer_state_dict': self.optimizer.state_dict(),
            'model_state_dict': model_state_dic,
            'refine_state_dict': self.refiner.state_dict(),
        }, save_path)
        self.save_list.append(save_path)  # control the number of saved models
Beispiel #7
0
def main(cfg: DictConfig) -> None:
    torch.manual_seed(cfg.seed)

    is_multi_gpu = False

    if isinstance(cfg.gpu, ListConfig):
        is_multi_gpu = True
        log.info(f"Use GPUs {str(cfg.gpu).strip('[]')} for training")
    else:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg.gpu)

    device = torch.device(f'cuda' if cfg.gpu is not None else 'cpu')
    log.info(f"Use device {device} for training")

    backend = hydra.utils.get_method(
        f'backends.{cfg.model.backend.name}.build')(**cfg.model.backend.params)

    optimizer = hydra.utils.get_class(f"torch.optim.{cfg.optimizer.name}")
    optimizer = optimizer(
        filter(lambda p: p.requires_grad, backend.parameters()),
        **cfg.optimizer.params)

    scheduler = None
    if cfg.optimizer.scheduler is not None:
        scheduler = hydra.utils.get_class(
            f"torch.optim.lr_scheduler.{cfg.optimizer.scheduler.name}")
        scheduler = scheduler(**{
            **{
                "optimizer": optimizer
            },
            **cfg.optimizer.scheduler.params
        })

    if cfg.model.uda is not None:
        uda_method = list(cfg.model.uda.keys())[0]
        uda_params = cfg.model.uda[uda_method]
        uda_cls = hydra.utils.get_class(f"uda.{uda_method}")
        uda = uda_cls(**uda_params) if uda_params else uda_cls()
    else:
        uda = hydra.utils.get_class(f"uda.base.Model")()
    uda.cfg = cfg
    uda.device = device
    uda.backend = backend
    uda.optimizer = optimizer
    uda.centernet_loss = hydra.utils.get_class(
        f"losses.{cfg.model.backend.loss.name}")(
            **cfg.model.backend.loss.params)

    uda.scheduler = scheduler

    train_loader, val_loader, test_loader = load_datasets(
        cfg,
        down_ratio=backend.down_ratio,
        rotated_boxes=backend.rotated_boxes)
    tensorboard_logger = TensorboardLogger(cfg, val_loader.dataset.classes)

    evaluators = []
    for e in cfg.evaluation:
        defaults = {
            'score_threshold': cfg.score_threshold,
            **cfg.evaluation[e]
        }
        e = hydra.utils.get_class(f"evaluation.{e}.Evaluator")(**defaults)
        e.classes = tensorboard_logger.classes
        e.num_workers = cfg.num_workers
        e.use_rotated_boxes = cfg.model.backend.params.rotated_boxes
        evaluators.append(e)

    uda.init_done()

    start_epoch = 1
    if cfg.pretrained is not None and cfg.resume is None:
        start_epoch = uda.load_model(cfg.pretrained)
    elif cfg.resume is not None:
        start_epoch = uda.load_model(cfg.resume, True)

    uda.to(device, is_multi_gpu)

    stats = {}
    best = float(
        "inf") if cfg.save_best_metric.mode == 'min' else -float("inf")

    if not cfg.test_only:
        for epoch in tqdm(range(start_epoch, cfg.epochs + 1),
                          initial=start_epoch,
                          position=0,
                          desc='Epoch'):
            uda.epoch_start()
            uda.set_phase(is_training=True)
            tag = 'training'
            for step, data in tqdm(enumerate(train_loader),
                                   total=len(train_loader),
                                   position=1,
                                   desc='Training Steps'):
                outputs = uda.step(data)

                for k in outputs["stats"]:
                    log_key = f"{tag}/{k}"
                    m = stats.get(log_key, AverageMeter(name=k))
                    m.update(outputs["stats"][k].item(), data["input"].size(0))
                    stats[log_key] = m

            if epoch % cfg.eval_at_n_epoch != 0:
                continue

            tag = 'validation'
            uda.set_phase(is_training=False)
            with torch.no_grad():
                for step, data in tqdm(enumerate(val_loader),
                                       total=len(val_loader),
                                       position=1,
                                       desc='Validation Steps'):
                    outputs = uda.step(data, is_training=False)

                    for k in outputs["stats"]:
                        log_key = f"{tag}/{k}"
                        m = stats.get(log_key, AverageMeter(name=k))
                        m.update(outputs["stats"][k].item(),
                                 data["input"].size(0))
                        stats[log_key] = m

                    detections = uda.get_detections(outputs, data)
                    detections["image_shape"] = data["input"].shape[1:]
                    for e in evaluators:
                        e.add_batch(**detections)

                    tensorboard_logger.log_detections(data,
                                                      detections,
                                                      epoch,
                                                      tag=tag)

            for e in evaluators:
                result = e.evaluate()
                stats = {**stats, **result}

            scalars = {}
            for k, s in stats.items():
                if isinstance(s, AverageMeter):
                    scalars[k] = s.avg
                    s.reset()
                else:
                    scalars[k] = s

                tensorboard_logger.log_stat(k, scalars[k], epoch)

            uda.epoch_end()
            tensorboard_logger.reset()
            uda.save_model("model_last.pth", epoch, True)

            if not cfg.save_best_metric.name in scalars:
                log.error(
                    f"Metric {cfg.save_best_metric.name} not valid, valid values are {' '.join(scalars.keys())}"
                )
                return

            current = scalars[cfg.save_best_metric.name]
            if (cfg.save_best_metric.mode == 'min' and best > current
                    or cfg.save_best_metric.mode == 'max' and best < current):
                uda.save_model("model_best.pth", epoch, True)
                best = current

                log.info(
                    f"Save best model with {cfg.save_best_metric.name} of {current:.4f}"
                )

    if test_loader is not None:
        if cfg.test_only:
            epoch = start_epoch
        tag = 'test'

        uda.set_phase(is_training=False)
        with torch.no_grad():
            for step, data in tqdm(enumerate(test_loader),
                                   total=len(test_loader),
                                   position=1,
                                   desc='Test Steps'):
                outputs = uda.step(data, is_training=False)

                for k in outputs["stats"]:
                    log_key = f"{tag}/{k}"
                    m = stats.get(log_key, AverageMeter(name=k))
                    m.update(outputs["stats"][k].item(), data["input"].size(0))
                    stats[log_key] = m

                detections = uda.get_detections(outputs, data)
                detections["image_shape"] = data["input"].shape[1:]
                for e in evaluators:
                    e.add_batch(**detections)

                tensorboard_logger.log_detections(data,
                                                  detections,
                                                  epoch,
                                                  tag=tag)

        for e in evaluators:
            result = e.evaluate()
            stats = {**stats, **result}

        scalars = {}
        for k, s in stats.items():
            if isinstance(s, AverageMeter):
                scalars[k] = s.avg
                s.reset()
            else:
                scalars[k] = s

            tensorboard_logger.log_stat(k, scalars[k], epoch)

        tensorboard_logger.reset()
Beispiel #8
0
def main(args):
    model = build_model(args.model_name, args)
    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr,
                                 weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.5)

    log.info('loading data...\n')
    train_loader, val_loader = data_loader(args)
    train_bts, val_bts = len(train_loader), len(val_loader)
    log.info('train batch number: {0}; validation batch number: {1}'.format(
        train_bts, val_bts))

    # Whether using checkpoint
    if args.resume is not None:
        if not os.path.exists(args.resume):
            raise RuntimeError("=> no checkpoint found")
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        model.cuda()

        optimizer.load_state_dict(checkpoint['optimizer'])
        best_loss = checkpoint['best_loss']
        args.start_epoch = checkpoint['epoch'] + 1
        start_step = checkpoint['step'] + 1
    else:
        best_loss = np.inf
        start_step = 0

    # whether using pretrained model
    if args.pretrained_net is not None and args.resume is None:
        pretrained_w = torch.load(args.pretrained_net)
        model_dict = model.state_dict()
        pretrained_dict = {
            k: torch.from_numpy(v)
            for k, v in pretrained_w.items() if k in model_dict
        }
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)

    # run on multiple GPUs by DataParallel
    model = nn.DataParallel(model, device_ids=[int(e) for e in args.gpu_ids
                                               ]) if args.use_cuda else model
    l1_criterion = nn.L1Loss()
    mse_criterion = nn.MSELoss()

    # -------------------- training -------------------- #
    for epoch in range(args.start_epoch, args.epochs):
        e_time = time.time()
        log.info('training: epoch {}/{} \n'.format(epoch + 1, args.epochs))
        scheduler.step()

        model.train()
        for k, train_data in enumerate(tqdm(train_loader)):
            # print(train_img.size())
            # if k > 10: break
            train_depth = train_data['depth']
            train_img = train_data['rgb']
            if args.use_cuda:
                train_img, train_depth = train_img.cuda(), train_depth.cuda()

            optimizer.zero_grad()

            train_pred, loss_sigma = model(train_img)

            # remove depth out of max depth
            mask = train_depth.le(args.max_depth) & train_depth.ge(
                args.min_depth)
            mask = mask.type(torch.FloatTensor)
            if args.use_cuda:
                mask = mask.cuda()
            train_depth *= mask
            train_pred *= mask

            rmse = torch.sqrt(mse_criterion(train_pred, train_depth))
            # l_depth = l1_criterion(train_pred, train_depth)
            l_depth = depth_scale_invariant(train_pred, train_depth)
            l_edge = grad_loss(train_pred, train_depth)
            l_ssim = torch.sqrt(
                (1 - ssim(train_pred,
                          train_depth,
                          val_range=args.max_depth / args.min_depth)))
            # print("losses: ", rmse, l_edge, l_ssim)

            # train_loss = (10 * l_ssim) + (10 * l_edge) + (1.0 * l_depth)
            # train_loss = sum(1 / (2 * torch.exp(loss_sigma[i])) * loss + loss_sigma[i] / 2 for i in range(3) for loss in [l_depth, l_edge, l_ssim])
            # train_loss = sum(loss_sigma[i] * loss for i in range(3) for loss in [l_depth, l_edge, l_ssim])
            train_loss = sum([l_depth, l_edge, l_ssim])
            train_loss.backward()
            optimizer.step()
            writer.add_scalar('training loss', train_loss,
                              epoch * train_bts + k)
            # writer.add_scalar('training RMSE', rmse, epoch * train_bts + k)

            log.info(
                'train  combine loss and rmse of epoch/batch {}/{} are {} and {}'
                .format(epoch, k, train_loss, rmse))
            keeper.save_loss([
                epoch,
                train_loss.item(),
                rmse.item(),
                l_depth.item(),
                l_edge.item(),
                l_ssim.item()
            ], 'train_losses.csv')

            if k % 500 == 0:
                writer.add_scalar('learning rate',
                                  optimizer.param_groups[0]['lr'],
                                  epoch * train_bts + k)

        # evaluating test data
        val_rmse_avg = AverageMeter()
        val_relabs_avg = AverageMeter()
        model.eval()
        with torch.no_grad():
            for k, val_data in enumerate(tqdm(val_loader)):
                # if k > 10: break
                val_depth = val_data['depth']
                val_img = val_data['rgb']
                if args.use_cuda:
                    val_img, val_depth = val_img.cuda(), val_depth.cuda()

                val_pred, _ = model(val_img)

                # remove depth out of max depth
                mask = val_depth.le(args.max_depth) & val_depth.ge(
                    args.min_depth)
                mask = mask.type(torch.FloatTensor)
                if args.use_cuda:
                    mask = mask.cuda()
                val_depth *= mask
                val_pred *= mask

                if k % 200 == 1:
                    keeper.save_img(epoch, k,
                                    [val_img[0], val_depth[0], val_pred[0]])

                val_rmse = torch.sqrt(mse_criterion(val_pred, val_depth))
                val_depth[val_depth.eq(0)] = 1e-5
                val_relabs = torch.mean(
                    torch.abs(val_pred - val_depth) / val_depth)
                val_rmse_avg.update(val_rmse.item())
                val_relabs_avg.update(val_relabs.item())
                log.info('val rmse of epoch/batch {}/{} is {}'.format(
                    epoch, k, val_rmse))

                writer.add_scalar('validation RMSE', val_rmse,
                                  epoch * val_bts + k)
                writer.add_scalar('validation relative absolute error',
                                  val_relabs, epoch * val_bts + k)
            writer.add_scalar('Epoch validation RMSE', val_rmse_avg.avg, epoch)
            writer.add_scalar('Epoch validation absErrorRel',
                              val_relabs_avg.avg, epoch)

            # keeper.save_loss([val_rmse_avg.avg], 'val_losses.csv')

            # optimizer = time_lr_scheduler(optimizer, epoch, lr_decay_epoch=3)

        log.info('Saving model ...')
        keeper.save_checkpoint(
            {
                'epoch': epoch,
                'step': 0,
                # 'state_dict': model.state_dict(),  # cpu
                'state_dict': model.module.state_dict(),  # GPU
                'optimizer': optimizer.state_dict(),
                'best_loss': best_loss,
            },
            is_best=val_rmse_avg.avg < best_loss)

        if val_rmse_avg.avg < best_loss:
            best_loss = val_rmse_avg.avg

        log.info('training time of epoch {}/{} is {} \n'.format(
            epoch + 1, args.epochs,
            time.time() - e_time))

        start_step = 0
Beispiel #9
0
def main(args):
    log.info("Minion has spawn.")

    model = build_model(args.model_name)

    # Training parameters
    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr,
                                 weight_decay=args.weight_decay)
    l1_criterion = nn.L1Loss()

    log.info("loading data...")
    train_loader, val_loader = data_loader(args)

    # Whether using checkpoint
    if args.resume is not None:
        if not os.path.exists(args.resume):
            raise RuntimeError("=> no checkpoint found")
        checkpoint = torch.load(args.resume)
        # if args.use_cuda:
        #     model.module.load_state_dict(checkpoint['state_dict'])
        # else:
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        best_loss = checkpoint['best_loss']
        args.start_epoch = checkpoint['epoch'] + 1
    else:
        best_loss = np.inf

    # whether using pretrained model
    if args.pretrained_net is not None and args.resume is None:
        pretrained_w = keras2torch_weights(args.pretrained_net)
        model_dict = model.state_dict()
        pretrained_dict = {
            k: torch.from_numpy(v)
            for k, v in pretrained_w.items() if k in model_dict
        }
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)

    model = model.cuda() if args.use_cuda else model

    # --------------- Start training ---------------
    for epoch in range(args.epochs):
        e_time = time.time()
        log.info('training: epoch {}/{} \n'.format(epoch + 1, args.epochs))

        model.train()
        train_losses = AverageMeter()

        for i, (train_image, train_depth) in enumerate(train_loader):
            # print('train image size: {}'.format(train_image.size()))
            # print('train depth size: {}'.format(train_depth.size()))
            print(i)
            if args.use_cuda:
                train_image = train_image.cuda()
                train_depth = train_depth.cuda()

            optimizer.zero_grad()

            # Normalize depth
            depth_n = DepthNorm(train_depth, args.max_depth)
            # add channel dimension
            depth_n = depth_n.unsqueeze(1)

            train_out = model(train_image)

            train_loss_depth = l1_criterion(train_out, depth_n)
            train_loss_ssim = torch.clamp((1 - ssim(
                train_out, depth_n, val_range=args.max_depth / args.min_depth))
                                          * 0.5, 0, 1)

            train_loss = (1.0 * train_loss_ssim) + (0.1 * train_loss_depth)

            train_losses.update(train_loss.data.item(), train_image.size(0))
            train_loss.backward()
            optimizer.step()

        log.info('Train loss of epoch {} is {}'.format(epoch,
                                                       train_losses.avg))
        keeper.save_loss([epoch, train_losses.count, train_losses.avg],
                         'train_loss.csv')
        keeper.save_checkpoint({
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'best_loss': best_loss,
        })

        # --------------- validation ---------------
        log.info('validation: epoch {}/{} \n'.format(epoch + 1, args.epochs))
        model.eval()
        val_losses = AverageMeter()

        for i, (val_image, val_depth) in enumerate(val_loader):
            if args.use_cuda:
                val_image = val_image.cuda()
                val_depth = val_depth.cuda()

            with torch.no_grad():
                val_depth_n = DepthNorm(val_depth, args.max_depth)
                val_out = model(val_image)

            if i % 100 == 1:
                keeper.save_img(val_image[1],
                                val_depth[1],
                                val_out[1],
                                img_name='val_{}_{}'.format(epoch, i))

            val_loss_depth = l1_criterion(val_out, val_depth_n)
            val_loss_ssim = torch.clamp(
                (1 - ssim(val_out,
                          val_depth_n,
                          val_range=args.max_depth / args.min_depth)) * 0.5, 0,
                1)

            val_loss = (1.0 * val_loss_ssim) + (0.1 * val_loss_depth)

            val_losses.update(val_loss.data.item(), val_image.size(0))

        log.info('Validation loss of epoch {} is {}'.format(
            epoch, val_losses.avg))
        keeper.save_loss([epoch, val_losses.count, val_losses.avg],
                         'validation_loss.csv')

        if val_losses.avg < best_loss:
            best_loss = val_losses.avg
            keeper.save_checkpoint(
                {
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'best_loss': best_loss,
                }, 'best_model.pth')

        log.info('training time of epoch [%d/%d] is %d \n' %
                 (epoch + 1, args.epochs, time.time() - e_time))
Beispiel #10
0
    def train_eopch(self):
        print("regression Trainer ---> train_eopch")
        epoch_loss = AverageMeter()
        epoch_mae = AverageMeter()
        epoch_mse = AverageMeter()
        epoch_start = time.time()
        self.model.train()  # Set model to training mode
        '''
        遍历洗牌后的数据进行训练,乱序
        inputs, points, targets, st_sizes已经在前面的train_collate()定义
        '''
        # Iterate over data.
        for step, (inputs, points, targets,
                   st_sizes) in enumerate(self.dataloaders['train']):

            inputs = inputs.to(self.device)
            st_sizes = st_sizes.to(self.device)
            gd_count = np.array([len(p) for p in points], dtype=np.float32)
            points = [p.to(self.device) for p in points]
            # print("points:")
            # print(points)
            targets = [t.to(self.device) for t in targets]
            # print("targets:")
            # print(targets)

            with torch.set_grad_enabled(True):
                outputs = self.model(inputs)
                # outputs可以转换为64*64的矩阵,可以表示密度图,但是和论文里的不符
                # 【model是什么----vgg19模型】
                # 【输出查看Inputs是什么----tensor矩阵】
                # print(inputs)
                # 【输出查看Outputs是什么----tensor矩阵】
                # print(outputs)
                '''
                针对每一次训练,输出图像,发现都是64*64大小
                这里用的是层层卷积处理好的数据
                无法获取图像名称,而且已经被洗牌,所以顺序对不上
                '''
                # dm = outputs.squeeze().detach().cpu().numpy()
                # dm_nor = (dm-np.min(dm))/(np.max(dm)-np.min(dm)) # 归一化
                # plt.imshow(dm_nor, cmap=cm.jet)
                # 这里img都被数据代替,无法获取名字,所以用Num计数
                # plt.savefig("D:\研究生\BayesCrowdCounting\\" + str(num))
                # print("ok!")
                '''
                先验概率和损失
                在前面已经定义
                self.post_prob = Post_Prob(args.sigma,args.crop_size,……)
                self.criterion = Bay_Loss(args.use_background, self.device)
                '''
                prob_list = self.post_prob(points, st_sizes)
                loss = self.criterion(prob_list, targets, outputs)

                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                N = inputs.size(0)
                pre_count = torch.sum(outputs.view(N, -1),
                                      dim=1).detach().cpu().numpy()
                res = pre_count - gd_count

                epoch_loss.update(loss.item(), N)
                epoch_mse.update(np.mean(res * res), N)
                epoch_mae.update(np.mean(abs(res)), N)
        '''
        训练完一轮后在这里输出loss,mse,mae……的平均值
        '''
        logging.info(
            'Epoch {} Train, Loss: {:.2f}, MSE: {:.2f} MAE: {:.2f}, Cost {:.1f} sec'
            .format(self.epoch, epoch_loss.get_avg(),
                    np.sqrt(epoch_mse.get_avg()), epoch_mae.get_avg(),
                    time.time() - epoch_start))
        model_state_dic = self.model.state_dict()
        save_path = os.path.join(self.save_dir,
                                 '{}_ckpt.tar'.format(self.epoch))
        torch.save(
            {
                'epoch': self.epoch,
                'optimizer_state_dict': self.optimizer.state_dict(),
                'model_state_dict': model_state_dic
            }, save_path)
        self.save_list.append(save_path)  # control the number of saved models
Beispiel #11
0
def train_wrapper(model):
    # logging
    log_format = '%(asctime)s %(message)s'  #标准格式化:时间 + 信息
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(args.loss_dir, 'train.log'))  #创建日志
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    # record all the print content to txt file
    logger.make_print_to_file(args.loss_dir)

    if args.pretrained_model:
        model.load(args.pretrained_model)

    # load data
    train_inputs = radar_dataloader(
        args.train_data_paths,
        sample_shape=(args.total_length, 1, args.img_width,
                      args.img_width),  #(20 , 1, 140,140)
        input_len=args.input_length)
    test_inputs = radar_dataloader(args.valid_data_paths,
                                   sample_shape=(args.total_length, 1,
                                                 args.img_width,
                                                 args.img_width),
                                   input_len=args.input_length)
    train_loaders = torch.utils.data.DataLoader(train_inputs,
                                                batch_size=args.batch_size,
                                                shuffle=True,
                                                num_workers=0,
                                                pin_memory=True)
    test_loaders = torch.utils.data.DataLoader(test_inputs,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=0,
                                               pin_memory=True,
                                               drop_last=True)

    # schedule sampling
    eta = args.sampling_start_value

    ### save traning loss and test loss
    train_loss = []
    test_loss = []
    test_ssim = []
    test_psnr = []
    test_fmae = []
    test_sharp = []
    test_iter = []

    llr = args.lr
    model.optimizer = adjust_learning_rate(model.optimizer, llr)  #调整学习率

    for epoch in tqdm(range(0, args.max_epoch + 1)):  #(0 , 61)
        losses = AverageMeter()

        if epoch % args.adjust_interval == 0 and epoch > 0:  #每 adjust_interval步长调整学习率
            llr = llr * args.adjust_rate  #学习率 *= 0.5
            model.optimizer = adjust_learning_rate(model.optimizer, llr)

        for ind, ims in enumerate(train_loaders):
            eta, real_input_flag = schedule_sampling(eta, epoch)
            ims = preprocess.reshape_patch(
                ims, args.patch_size)  #ims(4 , 20 , 140//4 , 140//4 , 4*4*1)
            tr_loss = trainer.train(model, ims, real_input_flag, args, epoch)
            train_loss.append(tr_loss)

            losses.update(tr_loss)

            if ind % args.display_interval == 0:
                logging.info('[{0}][{1}]\t'
                             'lr: {lr:.5f}\t'
                             'loss: {loss.val:.6f} ({loss.avg:.6f})'.format(
                                 epoch,
                                 ind,
                                 lr=model.optimizer.param_groups[-1]['lr'],
                                 loss=losses))

            torch.cuda.empty_cache()

        # plot figure to observe the losses
        x = range(len(train_loss))
        plt.figure(1)
        plt.title("this is losses of training")
        plt.plot(x, train_loss, label='loss')
        plt.legend()
        plt.savefig(args.loss_dir + '/train_loss.png')
        plt.close(1)
        # next

        if epoch % args.snapshot_interval == 0 and epoch > 0:  #每snapshot_interval步保存一次模型
            model.save(epoch)

        if epoch % args.test_interval == 0 and epoch > 0:
            with torch.no_grad():
                avg_mse, ssim, psnr, fmae, sharp = trainer.test(
                    model, test_loaders, args, epoch)
            test_iter.append(epoch)
            test_loss.append(avg_mse)
            test_ssim.append(ssim)
            test_psnr.append(psnr)
            test_fmae.append(fmae)
            test_sharp.append(sharp)

            # plot figure to observe the losses
            x = range(len(test_loss))
            plt.figure(1)
            plt.title("this is losses of validation")
            plt.plot(x, test_loss, label='loss')
            plt.legend()
            plt.savefig(args.loss_dir + '/valid_loss.png')
            plt.close(1)
            # next

        if epoch % args.loss_interval == 0 and epoch > 0:
            fileName = "/loss epoch{}".format(
                epoch) + datetime.datetime.now().strftime('date:' + '%Y_%m_%d')
            np.savez_compressed(args.loss_dir + fileName,
                                train_loss=np.array(train_loss),
                                test_iter=np.array(test_iter),
                                test_loss=np.array(test_loss),
                                test_ssim=np.array(test_ssim),
                                test_psnr=np.array(test_psnr),
                                test_fmae=np.array(test_fmae),
                                test_sharp=np.array(test_sharp))

    fileName = "/loss all " + datetime.datetime.now().strftime('date:' +
                                                               '%Y_%m_%d')
    np.savez_compressed(args.loss_dir + fileName,
                        train_loss=np.array(train_loss),
                        test_iter=np.array(test_iter),
                        test_loss=np.array(test_loss),
                        test_ssim=np.array(test_ssim),
                        test_psnr=np.array(test_psnr),
                        test_fmae=np.array(test_fmae),
                        test_sharp=np.array(test_sharp))
Beispiel #12
0
    def train_epoch(self):
        epoch_loss = AverageMeter()
        epoch_mae = AverageMeter()
        epoch_mse = AverageMeter()
        epoch_start = time.time()
        self.model.train()  # Set model to training mode

        # Iterate over data.
        for step, (inputs, points, targets,
                   st_sizes) in enumerate(self.dataloaders['train']):
            inputs = inputs.to(self.device)
            st_sizes = st_sizes.to(self.device)
            gd_count = np.array([len(p) for p in points], dtype=np.float32)
            points = [p.to(self.device) for p in points]
            targets = [t.to(self.device) for t in targets]

            with torch.set_grad_enabled(True):
                outputs = self.model(inputs)

                if self.epoch % 2 == 0 and random.randrange(10) == 0:
                    fig, (ax1, ax2) = plt.subplots(1, 2)
                    ax1.imshow(np.squeeze(outputs[0].cpu().detach().numpy()))
                    X = inputs.cpu().numpy()[0][:3] * np.reshape(
                        [0.229, 0.224, 0.225],
                        (3, 1, 1)) + np.reshape([0.485, 0.456, 0.406],
                                                (3, 1, 1))
                    reshaped_inputs = np.moveaxis(X, 0, -1)
                    ax2.imshow(reshaped_inputs)
                    fig.savefig(
                        os.path.join(
                            self.save_dir, '%d-test%d.png' %
                            (self.epoch, random.randrange(1000000))))
                    plt.close(fig)

                prob_list = self.post_prob(points, st_sizes)
                loss = self.criterion(prob_list, targets, outputs)

                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                N = inputs.size(0)
                pre_count = torch.sum(outputs.view(N, -1),
                                      dim=1).detach().cpu().numpy()
                res = pre_count - gd_count
                epoch_loss.update(loss.item(), N)
                epoch_mse.update(np.mean(res * res), N)
                epoch_mae.update(np.mean(abs(res)), N)

        logging.info(
            'Epoch {} Train, Loss: {:.2f}, MSE: {:.2f} MAE: {:.2f}, Cost {:.1f} sec'
            .format(self.epoch, epoch_loss.get_avg(),
                    np.sqrt(epoch_mse.get_avg()), epoch_mae.get_avg(),
                    time.time() - epoch_start))
        model_state_dic = self.model.state_dict()
        save_path = os.path.join(self.save_dir,
                                 '{}_ckpt.tar'.format(self.epoch))
        torch.save(
            {
                'epoch': self.epoch,
                'optimizer_state_dict': self.optimizer.state_dict(),
                'model_state_dict': model_state_dic
            }, save_path)
        self.save_list.append(save_path)  # control the number of saved models