コード例 #1
0
ファイル: train_due.py プロジェクト: lupalab/DUE
def main(hparams):
    results_dir = get_results_directory(hparams.output_dir)
    writer = SummaryWriter(log_dir=str(results_dir))

    ds = get_dataset(hparams.dataset, root=hparams.data_root)
    input_size, num_classes, train_dataset, test_dataset = ds

    hparams.seed = set_seed(hparams.seed)

    if hparams.n_inducing_points is None:
        hparams.n_inducing_points = num_classes

    print(f"Training with {hparams}")
    hparams.save(results_dir / "hparams.json")

    if hparams.ard:
        # Hardcoded to WRN output size
        ard = 640
    else:
        ard = None

    feature_extractor = WideResNet(
        spectral_normalization=hparams.spectral_normalization,
        dropout_rate=hparams.dropout_rate,
        coeff=hparams.coeff,
        n_power_iterations=hparams.n_power_iterations,
        batchnorm_momentum=hparams.batchnorm_momentum,
    )

    initial_inducing_points, initial_lengthscale = initial_values_for_GP(
        train_dataset, feature_extractor, hparams.n_inducing_points
    )

    gp = GP(
        num_outputs=num_classes,
        initial_lengthscale=initial_lengthscale,
        initial_inducing_points=initial_inducing_points,
        separate_inducing_points=hparams.separate_inducing_points,
        kernel=hparams.kernel,
        ard=ard,
        lengthscale_prior=hparams.lengthscale_prior,
    )

    model = DKL_GP(feature_extractor, gp)
    model = model.cuda()

    likelihood = SoftmaxLikelihood(num_classes=num_classes, mixing_weights=False)
    likelihood = likelihood.cuda()

    elbo_fn = VariationalELBO(likelihood, gp, num_data=len(train_dataset))

    parameters = [
        {"params": feature_extractor.parameters(), "lr": hparams.learning_rate},
        {"params": gp.parameters(), "lr": hparams.learning_rate},
        {"params": likelihood.parameters(), "lr": hparams.learning_rate},
    ]

    optimizer = torch.optim.SGD(
        parameters, momentum=0.9, weight_decay=hparams.weight_decay
    )

    milestones = [60, 120, 160]

    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=milestones, gamma=0.2
    )

    def step(engine, batch):
        model.train()
        likelihood.train()

        optimizer.zero_grad()

        x, y = batch
        x, y = x.cuda(), y.cuda()

        y_pred = model(x)
        elbo = -elbo_fn(y_pred, y)

        elbo.backward()
        optimizer.step()

        return elbo.item()

    def eval_step(engine, batch):
        model.eval()
        likelihood.eval()

        x, y = batch
        x, y = x.cuda(), y.cuda()

        with torch.no_grad():
            y_pred = model(x)

        return y_pred, y

    trainer = Engine(step)
    evaluator = Engine(eval_step)

    metric = Average()
    metric.attach(trainer, "elbo")

    def output_transform(output):
        y_pred, y = output

        # Sample softmax values independently for classification at test time
        y_pred = y_pred.to_data_independent_dist()

        # The mean here is over likelihood samples
        y_pred = likelihood(y_pred).probs.mean(0)

        return y_pred, y

    metric = Accuracy(output_transform=output_transform)
    metric.attach(evaluator, "accuracy")

    metric = Loss(lambda y_pred, y: -elbo_fn(y_pred, y))
    metric.attach(evaluator, "elbo")

    kwargs = {"num_workers": 4, "pin_memory": True}

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=hparams.batch_size,
        shuffle=True,
        drop_last=True,
        **kwargs,
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=512, shuffle=False, **kwargs
    )

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_results(trainer):
        metrics = trainer.state.metrics
        elbo = metrics["elbo"]

        print(f"Train - Epoch: {trainer.state.epoch} ELBO: {elbo:.2f} ")
        writer.add_scalar("Likelihood/train", elbo, trainer.state.epoch)

        if hparams.spectral_normalization:
            for name, layer in model.feature_extractor.named_modules():
                if isinstance(layer, torch.nn.Conv2d):
                    writer.add_scalar(
                        f"sigma/{name}", layer.weight_sigma, trainer.state.epoch
                    )

        if not hparams.ard:
            # Otherwise it's too much to submit to tensorboard
            length_scales = model.gp.covar_module.base_kernel.lengthscale.squeeze()
            for i in range(length_scales.shape[0]):
                writer.add_scalar(
                    f"length_scale/{i}", length_scales[i], trainer.state.epoch
                )

        if trainer.state.epoch > 150 and trainer.state.epoch % 5 == 0:
            _, auroc, aupr = get_ood_metrics(
                hparams.dataset, "SVHN", model, likelihood, hparams.data_root
            )
            print(f"OoD Metrics - AUROC: {auroc}, AUPR: {aupr}")
            writer.add_scalar("OoD/auroc", auroc, trainer.state.epoch)
            writer.add_scalar("OoD/auprc", aupr, trainer.state.epoch)

        evaluator.run(test_loader)
        metrics = evaluator.state.metrics
        acc = metrics["accuracy"]
        elbo = metrics["elbo"]

        print(
            f"Test - Epoch: {trainer.state.epoch} "
            f"Acc: {acc:.4f} "
            f"ELBO: {elbo:.2f} "
        )

        writer.add_scalar("Likelihood/test", elbo, trainer.state.epoch)
        writer.add_scalar("Accuracy/test", acc, trainer.state.epoch)

        scheduler.step()

    pbar = ProgressBar(dynamic_ncols=True)
    pbar.attach(trainer)

    trainer.run(train_loader, max_epochs=200)

    # Done training - time to evaluate
    results = {}

    evaluator.run(train_loader)
    train_acc = evaluator.state.metrics["accuracy"]
    train_elbo = evaluator.state.metrics["elbo"]
    results["train_accuracy"] = train_acc
    results["train_elbo"] = train_elbo

    evaluator.run(test_loader)
    test_acc = evaluator.state.metrics["accuracy"]
    test_elbo = evaluator.state.metrics["elbo"]
    results["test_accuracy"] = test_acc
    results["test_elbo"] = test_elbo

    _, auroc, aupr = get_ood_metrics(
        hparams.dataset, "SVHN", model, likelihood, hparams.data_root
    )
    results["auroc_ood_svhn"] = auroc
    results["aupr_ood_svhn"] = aupr

    print(f"Test - Accuracy {results['test_accuracy']:.4f}")

    results_json = json.dumps(results, indent=4, sort_keys=True)
    (results_dir / "results.json").write_text(results_json)

    torch.save(model.state_dict(), results_dir / "model.pt")
    torch.save(likelihood.state_dict(), results_dir / "likelihood.pt")

    writer.close()
コード例 #2
0
                    help='search-value')
parser.add_argument('--tau_dm_step',
                    type=float,
                    default=1.0,
                    help='search-value')
parser.add_argument('--rho_begin',
                    type=float,
                    default=1.0,
                    help='search-value')
parser.add_argument('--rho_end', type=float, default=2.0, help='search-value')
parser.add_argument('--rho_step', type=float, default=1.0, help='search-value')

args = parser.parse_args()

cuda = torch.cuda.is_available()
set_seed(args.seed, cuda)

if cuda:
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

inp_size = 40 * 40


def set_data():
    ## load dataset
    # [5,50,100,40,40]; [5,50,100,40,40]
    # train_data = np.load("../data/5class_50sam_40x40_kinetics_train.npy")
    # test_data = np.load("../data/5class_50sam_40x40_kinetics_val.npy")
コード例 #3
0
def main(args):
    this_dir = osp.join(osp.dirname(__file__), '.')
    save_dir = osp.join(this_dir, 'checkpoints')
    if not osp.isdir(save_dir):
        os.makedirs(save_dir)
    command = 'python ' + ' '.join(sys.argv)
    logger = utl.setup_logger(osp.join(this_dir, 'log.txt'), command=command)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    utl.set_seed(int(args.seed))

    model = build_model(args)
    if osp.isfile(args.checkpoint):
        checkpoint = torch.load(args.checkpoint,
                                map_location=torch.device('cpu'))
        model.load_state_dict(checkpoint['model_state_dict'])
    else:
        model.apply(utl.weights_init)
    if args.distributed:
        model = nn.DataParallel(model)
    model = model.to(device)

    criterion = utl.MultiCrossEntropyLoss(ignore_index=21).to(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    if osp.isfile(args.checkpoint):
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        for param_group in optimizer.param_groups:
            param_group['lr'] = args.lr
        args.start_epoch += checkpoint['epoch']
    softmax = nn.Softmax(dim=1).to(device)

    for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
        if epoch == 21:
            args.lr = args.lr * 0.1
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

        data_loaders = {
            phase: utl.build_data_loader(args, phase)
            for phase in args.phases
        }

        enc_losses = {phase: 0.0 for phase in args.phases}
        enc_score_metrics = []
        enc_target_metrics = []
        enc_mAP = 0.0
        dec_losses = {phase: 0.0 for phase in args.phases}
        dec_score_metrics = []
        dec_target_metrics = []
        dec_mAP = 0.0

        start = time.time()
        for phase in args.phases:
            training = phase == 'train'
            if training:
                model.train(True)
            elif not training and args.debug:
                model.train(False)
            else:
                continue

            with torch.set_grad_enabled(training):
                for batch_idx, (camera_inputs, motion_inputs, enc_target, dec_target) \
                        in enumerate(data_loaders[phase], start=1):
                    batch_size = camera_inputs.shape[0]
                    camera_inputs = camera_inputs.to(device)
                    motion_inputs = motion_inputs.to(device)
                    enc_target = enc_target.to(device).view(
                        -1, args.num_classes)
                    dec_target = dec_target.to(device).view(
                        -1, args.num_classes)

                    enc_score, dec_score = model(camera_inputs, motion_inputs)
                    enc_loss = criterion(enc_score, enc_target)
                    dec_loss = criterion(dec_score, dec_target)
                    enc_losses[phase] += enc_loss.item() * batch_size
                    dec_losses[phase] += dec_loss.item() * batch_size

                    if args.verbose:
                        print(
                            'Epoch: {:2} | iteration: {:3} | enc_loss: {:.5f} dec_loss: {:.5f}'
                            .format(epoch, batch_idx, enc_loss.item(),
                                    dec_loss.item()))

                    if training:
                        optimizer.zero_grad()
                        loss = enc_loss + dec_loss
                        loss.backward()
                        optimizer.step()
                    else:
                        # Prepare metrics for encoder
                        enc_score = softmax(enc_score).cpu().numpy()
                        enc_target = enc_target.cpu().numpy()
                        enc_score_metrics.extend(enc_score)
                        enc_target_metrics.extend(enc_target)
                        # Prepare metrics for decoder
                        dec_score = softmax(dec_score).cpu().numpy()
                        dec_target = dec_target.cpu().numpy()
                        dec_score_metrics.extend(dec_score)
                        dec_target_metrics.extend(dec_target)
        end = time.time()

        if args.debug:
            result_file = 'inputs-{}-epoch-{}.json'.format(args.inputs, epoch)
            # Compute result for encoder
            enc_mAP = utl.compute_result_multilabel(
                args.class_index,
                enc_score_metrics,
                enc_target_metrics,
                save_dir,
                result_file,
                ignore_class=[0, 21],
                save=True,
            )
            # Compute result for decoder
            dec_mAP = utl.compute_result_multilabel(
                args.class_index,
                dec_score_metrics,
                dec_target_metrics,
                save_dir,
                result_file,
                ignore_class=[0, 21],
                save=False,
            )

        # Output result
        logger.output(epoch,
                      enc_losses,
                      dec_losses,
                      len(data_loaders['train'].dataset),
                      len(data_loaders['test'].dataset),
                      enc_mAP,
                      dec_mAP,
                      end - start,
                      debug=args.debug)

        # Save model
        checkpoint_file = 'inputs-{}-epoch-{}.pth'.format(args.inputs, epoch)
        torch.save(
            {
                'epoch':
                epoch,
                'model_state_dict':
                model.module.state_dict()
                if args.distributed else model.state_dict(),
                'optimizer_state_dict':
                optimizer.state_dict(),
            }, osp.join(save_dir, checkpoint_file))
コード例 #4
0
def main(args):
    this_dir = osp.join(osp.dirname(__file__), '.')

    ### make directory for each step size
    # '/dataset/volume1/users/yumin/result'
    #'/data/yumin/result'
    save_dir = osp.join(
        '/dataset/volume1/users/yumin/result',
        'delta_{}_checkpoints_method{}_noenc_smoothbeta0.5'.format(
            args.dataset, args.method))

    if not osp.isdir(save_dir):
        os.makedirs(save_dir)

    command = 'python ' + ' '.join(sys.argv)
    logger = utl.setup_logger(osp.join(this_dir, 'lstm_log.txt'),
                              command=command)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    utl.set_seed(int(args.seed))

    model = build_model(args)
    if osp.isfile(args.checkpoint):
        checkpoint = torch.load(args.checkpoint,
                                map_location=torch.device('cpu'))
        model.load_state_dict(checkpoint['model_state_dict'])
    else:
        model.apply(utl.weights_init)
    if args.distributed:  ### !!!
        model = nn.DataParallel(model)
    model = model.to(device)

    if args.dataset == 'THUMOS':
        criterion1 = utl.MultiCrossEntropyLoss_Delta(
            num_class=args.num_classes,
            dirichlet=args.dirichlet,
            ignore_index=21).to(device)
        # criterion2 = nn.MSELoss()
        # criterion2 = nn.L1Loss()
        criterion2 = nn.SmoothL1Loss()
        # criterion2 = nn.HuberLoss()

    elif args.dataset == "TVSeries":
        criterion = utl.MultiCrossEntropyLoss_Delta(
            num_class=args.num_classes, dirichlet=args.dirichlet).to(device)

    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    if osp.isfile(args.checkpoint):
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        for param_group in optimizer.param_groups:
            param_group['lr'] = args.lr
        args.start_epoch += checkpoint['epoch']

    softmax = nn.Softmax(dim=1).to(device)

    for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
        if epoch == 21:
            args.lr = args.lr * 0.1
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

        data_loaders = {
            phase: utl.build_data_loader(args, phase)
            for phase in args.phases
        }

        enc_losses = {phase: 0.0 for phase in args.phases}
        enc_score_metrics = []
        enc_target_metrics = []
        delta_score_metrics = []
        delta_target_metrics = []
        enc_mAP = 0.0
        delta_mAP = 0.0

        start = time.time()
        for phase in args.phases:
            training = phase == 'train'
            if training:
                model.train(True)
            elif not training and args.debug:
                model.train(False)
            else:
                continue

            with torch.set_grad_enabled(training):
                for batch_idx, (camera_inputs, motion_inputs, enc_target, smooth_target) \
                        in enumerate(data_loaders[phase], start=1):

                    batch_size = camera_inputs.shape[0]
                    camera_inputs = camera_inputs.to(device)
                    motion_inputs = motion_inputs.to(device)

                    extend_target = enc_target.to(device)
                    enc_target = enc_target.to(device).view(
                        -1, args.num_classes)
                    smooth_target = smooth_target.to(device)
                    oad_score, delta_score = model(camera_inputs,
                                                   motion_inputs)

                    oad_before = oad_score.clone().detach()
                    oad_before = oad_before[:, 1::, :]

                    ## have to make delta target and compute delta loss

                    new_target = smooth_target[:, 1::, :] - oad_before
                    # print('***** DELTA TARGET')
                    # print(new_target)
                    # print('***** DELTA SCORE')
                    # print(delta_score[:,1::,:])

                    oad_loss = criterion1(oad_score, extend_target)
                    delta_loss = criterion2(delta_score[:, 1::, :],
                                            new_target)  # ignore the first
                    # delta_loss = criterion2(delta_score[:,1::,:], extend_target[:,1::,:])   # without labelsmoothing

                    enc_losses[phase] += oad_loss.item() * batch_size

                    if args.verbose:
                        print(
                            'Epoch: {:2} | iteration: {:3} | enc_loss: {:.5f} | delta_loss: {:.5f}'
                            .format(epoch, batch_idx, oad_loss.item(),
                                    delta_loss.item() * 10))

                    if training:
                        optimizer.zero_grad()
                        loss = oad_loss + delta_loss * 10
                        loss.backward()
                        optimizer.step()
                    else:
                        # Prepare metrics for encoder
                        enc_score = oad_score.cpu().numpy()  ## softmax check
                        enc_target = extend_target.cpu().numpy()
                        enc_score_metrics.extend(enc_score)
                        enc_target_metrics.extend(enc_target)
                        delta_score_c = delta_score[:, 1::, :].reshape(
                            -1, args.num_classes)
                        delta = delta_score_c.cpu().numpy()
                        new_target_c = new_target.reshape(-1, args.num_classes)
                        delta_target = new_target_c.cpu().numpy()
                        delta_score_metrics.extend(delta)
                        delta_target_metrics.extend(delta_target)

        end = time.time()

        if args.debug:
            if epoch % 1 == 0:
                result_file = osp.join(
                    this_dir,
                    'delta-inputs-{}-epoch-{}.json'.format(args.inputs, epoch))
                # Compute result for encoder
                enc_mAP = utl.compute_result_multilabel(
                    args.dataset,
                    args.class_index,
                    enc_score_metrics,
                    enc_target_metrics,
                    save_dir,
                    result_file,
                    ignore_class=[0, 21],
                    save=True,
                )

                delta_mAP = utl.compute_result_multilabel(
                    args.dataset,
                    args.class_index,
                    delta_score_metrics,
                    delta_target_metrics,
                    save_dir,
                    result_file,
                    ignore_class=[0, 21],
                    save=True,
                    smooth=True,
                )

        # Output result
        logger.delta_output(epoch,
                            enc_losses,
                            len(data_loaders['train'].dataset),
                            len(data_loaders['test'].dataset),
                            enc_mAP,
                            delta_mAP,
                            end - start,
                            debug=args.debug)

        # Save model
        checkpoint_file = 'delta-inputs-{}-epoch-{}.pth'.format(
            args.inputs, epoch)
        torch.save(
            {
                'epoch':
                epoch,
                'model_state_dict':
                model.module.state_dict()
                if args.distributed else model.state_dict(),
                'optimizer_state_dict':
                optimizer.state_dict(),
            }, osp.join(save_dir, checkpoint_file))