Ejemplo n.º 1
0
def main():
    global args
    checkpoint = None
    #is_eval = False
    is_eval = True  # 我加的,用来测试,2020/02/26
    if args.evaluate:
        args_new = args
        if os.path.isfile(args.evaluate):
            print("=> loading checkpoint '{}' ... ".format(args.evaluate),
                  end='')
            checkpoint = torch.load(args.evaluate, map_location=device)
            args = checkpoint['args']
            args.data_folder = args_new.data_folder
            args.val = args_new.val
            is_eval = True
            print("Completed.")
        else:
            print("No model found at '{}'".format(args.evaluate))
            return

    print("=> creating model and optimizer ... ", end='')
    model = DepthCompletionNet(args).to(device)
    model_named_params = [
        p for _, p in model.named_parameters() if p.requires_grad
    ]
    optimizer = torch.optim.Adam(model_named_params,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    print("completed.")
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> checkpoint state loaded.")

    model = torch.nn.DataParallel(model)

    # Data loading code
    print("=> creating data loaders ... ")

    val_dataset = KittiDepth('test_completion', args)
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=2,
        pin_memory=True)  # set batch size to be 1 for validation
    print("\t==> val_loader size:{}".format(len(val_loader)))

    # create backups and results folder
    logger = helper.logger(args)
    if checkpoint is not None:
        logger.best_result = checkpoint['best_result']
    print("=> logger created.")

    if is_eval:
        print("=> starting model test ...")
        result, is_best = iterate("test_completion", args, val_loader, model,
                                  None, logger, checkpoint['epoch'])
        return
Ejemplo n.º 2
0
def main():
    global args
    if args.partial_train == 'yes':  # train on a part of the whole train set
        print(
            "Can't use partial train here. It is used only for test check. Exit..."
        )
        return

    if args.test != "yes":
        print(
            "This main should use only for testing, but test=yes wat not given. Exit..."
        )
        return

    print("Evaluating test set with main_test:")
    whole_ts = time.time()
    checkpoint = None
    is_eval = False
    if args.evaluate:  # test a finished model
        args_new = args  # copies
        if os.path.isfile(args.evaluate):  # path is an existing regular file
            print("=> loading finished model from '{}' ... ".format(
                args.evaluate),
                  end='')  # "end=''" disables the newline
            checkpoint = torch.load(args.evaluate, map_location=device)
            args = checkpoint['args']
            args.data_folder = args_new.data_folder
            args.val = args_new.val
            args.save_images = args_new.save_images
            args.result = args_new.result
            is_eval = True
            print("Completed.")
        else:
            print("No model found at '{}'".format(args.evaluate))
            return
    elif args.resume:  # resume from a checkpoint
        args_new = args
        if os.path.isfile(args.resume):
            print("=> loading checkpoint from '{}' ... ".format(args.resume),
                  end='')
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch'] + 1
            args.data_folder = args_new.data_folder
            args.val = args_new.val
            print("Completed. Resuming from epoch {}.".format(
                checkpoint['epoch']))
        else:
            print("No checkpoint found at '{}'".format(args.resume))
            return

    print("=> creating model and optimizer ... ", end='')
    model = DepthCompletionNet(args).to(device)
    model_named_params = [
        p for _, p in model.named_parameters(
        )  # "_, p" is a direct analogy to an assignment statement k, _ = (0, 1). Unpack a tuple object
        if p.requires_grad
    ]
    optimizer = torch.optim.Adam(model_named_params,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    print("completed.")
    [f'{k:<20}: {v}' for k, v in model.__dict__.items()]

    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> checkpoint state loaded.")

    model = torch.nn.DataParallel(
        model
    )  # make the model run parallelly: splits your data automatically and sends job orders to multiple models on several GPUs.
    # After each model finishes their job, DataParallel collects and merges the results before returning it to you

    # data loading code
    print("=> creating data loaders ... ")
    if not is_eval:  # we're not evaluating
        train_dataset = KittiDepth('train',
                                   args)  # get the paths for the files
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True,
                                                   sampler=None)  # load them
        print("\t==> train_loader size:{}".format(len(train_loader)))

    if args_new.test == "yes":  # will take the data from the "test" folders
        val_dataset = KittiDepth('test', args)
        is_test = 'yes'
    else:
        val_dataset = KittiDepth('val', args)
        is_test = 'no'
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=2,
        pin_memory=True)  # set batch size to be 1 for validation
    print("\t==> val_loader size:{}".format(len(val_loader)))

    # create backups and results folder
    logger = helper.logger(args, is_test)
    if checkpoint is not None:
        logger.best_result = checkpoint['best_result']
    print("=> logger created.")  # logger records sequential data to a log file

    # main code - run the NN
    if is_eval:
        print("=> starting model evaluation ...")
        result, is_best = iterate("val", args, val_loader, model, None, logger,
                                  checkpoint['epoch'])
        return

    print("=> starting model training ...")
    for epoch in range(args.start_epoch, args.epochs):
        print("=> start training epoch {}".format(epoch) +
              "/{}..".format(args.epochs))
        train_ts = time.time()
        iterate("train", args, train_loader, model, optimizer, logger,
                epoch)  # train for one epoch
        result, is_best = iterate("val", args, val_loader, model, None, logger,
                                  epoch)  # evaluate on validation set
        helper.save_checkpoint({  # save checkpoint
            'epoch': epoch,
            'model': model.module.state_dict(),
            'best_result': logger.best_result,
            'optimizer': optimizer.state_dict(),
            'args': args,
        }, is_best, epoch, logger.output_directory)
        print("finish training epoch {}, time elapsed {:.2f} hours, \n".format(
            epoch, (time.time() - train_ts) / 3600))
    last_checkpoint = os.path.join(
        logger.output_directory, 'checkpoint-' + str(epoch) + '.pth.tar'
    )  # delete last checkpoint because we have the best_model and we dont need it
    os.remove(last_checkpoint)
    print("finished model training, time elapsed {0:.2f} hours, \n".format(
        (time.time() - whole_ts) / 3600))
Ejemplo n.º 3
0
        model_is.sort()
        print(model_is)
        while model_is in used_model_is:
            random.shuffle(possible_model_is)
            random.shuffle(possible_model_is)
            random.shuffle(possible_model_is)
            model_is = possible_model_is[0:M]
            model_is.sort()
            print(model_is)
        used_model_is.append(model_is)

        models = []
        for i in model_is:
            restore_from = "/root/evaluating_bdl/depthCompletion/trained_models/%s_%d/checkpoint_40000.pth" % (
                model_id, i)
            model = DepthCompletionNet().cuda()
            model = torch.nn.DataParallel(model)
            model.load_state_dict(torch.load(restore_from))
            model.eval()
            models.append(model)

        M_float = float(len(models))
        print(M_float)

        batch_losses = []
        batch_rmses = []
        sigma_alea_values = np.array([])
        sigma_epi_values = np.array([])
        sigma_pred_values = np.array([])
        squared_error_values = np.array([])
        for i_iter, batch in enumerate(eval_loader):
Ejemplo n.º 4
0
        models = []
        for i, current_NN in enumerate(
                NNs_weights
        ):  # relevant code from the 'Black-Box' models (to use the NNs for the prediction)
            checkpoint = None
            if os.path.isfile(current_NN):
                print("=> loading checkpoint '{}' ... ".format(current_NN),
                      end='')
                checkpoint = torch.load(current_NN, map_location=device)
                args = checkpoint['args']
                is_eval = True
                print("Completed.")
            else:
                assert False, ("No model found at '{}'".format(current_NN))

            model = DepthCompletionNet(args).to(device)
            model_named_params = [
                p for _, p in model.named_parameters() if p.requires_grad
            ]
            model.load_state_dict(checkpoint['model'])
            model = torch.nn.DataParallel(model)

            NN_arguments.append(args)
            models.append(model)

        # predict & create samples for the training sets #
        if len(existing_weights) == 0:  # not only inferencing
            for set_num in range(1, M + 1):
                print("\nSTART PREDICTING train set num: {} for next phase\n".
                      format(set_num))
                pred_samp_train_set_time = time.time()
def main():
    global args
    checkpoint = None
    is_eval = False
    if args.evaluate:
        args_new = args
        if os.path.isfile(args.evaluate):
            print("=> loading checkpoint '{}' ... ".format(args.evaluate),
                  end='')
            checkpoint = torch.load(args.evaluate, map_location=device)
            args = checkpoint['args']
            args.data_folder = args_new.data_folder
            args.val = args_new.val
            args.result = args_new.result
            is_eval = True
            print("Completed.")
        else:
            print("No model found at '{}'".format(args.evaluate))
            return
    elif args.resume:  # optionally resume from a checkpoint
        args_new = args
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}' ... ".format(args.resume),
                  end='')
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch'] + 1
            args.data_folder = args_new.data_folder
            args.val = args_new.val
            args.result = args_new.result
            print("Completed. Resuming from epoch {}.".format(
                checkpoint['epoch']))
        else:
            print("No checkpoint found at '{}'".format(args.resume))
            return

    print("=> creating model and optimizer ... ", end='')
    model = DepthCompletionNet(args).to(device)
    model_named_params = [
        p for _, p in model.named_parameters() if p.requires_grad
    ]
    optimizer = torch.optim.Adam(model_named_params,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    print("completed.")
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> checkpoint state loaded.")

    model = torch.nn.DataParallel(model)

    # Data loading code
    print("=> creating data loaders ... ")
    if not is_eval:
        train_dataset = KittiDepth('train', args)
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True,
                                                   sampler=None)
        print("\t==> train_loader size:{}".format(len(train_loader)))
    val_dataset = KittiDepth('val', args)
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=2,
        pin_memory=True)  # set batch size to be 1 for validation
    print("\t==> val_loader size:{}".format(len(val_loader)))

    # create backups and results folder
    logger = helper.logger(args)
    if checkpoint is not None:
        logger.best_result = checkpoint['best_result']
    print("=> logger created.")

    if is_eval:
        print("=> starting model evaluation ...")
        result, is_best = iterate("val", args, val_loader, model, None, logger,
                                  checkpoint['epoch'])
        return

    # main loop
    print("=> starting main loop ...")
    for epoch in range(args.start_epoch, args.epochs):
        print("=> starting training epoch {} ..".format(epoch))
        iterate("train", args, train_loader, model, optimizer, logger,
                epoch)  # train for one epoch
        result, is_best = iterate("val", args, val_loader, model, None, logger,
                                  epoch)  # evaluate on validation set
        helper.save_checkpoint({ # save checkpoint
            'epoch': epoch,
            'model': model.module.state_dict(),
            'best_result': logger.best_result,
            'optimizer' : optimizer.state_dict(),
            'args' : args,
        }, is_best, epoch, logger.output_directory)
                                             crop_size=(352, 352))
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=4)

    val_dataset = DatasetKITTIVal(kitti_depth_path=kitti_depth_path)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                             batch_size=val_batch_size,
                                             shuffle=False,
                                             num_workers=1)

    criterion = MaskedL2Gauss().cuda()
    rmse_criterion = RMSE().cuda()

    model = DepthCompletionNet().cuda()
    model = torch.nn.DataParallel(model)
    model.train()

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=weight_decay)
    optimizer.zero_grad()

    train_losses = []
    batch_train_losses = []
    val_losses = []
    train_rmses = []
    batch_train_rmses = []
    val_rmses = []
    for i_iter, batch in enumerate(train_loader):
Ejemplo n.º 7
0
def main():
    global args
    checkpoint = None
    is_eval = False
    if args.evaluate:
        if os.path.isfile(args.evaluate):
            print("=> loading checkpoint '{}'".format(args.evaluate))
            checkpoint = torch.load(args.evaluate)
            args = checkpoint['args']
            is_eval = True
            print("=> checkpoint loaded.")
        else:
            print("=> no model found at '{}'".format(args.evaluate))
            return
    elif args.resume:  # optionally resume from a checkpoint
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch'] + 1
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
            return

    print("=> creating model and optimizer...")
    model = DepthCompletionNet(args).cuda()
    model_named_params = [
        p for _, p in model.named_parameters() if p.requires_grad
    ]
    optimizer = torch.optim.Adam(model_named_params,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    print("=> model and optimizer created.")
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> checkpoint state loaded.")

    model = torch.nn.DataParallel(model)
    print("=> model transferred to multi-GPU.")

    # Data loading code
    print("=> creating data loaders ...")
    if not is_eval:
        train_dataset, train_loader = get_kitti_dataloader(
            mode='train',
            dataset_name=dataset_name,
            setname='train',
            args=args)
        # train_dataset = KittiDepth('train', args)
        # train_loader = torch.utils.data.DataLoader(
        #     train_dataset, batch_size=args.batch_size, shuffle=True,
        #     num_workers=args.workers, pin_memory=True, sampler=None)

    val_dataset, val_loader = get_kitti_dataloader(mode='eval',
                                                   dataset_name=dataset_name,
                                                   setname='test',
                                                   args=args)

    # change dataset here:
    # val_dataset = KittiDepth('val', args)
    # val_dataset = KittiDataset(base_dir="./data/kitti/", setname="selval")
    # val_dataset = vKittiDataset(base_dir="./data/vkitti/", setname="test")
    # val_dataset = OurDataset(base_dir="/home/bird/data2/dataset/our_lidar/20190315/f_c_1216_352", setname="f_c_1216_352")
    # val_dataset = OurDataset(base_dir="/home/bird/data2/dataset/our_lidar/20190318/f_c_1216_352", setname="f_c_1216_352_20190318")
    # val_dataset = NuScenesDataset(base_dir="/home/bird/data2/dataset/nuscenes/projected", setname="f_c_1216_352")
    # val_loader = torch.utils.data.DataLoader(val_dataset,
    #     batch_size=1, shuffle=False, num_workers=2, pin_memory=True)  # set batch size to be 1 for validation

    print("=> data loaders created.")

    # create backups and results folder
    logger = helper.logger(args)
    # if checkpoint is not None:
    #     logger.best_result = checkpoint['best_result']
    print("=> logger created.")

    if is_eval:
        result, is_best = iterate("eval", args, val_loader, model, None,
                                  logger, checkpoint['epoch'], val_dataset)
        print(result)
        print(is_best)
        return

    # main loop
    for epoch in range(args.start_epoch, args.epochs):
        print("=> starting training epoch {} ..".format(epoch))
        iterate("train", args, train_loader, model, optimizer, logger, epoch,
                train_dataset)  # train for one epoch
        result, is_best = iterate("val", args, val_loader, model, None, logger,
                                  epoch,
                                  val_dataset)  # evaluate on validation set
        helper.save_checkpoint({ # save checkpoint
            'epoch': epoch,
            'model': model.module.state_dict(),
            'best_result': logger.best_result,
            'optimizer' : optimizer.state_dict(),
            'args' : args,
        }, is_best, epoch, logger.output_directory)
Ejemplo n.º 8
0
    learning_rate = 1.0e-5

    snapshot_dir = snapshot_dir_base + "_%d/" % i
    if not os.path.exists(snapshot_dir):
        os.makedirs(snapshot_dir)

    train_dataset = DatasetVirtualKITTIAugmentation(virtualkitti_path=virtualkitti_path, max_iters=num_steps*batch_size, crop_size=(352, 352))
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

    val_dataset = DatasetVirtualKITTIVal(virtualkitti_path=virtualkitti_path)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=val_batch_size, shuffle=False, num_workers=1)

    criterion = MaskedL2Gauss().cuda()
    rmse_criterion = RMSE().cuda()

    model = DepthCompletionNet().cuda()
    model = torch.nn.DataParallel(model)
    model.train()

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    optimizer.zero_grad()

    train_losses = []
    batch_train_losses = []
    val_losses = []
    train_rmses = []
    batch_train_rmses = []
    val_rmses = []
    for i_iter, batch in enumerate(train_loader):
        imgs, sparses, targets, file_ids = batch
        imgs = Variable(imgs.cuda()) # (shape: (batch_size, h, w))
Ejemplo n.º 9
0
def main():
    global args
    checkpoint = None
    is_eval = False
    if args.evaluate:
        if os.path.isfile(args.evaluate):
            print("=> loading checkpoint '{}'".format(args.evaluate))
            checkpoint = torch.load(args.evaluate)
            args = checkpoint['args']
            is_eval = True
            print("=> checkpoint loaded.")
        else:
            print("=> no model found at '{}'".format(args.evaluate))
            return
    elif args.resume:  # optionally resume from a checkpoint
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch'] + 1
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
            return

    print("=> creating model and optimizer...")
    model = DepthCompletionNet(args).cuda()
    model_named_params = [
        p for _, p in model.named_parameters() if p.requires_grad
    ]
    optimizer = torch.optim.Adam(model_named_params,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    print("=> model and optimizer created.")
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> checkpoint state loaded.")

    model = torch.nn.DataParallel(model)
    print("=> model transferred to multi-GPU.")

    # Data loading code
    print("=> creating data loaders ...")
    if not is_eval:
        train_dataset = KittiDepth('train', args)
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True,
                                                   sampler=None)
    val_dataset = KittiDepth('val', args)
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=2,
        pin_memory=False)  # set batch size to be 1 for validation
    print("=> data loaders created.")

    # create backups and results folde
    logger = helper.logger(args)
    if checkpoint is not None:
        logger.best_result = checkpoint['best_result']
    print("=> logger created.")

    if is_eval:
        result, result_intensity, is_best = iterate("val", args, val_loader,
                                                    model, None, logger,
                                                    checkpoint['epoch'])
        return

    # main loop

    for epoch in range(args.start_epoch, args.epochs):
        print("=> starting training epoch {} ..".format(epoch))
        iterate("train", args, train_loader, model, optimizer, logger,
                epoch)  # train for one epoch
        result, result_intensity, is_best = iterate(
            "val", args, val_loader, model, None, logger,
            epoch)  # evaluate on validation set
        helper.save_checkpoint({ # save checkpoint
            'epoch': epoch,
            'model': model.module.state_dict(),
            'best_result': logger.best_result,
            'optimizer' : optimizer.state_dict(),
            'args' : args,
        }, is_best, epoch, logger.output_directory)

        logger.writer.add_scalar('eval/rmse_depth', result.rmse, epoch)
        logger.writer.add_scalar('eval/rmse_intensity', result_intensity.rmse,
                                 epoch)
        logger.writer.add_scalar('eval/mae_depth', result.mae, epoch)
        logger.writer.add_scalar('eval/mae_intensity', result_intensity.mae,
                                 epoch)
        # logger.writer.add_scalar('eval/irmse_depth', result.irmse, epoch)
        # logger.writer.add_scalar('eval/irmse_intensity', result_intensity.irmse, epoch)
        logger.writer.add_scalar('eval/rmse_total',
                                 result.rmse + args.wi * result_intensity.rmse,
                                 epoch)