Ejemplo n.º 1
0
def main():
    # ensure numba JIT is on
    if 'NUMBA_DISABLE_JIT' in os.environ:
        del os.environ['NUMBA_DISABLE_JIT']

    # parse arguments
    global args
    args = cmd_args.parse_args_from_yaml(sys.argv[1])

    # -------------------- logging args --------------------
    if osp.exists(args.ckpt_dir):
        to_continue = query_yes_no('Attention!!!, ckpt_dir already exists!\
                                        Whether to continue?',
                                   default=None)
        if not to_continue:
            sys.exit(1)
    os.makedirs(args.ckpt_dir, mode=0o777, exist_ok=True)

    logger = Logger(osp.join(args.ckpt_dir, 'log'))
    logger.log('sys.argv:\n' + ' '.join(sys.argv))

    os.environ['NUMBA_NUM_THREADS'] = str(args.workers)
    logger.log('NUMBA NUM THREADS\t' + os.environ['NUMBA_NUM_THREADS'])

    for arg in sorted(vars(args)):
        logger.log('{:20s} {}'.format(arg, getattr(args, arg)))
    logger.log('')

    # -------------------- dataset & loader --------------------
    if not args.evaluate:
        train_dataset = datasets.__dict__[args.dataset](
            train=True,
            transform=transforms.Augmentation(args.aug_together, args.aug_pc2,
                                              args.data_process,
                                              args.num_points,
                                              args.allow_less_points),
            gen_func=transforms.GenerateDataUnsymmetric(args),
            args=args)
        logger.log('train_dataset: ' + str(train_dataset))
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=args.workers,
            pin_memory=True,
            worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) %
                                                    (2**32)))

    val_dataset = datasets.__dict__[args.dataset](
        train=False,
        transform=transforms.ProcessData(args.data_process, args.num_points,
                                         args.allow_less_points),
        gen_func=transforms.GenerateDataUnsymmetric(args),
        args=args)
    logger.log('val_dataset: ' + str(val_dataset))
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True,
        worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) %
                                                (2**32)))

    # -------------------- create model --------------------
    logger.log("=>  creating model '{}'".format(args.arch))

    model = models.__dict__[args.arch](args)

    if not args.evaluate:
        init_func = partial(init_weights_multi,
                            init_type=args.init,
                            gain=args.gain)
        model.apply(init_func)
    logger.log(model)

    model = torch.nn.DataParallel(model).cuda()
    criterion = EPE3DLoss().cuda()

    if args.evaluate:
        torch.backends.cudnn.enabled = False
    else:
        cudnn.benchmark = True
    # https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936
    # But if your input sizes changes at each iteration,
    # then cudnn will benchmark every time a new size appears,
    # possibly leading to worse runtime performances.

    # -------------------- resume --------------------
    if args.resume:
        if osp.isfile(args.resume):
            logger.log("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            logger.log(
                "=> loaded checkpoint '{}' (start epoch {}, min loss {})".
                format(args.resume, checkpoint['epoch'],
                       checkpoint['min_loss']))
        else:
            logger.log("=> no checkpoint found at '{}'".format(args.resume))
            checkpoint = None
    else:
        args.start_epoch = 0

    # -------------------- evaluation --------------------
    if args.evaluate:
        res_str = evaluate(val_loader, model, logger, args)
        logger.close()
        return res_str

    # -------------------- optimizer --------------------
    optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad,
                                               model.parameters()),
                                 lr=args.lr,
                                 weight_decay=0)
    if args.resume and (checkpoint is not None):
        optimizer.load_state_dict(checkpoint['optimizer'])

    if hasattr(args, 'reset_lr') and args.reset_lr:
        print('reset lr')
        reset_learning_rate(optimizer, args)

    # -------------------- main loop --------------------
    min_train_loss = None
    best_train_epoch = None
    best_val_epoch = None
    do_eval = True

    for epoch in range(args.start_epoch, args.epochs):
        old_lr = optimizer.param_groups[0]['lr']
        adjust_learning_rate(optimizer, epoch, args)

        lr = optimizer.param_groups[0]['lr']
        if old_lr != lr:
            print('Switch lr!')
        logger.log('lr: ' + str(optimizer.param_groups[0]['lr']))

        train_loss = train(train_loader, model, criterion, optimizer, epoch,
                           logger)
        gc.collect()

        is_train_best = True if best_train_epoch is None else (
            train_loss < min_train_loss)
        if is_train_best:
            min_train_loss = train_loss
            best_train_epoch = epoch

        if do_eval:
            val_loss = validate(val_loader, model, criterion, logger)
            gc.collect()

            is_val_best = True if best_val_epoch is None else (
                val_loss < min_val_loss)
            if is_val_best:
                min_val_loss = val_loss
                best_val_epoch = epoch
                logger.log("New min val loss!")

        min_loss = min_val_loss if do_eval else min_train_loss
        is_best = is_val_best if do_eval else is_train_best
        save_checkpoint(
            {
                'epoch': epoch + 1,  # next start epoch
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'min_loss': min_loss,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            args.ckpt_dir)

    train_str = 'Best train loss: {:.5f} at epoch {:3d}'.format(
        min_train_loss, best_train_epoch)
    logger.log(train_str)

    if do_eval:
        val_str = 'Best val loss: {:.5f} at epoch {:3d}'.format(
            min_val_loss, best_val_epoch)
        logger.log(val_str)

    logger.close()
    result_str = val_str if do_eval else train_str
    return result_str
Ejemplo n.º 2
0
import cmd_args
import open3d as o3d
from utils_dataset import lines
from torch.utils.tensorboard import SummaryWriter
from loss_fn import iou_projected_to_2d


args = cmd_args.parse_args_from_yaml("/home/mayank/Mayank/TrackThisFlow/configs/test_ours_KITTI.yaml")

basedir = "/home/mayank/Data/KITTI/training/"

writer = SummaryWriter()

val_dataset = dataset.track_and_flow_dataset(basedir,
    transform=transforms.ProcessData(args.data_process,
                                        args.num_points,
                                        args.allow_less_points),
    gen_func=transforms.GenerateDataUnsymmetric(args),
    args=args
)

print("Length of dataset:", len(val_dataset))

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=4,
    pin_memory=True,
    worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) % (2 ** 32))
)
Ejemplo n.º 3
0
def main():
    if 'NUMBA_DISABLE_JIT' in os.environ:
        del os.environ['NUMBA_DISABLE_JIT']

    global args
    args = cmd_args.parse_args_from_yaml(sys.argv[1])

    os.environ[
        'CUDA_VISIBLE_DEVICES'] = args.gpu if args.multi_gpu is None else '0,1'
    '''CREATE DIR'''
    experiment_dir = Path('./experiment/')
    experiment_dir.mkdir(exist_ok=True)
    file_dir = Path(
        str(experiment_dir) + '/PointConv%sFlyingthings3d-' % args.model_name +
        str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')))
    file_dir.mkdir(exist_ok=True)
    checkpoints_dir = file_dir.joinpath('checkpoints/')
    checkpoints_dir.mkdir(exist_ok=True)
    log_dir = file_dir.joinpath('logs/')
    log_dir.mkdir(exist_ok=True)
    os.system('cp %s %s' % ('models.py', log_dir))
    os.system('cp %s %s' % ('pointconv_util.py', log_dir))
    os.system('cp %s %s' % ('train.py', log_dir))
    os.system('cp %s %s' % ('config_train.yaml', log_dir))
    '''LOG'''
    logger = logging.getLogger(args.model_name)
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file_handler = logging.FileHandler(
        str(log_dir) + '/train_%s_sceneflow.txt' % args.model_name)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    logger.info(
        '----------------------------------------TRAINING----------------------------------'
    )
    logger.info('PARAMETER ...')
    logger.info(args)

    blue = lambda x: '\033[94m' + x + '\033[0m'
    model = PointConvSceneFlow()

    train_dataset = datasets.__dict__[args.dataset](
        train=True,
        transform=transforms.Augmentation(args.aug_together, args.aug_pc2,
                                          args.data_process, args.num_points),
        num_points=args.num_points,
        data_root=args.data_root,
        full=args.full)
    logger.info('train_dataset: ' + str(train_dataset))
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True,
        worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) %
                                                (2**32)))

    val_dataset = datasets.__dict__[args.dataset](
        train=False,
        transform=transforms.ProcessData(args.data_process, args.num_points,
                                         args.allow_less_points),
        num_points=args.num_points,
        data_root=args.data_root)
    logger.info('val_dataset: ' + str(val_dataset))
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True,
        worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) %
                                                (2**32)))
    '''GPU selection and multi-GPU'''
    if args.multi_gpu is not None:
        device_ids = [int(x) for x in args.multi_gpu.split(',')]
        torch.backends.cudnn.benchmark = True
        model.cuda(device_ids[0])
        model = torch.nn.DataParallel(model, device_ids=device_ids)
    else:
        model.cuda()

    if args.pretrain is not None:
        model.load_state_dict(torch.load(args.pretrain))
        print('load model %s' % args.pretrain)
        logger.info('load model %s' % args.pretrain)
    else:
        print('Training from scratch')
        logger.info('Training from scratch')

    pretrain = args.pretrain
    init_epoch = int(pretrain[-14:-11]) if args.pretrain is not None else 0

    if args.optimizer == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.learning_rate,
                                    momentum=0.9)
    elif args.optimizer == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.learning_rate,
                                     betas=(0.9, 0.999),
                                     eps=1e-08,
                                     weight_decay=args.weight_decay)

    optimizer.param_groups[0]['initial_lr'] = args.learning_rate
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=80,
                                                gamma=0.5,
                                                last_epoch=init_epoch - 1)
    LEARNING_RATE_CLIP = 1e-5

    history = defaultdict(lambda: list())
    best_epe = 1000.0
    for epoch in range(init_epoch, args.epochs):
        lr = max(optimizer.param_groups[0]['lr'], LEARNING_RATE_CLIP)
        print('Learning rate:%f' % lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        total_loss = 0
        total_seen = 0
        optimizer.zero_grad()
        for i, data in tqdm(enumerate(train_loader, 0),
                            total=len(train_loader),
                            smoothing=0.9):
            pos1, pos2, norm1, norm2, flow, _ = data
            # move to cuda
            pos1 = pos1.cuda()
            pos2 = pos2.cuda()
            norm1 = norm1.cuda()
            norm2 = norm2.cuda()
            flow = flow.cuda()

            model = model.train()
            pred_flows, fps_pc1_idxs, _, _, _ = model(pos1, pos2, norm1, norm2)

            loss = multiScaleLoss(pred_flows, flow, fps_pc1_idxs)

            history['loss'].append(loss.cpu().data.numpy())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            total_loss += loss.cpu().data * args.batch_size
            total_seen += args.batch_size

        scheduler.step()

        train_loss = total_loss / total_seen
        str_out = 'EPOCH %d %s mean loss: %f' % (epoch, blue('train'),
                                                 train_loss)
        print(str_out)
        logger.info(str_out)

        eval_epe3d, eval_loss = eval_sceneflow(model.eval(), val_loader)
        str_out = 'EPOCH %d %s mean epe3d: %f  mean eval loss: %f' % (
            epoch, blue('eval'), eval_epe3d, eval_loss)
        print(str_out)
        logger.info(str_out)

        if eval_epe3d < best_epe:
            best_epe = eval_epe3d
            if args.multi_gpu is not None:
                torch.save(
                    model.module.state_dict(), '%s/%s_%.3d_%.4f.pth' %
                    (checkpoints_dir, args.model_name, epoch, best_epe))
            else:
                torch.save(
                    model.state_dict(), '%s/%s_%.3d_%.4f.pth' %
                    (checkpoints_dir, args.model_name, epoch, best_epe))
            logger.info('Save model ...')
            print('Save model ...')
        print('Best epe loss is: %.5f' % (best_epe))
        logger.info('Best epe loss is: %.5f' % (best_epe))
Ejemplo n.º 4
0
def main():

    #import ipdb; ipdb.set_trace()
    if 'NUMBA_DISABLE_JIT' in os.environ:
        del os.environ['NUMBA_DISABLE_JIT']

    global args
    args = cmd_args.parse_args_from_yaml(sys.argv[1])

    os.environ[
        'CUDA_VISIBLE_DEVICES'] = args.gpu if args.multi_gpu is None else '0,1,2,3'
    '''CREATE DIR'''
    experiment_dir = Path('./Evaluate_experiment/')
    experiment_dir.mkdir(exist_ok=True)
    file_dir = Path(
        str(experiment_dir) + '/%sFlyingthings3d-' % args.model_name +
        str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')))
    file_dir.mkdir(exist_ok=True)
    checkpoints_dir = file_dir.joinpath('checkpoints/')
    checkpoints_dir.mkdir(exist_ok=True)
    log_dir = file_dir.joinpath('logs/')
    log_dir.mkdir(exist_ok=True)
    os.system('cp %s %s' % ('models.py', log_dir))
    os.system('cp %s %s' % ('pointconv_util.py', log_dir))
    os.system('cp %s %s' % ('evaluate.py', log_dir))
    os.system('cp %s %s' % ('config_evaluate.yaml', log_dir))
    '''LOG'''
    logger = logging.getLogger(args.model_name)
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file_handler = logging.FileHandler(
        str(log_dir) + 'train_%s_sceneflow.txt' % args.model_name)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    logger.info(
        '----------------------------------------TRAINING----------------------------------'
    )
    logger.info('PARAMETER ...')
    logger.info(args)

    blue = lambda x: '\033[94m' + x + '\033[0m'
    model = PointConvSceneFlow()

    val_dataset = datasets.__dict__[args.dataset](
        train=False,
        transform=transforms.ProcessData(args.data_process, args.num_points,
                                         args.allow_less_points),
        num_points=args.num_points,
        data_root=args.data_root)
    logger.info('val_dataset: ' + str(val_dataset))
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True,
        worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) %
                                                (2**32)))

    #load pretrained model
    pretrain = args.ckpt_dir + args.pretrain
    model.load_state_dict(torch.load(pretrain))
    print('load model %s' % pretrain)
    logger.info('load model %s' % pretrain)

    model.cuda()

    epe3ds = AverageMeter()
    acc3d_stricts = AverageMeter()
    acc3d_relaxs = AverageMeter()
    outliers = AverageMeter()
    # 2D
    epe2ds = AverageMeter()
    acc2ds = AverageMeter()

    total_loss = 0
    total_seen = 0
    total_epe = 0
    metrics = defaultdict(lambda: list())
    for i, data in tqdm(enumerate(val_loader, 0),
                        total=len(val_loader),
                        smoothing=0.9):
        pos1, pos2, norm1, norm2, flow, path = data

        #move to cuda
        pos1 = pos1.cuda()
        pos2 = pos2.cuda()
        norm1 = norm1.cuda()
        norm2 = norm2.cuda()
        flow = flow.cuda()

        model = model.eval()
        with torch.no_grad():
            pred_flows, fps_pc1_idxs, _, _, _ = model(pos1, pos2, norm1, norm2)

            loss = multiScaleLoss(pred_flows, flow, fps_pc1_idxs)

            full_flow = pred_flows[0].permute(0, 2, 1)
            epe3d = torch.norm(full_flow - flow, dim=2).mean()

        total_loss += loss.cpu().data * args.batch_size
        total_epe += epe3d.cpu().data * args.batch_size
        total_seen += args.batch_size

        pc1_np = pos1.cpu().numpy()
        pc2_np = pos2.cpu().numpy()
        sf_np = flow.cpu().numpy()
        pred_sf = full_flow.cpu().numpy()

        EPE3D, acc3d_strict, acc3d_relax, outlier = evaluate_3d(pred_sf, sf_np)

        epe3ds.update(EPE3D)
        acc3d_stricts.update(acc3d_strict)
        acc3d_relaxs.update(acc3d_relax)
        outliers.update(outlier)

        # 2D evaluation metrics
        flow_pred, flow_gt = geometry.get_batch_2d_flow(
            pc1_np, pc1_np + sf_np, pc1_np + pred_sf, path)
        EPE2D, acc2d = evaluate_2d(flow_pred, flow_gt)

        epe2ds.update(EPE2D)
        acc2ds.update(acc2d)

    mean_loss = total_loss / total_seen
    mean_epe = total_epe / total_seen
    str_out = '%s mean loss: %f mean epe: %f' % (blue('Evaluate'), mean_loss,
                                                 mean_epe)
    print(str_out)
    logger.info(str_out)

    res_str = (' * EPE3D {epe3d_.avg:.4f}\t'
               'ACC3DS {acc3d_s.avg:.4f}\t'
               'ACC3DR {acc3d_r.avg:.4f}\t'
               'Outliers3D {outlier_.avg:.4f}\t'
               'EPE2D {epe2d_.avg:.4f}\t'
               'ACC2D {acc2d_.avg:.4f}'.format(epe3d_=epe3ds,
                                               acc3d_s=acc3d_stricts,
                                               acc3d_r=acc3d_relaxs,
                                               outlier_=outliers,
                                               epe2d_=epe2ds,
                                               acc2d_=acc2ds))

    print(res_str)
    logger.info(res_str)