Ejemplo n.º 1
0
def main():
    args.dump_dir = ensure_path(
        osp.join('dumps', args.series_name, args.desc_name,
                 (args.training_target + ('-curriculum_' + args.curriculum) +
                  ('-qtrans_' + args.question_transform
                   if args.question_transform is not None else '') +
                  ('-' + args.expr if args.expr is not None else '') +
                  ('-lr_' + str(args.lr)))))

    if not args.debug:
        args.ckpt_dir = ensure_path(osp.join(args.dump_dir, 'checkpoints'))
        args.meta_dir = ensure_path(osp.join(args.dump_dir, 'meta'))
        args.meta_file = osp.join(args.meta_dir, args.run_name + '.json')
        args.log_file = osp.join(args.meta_dir, args.run_name + '.log')
        args.meter_file = osp.join(args.meta_dir,
                                   args.run_name + '.meter.json')

        logger.critical('Writing logs to file: "{}".'.format(args.log_file))
        set_output_file(args.log_file)

        logger.critical('Writing metainfo to file: "{}".'.format(
            args.meta_file))
        with open(args.meta_file, 'w') as f:
            f.write(dump_metainfo(args=args.__dict__, configs=configs))

        # Initialize the tensorboard.
        if args.use_tb:
            args.tb_dir_root = ensure_path(
                osp.join(args.dump_dir, 'tensorboard'))
            args.tb_dir = ensure_path(osp.join(args.tb_dir_root,
                                               args.run_name))

    initialize_dataset(args.dataset)
    build_dataset = get_dataset_builder(args.dataset)

    dataset = build_dataset(args, configs, args.data_image_root,
                            args.data_scenes_json, args.data_questions_json)

    dataset_trim = int(len(dataset) *
                       args.data_trim) if args.data_trim <= 1 else int(
                           args.data_trim)
    if dataset_trim > 0:
        dataset = dataset.trim_length(dataset_trim)

    dataset_split = int(len(dataset) *
                        args.data_split) if args.data_split <= 1 else int(
                            args.data_split)
    train_dataset, validation_dataset = dataset.split_trainval(dataset_split)

    extra_dataset = None
    if args.extra_data_dir is not None:
        extra_dataset = build_dataset(args, configs,
                                      args.extra_data_image_root,
                                      args.extra_data_scenes_json,
                                      args.extra_data_questions_json)

    main_train(train_dataset, validation_dataset, extra_dataset)
Ejemplo n.º 2
0
def main():
    args.dump_dir = ensure_path(osp.join(
        'dumps', args.series_name, args.desc_name))
    if args.normalized_boxes:
        args.dump_dir = args.dump_dir + '_norm_box'
    if args.even_smp_flag:
        args.dump_dir = args.dump_dir + '_even_smp'+str(args.frm_img_num)
    if args.even_smp_flag:
        args.dump_dir = args.dump_dir + '_col_box_ftr'
    args.dump_dir +=  '_' + args.version + '_' + args.prefix

    #if args.debug:
    if not args.debug:
        args.ckpt_dir = ensure_path(osp.join(args.dump_dir, 'checkpoints'))
        args.meta_dir = ensure_path(osp.join(args.dump_dir, 'meta'))
        args.meta_file = osp.join(args.meta_dir, args.run_name + '.json')
        args.log_file = osp.join(args.meta_dir, args.run_name + '.log')
        args.meter_file = osp.join(args.meta_dir, args.run_name + '.meter.json')

        logger.critical('Writing logs to file: "{}".'.format(args.log_file))
        set_output_file(args.log_file)

        logger.critical('Writing metainfo to file: "{}".'.format(args.meta_file))
        with open(args.meta_file, 'w') as f:
            f.write(dump_metainfo(args=args.__dict__, configs=configs))

        # Initialize the tensorboard.
        if args.use_tb:
            args.tb_dir_root = ensure_path(osp.join(args.dump_dir, 'tensorboard'))
            args.tb_dir = ensure_path(osp.join(args.tb_dir_root, args.run_name))

    initialize_dataset(args.dataset, args.version)
    #validation_dataset = extra_dataset 
    if args.testing_flag==1 or args.dataset=='billiards':
        validation_dataset = build_clevrer_dataset(args, 'test')
    else:
        validation_dataset = build_clevrer_dataset(args, 'validation')
    train_dataset = build_clevrer_dataset(args, 'train')

    extra_dataset = None
    main_train(train_dataset, validation_dataset, extra_dataset)
Ejemplo n.º 3
0
                            metavar='N',
                            help='the interval(number of epochs) to do test')
schedule_group.add_argument('--test-only',
                            action='store_true',
                            help='test-only mode')

logger = get_logger(__file__)

args = parser.parse_args()

args.use_gpu = args.use_gpu and torch.cuda.is_available()

if args.dump_dir is not None:
    io.mkdir(args.dump_dir)
    args.log_file = os.path.join(args.dump_dir, 'log.log')
    set_output_file(args.log_file)
else:
    args.checkpoints_dir = None
    args.summary_file = None

if args.seed is not None:
    import jacinle.random as random
    random.reset_global_seed(args.seed)

args.task_is_outdegree = args.task in ['outdegree']
args.task_is_connectivity = args.task in ['connectivity']
args.task_is_adjacent = args.task in ['adjacent', 'adjacent-mnist']
args.task_is_family_tree = args.task in [
    'has-father', 'has-sister', 'grandparents', 'uncle', 'maternal-great-uncle'
]
args.task_is_mnist_input = args.task in ['adjacent-mnist']
Ejemplo n.º 4
0
def main():
    # directories
    if not args.debug:
        args.dump_dir = ensure_path(osp.join('dumps', args.series_name, args.desc_name, args.run_name))
        args.ckpt_dir = ensure_path(osp.join(args.dump_dir, 'checkpoints'))
        args.vis_dir = ensure_path(osp.join(args.dump_dir, 'visualizations'))
        args.meta_file = osp.join(args.dump_dir, 'metainfo.json')
        args.log_file = osp.join(args.dump_dir, 'log.log')
        args.meter_file = osp.join(args.dump_dir, 'meter.json')

        # Initialize the tensorboard.
        if args.use_tb:
            args.tb_dir = ensure_path(osp.join(args.dump_dir, 'tensorboard'))
        else:
            args.tb_dir = None

    if not args.debug:
        logger.critical('Writing logs to file: "{}".'.format(args.log_file))
        set_output_file(args.log_file)

        logger.critical('Writing metainfo to file: "{}".'.format(args.meta_file))
        with open(args.meta_file, 'w') as f:
            f.write(dump_metainfo(args=args.__dict__, configs=configs))

    if args.debug and args.use_tb:
        logger.warning('Disabling the tensorboard in the debug mode.')
        args.use_tb = False
    if args.evaluate and args.use_tb:
        logger.warning('Disabling the tensorboard in the evaluation mode.')
        args.use_tb = False

    # TODO(Jiayuan Mao @ 04/23): load the dataset.
    logger.critical('Loading the dataset.')
    train_dataset = None
    validation_dataset = None
    # configs.validate_dataset_compatibility(train_dataset)

    # TODO(Jiayuan Mao @ 04/23): build the model.
    logger.critical('Building the model.')
    model = desc.make_model(args)

    if args.use_gpu:
        model.cuda()
        # Use the customized data parallel if applicable.
        if args.gpu_parallel:
            from jactorch.parallel import JacDataParallel
            # Set user_scattered because we will add a multi GPU wrapper to the dataloader. See below.
            model = JacDataParallel(model, device_ids=args.gpus, user_scattered=True).cuda()
        # TODO(Jiayuan Mao @ 04/23): disable the cudnn benchmark.
        # Disable the cudnn benchmark.
        cudnn.benchmark = False

    if hasattr(desc, 'make_optimizer'):
        logger.critical('Building customized optimizer.')
        optimizer = desc.make_optimizer(model, args.lr)
    else:
        from jactorch.optim import AdamW
        # TODO(Jiayuan Mao @ 04/23): set the default optimizer.
        trainable_parameters = filter(lambda x: x.requires_grad, model.parameters())
        optimizer = AdamW(trainable_parameters, args.lr, weight_decay=configs.train.weight_decay)

    if args.acc_grad > 1:
        from jactorch.optim import AccumGrad
        optimizer = AccumGrad(optimizer, args.acc_grad)
        logger.warning('Use accumulated grad={:d}, effective iterations per epoch={:d}.'.format(args.acc_grad, int(args.iters_per_epoch / args.acc_grad)))

    trainer = TrainerEnv(model, optimizer)

    if args.resume:
        extra = trainer.load_checkpoint(args.resume)
        if extra:
            args.start_epoch = extra['epoch']
            logger.critical('Resume from epoch {}.'.format(args.start_epoch))
    elif args.load:
        if trainer.load_weights(args.load):
            logger.critical('Loaded weights from pretrained model: "{}".'.format(args.load))

    if args.use_tb:
        from jactorch.train.tb import TBLogger, TBGroupMeters
        tb_logger = TBLogger(args.tb_dir)
        meters = TBGroupMeters(tb_logger)
        logger.critical('Writing tensorboard logs to: "{}".'.format(args.tb_dir))
    else:
        from jacinle.utils.meter import GroupMeters
        meters = GroupMeters()

    if not args.debug:
        logger.critical('Writing metainfo to file: "{}".'.format(args.meta_file))
        with open(args.meta_file, 'w') as f:
            f.write(dump_metainfo(args=args.__dict__, configs=configs))
        logger.critical('Writing meter logs to file: "{}".'.format(args.meter_file))

        logger.critical('Initializing MLDash.')
        mldash.init(
            desc_name=args.series_name + '/' + args.desc_name,
            expr_name=args.expr,
            run_name=args.run_name,
            args=args,
            highlight_args=parser,
            configs=configs,
        )
        mldash.update(metainfo_file=args.meta_file, log_file=args.log_file, meter_file=args.meter_file, tb_dir=args.tb_dir)

    if args.embed:
        from IPython import embed; embed()

    if hasattr(desc, 'customize_trainer'):
        desc.customize_trainer(trainer)

    # TODO(Jiayuan Mao @ 04/23): make the data loader.
    logger.critical('Building the data loader.')
    train_dataloader = train_dataset.make_dataloader(args.batch_size, shuffle=True, drop_last=True, nr_workers=args.data_workers)
    validation_dataloader = validation_dataset.make_dataloader(args.batch_size, shuffle=False, drop_last=False, nr_workers=args.data_workers)

    if args.use_gpu and args.gpu_parallel:
        from jactorch.data.dataloader import JacDataLoaderMultiGPUWrapper
        train_dataloader = JacDataLoaderMultiGPUWrapper(train_dataloader, args.gpus)
        validation_dataloader = JacDataLoaderMultiGPUWrapper(validation_dataloader, args.gpus)

    if args.evaluate:
        epoch = 0

        model.eval()
        validate_epoch(epoch, trainer, validation_dataloader, meters)

        if not args.debug:
            meters.dump(args.meter_file)

        logger.critical(meters.format_simple('Epoch = {}'.format(epoch), compressed=False))
        return

    for epoch in range(args.start_epoch + 1, args.epochs + 1):
        meters.reset()

        model.train()
        train_epoch(epoch, trainer, train_dataloader, meters)

        if args.validation_interval > 0 and epoch % args.validation_interval == 0:
            model.eval()
            with torch.no_grad():
                validate_epoch(epoch, trainer, validation_dataloader, meters)

        if not args.debug:
            meters.dump(args.meter_file)

        # TODO(Jiayuan Mao @ 02/15): config the MLDash.
        if not args.debug:
            mldash.log_metric('epoch', epoch, desc=False, expr=False)
            for key, value in meters.items():
                if key.startswith('loss') or key.startswith('validation/loss'):
                    mldash.log_metric_min(key, value.avg)
            for key, value in meters.items():
                if key.startswith('acc') or key.startswith('validation/acc'):
                    mldash.log_metric_max(key, value.avg)

        logger.critical(meters.format_simple('Epoch = {}'.format(epoch), compressed=False))

        if not args.debug:
            if epoch % args.save_interval == 0:
                fname = osp.join(args.ckpt_dir, 'epoch_{}.pth'.format(epoch))
                trainer.save_checkpoint(fname, dict(epoch=epoch, meta_file=args.meta_file))
Ejemplo n.º 5
0
def main():
    args.dump_dir = ensure_path(
        osp.join(
            "dumps",
            args.series_name,
            args.desc_name,
            (args.training_target + ("-curriculum_" + args.curriculum) +
             ("-qtrans_" + args.question_transform
              if args.question_transform is not None else "") +
             ("-" + args.expr if args.expr is not None else "")),
        ))

    if not args.debug:
        args.ckpt_dir = ensure_path(osp.join(args.dump_dir, "checkpoints"))
        args.meta_dir = ensure_path(osp.join(args.dump_dir, "meta"))
        args.meta_file = osp.join(args.meta_dir, args.run_name + ".json")
        args.log_file = osp.join(args.meta_dir, args.run_name + ".log")
        args.meter_file = osp.join(args.meta_dir,
                                   args.run_name + ".meter.json")

        logger.critical('Writing logs to file: "{}".'.format(args.log_file))
        set_output_file(args.log_file)

        logger.critical('Writing metainfo to file: "{}".'.format(
            args.meta_file))
        with open(args.meta_file, "w") as f:
            f.write(dump_metainfo(args=args.__dict__, configs=configs))

        # Initialize the tensorboard.
        if args.use_tb:
            args.tb_dir_root = ensure_path(
                osp.join(args.dump_dir, "tensorboard"))
            args.tb_dir = ensure_path(osp.join(args.tb_dir_root,
                                               args.run_name))

    if args.train_split is not None:
        with open(osp.join(args.data_dir, args.train_split)) as f:
            train_idxs = set(json.load(f))
    else:
        train_idxs = None
    if args.val_split is not None and args.val_data_dir is not None:
        with open(osp.join(args.val_data_dir, args.val_split)) as f:
            val_idxs = set(json.load(f))
    else:
        val_idxs = None
    if args.test_split is not None and args.test_data_dir is not None:
        with open(osp.join(args.test_data_dir, args.test_split)) as f:
            test_idxs = set(json.load(f))
    else:
        test_idxs = None

    initialize_dataset(args.dataset)
    build_dataset = get_dataset_builder(args.dataset)

    dataset = build_dataset(
        args,
        configs,
        args.data_image_root,
        args.data_depth_root,
        args.data_scenes_json,
        args.data_questions_json,
    )

    # dataset_trim = (
    #     int(len(dataset) * args.data_trim)
    #     if args.data_trim <= 1
    #     else int(args.data_trim)
    # )
    # if dataset_trim > 0:
    #     dataset = dataset.trim_length(dataset_trim)

    # # dataset_split = (
    # #     int(len(dataset) * args.data_split)
    # #     if args.data_split <= 1
    # #     else int(args.data_split)
    # # )
    # # train_dataset, validation_dataset = dataset.split_trainval(dataset_split)
    # if args.mv:
    #     ood_views = set(args.ood_views)
    #     id_views = set(range(args.num_views)) - ood_views
    train_dataset = dataset
    # if train_idxs:
    #     train_dataset = dataset.filter(
    #         lambda question: question["image_index"] in train_idxs,
    #         "filter_train_size_{}".format(len(train_idxs)),
    #     )
    val_dataset = None
    if args.val_data_dir is not None:
        val_dataset = build_dataset(
            args,
            configs,
            args.val_data_image_root,
            args.val_data_depth_root,
            args.val_data_scenes_json,
            args.val_data_questions_json,
        )
    #     if val_idxs:
    #         val_dataset = val_dataset.filter(
    #             lambda question: question["image_index"] in val_idxs,
    #             "filter_val_size_{}".format(len(val_idxs)),
    #         )
    test_dataset = None
    if args.test_data_dir is not None:
        test_dataset = build_dataset(
            args,
            configs,
            args.test_data_image_root,
            args.test_data_depth_root,
            args.test_data_scenes_json,
            args.test_data_questions_json,
        )
    #     if test_idxs:
    #         test_dataset = test_dataset.filter(
    #             lambda question: question["image_index"] in test_idxs,
    #             "filter_val_size_{}".format(len(test_idxs)),
    #         )
    #     test_dataset = {"test": test_dataset}
    # if args.mv:
    #     # train_dataset = train_dataset.filter(
    #     #     lambda question: question["view_id"] in id_views, "id_view"
    #     # )
    #     if val_dataset:
    #         val_dataset = val_dataset.filter(
    #             lambda question: question["view_id"] in id_views, "id_view"
    #         )
    #     if test_dataset:
    #         id_test = test_dataset["test"].filter(
    #             lambda question: question["view_id"] in id_views, "id_view"
    #         )
    #         ood_test = test_dataset["test"].filter(
    #             lambda question: question["view_id"] in ood_views, "ood_view"
    #         )
    #         test_dataset = {"id_test": id_test, "ood_test": ood_test}

    prototype_dataset = create_prototype_dataset(
        "/projects/data/clevr_nscl/one_shot_protos")
    one_shot_root = "/projects/data/clevr_nscl/one_shot_test_only"
    one_shot_dataset = build_dataset(
        args,
        configs,
        one_shot_root + "/images",
        one_shot_root + "/depth",
        one_shot_root + "/CLEVR_scenes_annotated_aligned.json",
        one_shot_root + "/CLEVR_questions.json",
    )
    main_train(train_dataset, val_dataset, test_dataset, prototype_dataset,
               one_shot_dataset)
Ejemplo n.º 6
0
def main():
    args.dump_dir = ensure_path(
        osp.join(
            "dumps",
            args.series_name,
            args.desc_name,
            (args.training_target + ("-curriculum_" + args.curriculum) +
             ("-qtrans_" + args.question_transform
              if args.question_transform is not None else "") +
             ("-" + args.expr if args.expr is not None else "")),
        ))

    if not args.debug:
        args.ckpt_dir = ensure_path(osp.join(args.dump_dir, "checkpoints"))
        args.meta_dir = ensure_path(osp.join(args.dump_dir, "meta"))
        args.meta_file = osp.join(args.meta_dir, args.run_name + ".json")
        args.log_file = osp.join(args.meta_dir, args.run_name + ".log")
        args.meter_file = osp.join(args.meta_dir,
                                   args.run_name + ".meter.json")

        logger.critical('Writing logs to file: "{}".'.format(args.log_file))
        set_output_file(args.log_file)

        logger.critical('Writing metainfo to file: "{}".'.format(
            args.meta_file))
        with open(args.meta_file, "w") as f:
            f.write(dump_metainfo(args=args.__dict__, configs=configs))

        # Initialize the tensorboard.
        if args.use_tb:
            args.tb_dir_root = ensure_path(
                osp.join(args.dump_dir, "tensorboard"))
            args.tb_dir = ensure_path(osp.join(args.tb_dir_root,
                                               args.run_name))

    initialize_dataset(args.dataset)
    build_dataset = get_dataset_builder(args.dataset)

    dataset = build_dataset(
        args,
        configs,
        args.data_image_root,
        args.data_scenes_json,
        args.data_questions_json,
    )

    dataset_trim = (int(len(dataset) * args.data_trim)
                    if args.data_trim <= 1 else int(args.data_trim))
    if dataset_trim > 0:
        dataset = dataset.trim_length(dataset_trim)

    dataset_split = (int(len(dataset) * args.data_split)
                     if args.data_split <= 1 else int(args.data_split))
    # from IPython import embed

    # embed()
    train_dataset, validation_dataset = dataset.split_trainval(dataset_split)

    extra_dataset = None
    if args.extra_data_dir is not None:
        extra_dataset = build_dataset(
            args,
            configs,
            args.extra_data_image_root,
            args.extra_data_scenes_json,
            args.extra_data_questions_json,
        )

    main_train(train_dataset, validation_dataset, extra_dataset)
Ejemplo n.º 7
0
def main():
    # directories
    if not args.debug:
        args.dump_dir = ensure_path(
            osp.join('dumps', args.series_name, args.desc_name))
        args.ckpt_dir = ensure_path(osp.join(args.dump_dir, 'checkpoints'))
        args.meta_dir = ensure_path(osp.join(args.dump_dir, 'meta'))
        args.meta_file = osp.join(args.meta_dir, args.run_name + '.json')
        args.log_file = osp.join(args.meta_dir, args.run_name + '.log')
        args.meter_file = osp.join(args.meta_dir,
                                   args.run_name + '.meter.json')

    if not args.debug:
        logger.critical('Writing logs to file: "{}".'.format(args.log_file))
        set_output_file(args.log_file)

        logger.critical('Writing metainfo to file: "{}".'.format(
            args.meta_file))
        with open(args.meta_file, 'w') as f:
            f.write(dump_metainfo(args=args.__dict__, configs=configs))
    else:
        if args.use_tb:
            logger.warning(
                'Disabling the tensorboard in the debug mode.'.format(
                    args.meta_file))
            args.use_tb = False

    # TODO(Jiayuan Mao @ 04/23): load the dataset.
    logger.critical('Loading the dataset.')
    validation_dataset = None
    # configs.validate_dataset_compatibility(train_dataset)

    # TODO(Jiayuan Mao @ 04/23): build the model.
    logger.critical('Building the model.')
    model = desc.make_model(args)

    if args.use_gpu:
        model.cuda()
        # Use the customized data parallel if applicable.
        if args.gpu_parallel:
            from jactorch.parallel import JacDataParallel
            # from jactorch.parallel import UserScatteredJacDataParallel as JacDataParallel
            model = JacDataParallel(model, device_ids=args.gpus).cuda()
        # TODO(Jiayuan Mao @ 04/23): disable the cudnn benchmark.
        # Disable the cudnn benchmark.
        cudnn.benchmark = False

    if load_weights(model, args.load):
        logger.critical('Loaded weights from pretrained model: "{}".'.format(
            args.load))

    if args.use_tb:
        from jactorch.train.tb import TBLogger, TBGroupMeters
        tb_logger = TBLogger(args.tb_dir)
        meters = TBGroupMeters(tb_logger)
        logger.critical('Writing tensorboard logs to: "{}".'.format(
            args.tb_dir))
    else:
        from jacinle.utils.meter import GroupMeters
        meters = GroupMeters()

    if not args.debug:
        logger.critical('Writing meter logs to file: "{}".'.format(
            args.meter_file))

    if args.embed:
        from IPython import embed
        embed()

    # TODO(Jiayuan Mao @ 04/23): make the data loader.
    logger.critical('Building the data loader.')
    validation_dataloader = validation_dataset.make_dataloader(
        args.batch_size,
        shuffle=False,
        drop_last=False,
        nr_workers=args.data_workers)

    model.eval()
    validate_epoch(model, validation_dataloader, meters)

    if not args.debug:
        meters.dump(args.meter_file)

    logger.critical(meters.format_simple('Test', compressed=False))