コード例 #1
0
def main():
    """
    Main body of script.
    """
    args = parse_args()
    args.seed = init_rand(seed=args.seed)

    _, log_file_exist = initialize_logging(
        logging_dir_path=args.save_dir,
        logging_file_name=args.logging_file_name,
        script_args=args,
        log_packages=args.log_packages,
        log_pip_packages=args.log_pip_packages)

    use_cuda, batch_size = prepare_pt_context(num_gpus=args.num_gpus,
                                              batch_size=args.batch_size)

    net = prepare_model(model_name=args.model,
                        use_pretrained=args.use_pretrained,
                        pretrained_model_file_path=args.resume.strip(),
                        use_cuda=use_cuda)
    real_net = net.module if hasattr(net, "module") else net
    assert (hasattr(real_net, "num_classes"))
    num_classes = real_net.num_classes

    ds_metainfo = get_dataset_metainfo(dataset_name=args.dataset)
    ds_metainfo.update(args=args)

    train_data = get_train_data_source(ds_metainfo=ds_metainfo,
                                       batch_size=batch_size,
                                       num_workers=args.num_workers)
    val_data = get_val_data_source(ds_metainfo=ds_metainfo,
                                   batch_size=batch_size,
                                   num_workers=args.num_workers)

    optimizer, lr_scheduler, start_epoch = prepare_trainer(
        net=net,
        optimizer_name=args.optimizer_name,
        wd=args.wd,
        momentum=args.momentum,
        lr_mode=args.lr_mode,
        lr=args.lr,
        lr_decay_period=args.lr_decay_period,
        lr_decay_epoch=args.lr_decay_epoch,
        lr_decay=args.lr_decay,
        num_epochs=args.num_epochs,
        state_file_path=args.resume_state)

    if args.save_dir and args.save_interval:
        param_names = ds_metainfo.val_metric_capts + ds_metainfo.train_metric_capts + [
            "Train.Loss", "LR"
        ]
        lp_saver = TrainLogParamSaver(
            checkpoint_file_name_prefix="{}_{}".format(ds_metainfo.short_label,
                                                       args.model),
            last_checkpoint_file_name_suffix="last",
            best_checkpoint_file_name_suffix=None,
            last_checkpoint_dir_path=args.save_dir,
            best_checkpoint_dir_path=None,
            last_checkpoint_file_count=2,
            best_checkpoint_file_count=2,
            checkpoint_file_save_callback=save_params,
            checkpoint_file_exts=(".pth", ".states"),
            save_interval=args.save_interval,
            num_epochs=args.num_epochs,
            param_names=param_names,
            acc_ind=ds_metainfo.saver_acc_ind,
            # bigger=[True],
            # mask=None,
            score_log_file_path=os.path.join(args.save_dir, "score.log"),
            score_log_attempt_value=args.attempt,
            best_map_log_file_path=os.path.join(args.save_dir, "best_map.log"))
    else:
        lp_saver = None

    train_net(batch_size=batch_size,
              num_epochs=args.num_epochs,
              start_epoch1=args.start_epoch,
              train_data=train_data,
              val_data=val_data,
              net=net,
              optimizer=optimizer,
              lr_scheduler=lr_scheduler,
              lp_saver=lp_saver,
              log_interval=args.log_interval,
              num_classes=num_classes,
              val_metric=get_composite_metric(
                  ds_metainfo.val_metric_names,
                  ds_metainfo.val_metric_extra_kwargs),
              train_metric=get_composite_metric(
                  ds_metainfo.train_metric_names,
                  ds_metainfo.train_metric_extra_kwargs),
              use_cuda=use_cuda)
コード例 #2
0
def main():
    """
    Main body of script.
    """
    args = parse_args()
    args.seed = init_rand(seed=args.seed)

    # args.num_gpus = 1
    # args.num_epochs = 500
    # args.dataset = "CIFAR10"
    # args.model = "resnet20_cifar10"

    args.use_pretrained = False

    num_non_res = 3
    pretrained_model_file_path = '/exdrive/resnet20-cifar10/unfrozen-training/by-stack/non-res-stack-3-models/cifar10-non-res-stack-2-teacher.pth'

    _, log_file_exist = initialize_logging(
        logging_dir_path=args.save_dir,
        logging_file_name=args.logging_file_name,
        script_args=args,
        log_packages=args.log_packages,
        log_pip_packages=args.log_pip_packages)

    use_cuda, batch_size = prepare_pt_context(num_gpus=args.num_gpus,
                                              batch_size=args.batch_size)

    # Get model
    net = prepare_model(
        model_name=args.model,
        use_pretrained=args.use_pretrained,
        # pretrained_model_file_path=pretrained_model_file_path,
        pretrained_model_file_path=args.resume.strip(),
        use_cuda=use_cuda,
        remove_module=True
    )  # True if using our own trained model waits; o.w. False
    real_net = net.module if hasattr(net, "module") else net
    assert (hasattr(real_net, "num_classes"))
    num_classes = real_net.num_classes

    ds_metainfo = get_dataset_metainfo(dataset_name=args.dataset)
    ds_metainfo.update(args=args)

    train_data = get_train_data_source(ds_metainfo=ds_metainfo,
                                       batch_size=batch_size,
                                       num_workers=args.num_workers)
    val_data = get_val_data_source(ds_metainfo=ds_metainfo,
                                   batch_size=batch_size,
                                   num_workers=args.num_workers)

    optimizer, lr_scheduler, start_epoch = prepare_trainer(
        net=net,
        optimizer_name=args.optimizer_name,
        wd=args.wd,
        momentum=args.momentum,
        lr_mode=args.lr_mode,
        lr=args.lr,
        lr_decay_period=args.lr_decay_period,
        lr_decay_epoch=args.lr_decay_epoch,
        lr_decay=args.lr_decay,
        num_epochs=args.num_epochs,
        state_file_path=args.resume_state)

    if args.save_dir and args.save_interval:
        param_names = ds_metainfo.val_metric_capts + ds_metainfo.train_metric_capts + [
            "Train.Loss", "LR"
        ]
        lp_saver = TrainLogParamSaver(
            checkpoint_file_name_prefix="{}_{}".format(ds_metainfo.short_label,
                                                       args.model),
            last_checkpoint_file_name_suffix="last",
            best_checkpoint_file_name_suffix=None,
            last_checkpoint_dir_path=args.save_dir,
            best_checkpoint_dir_path=None,
            last_checkpoint_file_count=2,
            best_checkpoint_file_count=2,
            checkpoint_file_save_callback=save_params,
            checkpoint_file_exts=(".pth", ".states"),
            save_interval=args.save_interval,
            num_epochs=args.num_epochs,
            param_names=param_names,
            acc_ind=ds_metainfo.saver_acc_ind,
            # bigger=[True],
            # mask=None,
            score_log_file_path=os.path.join(args.save_dir, "score.log"),
            score_log_attempt_value=args.attempt,
            best_map_log_file_path=os.path.join(args.save_dir, "best_map.log"))
    else:
        lp_saver = None

    print(net)
    # summary(net, (3, 32, 32))

    if torch.cuda.is_available():
        print('cuda available - sending net to gpu')
        print('training using cuda = ', use_cuda)
        net.cuda()

    print('\n\nTraining nonresnet20 on cifar10 with num_non_res=', num_non_res)
    train_net(batch_size=batch_size,
              num_epochs=args.num_epochs,
              start_epoch1=args.start_epoch,
              train_data=train_data,
              val_data=val_data,
              net=net,
              optimizer=optimizer,
              lr_scheduler=lr_scheduler,
              lp_saver=lp_saver,
              log_interval=args.log_interval,
              num_classes=num_classes,
              val_metric=get_composite_metric(
                  ds_metainfo.val_metric_names,
                  ds_metainfo.val_metric_extra_kwargs),
              train_metric=get_composite_metric(
                  ds_metainfo.train_metric_names,
                  ds_metainfo.train_metric_extra_kwargs),
              use_cuda=use_cuda)