Ejemplo n.º 1
0
def parse_option():
    parser = argparse.ArgumentParser('PSNet scene-segmentation evaluating')
    parser.add_argument('--cfg', type=str, required=True, help='config file')
    parser.add_argument('--load_path',
                        required=True,
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint')
    parser.add_argument('--log_dir',
                        type=str,
                        default='log_eval',
                        help='log dir [default: log_eval]')
    parser.add_argument('--data_root',
                        type=str,
                        default='data',
                        help='root director of dataset')
    parser.add_argument('--num_workers',
                        type=int,
                        default=4,
                        help='num of workers to use')
    parser.add_argument('--batch_size', type=int, help='batch_size')
    parser.add_argument('--num_points', type=int, help='num_points')
    parser.add_argument('--num_steps', type=int, help='num_steps')
    parser.add_argument("--local_rank",
                        type=int,
                        help='local rank for DistributedDataParallel')
    parser.add_argument("--rng_seed", type=int, default=0, help='manual seed')

    args, unparsed = parser.parse_known_args()

    update_config(args.cfg)

    config.data_root = args.data_root
    config.num_workers = args.num_workers
    config.load_path = args.load_path
    config.rng_seed = args.rng_seed

    config.local_rank = args.local_rank

    ddir_name = args.cfg.split('.')[-2].split('/')[-1]
    config.log_dir = os.path.join(args.log_dir, 'psnet',
                                  f'{ddir_name}_{int(time.time())}')

    if args.batch_size:
        config.batch_size = args.batch_size
    if args.num_points:
        config.num_points = args.num_points
    if args.num_steps:
        config.num_steps = args.num_steps

    print(args)
    print(config)

    torch.manual_seed(args.rng_seed)
    torch.cuda.manual_seed_all(args.rng_seed)
    random.seed(args.rng_seed)
    np.random.seed(args.rng_seed)

    return args, config
def parse_option():
    parser = argparse.ArgumentParser("Training and evaluating PartNet")
    parser.add_argument('--cfg', help='yaml file', type=str)
    parser.add_argument('--gpus', type=int, default=0, nargs='+', help='gpus to use [default: 0]')
    parser.add_argument('--num_threads', type=int, default=4, help='num of threads to use')
    parser.add_argument('--batch_size', type=int, help='batch_size')
    parser.add_argument('--base_learning_rate', type=float, help='base learning rate for batch size 8')

    # IO
    parser.add_argument('--log_dir', default='log', help='log dir [default: log]')
    parser.add_argument('--load_path', help='path to a check point file for load')
    parser.add_argument('--print_freq', type=int, help='print frequency')
    parser.add_argument('--save_freq', type=int, help='save frequency')
    parser.add_argument('--val_freq', type=int, help='val frequency')

    # Misc
    parser.add_argument('--save_memory', action='store_true', help='use memory_saving_gradients')
    parser.add_argument("--rng-seed", type=int, default=0, help='manual seed')

    args, _ = parser.parse_known_args()

    # Update config
    update_config(args.cfg)

    ddir_name = args.cfg.split('.')[-2].split('/')[-1]
    config.log_dir = os.path.join(args.log_dir, 'partnet', f'{ddir_name}_{int(time.time())}')
    config.load_path = args.load_path
    config.gpus = args.gpus if isinstance(args.gpus, list) else [args.gpus]
    config.num_gpus = len(config.gpus)
    if args.num_threads:
        config.num_threads = args.num_threads
    else:
        cpu_count = psutil.cpu_count()
        gpu_count = str(subprocess.check_output(["nvidia-smi", "-L"])).count('UUID')
        config.num_threads = config.num_gpus * cpu_count // gpu_count
    if args.batch_size:
        config.batch_size = args.batch_size
    if args.base_learning_rate:
        config.base_learning_rate = args.base_learning_rate
    if args.print_freq:
        config.print_freq = args.print_freq
    if args.save_freq:
        config.save_freq = args.save_freq
    if args.val_freq:
        config.save_freq = args.val_freq

    # Set manual seed
    tf.set_random_seed(args.rng_seed)
    np.random.seed(args.rng_seed)

    # If args.save_memory is True, use gradient-checkpointing to save memory
    if args.save_memory:  # if save memory
        import utils.memory_saving_gradients
        tf.__dict__["gradients"] = utils.memory_saving_gradients.gradients_collection

    return args, config
Ejemplo n.º 3
0
def main():
    update_config("configs/coco/resnet_v1_101_coco_trainval_fpn_dcn_end2end_ohem.yaml")
    log_init(filename=config.TRAIN.model_prefix + "train.log")
    msg = pprint.pformat(config)
    logging.info(msg)
    os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
    os.environ["MXNET_GPU_MEM_POOL_TYPE"] = "Round"

    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    ctx = ctx * config.network.IM_PER_GPU
    train_net(ctx, config.TRAIN.begin_epoch, config.TRAIN.lr, config.TRAIN.lr_step)
Ejemplo n.º 4
0
    def update_config(self, config):
        r"""
        Update the object config with new inputs.

        Args:

            config (dict or BaseConfig) : fields of configuration to be updated

            Typically if config = {"learningRate": 0.1} only the learning rate
            will be changed.
        """
        update_config(self.config, config)
        self.updateSolversDevice()
Ejemplo n.º 5
0
def main():
    update_config(
        "configs/voc/resnet_v1_50_voc0712_rfcn_dcn_end2end_ohem_one_gpu.yaml")
    log_init(filename=config.TRAIN.model_prefix + "train.log")
    msg = pprint.pformat(config)
    logging.info(msg)
    os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
    os.environ["MXNET_GPU_MEM_POOL_TYPE"] = "Round"

    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    train_net(ctx, config.network.pretrained, config.network.pretrained_epoch,
              config.TRAIN.model_prefix, config.TRAIN.begin_epoch,
              config.TRAIN.end_epoch, config.TRAIN.lr, config.TRAIN.lr_step)
Ejemplo n.º 6
0
def parse_option():
    parser = argparse.ArgumentParser("Evaluating S3DIS")
    parser.add_argument('--cfg', help='yaml file', type=str)
    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='which gpu to use [default: 0]')
    parser.add_argument('--num_threads',
                        type=int,
                        default=4,
                        help='num of threads to use')
    parser.add_argument('--batch_size', type=int, help='batch_size')
    parser.add_argument('--base_learning_rate',
                        type=float,
                        help='base learning rate for batch size 8')

    # IO
    parser.add_argument('--log_dir',
                        default='log_eval',
                        help='log dir [default: log]')
    parser.add_argument('--load_path',
                        help='path to a check point file for load')

    # Misc
    parser.add_argument("--rng-seed", type=int, default=0, help='manual seed')

    args, _ = parser.parse_known_args()

    # Update config
    update_config(args.cfg)

    ddir_name = args.cfg.split('.')[-2].split('/')[-1]
    config.log_dir = os.path.join(args.log_dir, 's3dis',
                                  f'{ddir_name}_{int(time.time())}')
    config.load_path = args.load_path

    if args.num_threads:
        config.num_threads = args.num_threads
    else:
        cpu_count = psutil.cpu_count()
        config.num_threads = cpu_count
    if args.batch_size:
        config.batch_size = args.batch_size

    # Set manual seed
    tf.set_random_seed(args.rng_seed)
    np.random.seed(args.rng_seed)

    return args, config
Ejemplo n.º 7
0
def cli_main():
    parser = options.get_validation_parser()
    parser.add_argument(
        '--config',
        type=str,
        nargs='*',
        help=
        'paths to JSON files of experiment configurations, from high to low priority'
    )
    parser.add_argument(
        '--load-checkpoint',
        type=str,
        help='path to checkpoint to load (possibly composite) model from')

    pre_parsed_args = parser.parse_args()

    config_dict = {}
    for config_path in pre_parsed_args.config:
        config_dict = update_config(config_dict, compose_configs(config_path))

    parser_modifier = modify_factory(config_dict)

    args = options.parse_args_and_arch(parser, modify_parser=parser_modifier)

    update_namespace(args, config_dict)

    main(args)
Ejemplo n.º 8
0
    #     cocoEval.accumulate()
    #     cocoEval.summarize()
    #     mAP_eachclasses[catid2catbane[catId]] = cocoEval.stats[1]
    print(u"Evaluate all classes.")
    cocoEval.params.catIds = catIds
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
    mAP_eachclasses[u"mAP@IoU=0.5"] = cocoEval.stats[1]
    print("************summary***************")
    for k in mAP_eachclasses.keys():
        print(k, mAP_eachclasses[k])


if __name__ == '__main__':
    update_config(
        "configs/coco/resnet_v1_101_coco_trainval_fpn_dcn_end2end_ohem.yaml")
    backbone = SEResNext50_32x4d()
    net = PyramidRFCN(config, backbone)
    params_pretrained = mx.nd.load("output/fpn_coco-5-0.0.params")
    for k in params_pretrained:
        params_pretrained[k.replace("arg:",
                                    "").replace("aux:",
                                                "")] = params_pretrained.pop(k)
    params = net.collect_params()
    for k in params.keys():
        if k in params_pretrained.keys():
            params[k]._load_init(params_pretrained[k], ctx=mx.cpu())
        else:
            print(k)

    results = {}
Ejemplo n.º 9
0
def parse_args():
    parser = argparse.ArgumentParser(
        description='Train and val for occlusion edge/order detection')
    parser.add_argument('--config',
                        default='',
                        required=False,
                        type=str,
                        help='experiment configure file name')
    args, rest = parser.parse_known_args()
    update_config(args.config)  # update params with experiment config file

    parser.add_argument('--debug', action='store_true', help='debug mode')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument(
        '--new_val',
        action='store_true',
        help='new val with resumed model, re-calculate val perf ')
    parser.add_argument('--out_dir',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='res output dir(defaut: output/date)')
    parser.add_argument('--evaluate',
                        action='store_true',
                        help='test with best model in validation')
    parser.add_argument('--frequent',
                        default=config.default.frequent,
                        type=int,
                        help='frequency of logging')
    parser.add_argument('--gpus',
                        help='specify the gpu to be use',
                        default='3',
                        required=False,
                        type=str)
    parser.add_argument('--cpu',
                        default=False,
                        required=False,
                        type=bool,
                        help='whether use cpu mode')
    parser.add_argument('-j',
                        '--workers',
                        default=2,
                        type=int,
                        metavar='N',
                        help='number of data loading workers')
    parser.add_argument('--vis',
                        action='store_true',
                        help='turn on visualization')
    parser.add_argument(
        '--arch',
        '-a',
        metavar='ARCH',
        choices=model_names,
        help='model architecture, overwritten if pretrained is specified: ' +
        ' | '.join(model_names))
    args = parser.parse_args()

    return args
Ejemplo n.º 10
0
    criterion = MaskedCrossEntropy()
    return criterion


if __name__ == "__main__":
    # obtain config
    import argparse
    from utils.config import config, update_config
    parser = argparse.ArgumentParser('S3DIS semantic segmentation training')
    parser.add_argument('--cfg',
                        type=str,
                        default='project/cfgs/s3dis/pointnet.yaml',
                        help='config file')
    args, unparsed = parser.parse_known_args()
    # update config dict with the yaml file
    update_config(args.cfg)
    print(config)

    # create a model
    model = PointNetSemSeg(config, config.input_features_dim)
    print(model)

    # define a loss
    from losses import MaskedCrossEntropy
    criterion = MaskedCrossEntropy()

    # create a random input and then predict
    batch_size = 2  # config.batch_size
    num_points = config.num_points
    input_features_dim = config.input_features_dim
    xyz = torch.rand(batch_size, num_points, 3)
Ejemplo n.º 11
0
def cli_main():

    parser = options.get_training_parser()
    parser.add_argument(
        '--config',
        type=str,
        nargs='*',
        help=
        'paths to JSON files of experiment configurations, from high to low priority',
    )
    parser.add_argument('--exp-name',
                        type=str,
                        default='',
                        help='name of the experiment')
    parser.add_argument(
        '--debug',
        default=False,
        action='store_true',
        help='run training in the debugging mode',
    )
    parser.add_argument('--path-attributes',
                        type=str,
                        nargs='*',
                        default=['task', 'arch', 'lr'])
    parser.add_argument(
        '--filter_best_last_ckpts',
        type=str,
        default=False,
        help=
        'whether to filter out checkpoint_best and checkpoint_last from checkpoint list'
    )
    parser.add_argument('--log_valid_progress',
                        type=str,
                        default=False,
                        help='whether to log validation progress')
    pre_parsed_args, unknown = parser.parse_known_args()

    config_dict = {}
    for config_path in pre_parsed_args.config:
        config_dict = update_config(config_dict, compose_configs(config_path))

    parser_modifier = modify_factory(config_dict)

    args = options.parse_args_and_arch(parser, modify_parser=parser_modifier)

    update_namespace(args, config_dict)

    if args.distributed_init_method is None:
        distributed_utils.infer_init_method(args)

    if args.distributed_init_method is not None:
        # distributed training
        if torch.cuda.device_count() > 1 and not args.distributed_no_spawn:
            start_rank = args.distributed_rank
            args.distributed_rank = None  # assign automatically
            torch.multiprocessing.spawn(
                fn=distributed_main,
                args=(args, start_rank),
                nprocs=torch.cuda.device_count(),
            )
        else:
            distributed_main(args.device_id, args)
    elif args.distributed_world_size > 1:
        # fallback for single node with multiple GPUs
        assert args.distributed_world_size <= torch.cuda.device_count()
        port = random.randint(10000, 20000)
        args.distributed_init_method = 'tcp://localhost:{port}'.format(
            port=port)
        args.distributed_rank = None  # set based on device id
        if (args.update_freq is not None and max(args.update_freq) > 1
                and args.ddp_backend != 'no_c10d'):
            logger.info(
                'NOTE: you may get faster training with: --ddp-backend=no_c10d'
            )
        torch.multiprocessing.spawn(
            fn=distributed_main,
            args=(args, ),
            nprocs=args.distributed_world_size,
        )
    else:
        # single GPU training
        main(args)
Ejemplo n.º 12
0
            fetch_list=[avg_cost, auc_var],
            fetch_info=['Epoch {} cost: '.format(epoch + 1), ' - auc: '],
            print_period=cfg.log_interval,
            debug=False)
        end_time = time.time()
        logger.info("epoch %d finished, use time = %ds \n" %
                    ((epoch + 1), end_time - start_time))

        if (epoch + 1) % cfg.save_interval == 0:
            model_path = os.path.join(str(cfg.save_path), model.name,
                                      model.name + "_epoch_" + str(epoch + 1))
            if not os.path.isdir(model_path):
                os.makedirs(model_path)
            logger.info("saving model to %s \n" % (model_path))
            fluid.save(fluid.default_main_program(),
                       os.path.join(model_path, "checkpoint"))
    logger.info("Done.")


def main():
    train()


if __name__ == '__main__':
    option = BaseOptions()
    args = option.initialize()

    update_config(cfg, args)
    print_config(cfg)

    main()
Ejemplo n.º 13
0
                    help='Logging with tensorboard',
                    action='store_true')
parser.add_argument('--debug',
                    default=False,
                    dest='debug',
                    help='Visualization debug',
                    action='store_true')
parser.add_argument('--map',
                    default=True,
                    dest='map',
                    help='Evaluate mAP per epoch',
                    action='store_true')

opt = parser.parse_args()
cfg_file_name = os.path.basename(opt.cfg)
cfg = update_config(opt.cfg)

cfg['FILE_NAME'] = cfg_file_name
cfg.TRAIN.DPG_STEP = [i - cfg.TRAIN.DPG_MILESTONE for i in cfg.TRAIN.DPG_STEP]
opt.world_size = cfg.TRAIN.WORLD_SIZE
opt.work_dir = './exp/{}-{}/'.format(opt.exp_id, cfg_file_name)
opt.gpus = [i for i in range(torch.cuda.device_count())]
opt.device = torch.device("cuda:" +
                          str(opt.gpus[0]) if opt.gpus[0] >= 0 else "cpu")

if not os.path.exists("./exp/{}-{}".format(opt.exp_id, cfg_file_name)):
    os.makedirs("./exp/{}-{}".format(opt.exp_id, cfg_file_name))

filehandler = logging.FileHandler('./exp/{}-{}/training.log'.format(
    opt.exp_id, cfg_file_name))
streamhandler = logging.StreamHandler()
Ejemplo n.º 14
0
def train(
    query_dataloader,
    retrieval_dataloader,
    code_length,
    args,
    # args.device,
    # lr,
    # args.max_iter,
    # args.max_epoch,
    # args.num_samples,
    # args.batch_size,
    # args.root,
    # dataset,
    # args.gamma,
    # topk,
):
    """
    Training model.

    Args
        query_dataloader, retrieval_dataloader(torch.utils.data.dataloader.DataLoader): Data loader.
        code_length(int): Hashing code length.
        args.device(torch.args.device): GPU or CPU.
        lr(float): Learning rate.
        args.max_iter(int): Number of iterations.
        args.max_epoch(int): Number of epochs.
        num_train(int): Number of sampling training data points.
        args.batch_size(int): Batch size.
        args.root(str): Path of dataset.
        dataset(str): Dataset name.
        args.gamma(float): Hyper-parameters.
        topk(int): Topk k map.

    Returns
        mAP(float): Mean Average Precision.
    """
    # Initialization
    # model = alexnet.load_model(code_length).to(args.device)
    # model = resnet.resnet50(pretrained=args.pretrain, num_classes=code_length).to(args.device)

    update_config(config, args)
    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    model = hrnet.get_cls_net(config,
                              pretrained=args.pretrain,
                              num_classes=code_length).to(args.device)
    # print(model)
    if args.optim == 'SGD':
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              weight_decay=args.wd)
    elif args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, args.lr_step)
    criterion = ADSH_Loss(code_length, args.gamma)

    num_retrieval = len(retrieval_dataloader.dataset)
    U = torch.zeros(args.num_samples, code_length).to(args.device)
    B = torch.randn(num_retrieval, code_length).to(args.device)
    retrieval_targets = retrieval_dataloader.dataset.get_onehot_targets().to(
        args.device)
    cnn_losses, hash_losses, quan_losses = AverageMeter(), AverageMeter(
    ), AverageMeter()
    start = time.time()
    best_mAP = 0
    for it in range(args.max_iter):
        iter_start = time.time()
        # Sample training data for cnn learning
        train_dataloader, sample_index = sample_dataloader(
            retrieval_dataloader, args.num_samples, args.batch_size, args.root,
            args.dataset)

        # Create Similarity matrix
        train_targets = train_dataloader.dataset.get_onehot_targets().to(
            args.device)
        S = (train_targets @ retrieval_targets.t() > 0).float()
        S = torch.where(S == 1, torch.full_like(S, 1), torch.full_like(S, -1))

        # Soft similarity matrix, benefit to converge
        r = S.sum() / (1 - S).sum()
        S = S * (1 + r) - r

        # Training CNN model
        for epoch in range(args.max_epoch):
            cnn_losses.reset()
            hash_losses.reset()
            quan_losses.reset()
            for batch, (data, targets, index) in enumerate(train_dataloader):
                data, targets, index = data.to(args.device), targets.to(
                    args.device), index.to(args.device)
                optimizer.zero_grad()

                F = model(data)
                U[index, :] = F.data
                cnn_loss, hash_loss, quan_loss = criterion(
                    F, B, S[index, :], sample_index[index])
                cnn_losses.update(cnn_loss.item())
                hash_losses.update(hash_loss.item())
                quan_losses.update(quan_loss.item())
                cnn_loss.backward()
                optimizer.step()
            logger.info(
                '[epoch:{}/{}][cnn_loss:{:.6f}][hash_loss:{:.6f}][quan_loss:{:.6f}]'
                .format(epoch + 1, args.max_epoch, cnn_losses.avg,
                        hash_losses.avg, quan_losses.avg))
        scheduler.step()
        # Update B
        expand_U = torch.zeros(B.shape).to(args.device)
        expand_U[sample_index, :] = U
        B = solve_dcc(B, U, expand_U, S, code_length, args.gamma)

        # Total loss
        iter_loss = calc_loss(U, B, S, code_length, sample_index, args.gamma)
        # logger.debug('[iter:{}/{}][loss:{:.2f}][iter_time:{:.2f}]'.format(it+1, args.max_iter, iter_loss, time.time()-iter_start))
        logger.info('[iter:{}/{}][loss:{:.6f}][iter_time:{:.2f}]'.format(
            it + 1, args.max_iter, iter_loss,
            time.time() - iter_start))

        # Evaluate
        if (it + 1) % 1 == 0:
            query_code = generate_code(model, query_dataloader, code_length,
                                       args.device)
            mAP = evaluate.mean_average_precision(
                query_code.to(args.device),
                B,
                query_dataloader.dataset.get_onehot_targets().to(args.device),
                retrieval_targets,
                args.device,
                args.topk,
            )
            if mAP > best_mAP:
                best_mAP = mAP
                # Save checkpoints
                ret_path = os.path.join('checkpoints', args.info,
                                        str(code_length))
                # ret_path = 'checkpoints/' + args.info
                if not os.path.exists(ret_path):
                    os.makedirs(ret_path)
                torch.save(query_code.cpu(),
                           os.path.join(ret_path, 'query_code.t'))
                torch.save(B.cpu(), os.path.join(ret_path, 'database_code.t'))
                torch.save(query_dataloader.dataset.get_onehot_targets,
                           os.path.join(ret_path, 'query_targets.t'))
                torch.save(retrieval_targets.cpu(),
                           os.path.join(ret_path, 'database_targets.t'))
                torch.save(model.cpu(), os.path.join(ret_path, 'model.t'))
                model = model.to(args.device)
            logger.info(
                '[iter:{}/{}][code_length:{}][mAP:{:.5f}][best_mAP:{:.5f}]'.
                format(it + 1, args.max_iter, code_length, mAP, best_mAP))
    logger.info('[Training time:{:.2f}]'.format(time.time() - start))

    return best_mAP
Ejemplo n.º 15
0
from copy import deepcopy
from numpy import mean
from utils.config import Config, update_config
from utils.gridsearch import GridSearch
from utils.utils import get_kernel, get_classifier, get_sets, save_submission, kfold, split_train_val
from kernels.default import SimpleMKL
from utils.pca import PCA

glob_cfg = Config('config')

# Start by updating the glob_cfg set0, set1 and set2 with global glob_cfg
# Deepcopy is used because update_config has some side effects...
glob_cfg.set_(
    "set0",
    update_config(deepcopy(glob_cfg["global"].values_()),
                  glob_cfg.set0.values_()))
glob_cfg.set_(
    "set1",
    update_config(deepcopy(glob_cfg["global"].values_().copy()),
                  glob_cfg.set1.values_()))
glob_cfg.set_(
    "set2",
    update_config(deepcopy(glob_cfg["global"].values_().copy()),
                  glob_cfg.set2.values_()))

gridsearch = GridSearch(glob_cfg)

total_accuracy = []
all_predictions = []
all_ids = []
Ejemplo n.º 16
0
Archivo: eval.py Proyecto: ascust/MADA
        predicts = [evaluator(data[0].as_in_context(config.ctx))]
        predicts = [pred.argmax(1).asnumpy().squeeze() for pred in predicts]
        targets = [target.as_in_context(mx.cpu()).asnumpy().squeeze() \
                    for target in dsts]
        metric.update(targets, predicts)
       
    pixAcc, mIoU, IoUs = metric.get()
    iou_str = ""
    for ind, cur_class in enumerate(testset.classes):
        iou_str += "%s: %.3f\t" % (cur_class, IoUs[ind])
    logging.info( 'pixAcc: %.4f, mIoU: %.4f\n%s' % (pixAcc, mIoU, iou_str))

if __name__ == "__main__":
    args = parse_args()
    print("Using config file %s" % args.cfg)
    config.update_config(args.cfg)
    config.config.resume = args.resume
    config.config.gpu = args.gpu
    config.config.ctx = args.ctx
    config.config.test_batch_size = 1

    tag = args.cfg.split("/")[-1]
    tag = tag.replace(".yaml", "")
    config.config.tag = tag

    logging.basicConfig(filename=config.config.tag+"_eval.log", level=logging.INFO)
    console = logging.StreamHandler()
    logging.getLogger().addHandler(console)

    logging.info("Parameters:")
    logging.info(config.config)
Ejemplo n.º 17
0
if __name__ == '__main__':

    parser = options.get_training_parser()
    parser.add_argument(
        '--config',
        type=str,
        nargs='*',
        help='paths to JSON files of experiment configurations, from high to low priority',
    )
    parser.add_argument('--torch-file-system', action='store_true')
    pre_parsed_args, unknown = parser.parse_known_args()

    config_dict = {}
    for config_path in pre_parsed_args.config:
        config_dict = update_config(config_dict, compose_configs(config_path))

    parser_modifier = modify_factory(config_dict)

    args = options.parse_args_and_arch(parser, modify_parser=parser_modifier)

    update_namespace(args, config_dict)

    # set sharing strategy file system in case /dev/shm/ limits are small
    if args.torch_file_system:
        torch.multiprocessing.set_sharing_strategy('file_system')

    main(args)


Ejemplo n.º 18
0
def parse_option():
    parser = argparse.ArgumentParser('PartNet part-segmentation training')
    parser.add_argument('--cfg', type=str, required=True, help='config file')
    parser.add_argument('--data_root',
                        type=str,
                        default='data',
                        help='root director of dataset')
    parser.add_argument('--num_workers',
                        type=int,
                        default=4,
                        help='num of workers to use')
    parser.add_argument('--batch_size', type=int, help='batch_size')
    parser.add_argument('--base_learning_rate',
                        type=float,
                        help='base learning rate')
    parser.add_argument('--epochs', type=int, help='number of training epochs')
    parser.add_argument('--start_epoch', type=int, help='used for resume')

    # io
    parser.add_argument('--load_path',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--print_freq',
                        type=int,
                        default=10,
                        help='print frequency')
    parser.add_argument('--save_freq',
                        type=int,
                        default=10,
                        help='save frequency')
    parser.add_argument('--val_freq',
                        type=int,
                        default=10,
                        help='val frequency')
    parser.add_argument('--log_dir',
                        type=str,
                        default='log',
                        help='log dir [default: log]')

    # misc
    parser.add_argument("--local_rank",
                        type=int,
                        help='local rank for DistributedDataParallel')
    parser.add_argument("--rng_seed", type=int, default=0, help='manual seed')

    args, unparsed = parser.parse_known_args()

    update_config(args.cfg)

    config.data_root = args.data_root
    config.num_workers = args.num_workers
    config.load_path = args.load_path
    config.print_freq = args.print_freq
    config.save_freq = args.save_freq
    config.val_freq = args.val_freq
    config.rng_seed = args.rng_seed

    config.local_rank = args.local_rank

    ddir_name = args.cfg.split('.')[-2].split('/')[-1]
    config.log_dir = os.path.join(args.log_dir, 'partnet', ddir_name)

    if args.batch_size:
        config.batch_size = args.batch_size
    if args.base_learning_rate:
        config.base_learning_rate = args.base_learning_rate
    if args.epochs:
        config.epochs = args.epochs
    if args.start_epoch:
        config.start_epoch = args.start_epoch

    print(args)
    print(config)

    torch.manual_seed(args.rng_seed)
    torch.cuda.manual_seed_all(args.rng_seed)
    random.seed(args.rng_seed)
    np.random.seed(args.rng_seed)

    return args, config
Ejemplo n.º 19
0
    def load_state_dict(self,
                        in_state,
                        loadG=True,
                        loadD=True,
                        loadConfig=True,
                        finetuning=False):
        r"""
        Load a model saved with the @method save() function

        Args:
            - in_state (dict): state dict containing the model
        """

        # Step one : load the configuration
        if loadConfig:
            update_config(self.config, in_state['config'])
            self.lossCriterion = getattr(
                base_loss_criterions, self.config.lossCriterion)(self.device)
            self.initializeClassificationCriterion()

        # Re-initialize G and D with the loaded configuration
        buildAvG = True

        if loadG:
            self.netG = self.getNetG()
            if finetuning:
                loadPartOfStateDict(self.netG, in_state['netG'],
                                    ["formatLayer"])
                self.getOriginalG().initFormatLayer(
                    self.config.latentVectorDim)
            else:
                # Replace me by a standard loadStateDictCompatibletedict for open-sourcing TODO
                loadStateDictCompatible(self.netG, in_state['netG'])
                if 'avgG' in in_state:
                    print("Average network found !")
                    self.buildAvG()
                    # Replace me by a standard loadStatedict for open-sourcing
                    if isinstance(self.avgG, nn.DataParallel):

                        # loadStateDictCompatible(self.avgG.module, in_state['avgG'])

                        # HACK TO BE ABLE TO LOAD THE MODELS TRAINED SO FAR
                        loadStateDictCompatible(self.avgG.module,
                                                in_state['avgG'])
                    else:
                        loadStateDictCompatible(self.avgG, in_state['avgG'])
                    buildAvG = False

        if loadD:

            self.netD = self.getNetD()
            if finetuning:
                loadPartOfStateDict(self.netD, in_state['netD'],
                                    ["decisionLayer"])
                self.getOriginalD().initDecisionLayer(
                    self.lossCriterion.sizeDecisionLayer +
                    self.config.categoryVectorDim)
            else:
                # Replace me by a standard loadStatedict for open-sourcing TODO
                loadStateDictCompatible(self.netD, in_state['netD'])

        elif 'tmp' in in_state.keys():
            self.trainTmp = in_state['tmp']
        # Don't forget to reset the machinery !
        self.updateSolversDevice(buildAvG)
def parse_config():
    """load configs including parameters from dataset, model, training, etc.
    The basic process is:
    - load default settings based on the config dict in the utils/config.py
    - update the config dict using yaml file specified by an argparse argument(--cfg argument)
    - update the config dict using argparse arguments

    Returns:
        tuple: (args, config) contains config settings where args is argparse.Namespace object while config is a dict
    """
    parser = argparse.ArgumentParser('S3DIS semantic segmentation training')
    parser.add_argument('--cfg',
                        type=str,
                        default='project/cfgs/s3dis/pointnet2_ssg.yaml',
                        help='config file')
    # parser.add_argument('--model_name', type=str, default='', help='model name, pointnet, pointnet2ssg, pointnet2msg')
    parser.add_argument('--data_root',
                        type=str,
                        default='data',
                        help='root director of dataset')
    parser.add_argument('--num_workers',
                        type=int,
                        default=4,
                        help='num of workers to use')
    parser.add_argument('--batch_size', type=int, help='batch_size')
    parser.add_argument('--num_points', type=int, help='num_points')
    parser.add_argument('--num_steps', type=int, help='num_steps')
    parser.add_argument('--base_learning_rate',
                        type=float,
                        help='base learning rate')
    parser.add_argument('--weight_decay', type=float, help='weight_decay')
    parser.add_argument('--epochs', type=int, help='number of training epochs')
    parser.add_argument('--start_epoch', type=int, help='used for resume')

    # io
    parser.add_argument('--load_path',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--print_freq',
                        type=int,
                        default=10,
                        help='print frequency')
    parser.add_argument('--save_freq',
                        type=int,
                        default=10,
                        help='save frequency')
    parser.add_argument('--val_freq',
                        type=int,
                        default=10,
                        help='val frequency')
    parser.add_argument('--log_dir',
                        type=str,
                        default='log',
                        help='log dir [default: log]')

    # misc
    parser.add_argument("--local_rank",
                        type=int,
                        default=0,
                        help='local rank for DistributedDataParallel')
    parser.add_argument("--rng_seed", type=int, default=0, help='manual seed')

    args, unparsed = parser.parse_known_args()

    # update config dict with the yaml file
    update_config(args.cfg)

    # update config dict with args arguments
    config.data_root = args.data_root
    config.num_workers = args.num_workers
    config.load_path = args.load_path
    config.print_freq = args.print_freq
    config.save_freq = args.save_freq
    config.val_freq = args.val_freq
    config.rng_seed = args.rng_seed

    config.local_rank = args.local_rank

    model_name = args.cfg.split('.')[-2].split('/')[
        -1]  # model name, e.g., pointnet
    # supports: pointnet,pointnet2{ssg,msg}
    config.model_name = model_name
    current_time = datetime.now().strftime(
        '%Y%m%d%H%M%S')  #20210518221044 means 2021, 5.18, 22:10:44
    config.log_dir = os.path.join(args.log_dir, 's3dis',
                                  f'{model_name}_{int(current_time)}'
                                  )  ## log_dir=log/s3dis/pointnet_time

    if args.batch_size:
        config.batch_size = args.batch_size
    if args.num_points:
        config.num_points = args.num_points
    if args.num_steps:
        config.num_steps = args.num_steps
    if args.base_learning_rate:
        config.base_learning_rate = args.base_learning_rate
    if args.weight_decay:
        config.weight_decay = args.weight_decay
    if args.epochs:
        config.epochs = args.epochs
    if args.start_epoch:
        config.start_epoch = args.start_epoch

    print(args)
    print(config)

    torch.manual_seed(args.rng_seed)
    torch.cuda.manual_seed_all(args.rng_seed)
    random.seed(args.rng_seed)
    np.random.seed(args.rng_seed)

    return args, config
Ejemplo n.º 21
0
    GANTrainer = trainerModule(model_name=exp_name,
                               gpu=GPU_is_available(),
                               loader=loader,
                               loss_plot_i=args.loss_i,
                               eval_i=args.eval_i,
                               checkpoint_dir=checkpoint_dir,
                               save_iter=args.save_i,
                               n_samples=args.n_samples,
                               config=model_config,
                               vis_manager=vis_manager)

    # If a checkpoint is found, load it
    if not args.restart and checkpoint_state is not None:
        train_config, model_path, tmp_data_path = checkpoint_state
        # if args.retrain:
        #     train_config_file = read_json(train_config)
        #     for k, v in config['model_config'].items():
        #         train_config_file[k] = v
        #     train_config = os.path.join(checkpoint_dir, f'{exp_name}_train_config.json')
        #     save_json(train_config_file, train_config)

        GANTrainer.load_saved_training(model_path, train_config, tmp_data_path)
        if args.finetune:
            GANTrainer.model.update_config(model_config)
            update_config(GANTrainer.modelConfig, model_config)
        # ipdb.set_trace()
    # save config file
    save_json(config, os.path.join(checkpoint_dir, f'{exp_name}_config.json'))
    GANTrainer.train()
Ejemplo n.º 22
0
def cli_main():

    parser = options.get_training_parser()
    parser.add_argument(
        '--config',
        type=str,
        nargs='*',
        help=
        'paths to JSON files of experiment configurations, from high to low priority',
    )
    parser.add_argument('--exp-name',
                        type=str,
                        default='',
                        help='name of the experiment')
    parser.add_argument(
        '--debug',
        default=False,
        action='store_true',
        help='run training in the debugging mode',
    )
    parser.add_argument('--path-attributes',
                        type=str,
                        nargs='*',
                        default=['task', 'arch', 'lr'])
    parser.add_argument('--torch-file-system', action='store_true')

    pre_parsed_args, unknown = parser.parse_known_args()

    config_dict = {}
    for config_path in pre_parsed_args.config:
        config_dict = update_config(config_dict, compose_configs(config_path))

    parser_modifier = modify_factory(config_dict)

    args = options.parse_args_and_arch(parser, modify_parser=parser_modifier)

    update_namespace(args, config_dict)

    # set sharing strategy file system in case /dev/shm/ limits are small
    if args.torch_file_system:
        torch.multiprocessing.set_sharing_strategy('file_system')

    training_name = get_training_name(args)
    base_save_dir = generate_save_dir(args, training_name, sys.argv[1:])
    setattr(args, 'training_name', training_name)
    setattr(args, 'save_dir', os.path.join(base_save_dir, 'checkpoints'))
    setattr(args, 'tensorboard_logdir',
            os.path.join(base_save_dir, 'tensorboard'))

    save_config(vars(args), base_save_dir)

    if args.distributed_init_method is None:
        distributed_utils.infer_init_method(args)

    if args.distributed_init_method is not None:
        # distributed training
        if torch.cuda.device_count() > 1 and not args.distributed_no_spawn:
            start_rank = args.distributed_rank
            args.distributed_rank = None  # assign automatically
            torch.multiprocessing.spawn(
                fn=distributed_main,
                args=(args, start_rank),
                nprocs=torch.cuda.device_count(),
            )
        else:
            distributed_main(args.device_id, args)
    elif args.distributed_world_size > 1:
        # fallback for single node with multiple GPUs
        assert args.distributed_world_size <= torch.cuda.device_count()
        port = random.randint(10000, 20000)
        args.distributed_init_method = 'tcp://localhost:{port}'.format(
            port=port)
        args.distributed_rank = None  # set based on device id
        if (args.update_freq is not None and max(args.update_freq) > 1
                and args.ddp_backend != 'no_c10d'):
            logger.info(
                'NOTE: you may get faster training with: --ddp-backend=no_c10d'
            )
        torch.multiprocessing.spawn(
            fn=distributed_main,
            args=(args, ),
            nprocs=args.distributed_world_size,
        )

    else:
        # single GPU training
        main(args)