예제 #1
0
    def gather_options(self):
        # initialize parser with basic options
        if not self.initialized:
            parser = argparse.ArgumentParser(
                formatter_class=argparse.ArgumentDefaultsHelpFormatter)
            parser = self.initialize(parser)

        # get the basic options
        opt, unknown = parser.parse_known_args()

        # modify model-related parser options
        model_name = opt.model
        model_option_setter = models.get_option_setter(model_name)
        parser = model_option_setter(parser, self.isTrain)

        # modify dataset-related parser options
        dataset_mode = opt.dataset_mode
        dataset_option_setter = data.get_option_setter(dataset_mode)
        parser = dataset_option_setter(parser, self.isTrain)

        opt, unknown = parser.parse_known_args()

        # if there is opt_file, load it.
        # The previous default options will be overwritten
        if opt.load_from_opt_file:
            parser = self.update_options_from_file(parser, opt)

        update_parser_defaults_from_yaml(parser)

        opt = parser.parse_args()
        self.parser = parser
        return opt
예제 #2
0
파일: train.py 프로젝트: PeterouZh/FQ-GAN
def main():
    # parse command line and run
    parser = utils.prepare_parser()

    update_parser_defaults_from_yaml(parser)

    config = vars(parser.parse_args())
    print(config)
    run(config)
예제 #3
0
def main():
    # parse command line and run
    parser = utils.prepare_parser()

    update_parser_defaults_from_yaml(parser)

    args = parser.parse_args()
    args.base_root = os.path.join(args.tl_outdir, 'biggan')
    config = EasyDict(vars(args))
    config_str = get_dict_str(config)
    logger = logging.getLogger('tl')
    logger.info(config_str)
    run(config)
예제 #4
0
def main():
    logger = logging.getLogger('tl')
    # parse command line
    parser = utils.prepare_parser()
    update_parser_defaults_from_yaml(parser)

    args = parser.parse_args()
    args.base_root = os.path.join(args.tl_outdir, 'biggan')
    opt = EasyDict(vars(args))

    logger.info(f"\nglobal_cfg: \n" + get_dict_str(global_cfg))
    global_cfg.dump_to_file_with_command(
        f"{opt.tl_outdir}/config_command.yaml", command=opt.tl_command)

    run(opt)
예제 #5
0
def main():
  parser = build_parser()
  args, _ = parser.parse_known_args()
  is_main_process = args.local_rank == 0

  update_parser_defaults_from_yaml(parser, is_main_process=is_main_process)

  if is_main_process:
    modelarts_utils.setup_tl_outdir_obs(global_cfg)
    modelarts_utils.modelarts_sync_results_dir(global_cfg, join=True)
    modelarts_utils.prepare_dataset(global_cfg.get('modelarts_download', {}), global_cfg=global_cfg)

  args = parser.parse_args()

  setup_runtime(seed=args.seed)

  distributed = ddp_utils.is_distributed()
  if distributed:
      dist_utils.init_dist(args.launcher, backend='nccl')
      # important: use different random seed for different process
      torch.manual_seed(args.seed + dist.get_rank())

  # dataset
  dataset = torch_data_utils.ImageListDataset(meta_file=global_cfg.image_list_file, )
  if distributed:
    sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=False)
  else:
    sampler = None

  train_loader = data_utils.DataLoader(
    dataset,
    batch_size=1,
    shuffle=False,
    sampler=sampler,
    num_workers=args.num_workers,
    pin_memory=False)

  # test
  data_iter = iter(train_loader)
  data = next(data_iter)

  if is_main_process:
    modelarts_utils.prepare_dataset(global_cfg.get('modelarts_upload', {}), global_cfg=global_cfg, download=False)
    modelarts_utils.modelarts_sync_results_dir(global_cfg, join=True)
  if distributed:
    dist.barrier()
  pass
예제 #6
0
파일: main_moco.py 프로젝트: PeterouZh/moco
def main():
    update_parser_defaults_from_yaml(parser)
    args = parser.parse_args()
    global_cfg.merge_from_dict(vars(args))
    modelarts_utils.setup_tl_outdir_obs(global_cfg)
    modelarts_utils.modelarts_sync_results_dir(global_cfg, join=True)
    modelarts_utils.prepare_dataset(global_cfg.get('modelarts_download', {}),
                                    global_cfg=global_cfg)

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    if args.dist_url == "env://" and args.world_size == -1:
        args.world_size = int(os.environ["WORLD_SIZE"])

    args.distributed = args.world_size > 1 or args.multiprocessing_distributed

    ngpus_per_node = torch.cuda.device_count()
    if args.multiprocessing_distributed:
        # Since we have ngpus_per_node processes per node, the total world_size
        # needs to be adjusted accordingly
        args.world_size = ngpus_per_node * args.world_size
        # Use torch.multiprocessing.spawn to launch distributed processes: the
        # main_worker process function
        mp.spawn(main_worker,
                 nprocs=ngpus_per_node,
                 args=(ngpus_per_node, args))
    else:
        # Simply call main_worker function
        main_worker(args.gpu, ngpus_per_node, args)
예제 #7
0
    ]
    for v in sys.argv[1:]:
        if '=' in v:
            name, value = v.split('=')
            if name == '--tl_opts':
                argv.append(name)
                argv.extend(value.split(' '))
            else:
                argv.extend([name, value])
        else:
            argv.append(v)
    sys.argv.clear()
    sys.argv.extend(argv)

    sys.argv[
        sys.argv.index('--tl_outdir') +
        1] = f"{sys.argv[sys.argv.index('--tl_outdir') + 1]}-{time_str}_{tmp_args.number:02d}"
    shutil.rmtree(sys.argv[sys.argv.index('--tl_outdir') + 1],
                  ignore_errors=True)

    print(f"sys.argv processed: ")
    pprint.pprint(sys.argv)
    parser = update_parser_defaults_from_yaml(parser=parser,
                                              use_cfg_as_args=True)
    logger = logging.getLogger('tl')

    args, _ = parser.parse_known_args()
    global_cfg.merge_from_dict(vars(args))
    print(get_dict_str(global_cfg))
    main()
예제 #8
0
def run(argv_str=None):

    parser = update_parser_defaults_from_yaml(parser=None,
                                              use_cfg_as_args=True)
    args = parser.parse_args()
    main(args)
예제 #9
0
파일: main_moco.py 프로젝트: PeterouZh/moco
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu
    if args.gpu == 0:
        update_parser_defaults_from_yaml(parser)
        global_cfg.merge_from_dict(vars(args))
        modelarts_utils.setup_tl_outdir_obs(global_cfg)
        modelarts_utils.modelarts_sync_results_dir(global_cfg, join=True)

    logger = logging.getLogger('tl')
    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    print("=> creating model '{}'".format(args.arch))
    model = moco.builder.MoCo(models.__dict__[args.arch], args.moco_dim,
                              args.moco_k, args.moco_m, args.moco_t, args.mlp)
    logger.info(model)

    modelarts_utils.modelarts_sync_results_dir(global_cfg,
                                               join=True,
                                               is_main_process=(args.gpu == 0))

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        # comment out the following line for debugging
        raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        # AllGather implementation (batch shuffle, queue update, etc.) in
        # this code only supports DistributedDataParallel.
        raise NotImplementedError("Only DistributedDataParallel is supported.")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    if args.aug_plus:
        # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709
        augmentation = [
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomApply(
                [
                    transforms.ColorJitter(0.4, 0.4, 0.4,
                                           0.1)  # not strengthened
                ],
                p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])],
                                   p=0.5),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]
    else:
        # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978
        augmentation = [
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), normalize
        ]

    train_dataset = datasets.ImageFolder(
        traindir,
        moco.loader.TwoCropsTransform(transforms.Compose(augmentation)))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               drop_last=True)

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                is_best=False,
                filename=f'{args.tl_ckptdir}/checkpoint_{epoch:04d}.pth.tar')
            modelarts_utils.modelarts_sync_results_dir(
                global_cfg, join=False, is_main_process=(args.gpu == 0))
예제 #10
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu
    update_parser_defaults_from_yaml(parser, is_main_process=(gpu == 0))
    logger = logging.getLogger('tl')
    if args.gpu == 0:
        modelarts_utils.setup_tl_outdir_obs(global_cfg)
        modelarts_utils.modelarts_sync_results_dir(global_cfg, join=True)

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    logger.info("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch]()

    # freeze all layers but the last fc
    for name, param in model.named_parameters():
        if name not in ['fc.weight', 'fc.bias']:
            param.requires_grad = False
    # init the fc layer
    model.fc.weight.data.normal_(mean=0.0, std=0.01)
    model.fc.bias.data.zero_()
    logger.info(model)

    # load from pre-trained, before DistributedDataParallel constructor
    if args.pretrained:
        if os.path.isfile(args.pretrained):
            logger.info("=> loading checkpoint '{}'".format(args.pretrained))
            checkpoint = torch.load(args.pretrained, map_location="cpu")

            # rename moco pre-trained keys
            state_dict = checkpoint['state_dict']
            for k in list(state_dict.keys()):
                # retain only encoder_q up to before the embedding layer
                if k.startswith('module.encoder_q'
                                ) and not k.startswith('module.encoder_q.fc'):
                    # remove prefix
                    state_dict[k[len("module.encoder_q."):]] = state_dict[k]
                # delete renamed or unused k
                del state_dict[k]

            args.start_epoch = 0
            msg = model.load_state_dict(state_dict, strict=False)
            assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}

            logger.info("=> loaded pre-trained model '{}'".format(
                args.pretrained))
        else:
            print("=> no checkpoint found at '{}'".format(args.pretrained))

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    # optimize only the linear classifier
    parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
    assert len(parameters) == 2  # fc.weight, fc.bias
    optimizer = torch.optim.SGD(parameters,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    # val_loader = torch.utils.data.DataLoader(
    #     datasets.ImageFolder(valdir, transforms.Compose([
    #         transforms.Resize(256),
    #         transforms.CenterCrop(224),
    #         transforms.ToTensor(),
    #         normalize,
    #     ])),
    #     batch_size=args.batch_size, shuffle=False,
    #     num_workers=args.workers, pin_memory=True)

    evaldir = os.path.join(args.data, 'val')
    eval_imagenet = EvalImageNet(valdir=evaldir, gpu_id=gpu)

    if args.evaluate:
        eval_imagenet.validate(model=model, epoch=0)
        return
        # print("=> loading checkpoint '{}'".format(args.pretrained))
        # checkpoint = torch.load(args.pretrained, map_location="cpu")
        #
        # # rename moco pre-trained keys
        # state_dict = checkpoint['state_dict']
        # for k in list(state_dict.keys()):
        #     # retain only encoder_q up to before the embedding layer
        #     if k.startswith('module.encoder_q'):
        #         # remove prefix
        #         state_dict['module.' + k[len("module.encoder_q."):]] = state_dict[k]
        #     # delete renamed or unused k
        #     del state_dict[k]
        #
        # msg = model.load_state_dict(state_dict, strict=False)
        # validate(val_loader, model, criterion, args)
        # return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        # evaluate on validation set
        # acc1 = validate(val_loader, model, criterion, args)
        # summary_dict2txtfig({'top1': acc1.item()}, prefix='eval', step=epoch,
        #                     textlogger=global_textlogger, is_main_process=(args.gpu == 0))
        acc1 = eval_imagenet.validate(model=model, epoch=epoch)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                filename=f"{args.tl_ckptdir}/checkpoint.pth.tar")
            modelarts_utils.modelarts_sync_results_dir(
                global_cfg, join=False, is_main_process=(args.gpu == 0))
            if epoch == args.start_epoch:
                sanity_check(model.state_dict(), args.pretrained)
    modelarts_utils.modelarts_sync_results_dir(global_cfg,
                                               join=True,
                                               is_main_process=(args.gpu == 0))
예제 #11
0
def main():
    parser = ArgumentParser(add_help=False)
    parser.add_argument('-c',
                        '--config_path',
                        type=str,
                        default='./src/configs/CIFAR10/ContraGAN.json')
    parser.add_argument('--checkpoint_folder', type=str, default=None)
    parser.add_argument('-current',
                        '--load_current',
                        action='store_true',
                        help='whether you load the current or best checkpoint')
    parser.add_argument('--log_output_path', type=str, default=None)

    parser.add_argument('-DDP',
                        '--distributed_data_parallel',
                        action='store_true')
    parser.add_argument('-n', '--nodes', default=1, type=int, metavar='N')
    parser.add_argument('-nr',
                        '--nr',
                        default=0,
                        type=int,
                        help='ranking within the nodes')

    parser.add_argument('--seed',
                        type=int,
                        default=-1,
                        help='seed for generating random numbers')
    parser.add_argument('--num_workers', type=int, default=8, help='')
    parser.add_argument('-sync_bn',
                        '--synchronized_bn',
                        action='store_true',
                        help='whether turn on synchronized batchnorm')
    parser.add_argument('-mpc',
                        '--mixed_precision',
                        action='store_true',
                        help='whether turn on mixed precision training')
    parser.add_argument('-LARS',
                        '--LARS_optimizer',
                        action='store_true',
                        help='whether turn on LARS optimizer')
    parser.add_argument('-rm_API',
                        '--disable_debugging_API',
                        action='store_true',
                        help='whether disable pytorch autograd debugging mode')

    parser.add_argument('--reduce_train_dataset',
                        type=float,
                        default=1.0,
                        help='control the number of train dataset')
    parser.add_argument('--truncated_factor',
                        type=float,
                        default=-1.0,
                        help='factor for truncation trick')
    parser.add_argument('-stat_otf',
                        '--bn_stat_OnTheFly',
                        action='store_true',
                        help='when evaluating, use the statistics of a batch')
    parser.add_argument('-std_stat',
                        '--standing_statistics',
                        action='store_true')
    parser.add_argument('--standing_step',
                        type=int,
                        default=-1,
                        help='# of steps for accumulation batchnorm')
    parser.add_argument('--freeze_layers',
                        type=int,
                        default=-1,
                        help='# of layers for freezing discriminator')

    parser.add_argument('-l', '--load_all_data_in_memory', action='store_true')
    parser.add_argument('-t', '--train', action='store_true')
    parser.add_argument('-e', '--eval', action='store_true')
    parser.add_argument('-s', '--save_images', action='store_true')
    parser.add_argument('-iv',
                        '--image_visualization',
                        action='store_true',
                        help='select whether conduct image visualization')
    parser.add_argument(
        '-knn',
        '--k_nearest_neighbor',
        action='store_true',
        help='select whether conduct k-nearest neighbor analysis')
    parser.add_argument('-itp',
                        '--interpolation',
                        action='store_true',
                        help='whether conduct interpolation analysis')
    parser.add_argument('-fa',
                        '--frequency_analysis',
                        action='store_true',
                        help='whether conduct frequency analysis')
    parser.add_argument('-tsne',
                        '--tsne_analysis',
                        action='store_true',
                        help='whether conduct tsne analysis')
    parser.add_argument('--nrow',
                        type=int,
                        default=10,
                        help='number of rows to plot image canvas')
    parser.add_argument('--ncol',
                        type=int,
                        default=8,
                        help='number of cols to plot image canvas')

    parser.add_argument('--print_every',
                        type=int,
                        default=100,
                        help='control log interval')
    parser.add_argument('--save_every',
                        type=int,
                        default=2000,
                        help='control evaluation and save interval')
    parser.add_argument('--eval_type',
                        type=str,
                        default='test',
                        help='[train/valid/test]')

    from template_lib.v2.config_cfgnode import update_parser_defaults_from_yaml, global_cfg
    update_parser_defaults_from_yaml(parser=parser)
    args = parser.parse_args()

    if not args.train and \
            not args.eval and \
            not args.save_images and \
            not args.image_visualization and \
            not args.k_nearest_neighbor and \
            not args.interpolation and \
            not args.frequency_analysis and \
            not args.tsne_analysis:
        parser.print_help(sys.stderr)
        sys.exit(1)

    if args.config_path is not None:
        with open(args.config_path) as f:
            model_configs = json.load(f)
        train_configs = vars(args)
    else:
        raise NotImplementedError

    hdf5_path_train = make_hdf5(model_configs['data_processing'], train_configs, mode="train") \
        if train_configs['load_all_data_in_memory'] else None

    if train_configs['seed'] == -1:
        train_configs['seed'] = random.randint(1, 4096)
        cudnn.benchmark, cudnn.deterministic = True, False
    else:
        cudnn.benchmark, cudnn.deterministic = False, True

    fix_all_seed(train_configs['seed'])
    gpus_per_node, rank = torch.cuda.device_count(), torch.cuda.current_device(
    )
    world_size = gpus_per_node * train_configs['nodes']
    if world_size == 1:
        warnings.warn(
            'You have chosen a specific GPU. This will completely disable data parallelism.'
        )

    run_name = make_run_name(
        RUN_NAME_FORMAT,
        framework=train_configs['config_path'].split('/')[-1][:-5],
        phase='train')
    if train_configs['disable_debugging_API']:
        torch.autograd.set_detect_anomaly(False)
    check_flags(train_configs, model_configs, world_size)

    if train_configs['distributed_data_parallel'] and world_size > 1:
        print("Train the models through DistributedDataParallel (DDP) mode.")
        mp.spawn(prepare_train_eval,
                 nprocs=gpus_per_node,
                 args=(gpus_per_node, world_size, run_name, train_configs,
                       model_configs, hdf5_path_train))
    else:
        prepare_train_eval(rank,
                           gpus_per_node,
                           world_size,
                           run_name,
                           train_configs,
                           model_configs,
                           hdf5_path_train=hdf5_path_train)
예제 #12
0
  weights = np.mean(grads, axis=(1, 2))  #

  for i, w in enumerate(weights):
    cam += w * feature_map[i, :, :]

  cam = np.maximum(cam, 0)
  cam = cv2.resize(cam, (32, 32))
  cam -= np.min(cam)
  cam /= np.max(cam)

  return cam


if __name__ == '__main__':
  from template_lib.v2.config_cfgnode import update_parser_defaults_from_yaml, global_cfg
  update_parser_defaults_from_yaml(parser=None)

  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  path_img = os.path.join(BASE_DIR, "cam_img", "test_img_8.png")
  path_net = os.path.join(BASE_DIR, "cam_img", "net_params_72p.pkl")
  output_dir = global_cfg.tl_outdir

  classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


  # 图片读取;网络加载
  img = cv2.imread(path_img, 1)  # H*W*C
  img_input = img_preprocess(img)
  net = Net()

  ret = net.load_state_dict(torch.load(path_net))