Beispiel #1
0
def main(env_name, num_episodes, render, VideoSave, gamma, lam, kl_targ, batch_size):
    killer = GracefulKiller()
    env, obs_dim, act_dim = init_gym(env_name, render)
    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = datetime.utcnow().strftime("%b-%d_%H-%M-%S")  # create unique directories
    logger = Logger(logname=env_name, now=now)
    #aigym_path = os.path.join('/tmp', env_name, now)
    #env = wrappers.Monitor(env, aigym_path, force=True) 
    scaler = Scaler(obs_dim, env_name)
    scaler.resume()
    val_func = NNValueFunction(obs_dim, env_name)
    policy = Policy(obs_dim, act_dim, kl_targ, env_name)
    episode = 0
    capture = False
    while episode < num_episodes:
        if VideoSave and not capture:
            env.ScreenCapture(5)
            capture = True
        trajectories = run_policy(env, policy, scaler, logger, episodes=batch_size)
        episode += len(trajectories)
        

        if killer.kill_now:
            if input('Terminate training (y/[n])? ') == 'y':
                break
            killer.kill_now = False
    logger.close()
    policy.close_sess()
    val_func.close_sess()
Beispiel #2
0
def main(env_name, num_episodes, render, gamma, lam, kl_targ, batch_size):
    """ Main training loop

    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
        num_episodes: maximum number of episodes to run
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        batch_size: number of episodes per policy training batch
    """
    killer = GracefulKiller()
    env, obs_dim, act_dim = init_gym(env_name, render)
    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = datetime.utcnow().strftime("%b-%d_%H-%M-%S")  # create unique directories
    logger = Logger(logname=env_name, now=now)

    scaler = Scaler(obs_dim, env_name)
    val_func = NNValueFunction(obs_dim, env_name)
    policy = Policy(obs_dim, act_dim, kl_targ, env_name)
    # run a few episodes of untrained policy to initialize scaler:
    run_policy(env, policy, scaler, logger, episodes=5)
    episode = 0
    #capture = False
    while episode < num_episodes:
        trajectories = run_policy(env, policy, scaler, logger, episodes=batch_size)
        episode += len(trajectories)
        """if episode > 600 and not capture:
               env.ScreenCapture(5)
               capture = True"""
        add_value(trajectories, val_func)  # add estimated values to episodes
        add_disc_sum_rew(trajectories, gamma)  # calculated discounted sum of Rs
        add_gae(trajectories, gamma, lam)  # calculate advantage
        # concatenate all episodes into single NumPy arrays
        observes, actions, advantages, disc_sum_rew = build_train_set(trajectories)
        # add various stats to training log:
        log_batch_stats(observes, actions, advantages, disc_sum_rew, logger, episode)
        policy.update(observes, actions, advantages, logger)  # update policy
        val_func.fit(observes, disc_sum_rew, logger)  # update value function
        
        logger.write(display=True)  # write logger results to file and stdout
        scaler.save()
        if killer.kill_now:
            if input('Terminate training (y/[n])? ') == 'y':
                break
            killer.kill_now = False
    logger.close()
    policy.close_sess()
    val_func.close_sess()
Beispiel #3
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100

    trainset = dataloader(root='../data',
                          train=True,
                          download=True,
                          transform=transform_train)
    trainloader = data.DataLoader(trainset,
                                  batch_size=args.train_batch,
                                  shuffle=True,
                                  num_workers=args.workers)

    testset = dataloader(root='../data',
                         train=False,
                         download=False,
                         transform=transform_test)
    testloader = data.DataLoader(testset,
                                 batch_size=args.test_batch,
                                 shuffle=False,
                                 num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
            cardinality=args.cardinality,
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
    elif args.arch.startswith('densenet'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            growthRate=args.growthRate,
            compressionRate=args.compressionRate,
            dropRate=args.drop,
        )
    elif args.arch.startswith('wrn'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
    elif args.arch.endswith('resnet'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
        )
    else:
        model = models.__dict__[args.arch](num_classes=num_classes)

    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    criterion = nn.CrossEntropyLoss()

    if args.opt_method == "sgd":
        print("using SGD")
        optimizer = SGD(model.parameters(),
                        lr=args.lr,
                        momentum=args.momentum,
                        weight_decay=args.weight_decay,
                        nesterov=False)
    elif args.opt_method == "adam":
        print("using Adam")
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)
    else:
        raise Exception("Optimizer not supported")

    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(
            args.checkpoint,
            str(args.dataset) + '_' + str(args.arch) + '_mom_lr=' +
            str(args.lr) + '_m=' + str(args.momentum) + '_drop:' +
            str(args.drop) + '_seed=' + str(args.manualSeed) + '.txt'),
                        title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch,
                                   use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return
    flag3 = 0
    # Train and val
    for epoch in range(start_epoch, args.epochs):

        if args.opt_method == "sgd":
            adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer,
                                      epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch,
                                   use_cuda)
        if flag3 == 0:
            logger.append([0, train_init_loss, test_init_loss, 0, 0])
            flag3 = 1
        # append logger file
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)

    logger.close()

    print('Best acc:')
    print(best_acc)
Beispiel #4
0
def main():
    ''' set default hyperparams in default_hyperparams.py '''
    parser = argparse.ArgumentParser()

    # Required arguments
    parser.add_argument('-d',
                        '--dataset',
                        choices=supported.datasets,
                        required=True)
    parser.add_argument('--algorithm',
                        required=True,
                        choices=supported.algorithms)
    parser.add_argument(
        '--root_dir',
        required=True,
        help=
        'The directory where [dataset]/data can be found (or should be downloaded to, if it does not exist).'
    )
    parser.add_argument('--analyze_sample', default=1)

    # Dataset
    parser.add_argument(
        '--split_scheme',
        help=
        'Identifies how the train/val/test split is constructed. Choices are dataset-specific.'
    )
    parser.add_argument('--dataset_kwargs',
                        nargs='*',
                        action=ParseKwargs,
                        default={})
    parser.add_argument(
        '--download',
        default=False,
        type=parse_bool,
        const=True,
        nargs='?',
        help=
        'If true, tries to downloads the dataset if it does not exist in root_dir.'
    )
    parser.add_argument(
        '--frac',
        type=float,
        default=1.0,
        help=
        'Convenience parameter that scales all dataset splits down to the specified fraction, for development purposes.'
    )

    # Loaders
    parser.add_argument('--loader_kwargs',
                        nargs='*',
                        action=ParseKwargs,
                        default={})
    parser.add_argument('--train_loader', choices=['standard', 'group'])
    parser.add_argument('--uniform_over_groups',
                        type=parse_bool,
                        const=True,
                        nargs='?')
    parser.add_argument('--distinct_groups',
                        type=parse_bool,
                        const=True,
                        nargs='?')
    parser.add_argument('--n_groups_per_batch', type=int)
    parser.add_argument('--batch_size', type=int)
    parser.add_argument('--eval_loader',
                        choices=['standard'],
                        default='standard')

    # Model
    parser.add_argument('--model', choices=supported.models)
    parser.add_argument(
        '--model_kwargs',
        nargs='*',
        action=ParseKwargs,
        default={},
        help=
        'keyword arguments for model initialization passed as key1=value1 key2=value2'
    )

    # Transforms
    parser.add_argument('--train_transform', choices=supported.transforms)
    parser.add_argument('--eval_transform', choices=supported.transforms)
    parser.add_argument(
        '--target_resolution',
        nargs='+',
        type=int,
        help=
        'target resolution. for example --target_resolution 224 224 for standard resnet.'
    )
    parser.add_argument('--resize_scale', type=float)
    parser.add_argument('--max_token_length', type=int)

    # Objective
    parser.add_argument('--loss_function', choices=supported.losses)

    # Algorithm
    parser.add_argument('--groupby_fields', nargs='+')
    parser.add_argument('--group_dro_step_size', type=float)
    parser.add_argument('--coral_penalty_weight', type=float)
    parser.add_argument('--irm_lambda', type=float)
    parser.add_argument('--irm_penalty_anneal_iters', type=int)
    parser.add_argument('--algo_log_metric')
    parser.add_argument('--hsic_beta', type=float)
    parser.add_argument('--grad_penalty_lamb', type=float)
    parser.add_argument(
        '--params_regex',
        type=str,
        help='Regular expression specifying which gradients to penalize.')
    parser.add_argument('--label_cond',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=False)
    parser.add_argument('--dann_lamb', type=float)
    parser.add_argument('--dann_dc_name', type=str)

    # Model selection
    parser.add_argument('--val_metric')
    parser.add_argument('--val_metric_decreasing',
                        type=parse_bool,
                        const=True,
                        nargs='?')

    # Optimization
    parser.add_argument('--n_epochs', type=int)
    parser.add_argument('--optimizer', choices=supported.optimizers)
    parser.add_argument('--lr', type=float)
    parser.add_argument('--weight_decay', type=float)
    parser.add_argument('--max_grad_norm', type=float)
    parser.add_argument('--optimizer_kwargs',
                        nargs='*',
                        action=ParseKwargs,
                        default={})

    # Scheduler
    parser.add_argument('--scheduler', choices=supported.schedulers)
    parser.add_argument('--scheduler_kwargs',
                        nargs='*',
                        action=ParseKwargs,
                        default={})
    parser.add_argument('--scheduler_metric_split',
                        choices=['train', 'val'],
                        default='val')
    parser.add_argument('--scheduler_metric_name')

    # Evaluation
    parser.add_argument('--evaluate_all_splits',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=True)
    parser.add_argument('--eval_splits', nargs='+', default=[])
    parser.add_argument('--eval_only',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=False)
    parser.add_argument('--eval_epoch', default=None, type=int)
    parser.add_argument('--save_z',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=False)

    # Misc
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--log_dir', default='./logs')
    parser.add_argument('--log_every', default=50, type=int)
    parser.add_argument('--save_step', type=int)
    parser.add_argument('--save_best',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=True)
    parser.add_argument('--save_last',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=True)
    parser.add_argument('--no_group_logging',
                        type=parse_bool,
                        const=True,
                        nargs='?')
    parser.add_argument('--use_wandb',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=False)
    parser.add_argument('--progress_bar',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=False)
    parser.add_argument('--resume',
                        type=parse_bool,
                        const=True,
                        nargs='?',
                        default=False)

    config = parser.parse_args()
    config = populate_defaults(config)

    # set device
    config.device = torch.device("cuda:" + str(
        config.device)) if torch.cuda.is_available() else torch.device("cpu")

    ## Initialize logs
    if os.path.exists(config.log_dir) and config.resume:
        resume = True
        mode = 'a'
    elif os.path.exists(config.log_dir) and config.eval_only:
        resume = False
        mode = 'a'
    else:
        resume = False
        mode = 'w'

    if not os.path.exists(config.log_dir):
        os.makedirs(config.log_dir)
    logger = Logger(os.path.join(config.log_dir, 'log.txt'), mode)

    # Record config
    log_config(config, logger)

    # Set random seed
    set_seed(config.seed)

    # Data
    full_dataset = supported.datasets[config.dataset](
        root_dir=config.root_dir,
        download=config.download,
        split_scheme=config.split_scheme,
        **config.dataset_kwargs)

    # To implement data augmentation (i.e., have different transforms
    # at training time vs. test time), modify these two lines:
    train_transform = initialize_transform(
        transform_name=config.train_transform,
        config=config,
        dataset=full_dataset)
    eval_transform = initialize_transform(transform_name=config.eval_transform,
                                          config=config,
                                          dataset=full_dataset)

    train_grouper = CombinatorialGrouper(dataset=full_dataset,
                                         groupby_fields=config.groupby_fields)

    datasets = defaultdict(dict)
    for split in full_dataset.split_dict.keys():
        if split == 'train':
            transform = train_transform
            verbose = True
        elif split == 'val':
            transform = eval_transform
            verbose = True
        else:
            transform = eval_transform
            verbose = False
        # Get subset
        datasets[split]['dataset'] = full_dataset.get_subset(
            split, frac=config.frac, transform=transform)

        if split == 'train':
            datasets[split]['loader'] = get_train_loader(
                loader=config.train_loader,
                dataset=datasets[split]['dataset'],
                batch_size=config.batch_size,
                uniform_over_groups=config.uniform_over_groups,
                grouper=train_grouper,
                distinct_groups=config.distinct_groups,
                n_groups_per_batch=config.n_groups_per_batch,
                **config.loader_kwargs)
        else:
            datasets[split]['loader'] = get_eval_loader(
                loader=config.eval_loader,
                dataset=datasets[split]['dataset'],
                grouper=train_grouper,
                batch_size=config.batch_size,
                **config.loader_kwargs)

        # Set fields
        datasets[split]['split'] = split
        datasets[split]['name'] = full_dataset.split_names[split]
        datasets[split]['verbose'] = verbose

        # Loggers
        datasets[split]['eval_logger'] = BatchLogger(
            os.path.join(config.log_dir, f'{split}_eval.csv'),
            mode=mode,
            use_wandb=(config.use_wandb and verbose))
        datasets[split]['algo_logger'] = BatchLogger(
            os.path.join(config.log_dir, f'{split}_algo.csv'),
            mode=mode,
            use_wandb=(config.use_wandb and verbose))

        if config.use_wandb:
            initialize_wandb(config)

    # Logging dataset info
    if config.no_group_logging and full_dataset.is_classification and full_dataset.y_size == 1:
        log_grouper = CombinatorialGrouper(dataset=full_dataset,
                                           groupby_fields=['y'])
    elif config.no_group_logging:
        log_grouper = None
    else:
        log_grouper = train_grouper
    log_group_data(datasets, log_grouper, logger)

    ## Initialize algorithm
    algorithm = initialize_algorithm(config=config,
                                     datasets=datasets,
                                     train_grouper=train_grouper)

    if config.eval_epoch is None:
        eval_model_path = os.path.join(config.log_dir, 'best_model.pth')
    else:
        eval_model_path = os.path.join(config.log_dir,
                                       f'{config.eval_epoch}_model.pth')
    best_epoch, best_val_metric = load(algorithm, eval_model_path)
    if config.eval_epoch is None:
        epoch = best_epoch
    else:
        epoch = config.eval_epoch

    results, z_splits, y_splits, c_splits = evaluate(algorithm=algorithm,
                                                     datasets=datasets,
                                                     epoch=epoch,
                                                     general_logger=logger,
                                                     config=config)

    include_test = config.evaluate_all_splits or 'test' in config.eval_splits

    logistics = all_logistics(z_splits,
                              c_splits,
                              y_splits,
                              epoch=epoch,
                              sample=int(config.analyze_sample),
                              include_test=include_test)

    logistics['G0'] = results['id_val']['acc_avg']
    logistics['G1'] = logistics['val_on_val']
    logistics['G2'] = logistics['trainval_on_val']
    logistics['G3'] = results['val']['acc_avg']

    logistics['I0'] = logistics['c_train']
    logistics['I1'] = logistics['c_val']
    per_class = torch.tensor(list(logistics['c_perclass'].values()))
    logistics['I2'] = torch.mean(per_class).item()

    if include_test:
        logistics['G1_test'] = logistics['test_on_test']
        logistics['G2_test'] = logistics['traintest_on_test']
        logistics['G3_test'] = results['test']['acc_avg']

        logistics['I1_test'] = logistics['c_test']
        per_class = torch.tensor(list(logistics['c_perclass_test'].values()))
        logistics['I2_test'] = torch.mean(per_class).item()

    with (open(os.path.join(config.log_dir, f'tests_epoch_{epoch}.pkl'),
               "wb")) as f:
        pickle.dump(logistics, f)

    logger.close()
    for split in datasets:
        datasets[split]['eval_logger'].close()
        datasets[split]['algo_logger'].close()
Beispiel #5
0
def main():
    global best_acc
    global mean
    global std

    if not os.path.isdir(args.out):
    	mkdir_p(args.out)
    # Data
    print(f'==> Preparing AFLW')
    transform_train = transforms.Compose([
        transforms.Resize((60,60)),
        ])
    transform_val = transforms.Compose([
        transforms.Resize((60,60)),
        ])
    train_labeled_set, train_unlabeled_set, stat_labeled_set, train_val_set, val_set, test_set, mean, std = dataset.get_alfw(args.data_root, args.n_labeled, transform_train, transform_val)

    num_samples = args.val_iteration * args.batch_size
    sampler_x = RandomSampler(train_labeled_set, replacement=True, num_samples=num_samples)
    batch_sampler_x = BatchSampler(sampler_x, args.batch_size, drop_last=True)
    labeled_trainloader = data.DataLoader(train_labeled_set, batch_sampler=batch_sampler_x, num_workers=16, pin_memory=True)
    sampler_u = RandomSampler(train_unlabeled_set, replacement=True, num_samples=num_samples)
    batch_sampler_u = BatchSampler(sampler_u, args.batch_size, drop_last=True)
    unlabeled_trainloader = data.DataLoader(train_unlabeled_set, batch_sampler=batch_sampler_u, num_workers=16, pin_memory=True)
    val_loader = data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=16, pin_memory=True)
    test_loader = data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=16, pin_memory=True)


    sampler_trainval = RandomSampler(train_labeled_set, replacement=True, num_samples=num_samples)
    batch_sampler_trainval = BatchSampler(sampler_trainval, args.batch_size, drop_last=True)
    train_val_loader = data.DataLoader(train_labeled_set, batch_sampler=batch_sampler_trainval, num_workers=16, pin_memory=True)
    args.val_iteration=len(unlabeled_trainloader)



    # Model
    print("==> creating TCDCN")

    def create_model(ema=False):
        model = models.TCDCNN()
        model = model.cuda()

        if ema:
            for param in model.parameters():
                param.detach_()

        return model

    model = create_model()
    ema_model = create_model(ema=True)
    target_model = create_model(ema=True)

    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

    train_criterion = nn.MSELoss()
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)


    ema_optimizer= WeightEMA(model, ema_model, alpha=args.ema_decay)
    target_optimizer = UpdateEma(model, target_model, alpha=args.ema_decay)
    start_epoch = 0
    rampup_length = float(args.epochs) / float(args.batch_size)
    # Resume
    title = 'celeba-mtfl'
    if args.resume:
        # Load.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.out = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        ema_model.load_state_dict(checkpoint['ema_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.resume, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.out, 'log.txt'), title=title)
        logger.set_names(['Train Loss', 'Train Loss X', 'Train Loss U', 'Train ME', 'Train FR' , 'Valid Loss', 'Valid ME.', 'Valid FR', 'Test Loss', 'Test ME.', 'Test FR'])

    writer = SummaryWriter(args.out)
    step = 0
    test_accs = []
    # Train and val
    for epoch in range(start_epoch, args.epochs):
        if (epoch + 1) % 100 == 0 and (epoch + 1) <= args.epochs:
            optimizer.param_groups[0]['lr'] *= 0.1

        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_loss_x, train_loss_u = train(labeled_trainloader, unlabeled_trainloader, model, target_model, optimizer, ema_optimizer, target_optimizer, train_criterion, epoch, use_cuda, rampup_length)
        _, train_me, train_fr = validate(labeled_trainloader, model, criterion, epoch, use_cuda, mode='Train Stats')
        val_loss, val_me, val_fr = validate(val_loader, ema_model, criterion, epoch, use_cuda, mode='Valid Stats')
        test_loss, test_me, test_fr = validate(test_loader, ema_model, criterion, epoch, use_cuda, mode='Test Stats ')

        step = args.batch_size * args.val_iteration * (epoch + 1)

        writer.add_scalar('losses/train_loss', train_loss, step)
        writer.add_scalar('losses/valid_loss', val_loss, step)
        writer.add_scalar('losses/test_loss', test_loss, step)

        writer.add_scalar('accuracy/train_me', train_me, step)
        writer.add_scalar('accuracy/val_me', val_me, step)
        writer.add_scalar('accuracy/test_me', test_me, step)

        writer.add_scalar('accuracy/train_fr', train_fr, step)
        writer.add_scalar('accuracy/val_fr', val_fr, step)
        writer.add_scalar('accuracy/test_fr', test_fr, step)
        

        # append logger file
        logger.append([train_loss, train_loss_x, train_loss_u, train_me, train_fr, val_loss, val_me, val_fr, test_loss, test_me, test_fr])

        # save model
        is_best = val_me < best_acc
        best_acc = min(val_me, best_acc)
        if is_best:
            best_test = test_me
        save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'ema_state_dict': ema_model.state_dict(),
                'me': val_me,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
            }, is_best)
        test_accs.append(test_me)
    logger.close()
    # writer.close()

    print('Best acc')
    print(best_test)

    print('Mean acc:')
    print(np.mean(test_accs[-20:]))
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.save_dir):
        mkdir_p(args.save_dir)

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100

    trainset = dataloader(root='./data', train=True, download=True, transform=transform_train)
    trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers)

    testset = dataloader(root='./data', train=False, download=False, transform=transform_test)
    testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    
    model = models.__dict__[args.arch](dataset=args.dataset, depth=args.depth, reduction=args.reduction)

    print(model)    

    if args.cuda:
        model.cuda()

    print('    Total params: %.2f' % (sum(p.numel() for p in model.parameters())))

    with torch.cuda.device(0):
      net = model
      flops, params = get_model_complexity_info(net, (3, 32,32), as_strings=True, print_per_layer_stat=True)
      print('Flops:  ' + flops)
      print('Params: ' + params)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)


    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.save_dir = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.save_dir, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.save_dir, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])

    if args.evaluate:
        print('\nEvaluation only')
        with torch.no_grad():
            test_loss, test_acc = test(testloader, model, criterion, start_epoch, args.cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.gamma, args.schedule)

        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, args.cuda)
        with torch.no_grad():
            test_loss, test_acc = test(testloader, model, criterion, epoch, args.cuda)

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
            }, is_best, checkpoint=args.save_dir)

    logger.close()

    print('Best acc:')
    print(best_acc)
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100

    trainset = dataloader(root='./data',
                          train=True,
                          download=True,
                          transform=transform_train)
    trainloader = data.DataLoader(trainset,
                                  batch_size=args.train_batch,
                                  shuffle=True,
                                  num_workers=args.workers)

    testset = dataloader(root='./data',
                         train=False,
                         download=False,
                         transform=transform_test)
    testloader = data.DataLoader(testset,
                                 batch_size=args.test_batch,
                                 shuffle=False,
                                 num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
            cardinality=args.cardinality,
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
    elif args.arch.startswith('densenet'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            growthRate=args.growthRate,
            compressionRate=args.compressionRate,
            dropRate=args.drop,
        )
    elif args.arch.startswith('wrn'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
    elif args.arch.endswith('resnet'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            block_name=args.block_name,
        )
    else:
        model = models.__dict__[args.arch](num_classes=num_classes)

    print("Geometric LR: {}".format(args.geo_lr))

    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))
    criterion = nn.CrossEntropyLoss()

    param_lr = GradientRatioScheduler.get_params_base_lr(model, args.lr)
    optimizer = optim.SGD(param_lr,
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    scheduler = GradientRatioScheduler(optimizer)

    print_model(model)
    input("Cont?")

    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'Epoch', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.',
            'Time', 'Learning Rate'
        ])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch,
                                   use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(scheduler, epoch)

        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, max(scheduler.get_lr())))

        st = time.time()
        train_loss, train_acc = train(trainloader, model, criterion, optimizer,
                                      scheduler, epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch,
                                   use_cuda)

        # append logger file
        logger.append([
            epoch, train_loss, test_loss, train_acc, test_acc,
            time.time() - st,
            scheduler.get_lr()
        ])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        if args.save_checkpoint_model:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'acc': test_acc,
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
Beispiel #8
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)


    class Jigsaw(object):
        # https://github.com/joe-siyuan-qiao/GUNN
        def __call__(self, tensor):
            tensor_1 = tensor.clone()
            tensor_2 = tensor.clone()
            return torch.cat([tensor_1, tensor_2], dim=0)
            h, w = tensor.size(1), tensor.size(2)
            d = random.randint(0, h - 1)
            if d > 0:
                tensor_u, tensor_d = tensor.narrow(1, 0, d), tensor.narrow(1, d, h - d)
                tensor_1.narrow(1, 0, h - d).copy_(tensor_d)
                tensor_1.narrow(1, h - d, d).copy_(tensor_u)
            d = random.randint(0, w - 1)
            if d > 0:
                tensor_l, tensor_r = tensor.narrow(2, 0, d), tensor.narrow(2, d, w - d)
                tensor_2.narrow(2, 0, w - d).copy_(tensor_r)
                tensor_2.narrow(2, w - d, d).copy_(tensor_l)
            return torch.cat([tensor_1, tensor_2], dim=0)

        def __repr__(self):
            return self.__class__.__name__ + '()'


    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(traindir, transforms.Compose([
            transforms.RandomSizedCrop(args.image_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
            Jigsaw(),
        ])),
        batch_size=args.train_batch, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(valdir, transforms.Compose([
            transforms.Scale(int(256.0 / 224.0 * args.image_size)),
            transforms.CenterCrop(args.image_size),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=args.test_batch, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    elif args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
                    baseWidth=args.base_width,
                    cardinality=args.cardinality,
                )
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model = torch.nn.DataParallel(model).cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

    # define loss function (criterion) and optimizer
    criterion = nr.CrossEntropyLoss()
    optimizer = nr.SGD(model, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay,
            sparsity=args.nr_sparsity_start, zero_prb=args.nr_zero_prb)

    # Resume
    title = 'ImageNet-' + args.arch
    is_rejuvenated = False
    util_params_old = 1.0
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'], strict=False)
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
        is_rejuvenated = checkpoint['is_rejuvenated']
        util_params_old = checkpoint['util_params_old']
        args.epochs = checkpoint['epochs']
        args.schedule = checkpoint['schedule']
        print('==> Resumed model size: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))
        if args.nr_bn_target > 0:
            print('==> Restoring BN to {:.2f}'.format(args.nr_bn_target))
            model.module.bn_restore(args.nr_bn_target)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.', 'Valid 5 Acc.'])


    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(val_loader, model, criterion, start_epoch, use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    if args.nr_target >= 0.999:
        is_rejuvenated = True
    epoch = start_epoch
    while epoch < args.epochs:
        adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, use_cuda)
        test_loss, test_acc, test_acc_5 = test(val_loader, model, criterion, epoch, use_cuda)

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc, test_acc_5])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
                'is_rejuvenated': is_rejuvenated,
                'util_params_old': util_params_old,
                'epochs': args.epochs,
                'schedule': args.schedule,
                'sparsity': optimizer.sparsity,
            }, is_best, checkpoint=args.checkpoint)

        epoch = epoch + 1
        if not is_rejuvenated:
            args.epochs = args.epochs + 1
            for schedule_idx in range(len(args.schedule)):
                args.schedule[schedule_idx] = args.schedule[schedule_idx] + 1
            live_params, all_params = optimizer.inspect(flops_flag=args.nr_flops_flag)
            util_params = float(live_params) / float(all_params)
            print(' | Utilization {} at {}: {} {}'.format(util_params, optimizer.sparsity, live_params, all_params))

            if util_params <= args.nr_target:
                shutil.copyfile(os.path.join(args.checkpoint, 'checkpoint.pth.tar'), os.path.join(args.checkpoint,
                    'NR_{}_before_{}.pth.tar'.format(args.arch, args.nr_target)))
                model = model.module
                model.rejuvenate(flops_flag=args.nr_flops_flag)
                model = torch.nn.DataParallel(model).cuda()
                optimizer = nr.SGD(model, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay,
                        sparsity=0, zero_prb=args.nr_zero_prb)
                is_rejuvenated = True

                save_checkpoint({
                        'epoch': epoch,
                        'state_dict': model.state_dict(),
                        'acc': test_acc,
                        'best_acc': best_acc,
                        'optimizer' : optimizer.state_dict(),
                        'is_rejuvenated': is_rejuvenated,
                        'util_params_old': util_params_old,
                        'epochs': args.epochs,
                        'schedule': args.schedule,
                        'sparsity': optimizer.sparsity,
                    }, False, checkpoint=args.checkpoint, filename='NR_{}_{}.pth.tar'.format(args.arch, args.nr_target))

                if args.nr_compress_only:
                    return

            if util_params_old - util_params < args.nr_increase_thres:
                optimizer.sparsity += args.nr_increase_step

            util_params_old = util_params


    logger.close()
    # logger.plot()
    # savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
Beispiel #9
0
def main():
    chkpoint_name = args.checkpoint[args.checkpoint.rfind('/') + 1:]
    # import pdb; pdb.set_trace()
    global writer
    writer = SummaryWriter(log_dir='/runs/' + chkpoint_name)
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100

    trainset = dataloader(root='./data',
                          train=True,
                          download=True,
                          transform=transform_train)
    trainloader = data.DataLoader(trainset,
                                  batch_size=args.train_batch,
                                  shuffle=True,
                                  num_workers=args.workers)

    testset = dataloader(root='./data',
                         train=False,
                         download=False,
                         transform=transform_test)
    testloader = data.DataLoader(testset,
                                 batch_size=args.test_batch,
                                 shuffle=False,
                                 num_workers=args.workers)

    # writer =

    # Model
    print("==> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
            cardinality=args.cardinality,
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
    elif args.arch.startswith('densenet'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            growthRate=args.growthRate,
            compressionRate=args.compressionRate,
            dropRate=args.drop,
        )
    elif args.arch.startswith('wrn'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
    elif args.arch.endswith('resnet'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
        )
    else:
        model = models.__dict__[args.arch](num_classes=num_classes)

    # model = torch.nn.DataParallel(model).cuda()
    # model = model.cuda(torch.device('cuda:1'))
    model = model.cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    torch.save(model, 'tempolary.pth')
    new_model = torch.load('tempolary.pth')

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    # Resume
    title = 'cifar-10-' + args.arch

    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])
    print(model)

    import pdb
    pdb.set_trace()
    look_up_table = get_look_up_table(model)

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch,
                                   use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))

        if DEBUG:
            # print(model)
            show_low_rank(model,
                          input_size=[32, 32],
                          criterion=ValueThreshold(t),
                          type=args.type)

        print(' Start decomposition:')

        # set different threshold for model compression and test accuracy
        all_channs_ = []
        thresholds = np.arange(
            0.1, 1.0, 0.01).tolist()  #[5e-2] if args.type != 'ND' else [0.85]
        sigma_criterion = ValueThreshold if args.type != 'ND' else EnergyThreshold
        T = np.array(thresholds)
        cr = np.zeros(T.shape)
        acc = np.zeros(T.shape)

        model_path = 'net.pth'
        torch.save(model, model_path)
        result = 'result.pth' if not args.retrain else 'result-retrain.pth'

        for i, t in enumerate(thresholds):
            test_model = torch.load(model_path)
            print('=====================================Threshold is', t)
            cr[i], channs_ = show_low_rank(test_model,
                                           look_up_table,
                                           input_size=[32, 32],
                                           criterion=sigma_criterion(t),
                                           type=args.type)
            all_channs_.append(channs_)
            test_model = f_decouple(test_model,
                                    look_up_table,
                                    criterion=sigma_criterion(t),
                                    train=False,
                                    stride_1_only=args.stride_1_only)
            #print(model)
            print(' Done! test decoupled model')
            test_loss, test_acc = test(testloader, test_model, criterion,
                                       start_epoch, use_cuda)
            print(' Test Loss :  %.8f, Test Acc:  %.2f' %
                  (test_loss, test_acc))
            acc[i] = test_acc

            if args.retrain:
                # retrain model
                finetune_epoch = 4
                acc[i] = model_retrain(finetune_epoch, test_model, trainloader, \
                     testloader, criterion, look_up_table, use_cuda)
        torch.save(test_model, 'model.pth.tar')
        torch.save(OrderedDict([('acc', acc), ('cr', cr)]), result)
        print('compression ratio:')
        print(cr)
        print('accuracy:')
        print(acc)
        print(all_channs_)

        return

    # Train and val

    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer,
                                      look_up_table, epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch,
                                   use_cuda)

        # append logger file
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint) and args.local_rank == 0:
        mkdir_p(args.checkpoint)

    args.distributed = True
    args.gpu = args.local_rank
    torch.cuda.set_device(args.gpu)
    torch.distributed.init_process_group(backend='nccl', init_method='env://')
    args.world_size = torch.distributed.get_world_size()
    print('world_size = ', args.world_size)

    assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled."

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    elif 'resnext' in args.arch:
        model = models.__dict__[args.arch](
                    baseWidth=args.base_width,
                    cardinality=args.cardinality,
                )
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    flops, params = get_model_complexity_info(model, (224, 224), as_strings=False, print_per_layer_stat=False)
    print('Flops:  %.3f' % (flops / 1e9))
    print('Params: %.2fM' % (params / 1e6))

    cudnn.benchmark = True
    # define loss function (criterion) and optimizer
    # criterion = nn.CrossEntropyLoss().cuda()
    criterion = SoftCrossEntropyLoss(label_smoothing=args.label_smoothing).cuda()
    model = model.cuda()

    args.lr = float(0.1 * float(args.train_batch*args.world_size)/256.)
    state['lr'] = args.lr
    optimizer = set_optimizer(model)
    #optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    model, optimizer = amp.initialize(model, optimizer,
                                      opt_level=args.opt_level,
                                      keep_batchnorm_fp32=args.keep_batchnorm_fp32,
                                      loss_scale=args.loss_scale)

    #model = torch.nn.DataParallel(model).cuda()
    #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank)
    model = DDP(model, delay_allreduce=True)

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'valf')
    #normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    data_aug_scale = (0.08, 1.0) 

    train_dataset = datasets.ImageFolder(traindir, transforms.Compose([
            transforms.RandomResizedCrop(224, scale = data_aug_scale),
            transforms.RandomHorizontalFlip(),
            # transforms.ToTensor(),
            # normalize,
        ]))
    val_dataset   = datasets.ImageFolder(valdir, transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            # transforms.ToTensor(),
            # normalize,
        ]))

    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.train_batch, shuffle=False,
        num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate)
    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=args.test_batch, shuffle=False,
        num_workers=args.workers, pin_memory=True, sampler=val_sampler, collate_fn=fast_collate)


    # Resume
    title = 'ImageNet-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..', args.resume)
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        #checkpoint = torch.load(args.resume, map_location=torch.device('cpu'))
        checkpoint = torch.load(args.resume, map_location = lambda storage, loc: storage.cuda(args.gpu))
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        # model may have more keys
        t = model.state_dict()
        c = checkpoint['state_dict']
        for k in t:
            if k not in c:
                print('not in loading dict! fill it', k, t[k])
                c[k] = t[k]
        model.load_state_dict(c)
        print('optimizer load old state')
        optimizer.load_state_dict(checkpoint['optimizer'])
        if args.local_rank == 0:
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
    else:
        if args.local_rank == 0:
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
            logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])


    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(val_loader, model, criterion, start_epoch, use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        train_sampler.set_epoch(epoch)

        adjust_learning_rate(optimizer, epoch)

        if args.local_rank == 0:
            print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, use_cuda)
        test_loss, test_acc = test(val_loader, model, criterion, epoch, use_cuda)

        # save model
        if args.local_rank == 0:
            # append logger file
            logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc])

            is_best = test_acc > best_acc
            best_acc = max(test_acc, best_acc)
            save_checkpoint({
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'acc': test_acc,
                    'best_acc': best_acc,
                    'optimizer' : optimizer.state_dict(),
                }, is_best, checkpoint=args.checkpoint)

    if args.local_rank == 0:
        logger.close()

    print('Best acc:')
    print(best_acc)
Beispiel #11
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)


    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100


    trainset = dataloader(root='./data', train=True, download=True, transform=transform_train)
    trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers)

    testset = dataloader(root='./data', train=False, download=False, transform=transform_test)
    testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))


    if args.arch.endswith('resnet_mapping'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    MaplabelInit = init_label,
                    feature_num=num_classes,
                    Stable=args.stable,
                    InitFactor = args.Ifactor,
                    Dmode ='Dx',
                    Afun = 'Identity',
                    Layeradjust = 'toLabel'
                )
    else:
        model = models.__dict__[args.arch](num_classes=num_classes)

    #print(model)
    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))


    # Resume
    title = 'cifar-10-' + args.arch

    tfboard_dir=[]
    if args.tfboard_dir:
        tfboard_dir = args.tfboard_dir
    else:
        tfboard_dir = args.checkpoint

    criterion = nn.MSELoss()
    others = []
    maplable = []
    for name, p in model.named_parameters():
        if 'maplabel' in name:
            print(p)
            maplable += [p]
        else:
            others += [p]


    # pfront = parmlist.remove(model.module.maplabel)
    optimizer = optim.SGD([{'params': others},
                           {'params': maplable, 'lr': 0},
                           ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), tfboard_dir=tfboard_dir,title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), tfboard_dir=tfboard_dir,title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])


    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    b = args.b


    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)
        #index = m.sample()
        #b=blist[index]

        print('\nEpoch: [%d | %d] LR: %f dir: %s' % (epoch + 1, args.epochs, state['lr'],args.checkpoint))


        train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda,b)
        test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda)

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc])

        info = { 'train_acc': train_acc, \
                'test_acc': test_acc}
        for tag, value in info.items():
            logger.scalar_summary(tag,value,epoch+1)

        if use_mapping:
            tag = '/maplabel'
            logger.histo_summary(tag, model.module.maplabel.data.cpu().numpy(), epoch + 1)
            if model.module.maplabel.grad is not None:
                tag = '/maplabel/grad'
                logger.histo_summary(tag, model.module.maplabel.grad.data.cpu().numpy(), epoch + 1)
        

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
            }, is_best, checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.jpg'))

    print('Best acc:')
    print(best_acc)
Beispiel #12
0
def main():
    global best_acc
    # global record
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data
    print('===> Preparing dataset cifar10...')
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    dataloader = datasets.CIFAR10
    num_classes = 10

    trainset = dataloader(root='./data',
                          train=True,
                          download=True,
                          transform=transform_train)
    trainloader = data.DataLoader(trainset,
                                  batch_size=args.train_batch,
                                  shuffle=True,
                                  num_workers=args.workers)

    testset = dataloader(root='./data',
                         train=False,
                         download=False,
                         transform=transform_test)
    testloader = data.DataLoader(testset,
                                 batch_size=args.test_batch,
                                 shuffle=False,
                                 num_workers=args.workers)

    # Model
    print("==> Creating Model ResNet")
    model = resnet.ResNet()
    print("==> Create Successfully!")
    # print(model)

    if not args.fixbit:
        print("    Train both model and bitwise parameters...")
    else:
        print(
            "    Train model with trained fixed bit number and parameters...")

    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    # Resume
    title = 'cifar-10-' + 'ResNet-with-BIB'
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch,
                                   use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        resnet.RecordActivation = False
        resnet.FirstEpoch = False
        if not args.fixbit:
            if epoch == start_epoch:
                resnet.FirstEpoch = False
            else:
                resnet.FirstEpoch = False
        else:
            resnet.FirstEpoch = False
        adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer,
                                      epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch,
                                   use_cuda)

        # append logger file
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            checkpoint=args.checkpoint)

        if not args.fixbit:
            calculate_optimal_alpha.cal_quant()

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
Beispiel #13
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomSizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])),
                                               batch_size=args.train_batch,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Scale(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.test_batch,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    elif args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
            baseWidth=args.base_width,
            cardinality=args.cardinality,
        )
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    # Resume
    title = 'ImageNet-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(val_loader, model, criterion, start_epoch,
                                   use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(train_loader, model, criterion,
                                      optimizer, epoch, use_cuda)
        test_loss, test_acc = test(val_loader, model, criterion, epoch,
                                   use_cuda)

        # append logger file
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
Beispiel #14
0
def main():
    global best_prec1, args

    args.gpu = 0
    args.world_size = 1

    if args.distributed:
        args.gpu = args.local_rank % torch.cuda.device_count()
        torch.cuda.set_device(args.gpu)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()

    args.total_batch_size = args.world_size * args.batch_size

    if not os.path.isdir(args.checkpoint) and args.local_rank == 0:
        mkdir_p(args.checkpoint)

    if args.fp16:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if args.static_loss_scale != 1.0:
        if not args.fp16:
            print("Warning:  if --fp16 is not used, static_loss_scale will be ignored.")

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    model = model.cuda()
    if args.fp16:
        model = network_to_half(model)
    if args.distributed:
        # shared param/delay all reduce turns off bucketing in DDP, for lower latency runs this can improve perf
        # for the older version of APEX please use shared_param, for newer one it is delay_allreduce
        model = DDP(model, delay_allreduce=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    if args.fp16:
        optimizer = FP16_Optimizer(optimizer,
                                   static_loss_scale=args.static_loss_scale,
                                   dynamic_loss_scale=args.dynamic_loss_scale)

    # optionally resume from a checkpoint
    title = 'ImageNet-' + args.arch
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu))
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
            if args.local_rank == 0:
                logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        if args.local_rank == 0:
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
            logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.', 'Valid Top5.'])

    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')

    if(args.arch == "inception_v3"):
        crop_size = 299
        val_size = 320 # I chose this value arbitrarily, we can adjust.
    else:
        crop_size = 224
        val_size = 256

    pipe = HybridTrainPipe(batch_size=args.batch_size, num_threads=args.workers, device_id=args.local_rank, data_dir=traindir, crop=crop_size, dali_cpu=args.dali_cpu)
    pipe.build()
    train_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size))

    pipe = HybridValPipe(batch_size=args.batch_size, num_threads=8, device_id=args.local_rank, data_dir=valdir, crop=crop_size, size=val_size)
    pipe.build()
    val_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size))

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    total_time = AverageMeter()
    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch
        adjust_learning_rate(optimizer, epoch,args)

        if args.local_rank == 0:
            print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, optimizer.param_groups[0]['lr']))

        [train_loss, train_acc, avg_train_time] = train(train_loader, model, criterion, optimizer, epoch)
        total_time.update(avg_train_time)
        # evaluate on validation set
        [test_loss, prec1, prec5] = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        if args.local_rank == 0:
            # append logger file
            logger.append([optimizer.param_groups[0]['lr'], train_loss, test_loss, train_acc, prec1, prec5])

            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best,checkpoint=args.checkpoint)
            if epoch == args.epochs - 1:
                print('##Top-1 {0}\n'
                      '##Top-5 {1}\n'
                      '##Perf  {2}'.format(prec1, prec5, args.total_batch_size / total_time.avg))

        # reset DALI iterators
        train_loader.reset()
        val_loader.reset()

    if args.local_rank == 0:
        logger.close()
Beispiel #15
0
def main(env_name, num_episodes, gamma, lam, kl_targ, batch_size, eval):
    """ Main training loop

    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
        num_episodes: maximum number of episodes to run
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        batch_size: number of episodes per policy training batch
    """

    if eval:
        print("Evaluating: ")
        evaluate(env_name, num_episodes)
        exit()

    killer = GracefulKiller()
    env, obs_dim, act_dim = init_gym(env_name)
    now = datetime.utcnow().strftime(
        "%b-%d_%H:%M:%S")  # create unique directories
    logger = Logger(logname=env_name, now=now)
    aigym_path = os.path.join('/tmp', env_name, now)
    #env = wrappers.Monitor(env, aigym_path, force=True)
    scaler = Scaler(obs_dim)
    val_func = NNValueFunction(obs_dim)
    policy = Policy(obs_dim, act_dim, kl_targ)
    #policy.restore_weights() ## -------------
    #val_func.restore_weights() ## -------------
    # run a few episodes of untrained policy to initialize scaler:
    run_policy(env, policy, scaler, logger, episodes=5)
    episode = 0
    while episode < num_episodes:
        trajectories = run_policy(env,
                                  policy,
                                  scaler,
                                  logger,
                                  episodes=batch_size)
        episode += len(trajectories)
        add_value(trajectories, val_func)  # add estimated values to episodes
        add_disc_sum_rew(trajectories,
                         gamma)  # calculated discounted sum of Rs
        add_gae(trajectories, gamma, lam)  # calculate advantage
        # concatenate all episodes into single NumPy arrays
        observes, actions, advantages, disc_sum_rew = build_train_set(
            trajectories)
        # add various stats to training log:
        log_batch_stats(observes, actions, advantages, disc_sum_rew, logger,
                        episode)
        policy.update(observes, actions, advantages, logger)  # update policy
        val_func.fit(observes, disc_sum_rew, logger)  # update value function
        logger.write(display=True)  # write logger results to file and stdout

        if killer.kill_now:
            if input('Terminate training (y/[n])? ') == 'y':
                break
            killer.kill_now = False

    print("Scaler vars,means: ")
    print(scaler.vars, scaler.means)

    for i in range(3):
        run_episode(env, policy, scaler, animate=True)

    #policy.save_weights()
    #val_func.save_weights()

    #WARNING: scaler is disabled

    logger.close()
    policy.close_sess()
    val_func.close_sess()
class Dispatcher(object):
  ''' nested crawl dispatcher '''

  def __init__(self, queue, cache, st):
    self.queue = queue
    self.cache = cache
    self.keywords = st.args.keywords
    self.args = st.args
    self.conf = st.conf

    self.num_of_pages = 0
    self.bytes_of_pages = 0

    self.max_num_pages = st.args.num

    self.logger = Logger(self.conf['crawl']['crawl_log'])
    self.stats = CrawlStats(self.conf['crawl']['crawl_stats'])

    self.log_queue = Queue.Queue()
    self.end_page_log_item = Page('none', 0, 0)

    # shutdown signal
    self.shutdown = False

  def bulk_url_enqueue(self, urls):
    ''' add a list of URLs into the crawl queue '''
    for url in urls:
      page = Page(url, depth=1, score=9)
      self.queue.en_queue(page)

  def store_page(self, page):
    ''' store cralwed page into persistent store '''

    try:
      fileto = os.path.join(self.conf['crawl']['crawl_pages'], page.store)
      dirname = os.path.dirname(fileto)
      if not os.path.exists(dirname):
        os.makedirs(dirname)

      with open(fileto, 'wb') as fpw:
        fpw.write(page.content.encode('utf-8'))
    except Exception as e:
      print('Error writing back %s: %s' % (page.url, str(e)))

  def call_crawl_page(self, page):
    ''' thread function to be called '''
    GenericPageCrawler(page, self.queue, self.cache, self.log_queue, self.keywords, self.args.fake)

  def run_page_crawler(self):
    ''' listen to crawler priority queue and crawl pages '''

    worker = Worker(self.args.thread)
    while True:
      # get one item from the queue
      # initialize a generic crawler instance
      page = self.queue.de_queue()
      if page:
        self.stats.crawl_in_progress += 1
        page.time_dequeue = time.time()
        worker.add_task(self.call_crawl_page, page)

      if self.stats.crawl_in_progress == self.max_num_pages:
        break

    worker.join()
    self.shutdown = True
    self.log_queue.put(self.end_page_log_item)

  def run_log_writter(self):
    ''' listen to log FIFO queue and write back crawl logs '''

    page_end = self.end_page_log_item

    while True:
      # get one log item from the queue
      # 1. write back crawl page meta info including page link, size, depth, score, etc.
      # 2. write crawled page back to the persistent storage
      page = self.log_queue.get()
      if page and page != page_end:
        page_size = page.size
        self.stats.update_crawl_bytes(page_size)
        self.store_page(page)
        self.logger.log(page)

        if page.error == 1:
          self.stats.update_crawl_count(1, success=False)
        else:
          self.stats.update_crawl_count(1, success=True)

      if page == page_end:
        break

  def run_progress_reporter(self):
    ''' report crawling progress every N seconds
        N is based on configuration or
          argument -i (interval)

        reporting metrics include:
        - # of pages crawled.
        - successful rate.
        - # % finished.
        - # pages / sec
        - # bytes / sec
    '''

    # generate current progress report
    def progress_report_current():
      crawl_total = self.max_num_pages
      crawled_pages = self.stats.crawled_pages
      progress = '%d/%d pages crawled. [%d%%] finished' % \
                  (crawled_pages, crawl_total, (100*crawled_pages/crawl_total))
      return progress

    # generate current progress report END

    try:
      interval = self.conf['report']['interval']
    except Exception:
      interval = 3

    while True:
      print(time.ctime() + '\t' +  progress_report_current())
      time.sleep(interval)

      if self.shutdown:
        break

    # print finish statistics
    print(time.ctime() + '\t' +  progress_report_current())
    print('')
    self.stats.write_crawl_stats(sys.stdout)
    print('crawl finished.')

  def run(self):
    ''' run the dispatcher '''

    # crawl google web search engine
    gs = GoogleWebCrawler(self.keywords, self.args.fake)

    urls = gs.query()
    if not urls and gs.error > 0:
      print('Network Error. Please check network connection.')
      return

    if not urls:
      bs = BingWebCrawler(self.keywords, self.args.fake)
      urls = bs.query()

    if not urls:
      print('See crawl failed. Please check network connection or contact the author.')
      return

    self.bulk_url_enqueue(urls)

    # launch the crawler thread
    t_crawler = threading.Thread(target=self.run_page_crawler)
    t_crawler.daemon = True
    t_crawler.start()

    # launch the log writer thread
    t_logger = threading.Thread(target=self.run_log_writter)
    t_logger.daemon = True
    t_logger.start()

    # launch the progress reporter
    t_reporter = threading.Thread(target=self.run_progress_reporter)
    t_reporter.daemon = True
    t_reporter.start()

    # wait for the workers to finish
    t_crawler.join()
    t_logger.join()

    # finalize statistical metrics
    self.stats.finalize()

    # close reporter
    t_reporter.join()

    # close logger
    self.logger.close()
def main():
    # prepare dir
    if not os.path.exists('./snapshots'):
        os.mkdir('./snapshots')
    if not os.path.exists('./logdir'):
        os.mkdir('./logdir')

    global args, best_prec1
    args = parser.parse_args()

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    # create model
    print("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch](width_mult=args.width_mult)

    if not args.distributed:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()
    else:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    title = 'ImageNet-' + args.arch
    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            args.checkpoint = os.path.dirname(args.resume)
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                            title=title,
                            resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    cudnn.benchmark = True

    # Data loading code
    if args.data_backend == 'pytorch':
        get_train_loader = get_pytorch_train_loader
        get_val_loader = get_pytorch_val_loader
    elif args.data_backend == 'dali-gpu':
        get_train_loader = get_dali_train_loader(dali_cpu=False)
        get_val_loader = get_dali_val_loader()
    elif args.data_backend == 'dali-cpu':
        get_train_loader = get_dali_train_loader(dali_cpu=True)
        get_val_loader = get_dali_val_loader()

    train_loader, train_loader_len = get_train_loader(
        args.data,
        args.batch_size,
        workers=args.workers,
        input_size=args.input_size)
    val_loader, val_loader_len = get_val_loader(args.data,
                                                args.batch_size,
                                                workers=args.workers,
                                                input_size=args.input_size)

    if args.evaluate:
        from collections import OrderedDict
        if os.path.isfile(args.weight):
            print("=> loading pretrained weight '{}'".format(args.weight))
            source_state = torch.load(args.weight)
            target_state = OrderedDict()
            for k, v in source_state.items():
                if k[:7] != 'module.':
                    k = 'module.' + k
                target_state[k] = v
            model.load_state_dict(target_state)
        else:
            print("=> no weight found at '{}'".format(args.weight))

        validate(val_loader, val_loader_len, model, criterion)
        return

    # visualization
    writer = SummaryWriter(os.path.join(args.checkpoint, 'logs'))

    for epoch in range(args.start_epoch, args.epochs):
        t1 = time.time()

        if args.distributed:
            train_sampler.set_epoch(epoch)

        # train for one epoch
        train_loss, train_acc = train(train_loader, train_loader_len, model,
                                      criterion, optimizer, epoch)

        # evaluate on validation set
        val_loss, prec1 = validate(val_loader, val_loader_len, model,
                                   criterion)
        elapse = time.time() - t1
        h, m, s = eta_time(elapse, args.epochs - epoch - 1)

        lr = optimizer.param_groups[0]['lr']

        # append logger file
        logger.append([lr, train_loss, val_loss, train_acc, prec1])

        # tensorboardX
        writer.add_scalar('learning rate', lr, epoch + 1)
        writer.add_scalars('loss', {
            'train loss': train_loss,
            'validation loss': val_loss
        }, epoch + 1)
        writer.add_scalars('accuracy', {
            'train accuracy': train_acc,
            'validation accuracy': prec1
        }, epoch + 1)

        if prec1 > best_prec1:
            best_prec1 = max(prec1, best_prec1)
            torch.save(obj={
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            },
                       f=os.path.join(args.checkpoint, 'model.pth.tar'))
        print(
            '\n[Epoch: {:.0f} | {:.0f}], val: loss={:.6}, top1={:.6}, best=\033[31m{:.6}\033[0m, elapse={:.2f}min, eta={:.0f}h {:.0f}m {:.0f}s'
            .format(epoch + 1, args.epochs, val_loss, prec1, best_prec1,
                    elapse / 60, h, m, s))

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))
    writer.close()

    print('Best accuracy:', best_prec1)
Beispiel #18
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)
    print(os.path.isdir(args.checkpoint))

    # Data
    transform = get_transforms(input_size=args.image_size,
                               test_size=args.image_size,
                               backbone=None)

    print('==> Preparing dataset %s' % args.trainroot)
    trainset = dataset.Dataset(root=args.trainroot,
                               transform=transform['val_train'])
    train_loader = data.DataLoader(trainset,
                                   batch_size=args.train_batch,
                                   shuffle=True,
                                   num_workers=args.workers,
                                   pin_memory=True)

    valset = dataset.TestDataset(root=args.valroot,
                                 transform=transform['val_test'])
    val_loader = data.DataLoader(valset,
                                 batch_size=args.test_batch,
                                 shuffle=False,
                                 num_workers=args.workers,
                                 pin_memory=True)

    model = make_model(args)

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = get_optimizer(model, args)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode='min',
                                                           factor=0.2,
                                                           patience=5,
                                                           verbose=False)

    # Resume
    title = 'ImageNet-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.module.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(val_loader, model, criterion, start_epoch,
                                   use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, optimizer.param_groups[0]['lr']))

        train_loss, train_acc, train_5 = train(train_loader, model, criterion,
                                               optimizer, epoch, use_cuda)
        test_loss, test_acc, test_5 = test(val_loader, model, criterion, epoch,
                                           use_cuda)

        scheduler.step(test_loss)

        # append logger file
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])
        print(
            'train_loss:%f, val_loss:%f, train_acc:%f, train_5:%f, val_acc:%f, val_5:%f'
            % (train_loss, test_loss, train_acc, train_5, test_acc, test_5))

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)

        if len(args.gpu_id) > 1:
            save_checkpoint(
                {
                    'fold': 0,
                    'epoch': epoch + 1,
                    'state_dict': model.module.state_dict(),
                    'train_acc': train_acc,
                    'acc': test_acc,
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                single=True,
                checkpoint=args.checkpoint)
        else:
            save_checkpoint(
                {
                    'fold': 0,
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'train_acc': train_acc,
                    'acc': test_acc,
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                single=True,
                checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
def main():
    global best_acc, num_classes
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data
    transform_train = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomCrop(64, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    restrict = [int(part) for part in args.restrict.split(",")
                ] if args.restrict else None
    num_classes = len(restrict) if restrict else 1000

    trainset = train_imagenet64(args.dataset,
                                transform=transform_train,
                                filter=restrict)
    testset = test_imagenet64(args.dataset,
                              transform=transform_test,
                              filter=restrict)

    trainloader = data.DataLoader(trainset,
                                  batch_size=args.train_batch,
                                  shuffle=True,
                                  num_workers=args.workers)
    testloader = data.DataLoader(testset,
                                 batch_size=args.test_batch,
                                 shuffle=False,
                                 num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch](num_classes=num_classes)

    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch,
                                   use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer,
                                      epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch,
                                   use_cuda)

        # append logger file
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
Beispiel #20
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100

    trainset = dataloader(root=args.datapath, train=True, download=False, transform=transform_train)
    trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers)

    testset = dataloader(root=args.datapath, train=False, download=False, transform=transform_test)
    testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
                    cardinality=args.cardinality,
                    num_classes=num_classes,
                    depth=args.depth,
                    widen_factor=args.widen_factor,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('densenet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    growthRate=args.growthRate,
                    compressionRate=args.compressionRate,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('wrn'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    widen_factor=args.widen_factor,
                    dropRate=args.drop,
                )
    elif args.arch.endswith('resnet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                )
    else:
        model = models.__dict__[args.arch](num_classes=num_classes)
    modeltype = type(model).__name__
    model,classifier = decomposeModel(model)
    classifier = classifier.cuda()

    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    reg_net = None
    optimizer_reg = optim.SGD(classifier.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    if args.mtype == 'baseline':
        args.dcl_refsize = 0
        reg_net = regressor.Net(classifier, optimizer_reg, ref_size=args.dcl_refsize, backendtype=modeltype)
    elif args.mtype == 'dcl':
        reg_net = regressor.Net(classifier, optimizer_reg, ref_size=args.dcl_refsize, backendtype=modeltype, dcl_offset=args.dcl_offset, dcl_window=args.dcl_window)
    elif args.mtype == 'gem':
        reg_net = gem.Net(classifier, optimizer_reg, n_memories=args.gem_memsize, backendtype=modeltype)

    print(args)

    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Err.', 'Valid Err.', 'Cos Bef.', 'Cos Aft.', 'Mag'])


    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate_two(optimizer, optimizer_reg, epoch)

        print('Epoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_top1_acc, train_top5_acc, cong_bf, cong_af, mag = train(trainloader, model, criterion, optimizer, epoch, use_cuda, reg_net=reg_net)
        test_loss, test_top1_acc, test_top5_acc = test(testloader, model, criterion, epoch, use_cuda, reg_net=reg_net)
        print('train_loss: {:.4f}, train_top1_err: {:.2f}, train_top5_err: {:.2f}, test_loss: {:.4f}, test_top1_err: {:.2f}, test_top5_err: {:.2f}, cong_bf: {:.4f}, cong_af: {:.4f}, mag: {:.4f}'.format(
            train_loss, 100-train_top1_acc, 100-train_top5_acc,
            test_loss, 100-test_top1_acc, 100-test_top5_acc,
            cong_bf, cong_af, mag))

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, 100-train_top1_acc, 100-test_top1_acc, cong_bf, cong_af, mag])

        # save model
        is_best = test_top1_acc > best_acc
        best_acc = max(test_top1_acc, best_acc)
        save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'classifier_state_dict': reg_net.state_dict(),
                'acc': test_top1_acc,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
                'cong_bf': cong_bf,
                'cong_af': cong_af,
                'mag': mag,
            }, is_best, checkpoint=args.checkpoint)

    logger.close()

    print('Best err:')
    print(100-best_acc)
def main():
    global best_acc

    if not os.path.isdir(args.out):
        mkdir_p(args.out)

    # Data
    print(f'==> Preparing cifar10')
    transform_train = transforms.Compose([
        dataset.RandomPadandCrop(32),
        dataset.RandomFlip(),
        dataset.ToTensor(),
    ])

    transform_val = transforms.Compose([
        dataset.ToTensor(),
    ])

    train_labeled_set, train_unlabeled_set, val_set, test_set = dataset.get_cifar10(
        './data',
        args.n_labeled,
        transform_train=transform_train,
        transform_val=transform_val)
    labeled_trainloader = data.DataLoader(train_labeled_set,
                                          batch_size=args.batch_size,
                                          shuffle=True,
                                          num_workers=0,
                                          drop_last=True)
    unlabeled_trainloader = data.DataLoader(train_unlabeled_set,
                                            batch_size=args.batch_size,
                                            shuffle=True,
                                            num_workers=0,
                                            drop_last=True)
    val_loader = data.DataLoader(val_set,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=0)
    test_loader = data.DataLoader(test_set,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=0)

    # Model
    print("==> creating WRN-28-2")

    def create_model(ema=False):
        model = models.WideResNet(num_classes=10)
        model = model.to(dev)

        if ema:
            for param in model.parameters():
                param.detach_()

        return model

    model = create_model()
    ema_model = create_model(ema=True)

    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    train_criterion = SemiLoss()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    ema_optimizer = WeightEMA(model, ema_model, alpha=args.ema_decay)
    start_epoch = 0

    # Resume
    title = 'noisy-cifar-10'
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.out = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        ema_model.load_state_dict(checkpoint['ema_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.out, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.out, 'log.txt'), title=title)
        logger.set_names([
            'Train Loss', 'Train Loss X', 'Train Loss U', 'Valid Loss',
            'Valid Acc.', 'Test Loss', 'Test Acc.'
        ])

    writer = SummaryWriter(args.out)
    step = 0
    test_accs = []
    # Train and val
    for epoch in range(start_epoch, args.epochs):

        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))

        train_loss, train_loss_x, train_loss_u = train(
            labeled_trainloader, unlabeled_trainloader, model, optimizer,
            ema_optimizer, train_criterion, epoch, use_cuda)
        _, train_acc = validate(labeled_trainloader,
                                ema_model,
                                criterion,
                                epoch,
                                use_cuda,
                                mode='Train Stats')
        val_loss, val_acc = validate(val_loader,
                                     ema_model,
                                     criterion,
                                     epoch,
                                     use_cuda,
                                     mode='Valid Stats')
        test_loss, test_acc = validate(test_loader,
                                       ema_model,
                                       criterion,
                                       epoch,
                                       use_cuda,
                                       mode='Test Stats ')

        step = args.train_iteration * (epoch + 1)

        writer.add_scalar('losses/train_loss', train_loss, step)
        writer.add_scalar('losses/valid_loss', val_loss, step)
        writer.add_scalar('losses/test_loss', test_loss, step)

        writer.add_scalar('accuracy/train_acc', train_acc, step)
        writer.add_scalar('accuracy/val_acc', val_acc, step)
        writer.add_scalar('accuracy/test_acc', test_acc, step)

        # append logger file
        logger.append([
            train_loss, train_loss_x, train_loss_u, val_loss, val_acc,
            test_loss, test_acc
        ])

        # save model
        is_best = val_acc > best_acc
        best_acc = max(val_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'ema_state_dict': ema_model.state_dict(),
                'acc': val_acc,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            }, is_best)
        test_accs.append(test_acc)
    logger.close()
    writer.close()

    print('Best acc:')
    print(best_acc)

    print('Mean acc:')
    print(np.mean(test_accs[-20:]))
Beispiel #22
0
def main():
    global BEST_ACC, LR_STATE
    start_epoch = cfg.CLS.start_epoch  # start from epoch 0 or last checkpoint epoch

    # Create ckpt folder
    if not os.path.isdir(cfg.CLS.ckpt):
        mkdir_p(cfg.CLS.ckpt)
    if args.cfg_file is not None and not cfg.CLS.evaluate:
        shutil.copyfile(args.cfg_file, os.path.join(cfg.CLS.ckpt, args.cfg_file.split('/')[-1]))

    # Dataset and Loader
    normalize = transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)
    train_aug = [transforms.RandomResizedCrop(cfg.CLS.crop_size), transforms.RandomHorizontalFlip()]
    if len(cfg.CLS.rotation) > 0:
        train_aug.append(transforms.RandomRotation(cfg.CLS.rotation))
    if len(cfg.CLS.pixel_jitter) > 0:
        train_aug.append(RandomPixelJitter(cfg.CLS.pixel_jitter))
    if cfg.CLS.grayscale > 0:
        train_aug.append(transforms.RandomGrayscale(cfg.CLS.grayscale))
    train_aug.append(transforms.ToTensor())
    train_aug.append(normalize)

    traindir = os.path.join(cfg.CLS.data_root, cfg.CLS.train_folder)
    train_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(traindir, transforms.Compose(train_aug)),
        batch_size=cfg.CLS.train_batch, shuffle=True,
        num_workers=cfg.workers, pin_memory=True)

    if cfg.CLS.validate or cfg.CLS.evaluate:
        valdir = os.path.join(cfg.CLS.data_root, cfg.CLS.val_folder)
        val_loader = torch.utils.data.DataLoader(
            datasets.ImageFolder(valdir, transforms.Compose([
                transforms.Resize(cfg.CLS.base_size),
                transforms.CenterCrop(cfg.CLS.crop_size),
                transforms.ToTensor(),
                normalize,
            ])),
            batch_size=cfg.CLS.test_batch, shuffle=False,
            num_workers=cfg.workers, pin_memory=True)
    if cfg.CLS.teacher_pretrained:
        if cfg.CLS.torchvision_pretrain:
            #teacher_model = torchvision.models.resnet50(pretrained=True)
            T_MODEL = eval(cfg.CLS.torchvision_pretrain)
            print('Teacher pretrained model init done!!!')
        else:
            print('Teacher pretrained model soeaver done!')
            T_MODEL = models.__dict__[cfg.CLS.teacher_arch]()
            t_checkpoint = torch.load(cfg.CLS.teacher_weights)
            try:
                teacher_weight_dict = t_checkpoint['state_dict']
            except:
                teacher_weight_dict = t_checkpoint
            teacher_model_dict = T_MODEL.state_dict()
        if cfg.CLS.kd_loss:
            loss_kd = loss_fn_kd
    
    # Create model
    if cfg.CLS.teacher_pretrained: 
        model = models.__dict__[cfg.CLS.arch](teacher_model = T_MODEL)
    else:
        model = models.__dict__[cfg.CLS.arch]() 
    print(model)
    # Calculate FLOPs & Param
    #n_flops, n_convops, n_params = measure_model(model, cfg.CLS.crop_size, cfg.CLS.crop_size)
    #print('==> FLOPs: {:.4f}M, Conv_FLOPs: {:.4f}M, Params: {:.4f}M'.
    #      format(n_flops / 1e6, n_convops / 1e6, n_params / 1e6))
    #del model
    #if cfg.CLS.teacher_pretrained: 
    #    model = models.__dict__[cfg.CLS.arch](teacher_model = T_MODEL)
    #else:
    #    model = models.__dict__[cfg.CLS.arch]() 

    # Load pre-train model
    if cfg.CLS.pretrained:
        print("==> Using pre-trained model '{}'".format(cfg.CLS.pretrained))
        pretrained_dict = torch.load(cfg.CLS.pretrained)
        try:
            pretrained_dict = pretrained_dict['state_dict']
        except:
            pretrained_dict = pretrained_dict
        model_dict = model.state_dict()
        updated_dict, match_layers, mismatch_layers = weight_filler(pretrained_dict, model_dict)
        model_dict.update(updated_dict)
        model.load_state_dict(model_dict)
    else:
        print("==> Creating model '{}'".format(cfg.CLS.arch))

    if cfg.CLS.teacher_pretrained:
        print("==> Using pre-trained teacher model '{}'".format(cfg.CLS.teacher_pretrained))
        model_dict = model.state_dict()
        t_updated_dict, t_match_layers, t_mismatch_layers = weight_filler(teacher_weight_dict, model_dict)
        model_dict.update(t_updated_dict)
        model.load_state_dict(model_dict)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    if cfg.CLS.pretrained:
        def param_filter(param):
            return param[1]

        new_params = map(param_filter, filter(lambda p: p[0] in mismatch_layers, model.named_parameters()))
        base_params = map(param_filter, filter(lambda p: p[0] in match_layers, model.named_parameters()))
        model_params = [{'params': base_params}, {'params': new_params, 'lr': cfg.CLS.base_lr * 10}]
    elif cfg.CLS.teacher_pretrained:
        def param_filter(param):
            return param[1]
        t_new_params = map(param_filter, filter(lambda p: p[0] in t_mismatch_layers, model.named_parameters()))
        t_base_params = map(param_filter, filter(lambda p: p[0] in t_match_layers, model.named_parameters()))
        model_params = [{'params': t_base_params}, {'params': t_new_params}] 
    else:
        model_params = model.parameters()
    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    optimizer = optim.SGD(model_params, lr=cfg.CLS.base_lr, momentum=cfg.CLS.momentum,
                          weight_decay=cfg.CLS.weight_decay)

    # Evaluate model
    if cfg.CLS.evaluate:
        print('\n==> Evaluation only')
        test_loss, test_top1, test_top5 = test(val_loader, model, criterion, start_epoch, USE_CUDA)
        print('==> Test Loss: {:.8f} | Test_top1: {:.4f}% | Test_top5: {:.4f}%'.format(test_loss, test_top1, test_top5))
        return

    # Resume training
    title = 'Pytorch-CLS-' + cfg.CLS.arch
    if cfg.CLS.resume:
        # Load checkpoint.
        print("==> Resuming from checkpoint '{}'".format(cfg.CLS.resume))
        assert os.path.isfile(cfg.CLS.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(cfg.CLS.resume)
        BEST_ACC = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(cfg.CLS.ckpt, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(cfg.CLS.ckpt, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])

    # Train and val
    for epoch in range(start_epoch, cfg.CLS.epochs):
        print('\nEpoch: [{}/{}] | LR: {:.8f}'.format(epoch + 1, cfg.CLS.epochs, LR_STATE))

        if cfg.CLS.kd_loss:
            print('kd_train init done!!')
            alpha = cfg.CLS.alpha
            temperature = cfg.CLS.temperature
            train_loss, train_acc = train_kd(train_loader, model, loss_kd, optimizer, epoch, alpha, temperature, USE_CUDA)
        else:
            train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, USE_CUDA)
        if cfg.CLS.validate:
            test_loss, test_top1, test_top5 = test(val_loader, model, criterion, epoch, USE_CUDA)
        else:
            test_loss, test_top1, test_top5 = 0.0, 0.0, 0.0

        # Append logger file
        logger.append([LR_STATE, train_loss, test_loss, train_acc, test_top1])
        # Save model
        save_checkpoint(model, optimizer, test_top1, epoch)
        # Draw curve
        try:
            draw_curve(cfg.CLS.arch, cfg.CLS.ckpt)
            print('==> Success saving log curve...')
        except:
            print('==> Saving log curve error...')

    logger.close()
    try:
        savefig(os.path.join(cfg.CLS.ckpt, 'log.eps'))
        shutil.copyfile(os.path.join(cfg.CLS.ckpt, 'log.txt'), os.path.join(cfg.CLS.ckpt, 'log{}.txt'.format(
            datetime.datetime.now().strftime('%Y%m%d%H%M%S'))))
    except:
        print('Copy log error.')
    print('==> Training Done!')
    print('==> Best acc: {:.4f}%'.format(BEST_ACC))
Beispiel #23
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100


    trainset = dataloader(root='./data', train=True, download=True, transform=transform_train)
    trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers)

    testset = dataloader(root='./data', train=False, download=False, transform=transform_test)
    testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
                    cardinality=args.cardinality,
                    num_classes=num_classes,
                    depth=args.depth,
                    widen_factor=args.widen_factor,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('densenet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    growthRate=args.growthRate,
                    compressionRate=args.compressionRate,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('wrn'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    widen_factor=args.widen_factor,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('resnet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    block_name=args.block_name,
                )
    elif args.arch.startswith('preresnet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    block_name=args.block_name,
                )
    else:
        print('Model is specified wrongly - Use standard model')
        model = models.__dict__[args.arch](num_classes=num_classes)

    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))
    criterion = nn.CrossEntropyLoss()
    if args.optimizer.lower() == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    elif args.optimizer.lower() == 'radam':
        optimizer = RAdam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay)
    elif args.optimizer.lower() == 'adamw':
        optimizer = AdamW(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, warmup=args.warmup)
    elif args.optimizer.lower() == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay)
    elif args.optimizer.lower() == 'srsgd':
        iter_count = 1
        optimizer = SRSGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, iter_count=iter_count, restarting_iter=args.restart_schedule[0])
    
    # Resume
    title = 'cifar-10-' + args.arch
    logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
    logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])
    
    schedule_index = 1
    # Resume
    title = '%s-'%args.dataset + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        if args.optimizer.lower() == 'srsgd':
            iter_count = optimizer.param_groups[0]['iter_count']
        schedule_index = checkpoint['schedule_index']
        state['lr'] =  optimizer.param_groups[0]['lr']
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)  
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])
    
    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    
    for epoch in range(start_epoch, args.epochs):
        if args.optimizer.lower() == 'srsgd':
            if epoch in args.schedule:
                optimizer = SRSGD(model.parameters(), lr=args.lr * (args.gamma**schedule_index), weight_decay=args.weight_decay, iter_count=iter_count, restarting_iter=args.restart_schedule[schedule_index])
                schedule_index += 1
            
        else:
            adjust_learning_rate(optimizer, epoch)

        logger.file.write('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))
        
        if args.optimizer.lower() == 'srsgd':
            train_loss, train_acc, iter_count = train(trainloader, model, criterion, optimizer, epoch, use_cuda, logger)
        else:
            train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda, logger)
            
        test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda, logger)

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc])
        
        writer.add_scalars('train_loss', {args.model_name: train_loss}, epoch)
        writer.add_scalars('test_loss', {args.model_name: test_loss}, epoch)
        writer.add_scalars('train_acc', {args.model_name: train_acc}, epoch)
        writer.add_scalars('test_acc', {args.model_name: test_acc}, epoch)

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint({
                'epoch': epoch + 1,
                'schedule_index': schedule_index,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
            }, is_best, epoch, checkpoint=args.checkpoint)
    
    logger.file.write('Best acc:%f'%best_acc)
    
    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
    
    with open("./all_results.txt", "a") as f:
        fcntl.flock(f, fcntl.LOCK_EX)
        f.write("%s\n"%args.checkpoint)
        f.write("best_acc %f\n\n"%best_acc)
        fcntl.flock(f, fcntl.LOCK_UN)
Beispiel #24
0
    with torch.no_grad():
        end = time.time()
        for i, (data, target) in enumerate(test_loader):
            data = data.cuda()
            target = target.cuda()

            # compute output
            pred = classifier(model(data))
            loss = F.cross_entropy(pred, target, reduction='mean')
            
            # measure accuracy and record loss
            top1, = accuracy(pred, target, topk=(1,))
            losses.update(loss.item(), data.size(0))
            acc.update(top1.item(), data.size(0))
            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if i % args.print_freq == 0:
                logger.info('Test: [{0}/{1}] Time {btime.val:.3f} (avg={btime.avg:.3f}) '
                            'Test Loss {loss.val:.3f} (avg={loss.avg:.3f}) '
                            'Acc {acc.val:.3f} (avg={acc.avg:.3f})' \
                            .format(i, len(test_loader), btime=batch_time, loss=losses, acc=acc))

        logger.info(' * Accuracy {acc.avg:.5f}'.format(acc=acc))
    return acc.avg

# Train and evaluate the model
main()
writer.close()
logger.close()
Beispiel #25
0
def main():
    best_acc = 0
    start_epoch = args.start_epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    trainloader = getdata(args, train=True)
    testloader = getdata(args, train=False)

    model = VGG(args.attention, args.nclass)

    if args.gpu:
        if torch.cuda.is_available():
            model = model.cuda()
            cudnn.benchmark = True
        else:
            print(
                'There is no cuda available on this machine use cpu instead.')
            args.gpu = False

    criterion = nn.CrossEntropyLoss()
    optimizer = ''
    if args.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay)
    elif args.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)
    else:
        print(args.optimizer, 'is not correct')
        return

    title = 'cifar-10-' + args.attention

    if args.evaluate:
        print('\nEvaluation only')
        assert os.path.isfile(
            args.evaluate), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.evaluate)
        model.load_state_dict(checkpoint['state_dict'])
        test_loss, test_acc = test(model, testloader, criterion, args.gpu)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint,
                                     state['attention'] + '-' + 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint,
                                     state['attention'] + '-' + 'log.txt'),
                        title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    for epoch in range(start_epoch, args.epochs):
        start_time = time.time()
        adjust_learning_rate(optimizer, epoch)

        train_loss, train_acc = train(model, trainloader, criterion, optimizer,
                                      epoch, args.gpu)
        test_loss, test_acc = test(model, testloader, criterion, args.gpu)
        if sys.version[0] == '3':
            train_acc = train_acc.cpu().numpy().tolist()[0]
            test_acc = test_acc.cpu().numpy().tolist()[0]
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])

        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
                'attention': state['attention'],
            },
            is_best,
            checkpoint=args.checkpoint)
        print(time.time() - start_time)
        print(
            "epoch: {:3d}, lr: {:.8f}, train-loss: {:.3f}, test-loss: {:.3f}, train-acc: {:2.3f}, test_acc:, {:2.3f}"
            .format(epoch, state['lr'], train_loss, test_loss, train_acc,
                    test_acc))

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint,
                         state['attention'] + '-' + 'log.eps'))

    print('Best acc:', best_acc)
Beispiel #26
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    os.makedirs(args.save_dir, exist_ok=True)

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        #frequence_func,
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        #frequence_func,
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100

    trainset = dataloader(root='./data', train=True, download=True, transform=transform_train)
    trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers)

    testset = dataloader(root='./data', train=False, download=False, transform=transform_test)
    testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
                    cardinality=args.cardinality,
                    num_classes=num_classes,
                    depth=args.depth,
                    widen_factor=args.widen_factor,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('densenet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    growthRate=args.growthRate,
                    compressionRate=args.compressionRate,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('wrn'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    widen_factor=args.widen_factor,
                    dropRate=args.drop,
                )
    elif args.arch.endswith('resnet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                )
    else:
        model = models.__dict__[args.arch](num_classes=num_classes)

    model.cuda()

    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    # Resume
    title = 'cifar-10-' + args.arch

    if args.init_pth is None:
        save_checkpoint({'state_dict': model.state_dict()}, False, checkpoint=args.save_dir, filename='init.pth.tar')
    else:
        init_checkpoint = torch.load(args.init_pth)
        model.load_state_dict(init_checkpoint['state_dict'])

    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.save_dir = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.save_dir, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.save_dir, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return


    train_losses = []
    train_acces = []
    test_losses = []
    test_acces  = []
    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda)

        train_losses.append(train_loss)
        train_acces.append(train_acc)
        test_acces.append(test_acc)
        test_losses.append(test_loss)

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
            }, is_best, checkpoint=args.save_dir)

    logger.close()
    sns.lineplot(x=range(len(test_acces)), y=test_acces, color='red',dashes=True)
    plt.xlabel("episode")
    plt.ylabel("accuracy")
    plt.legend()
    plt.savefig(os.path.join(args.save_dir,"test.png"))

    print('Best acc:')
    print(best_acc)
Beispiel #27
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # load data
    print('==> Preparing dataset %s' % args.dataset)
    features, landmarks_g, landmarks_h, labels = pickle_2_img_and_landmark_ag(
        args.dataset_path)
    num_classes = 6

    # Model
    print("==> creating model '{}'".format(args.arch))
    # model = ResNetAndGCN(20, num_classes=num_classes)
    model = ModelAGATT(2, 36, 6, {}, False, dropout=0.3)
    # model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))
    # print('    resnet params: %.2fM' % (sum(p.numel() for p in model.resnet.parameters())/1000000.0))
    # print('    stgcn params: %.2fM' % (sum(p.numel() for p in model.st_gcn.parameters())/1000000.0))
    criterion = nn.CrossEntropyLoss()

    # 分层优化
    # resnet_para = [model.conv1.parameters(), model.layer1.parameters(), model.layer2.parameters(), model.layer3.parameters(), model.layer4.parameters()]
    # optimizer = optim.SGD([
    #     {'params': model.gcn11.parameters()},
    #     {'params': model.gcn12.parameters()},
    #     {'params': model.gcn21.parameters()},
    #     {'params': model.gcn22.parameters()},
    #     {'params': model.gcn31.parameters()},
    #     {'params': model.gcn32.parameters()},
    #     {'params': model.fc.parameters()},
    #     {'params': model.conv1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
    #     {'params': model.bn1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
    #     {'params': model.layer1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
    #     {'params': model.layer2.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
    #     {'params': model.layer3.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
    #     {'params': model.layer4.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
    #     ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    # Resume
    title = 'ckp-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log_stat.log'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log_stat.log'),
                        title=title)
        logger.set_names([
            'fold_num', 'Learning Rate', 'Train Loss', 'Valid Loss',
            'Train Acc.', 'Valid Acc.'
        ])

    # logging
    logging.basicConfig(level=logging.DEBUG,
                        filename=os.path.join(args.checkpoint, 'log_info.log'),
                        filemode='a+',
                        format="%(asctime)-15s %(levelname)-8s  %(message)s")
    # log configuration
    logging.info('-' * 10 + 'configuration' + '*' * 10)
    for arg in vars(args):
        logging.info((arg, str(getattr(args, arg))))

    acc_fold = []
    reset_lr = state['lr']
    for f_num in range(args.folds):
        state['lr'] = reset_lr
        model.reset_all_weights()
        # optimizer = optim.SGD([
        # {'params': model.gcn11.parameters()},
        # {'params': model.gcn12.parameters()},
        # {'params': model.gcn21.parameters()},
        # {'params': model.gcn22.parameters()},
        # {'params': model.gcn31.parameters()},
        # {'params': model.gcn32.parameters()},
        # {'params': model.fc.parameters()},
        # {'params': model.conv1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
        # {'params': model.bn1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
        # {'params': model.layer1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
        # {'params': model.layer2.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
        # {'params': model.layer3.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
        # {'params': model.layer4.parameters(), 'lr': 0.005, 'weight_decay': 5e-3},
        # ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay)
        # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        print(args.lr)
        # save each fold's acc and reset configuration
        average_acc = 0
        best_acc = 0

        # 10-fold cross validation
        train_x, train_lm_g, train_lm_h, train_y = [], [], [], []
        test_x, test_lm_g, test_lm_h, test_y = [], [], [], []
        for id_fold in range(args.folds):
            if id_fold == f_num:
                test_x = features[id_fold]
                test_lm_g = landmarks_g[id_fold]
                test_lm_h = landmarks_h[id_fold]
                test_y = labels[id_fold]
            else:
                train_x = train_x + features[id_fold]
                train_lm_g = train_lm_g + landmarks_g[id_fold]
                train_lm_h = train_lm_h + landmarks_h[id_fold]
                train_y = train_y + labels[id_fold]
        # convert array to tensor
        train_x = torch.tensor(train_x,
                               dtype=torch.float) / 255.0  #(b_s, 128, 128)
        train_x = train_x.unsqueeze(1)  #(b_s, 1, 128, 128)

        train_lm_g = np.stack(train_lm_g)
        train_lm_h = np.stack(train_lm_h)
        # 只要坐标信息, 不需要归一化
        # train_lm_g = (train_lm_g - np.mean(train_lm_g, axis=0)) / np.std(train_lm_g, axis=0)
        # train_lm_g = (train_lm_g - np.amin(train_lm_g)) / np.amax(train_lm_g) - np.amin(train_lm_g)
        # train_lm_h = (train_lm_h - np.mean(train_lm_h, axis=0)) / np.std(train_lm_h, axis=0)
        train_lm_g = torch.tensor(train_lm_g, dtype=torch.float)
        train_lm_h = torch.tensor(train_lm_h, dtype=torch.float)
        # train_lm = train_lm.unsqueeze(2)

        test_x = torch.tensor(test_x, dtype=torch.float) / 255.0
        test_x = test_x.unsqueeze(1)
        # 只要坐标信息, 不需要归一化
        # test_lm_g = (test_lm_g - np.mean(test_lm_g, axis=0)) / np.std(test_lm_g, axis=0)
        # test_lm_g = (test_lm_g - np.amin(test_lm_g)) / np.amax(test_lm_g) - np.amin(test_lm_g)
        # test_lm_h = (test_lm_h - np.mean(test_lm_h, axis=0)) / np.std(test_lm_h, axis=0)
        test_lm_g = torch.tensor(test_lm_g, dtype=torch.float)
        test_lm_h = torch.tensor(test_lm_h, dtype=torch.float)
        # test_lm = test_lm.unsqueeze(2)
        train_y, test_y = torch.tensor(train_y), torch.tensor(test_y)

        train_dataset = torch.utils.data.TensorDataset(train_x, train_lm_g,
                                                       train_lm_h, train_y)
        train_iter = torch.utils.data.DataLoader(dataset=train_dataset,
                                                 batch_size=args.train_batch,
                                                 shuffle=True)

        test_dataset = torch.utils.data.TensorDataset(test_x, test_lm_g,
                                                      test_lm_h, test_y)
        test_iter = torch.utils.data.DataLoader(dataset=test_dataset,
                                                batch_size=args.test_batch,
                                                shuffle=False)

        # test for fold order
        print(len(test_dataset))

        if args.evaluate:
            print('\nEvaluation only')
            test_loss, test_acc = test(train_x + test_x, train_y + test_y,
                                       model, criterion, start_epoch, use_cuda)
            print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
            continue

        # show plt
        # plt.show(block=False)

        # Train and val
        for epoch in range(start_epoch, args.epochs):

            # 在特定的epoch 调整学习率
            adjust_learning_rate(optimizer, epoch)
            # print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))
            print('\nEpoch: [%d | %d] LR: %f' %
                  (epoch + 1, args.epochs, optimizer.param_groups[0]['lr']))

            train_loss, train_acc = train(train_iter, model, criterion,
                                          optimizer, epoch, use_cuda)
            test_loss, test_acc = test(test_iter, model, criterion, epoch,
                                       use_cuda)

            # append logger file
            logger.append([
                f_num, state['lr'], train_loss, test_loss, train_acc, test_acc
            ])

            # save model
            is_best = test_acc > best_acc
            best_acc = max(test_acc, best_acc)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'acc': test_acc,
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                f_num,
                checkpoint=args.checkpoint)

        # compute average acc
        acc_fold.append(best_acc)
        average_acc = sum(acc_fold) / len(acc_fold)

        logging.info('fold: %d, best_acc: %.2f, average_acc: %.2f' %
                     (f_num, best_acc, average_acc))
    logger.close()
    # logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    logging.info('acc_fold' + str(acc_fold))
    print('average acc:')
    print(average_acc)
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)



    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    elif args.dataset == 'cifar100':
        dataloader = datasets.CIFAR100
        num_classes = 100


    trainset = dataloader(root='./data', train=True, download=True, transform=transform_train)
    trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=1)

    testset = dataloader(root='./data', train=False, download=False, transform=transform_test)
    testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=1)

    # Model
    print("==> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
                    cardinality=args.cardinality,
                    num_classes=num_classes,
                    depth=args.depth,
                    widen_factor=args.widen_factor,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('densenet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    growthRate=args.growthRate,
                    compressionRate=args.compressionRate,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('wrn'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    widen_factor=args.widen_factor,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('resnet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    ratio=args.ratio
                )
    else:
        model = models.__dict__[args.arch](num_classes=num_classes, ratio=args.ratio)

    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    if args.evaluate:
        print('\nEvaluation train')
        checkpoint = torch.load(args.save_path + '/' + 'model_best.pth.tar')
        model.load_state_dict(checkpoint['state_dict'])

        print('\nEvaluation test')
        test_loss, test_top1, test_top5 = test(testloader, model, criterion, start_epoch, use_cuda)
        print(' Test Loss:  %.8f, Test top1:  %.2f, Test top5' % (test_loss, test_top1, test_top5))
        return

    if args.finetune:
        # Load checkpoint.
        print('==> finetune a pretrained model..')
        print(args.model_path)
        assert os.path.isfile(args.model_path), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.model_path)
        model.load_state_dict(checkpoint['state_dict'], strict=False)

    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        print(args.save_path + '/' + 'checkpoint.pth.tar')
        assert os.path.isfile(args.save_path + '/' + 'checkpoint.pth.tar'), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.save_path + '/' + 'checkpoint.pth.tar')
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']

        # recover learning rate
        for epoch in args.schedule:
            if start_epoch > epoch:
                state['lr'] *= args.gamma

        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.save_path, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.save_path, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train top1', 'Train top5', 'Valid top1.', 'Valid top5'])


    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_top1, train_top5 = train(trainloader, model, criterion, optimizer, epoch, use_cuda, args)
        test_loss, test_top1, test_top5 = test(testloader, model, criterion, epoch, use_cuda)

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, train_top1, train_top5, test_top1, test_top5])

        print(args.save_path)

        # save model
        is_best = test_top1 > best_acc
        best_acc = max(test_top1, best_acc)
        save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_top1,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
            }, is_best, save_path=args.save_path)

    logger.close()

    print('Best acc:')
    print(best_acc)
Beispiel #29
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

        #158 dd
    #My dataset create
    ##############################################
    def make_datapath_list(phase="train"):
        rootpath = "./dataset/CUB-200-2011_rename_fixations"
        #rootpath = "./"
        target_path = os.path.join(rootpath, phase, "**",
                                   "*.jpg")  # 最初の**はクラスのディレクトリ
        path_list = []
        # globを利用してサブディレクトリまでファイルパスを格納
        for path in glob.glob(
                "./dataset/CUB-200-2011_rename_fixations/*/*.jpg"):  #階層によって変わる
            #for path in glob.glob("./dataset/CUB-200-2011_rename_fixations/*.jpg"):
            path_list.append(path)
        return path_list

    #001.Black_footed_Albatross
    class ImageTransform():
        def __init__(self):
            self.data_transform = transforms.Compose([
                transforms.Resize((256, 256)),
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomVerticalFlip(p=0.5),
                transforms.ToTensor(),
                transforms.Normalize((0.1948, 0.2155, 0.1589),
                                     (0.2333, 0.2278, 0.26106))
            ])

        def __call__(self, img):
            return self.data_transform(img)

    ############################################
    class Dataset(data.Dataset):
        def __init__(self, file_list, transform=None):
            self.file_list = file_list
            self.file_transform = transform  # 前処理クラスのインスタンス

        def __len__(self):
            return len(self.file_list)

        def __getitem__(self, index):
            img_path = self.file_list[index]  # index番目のpath
            img = Image.open(img_path)  # index番目の画像ロード
            #import pdb;pdb.set_trace()
            img_transformed = self.file_transform(img)  # 前処理クラスでtensorに変換
            #import pdb;pdb.set_trace
            label = img_path.split("/")[3]  # リストの値を階層によって変える 3
            label = label.split(".")[0]  #.以下を削除
            label = int(label)  #ラベルを数値にした
            #import pdb;pdb.set_trace
            #label = convert(label) # クラスを表すディレクトリ文字列から数値に変更
            ################################
            csv_path = img_path.split("/")[4]
            csv_path = csv_path.split(".")[0]
            csv_path = inclusive_index(csv_list, csv_path)

            #return img_transformed, label, img_path, csv_path
            return img_transformed, label, csv_path

    trainval_dataset = Dataset(file_list=make_datapath_list(phase="train"),
                               transform=ImageTransform())

    csv_list = glob.glob(
        "./dataset/CUB-200-2011_rename_fixations/*/*_fixtaions.csv")
    #csv_list = glob.glob("./dataset/CUB-200-2011_rename_fixations/001.Black_footed_Albatross/*_fixtaions.csv")
    n_train = int(len(csv_list) * 0.7)
    n_val = len(csv_list) - n_train

    def inclusive_index(lst, purpose):
        for i, e in enumerate(lst):
            if purpose in e: return i

        raise IndexError

    batch_size = 8
    #train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    #trainとvalに分ける
    train_dataset, val_dataset = torch.utils.data.random_split(
        trainval_dataset, [n_train, n_val])
    #trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    trainloader = torch.utils.data.DataLoader(train_dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              drop_last=True)
    testloader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             drop_last=True)

    # Model
    print("==> creating model '{}'".format(args.arch))

    model = vgg_11.vgg11(
    )  #モデルの呼び出し、今回は簡易版のため畳み込み3層、convlstm cell1層、全結合から構成される
    model.load_state_dict(
        torch.load("./models/cifar/pretrain/vgg11-bbd30ac9.pth"), strict=False)
    #import pdb;pdb.set_trace()
    #model.load_state_dict(torch.load("./models/cifar/pretrain/vgg19-dcbb9e9d.pth"), strict=False)
    model = torch.nn.DataParallel(model).to(device=device)

    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch,
                                   use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer,
                                      epoch, use_cuda)
        #import pdb;pdb.set_trace()
        test_loss, test_acc = test(testloader, model, criterion, epoch,
                                   use_cuda)

        # append logger file
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])

        #is_bets = 0
        #bets_acc = 0
        #########################################
        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
Beispiel #30
0
def main():
    global best_acc, state
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])
    # if args.dataset == 'cifar10':
    #     dataloader = datasets.CIFAR10
    #     num_classes = 10
    # else:
    #     dataloader = datasets.CIFAR100
    #     num_classes = 100

    # trainset = dataloader(root='./data', train=True, download=True, transform=transform_train)
    # trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers)

    # testset = dataloader(root='./data', train=False, download=False, transform=transform_test)
    # testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers)

    import cifar_custom

    if args.dataset == 'cifar10':
        dataloader = cifar_custom.CIFAR10
        num_classes = 10
    else:
        dataloader = cifar_custom.CIFAR100
        num_classes = 100

    trainset = dataloader(root='./data',
                          train=True,
                          download=True,
                          transform=transform_train,
                          imbalance=True)
    ##############   oversample minority classes  ##################
    import numpy as np
    targets = trainset.train_labels
    class_count = np.unique(targets, return_counts=True)[1]
    weight = 1. / class_count
    sample_weights = torch.from_numpy(weight[list(map(int, targets))])
    sampler = torch.utils.data.sampler.WeightedRandomSampler(
        sample_weights, len(sample_weights))
    trainloader = data.DataLoader(trainset,
                                  batch_size=args.train_batch,
                                  num_workers=args.workers,
                                  sampler=sampler)

    ##############################################################

    # trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers)

    testset = dataloader(root='./data',
                         train=False,
                         download=False,
                         transform=transform_test)
    testloader = data.DataLoader(testset,
                                 batch_size=args.test_batch,
                                 shuffle=False,
                                 num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
            cardinality=args.cardinality,
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
    elif args.arch.startswith('densenet'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            growthRate=args.growthRate,
            compressionRate=args.compressionRate,
            dropRate=args.drop,
        )
    elif args.arch.startswith('wrn'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
    elif args.arch.endswith('resnet'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
        )
    else:
        model = models.__dict__[args.arch](num_classes=num_classes)

################################

    import math

    class GLayer(nn.Module):
        def __init__(self, num_classes=10, feat_dim=2, use_gpu=True, scale=1):
            super(GLayer, self).__init__()
            self.centers = nn.Parameter(torch.Tensor(
                num_classes,
                feat_dim,
            ))
            self.reset_parameters()
            self.sigma = 10
            self.scale = scale
            self.bn = nn.BatchNorm1d(feat_dim)

        def reset_parameters(self):
            stdv = 1. / math.sqrt(self.centers.size(1))
            self.centers.data.uniform_(-stdv, stdv)

        def forward(self, x):

            # x = self.bn(x)
            x_norm = (x**2).sum(1).view(-1, 1)
            y = self.centers
            y_t = torch.transpose(y, 0, 1)
            y_norm = (y**2).sum(1).view(1, -1)
            dist = x_norm + y_norm - 2.0 * torch.mm(x, y_t)
            dist = torch.clamp(dist, 0.0, 1e2)

            # return 1.0/(1.0+dist)
            sim = self.scale * torch.exp(-dist / self.sigma)
            return sim

    class FullModel(nn.Module):
        def __init__(self, base_model, g_layer, linear_layer):
            super(FullModel, self).__init__()
            self.base = base_model
            # self.lin = linear_layer
            self.gl = g_layer

        def forward(self, x):
            x = self.base(x)
            x = x.view(x.size(0), -1)
            return self.gl(x)

    base_model = nn.Sequential(*list(model.children())[:-1])

    head = GLayer(num_classes=num_classes, feat_dim=342).cuda()
    # model=nn.Sequential(model,HeadNet)
    linear_layer = nn.Linear(342, num_classes).cuda()
    model = FullModel(base_model, head, linear_layer)

    ################################
    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    ################################
    class HingeLossSim(nn.Module):
        def __init__(self, num_classes=10, use_gpu=True, margin=1):
            super(HingeLossSim, self).__init__()
            self.num_classes = num_classes
            self.use_gpu = use_gpu
            self.margin = margin

        def forward(self, x, labels):
            batch_size = x.size(0)
            classes = torch.arange(self.num_classes).long()
            if self.use_gpu: classes = classes.cuda()
            labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
            mask = labels.eq(classes.expand(batch_size, self.num_classes))
            neg = x[~mask].view(batch_size, -1)
            pos = x[mask].unsqueeze(1).expand(neg.size())
            # return torch.mean(torch.relu(neg-pos+self.margin))
            return torch.mean(torch.relu(neg - pos + self.margin))

################################

    criterion = nn.CrossEntropyLoss()
    # criterion_xent = nn.CrossEntropyLoss()
    criterion_xent = nn.MultiMarginLoss(p=2, margin=1)
    # criterion_xent = HingeLossSim()
    optimizer_model = torch.optim.SGD(model.parameters(),
                                      lr=args.lr,
                                      weight_decay=args.weight_decay,
                                      momentum=args.momentum)

    # optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer_model.load_state_dict(checkpoint['optimizer_model'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion_xent,
                                   start_epoch, use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return
    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer_model, epoch)

        print('\nEpoch: [%d | %d] LR: %f ' %
              (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(trainloader, model, criterion_xent,
                                      criterion, optimizer_model, epoch,
                                      use_cuda)
        test_loss, test_acc = test(testloader, model, criterion_xent,
                                   criterion, epoch, use_cuda)

        # append logger file
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer_model': optimizer_model.state_dict(),
            },
            is_best,
            checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
def main():
    global best_acc
    start_epoch = args.start_epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    if args.data_augmentation:
        transform_train = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            RandomVerticalFlip(),
            transforms.Scale(256),
            transforms.RandomSizedCrop(224),
            ColorJitter(0.5, 0.5, 0.5),
            RandomRotation(30),
            RandomAffine(30),
            transforms.ToTensor(), normalize
        ])
    else:
        transform_train = transforms.Compose([
            transforms.RandomSizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), normalize
        ])

    #trainset = Dataset(root=args.root, filename=args.train_list, transform=transform_train)

    trainset = ImageWeightSampling(args.root,
                                   args.train_list,
                                   transform=transform_train)
    testset = Dataset(root=args.root_val,
                      filename=args.val_list,
                      transform=transforms.Compose([
                          transforms.Scale(256),
                          transforms.CenterCrop(224),
                          transforms.ToTensor(), normalize
                      ]))

    #sampler = torch.utils.data.sampler.WeightedRandomSampler(trainset.sample_weights, len(trainset.sample_weights))

    #train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.train_epoch,sampler=sampler, num_workers=args.workers, pin_memory=True)
    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=args.train_epoch,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(testset,
                                             batch_size=args.test_epoch,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    #create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        #check if pretrained model exists
        pretrained_filename = os.path.join('./pretrained', args.arch)
        assert os.path.isfile(
            pretrained_filename
        ), 'Error: no pretrained checkpoint directory found!'
        model = models.__dict__[args.arch](pretrained=True,
                                           num_classes=args.num_classes)
    elif args.arch.startswith('resnext') or args.arch.startswith('se_resnext'):
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch](baseWidth=args.base_width,
                                           cardinality=args.cardinality,
                                           num_classes=args.num_classes,
                                           attention=args.attention)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch](num_classes=args.num_classes)

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        models.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    cudnn.benchmark = True
    print(' Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    if args.weight_loss:
        assert os.path.isfile(
            args.weight_file
        ), 'weight loss training while weight file not found'
        weights = json.load(open(args.weight_file))
        weight_ = []
        for i in range(len(weights.keys())):
            weight_.append(weights[str(i)])
        weight_ = np.array(weight_, dtype=np.float32)
        weight_ = weight_ / sum(weight_) * (args.num_classes + 0.0) * 0.5 + 0.5
        print(weight_)
        weight_ = torch.from_numpy(weight_)

    if args.weight_loss:
        #criterion = nn.CrossEntropyLoss(weight=weight_,reduce=False).cuda()
        criterion = nn.CrossEntropyLoss(weight=weight_).cuda()
    elif args.mix_loss:
        center = np.loadtxt('center.txt')
        criterion = MixLoss(torch.from_numpy(center).cuda()).cuda()
    else:
        criterion = nn.CrossEntropyLoss().cuda()

    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    #Resume
    title = 'Imagenet-' + args.arch
    if args.resume:
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(val_loader, model, criterion, start_epoch,
                                   use_cuda)
        print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc))

    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)
        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(train_loader, model, criterion,
                                      optimizer, epoch, use_cuda)
        test_loss, test_acc = test(val_loader, model, criterion, epoch,
                                   use_cuda)

        #append logger file
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])

        #save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint(epoch, {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'acc': test_acc,
            'best_acc': best_acc,
            'optimizer': optimizer.state_dict(),
        },
                        is_best,
                        checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc: ')
    print(best_acc)
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)



    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100


    trainset = dataloader(root=args.data_root, train=True, download=True, transform=transform_train)
    trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers)

    testset = dataloader(root=args.data_root, train=False, download=False, transform=transform_test)
    testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers)

    # Model   
    print("==> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
                    cardinality=args.cardinality,
                    num_classes=num_classes,
                    depth=args.depth,
                    widen_factor=args.widen_factor,
                    dropRate=args.drop,
                )
    elif args.arch.startswith('densenet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    growthRate=args.growthRate,
                    compressionRate=args.compressionRate,
                    dropRate=args.drop,
                )        
    elif args.arch.startswith('wrn'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                    widen_factor=args.widen_factor,
                    dropRate=args.drop,
                )
    elif args.arch.endswith('resnet'):
        model = models.__dict__[args.arch](
                    num_classes=num_classes,
                    depth=args.depth,
                )
    else:
        model = models.__dict__[args.arch](num_classes=num_classes)

    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])


    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda)

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
            }, is_best, checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
Beispiel #33
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.save_dir):
        mkdir_p(args.save_dir)

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100

    trainset = dataloader(root='./data',
                          train=True,
                          download=True,
                          transform=transform_train)
    trainloader = data.DataLoader(trainset,
                                  batch_size=args.train_batch,
                                  shuffle=True,
                                  num_workers=args.workers)

    testset = dataloader(root='./data',
                         train=False,
                         download=False,
                         transform=transform_test)
    testloader = data.DataLoader(testset,
                                 batch_size=args.test_batch,
                                 shuffle=False,
                                 num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
            cardinality=args.cardinality,
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
        model_ref = models.__dict__[args.arch](
            cardinality=args.cardinality,
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
    elif args.arch.startswith('densenet'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            growthRate=args.growthRate,
            compressionRate=args.compressionRate,
            dropRate=args.drop,
        )
        model_ref = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            growthRate=args.growthRate,
            compressionRate=args.compressionRate,
            dropRate=args.drop,
        )
    elif args.arch.startswith('wrn'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
        model_ref = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
    elif args.arch.endswith('resnet'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
        )
        model_ref = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
        )
    else:
        model = models.__dict__[args.arch](num_classes=num_classes)
        model_ref = models.__dict__[args.arch](num_classes=num_classes)

    model = torch.nn.DataParallel(model).cuda()
    model_ref = torch.nn.DataParallel(model_ref).cuda()

    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)  # default is 0.001

    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Getting reference model from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        model_ref.load_state_dict(checkpoint['state_dict'])

    logger = Logger(os.path.join(args.save_dir, 'log_scratch.txt'),
                    title=title)
    logger.set_names([
        'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'
    ])

    # set some weights to zero, according to model_ref ---------------------------------
    for m, m_ref in zip(model.modules(), model_ref.modules()):
        if isinstance(m, nn.Conv2d):
            weight_copy = m_ref.weight.data.abs().clone()
            mask = weight_copy.gt(0).float().cuda()
            n = mask.sum() / float(m.in_channels)
            m.weight.data.normal_(0, math.sqrt(2. / n))
            m.weight.data.mul_(mask)

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))
        num_parameters = get_conv_zero_param(model)
        print('Zero parameters: {}'.format(num_parameters))
        num_parameters = sum(
            [param.nelement() for param in model.parameters()])
        print('Parameters: {}'.format(num_parameters))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer,
                                      epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch,
                                   use_cuda)

        # append logger file
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            checkpoint=args.save_dir)

    logger.close()

    print('Best acc:')
    print(best_acc)
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(traindir, transforms.Compose([
            transforms.RandomSizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=args.train_batch, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(valdir, transforms.Compose([
            transforms.Scale(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=args.test_batch, shuffle=False,
        num_workers=args.workers, pin_memory=True)
        
    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    elif args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
                    baseWidth=args.base_width,
                    cardinality=args.cardinality,
                )
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()
    
    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    # Resume
    title = 'ImageNet-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])


    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(val_loader, model, criterion, start_epoch, use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, use_cuda)
        test_loss, test_acc = test(val_loader, model, criterion, epoch, use_cuda)

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
            }, is_best, checkpoint=args.checkpoint)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best acc:')
    print(best_acc)
Beispiel #35
0
def main():
    global best_top1, best_top5
    
    args.world_size = 1
    
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data loading code    
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    crop_size = 224
    val_size = 256

    pipe = HybridTrainPipe(batch_size=args.train_batch, num_threads=args.workers, device_id=args.local_rank, data_dir=traindir, crop=crop_size, dali_cpu=args.dali_cpu)
    pipe.build()
    train_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size))

    pipe = HybridValPipe(batch_size=args.test_batch, num_threads=args.workers, device_id=args.local_rank, data_dir=valdir, crop=crop_size, size=val_size)
    pipe.build()
    val_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size))

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    elif args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
                    baseWidth=args.base_width,
                    cardinality=args.cardinality,
                )
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    cudnn.benchmark = True

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    if args.optimizer.lower() == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    elif args.optimizer.lower() == 'adamw':
        optimizer = AdamW(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, warmup = 0)
    elif args.optimizer.lower() == 'radam':
        optimizer = RAdam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay)
    elif args.optimizer.lower() == 'lsadam': 
        optimizer = LSAdamW(model.parameters(), lr=args.lr*((1.+4.*args.sigma)**(0.25)), 
                           betas=(args.beta1, args.beta2),
                           weight_decay=args.weight_decay, 
                           sigma=args.sigma)
    elif args.optimizer.lower() == 'lsradam':
        sigma = 0.1
        optimizer = LSRAdam(model.parameters(), lr=args.lr*((1.+4.*args.sigma)**(0.25)), 
                           betas=(args.beta1, args.beta2),
                           weight_decay=args.weight_decay, 
                           sigma=args.sigma)
    elif args.optimizer.lower() == 'srsgd':
        iter_count = 1
        optimizer = SGD_Adaptive(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, iter_count=iter_count, restarting_iter=args.restart_schedule[0])
    elif args.optimizer.lower() == 'sradam':
        iter_count = 1
        optimizer = SRNAdam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), iter_count=iter_count, weight_decay=args.weight_decay, restarting_iter=args.restart_schedule[0]) 
    elif args.optimizer.lower() == 'sradamw':
        iter_count = 1
        optimizer = SRAdamW(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), iter_count=iter_count, weight_decay=args.weight_decay, warmup = 0, restarting_iter=args.restart_schedule[0]) 
    elif args.optimizer.lower() == 'srradam':
        #NOTE: need to double-check this
        iter_count = 1
        optimizer = SRRAdam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), iter_count=iter_count, weight_decay=args.weight_decay, warmup = 0, restarting_iter=args.restart_schedule[0]) 
        
    # Resume
    title = 'ImageNet-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_top1 = checkpoint['best_top1']
        best_top5 = checkpoint['best_top5']
        start_epoch = checkpoint['epoch'] - 1
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        iter_count = optimizer.param_groups[0]['iter_count']
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Top1', 'Valid Top1', 'Train Top5', 'Valid Top5'])
        
    logger.file.write('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))


    if args.evaluate:
        logger.file.write('\nEvaluation only')
        test_loss, test_top1, test_top5 = test(val_loader, model, criterion, start_epoch, use_cuda, logger)
        logger.file.write(' Test Loss:  %.8f, Test Top1:  %.2f, Test Top5: %.2f' % (test_loss, test_top1, test_top5))
        return

    # Train and val
    schedule_index = 1
    for epoch in range(start_epoch, args.epochs):
        if args.optimizer.lower() == 'srsgd':
            if epoch in args.schedule:
                optimizer = SGD_Adaptive(model.parameters(), lr=args.lr * (args.gamma**schedule_index), weight_decay=args.weight_decay, iter_count=iter_count, restarting_iter=args.restart_schedule[schedule_index])
                schedule_index += 1
                
        elif args.optimizer.lower() == 'sradam':
            if epoch in args.schedule:
                optimizer = SRNAdam(model.parameters(), lr=args.lr * (args.gamma**schedule_index), betas=(args.beta1, args.beta2), iter_count=iter_count, weight_decay=args.weight_decay, restarting_iter=args.restart_schedule[schedule_index]) 
                schedule_index += 1
                
        elif args.optimizer.lower() == 'sradamw':
            if epoch in args.schedule:
                optimizer = SRAdamW(model.parameters(), lr=args.lr * (args.gamma**schedule_index), betas=(args.beta1, args.beta2), iter_count=iter_count, weight_decay=args.weight_decay, warmup = 0, restarting_iter=args.restart_schedule[schedule_index])
                schedule_index += 1
                
        elif args.optimizer.lower() == 'srradam':
            if epoch in args.schedule:
                optimizer = SRRAdam(model.parameters(), lr=args.lr * (args.gamma**schedule_index), betas=(args.beta1, args.beta2), iter_count=iter_count, weight_decay=args.weight_decay, warmup = 0, restarting_iter=args.restart_schedule[schedule_index])
                schedule_index += 1
            
        else:
            adjust_learning_rate(optimizer, epoch)

        logger.file.write('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))
        
        if args.optimizer.lower() == 'srsgd' or args.optimizer.lower() == 'sradam' or args.optimizer.lower() == 'sradamw' or args.optimizer.lower() == 'srradam':
            train_loss, train_top1, train_top5, iter_count = train(train_loader, model, criterion, optimizer, epoch, use_cuda, logger)
        else:
            train_loss, train_top1, train_top5 = train(train_loader, model, criterion, optimizer, epoch, use_cuda, logger)

        test_loss, test_top1, test_top5 = test(val_loader, model, criterion, epoch, use_cuda, logger)

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, train_top1, test_top1, train_top5, test_top5])

        writer.add_scalars('train_loss', {args.model_name: train_loss}, epoch)
        writer.add_scalars('test_loss', {args.model_name: test_loss}, epoch)
        writer.add_scalars('train_top1', {args.model_name: train_top1}, epoch)
        writer.add_scalars('test_top1', {args.model_name: test_top1}, epoch)
        writer.add_scalars('train_top5', {args.model_name: train_top5}, epoch)
        writer.add_scalars('test_top5', {args.model_name: test_top5}, epoch)

        # save model
        is_best = test_top1 > best_top1
        best_top1 = max(test_top1, best_top1)
        best_top5 = max(test_top5, best_top5)
        save_checkpoint({
                'epoch': epoch + 1,
                'schedule_index': schedule_index,
                'state_dict': model.state_dict(),
                'top1': test_top1,
                'top5': test_top5,
                'best_top1': best_top1,
                'best_top5': best_top5,
                'optimizer' : optimizer.state_dict(),
            }, is_best, epoch, checkpoint=args.checkpoint)
        
        # reset DALI iterators
        train_loader.reset()
        val_loader.reset()
        
    logger.file.write('Best top1: %f'%best_top1)
    logger.file.write('Best top5: %f'%best_top5)
    
    logger.close()
    logger.plot()
    savefig(os.path.join(args.checkpoint, 'log.eps'))

    print('Best top1: %f'%best_top1)
    print('Best top5: %f'%best_top5)
    
    with open("./all_results_imagenet.txt", "a") as f:
        fcntl.flock(f, fcntl.LOCK_EX)
        f.write("%s\n"%args.checkpoint)
        f.write("best_top1 %f, best_top5 %f\n\n"%(best_top1,best_top5))
        fcntl.flock(f, fcntl.LOCK_UN)