Esempio n. 1
0
def get_affinity(aug, aff_bases, config, augment):
    C.get()
    C.get().conf = config
    # setup - provided augmentation rules
    C.get()['aug'] = aug
    load_paths = augment['load_paths']
    cv_num = augment["cv_num"]

    aug_loaders = []
    for cv_id in range(cv_num):
        _, tl, validloader, tl2 = get_dataloaders(C.get()['dataset'],
                                                  C.get()['batch'],
                                                  augment['dataroot'],
                                                  augment['cv_ratio_test'],
                                                  split_idx=cv_id)
        aug_loaders.append(validloader)
        del tl, tl2

    loss_fn = torch.nn.CrossEntropyLoss(reduction='none')
    aug_accs = []
    for cv_id, loader in enumerate(aug_loaders):
        # eval
        model = get_model(C.get()['model'], num_class(C.get()['dataset']))
        ckpt = torch.load(load_paths[cv_id])
        if 'model' in ckpt:
            model.load_state_dict(ckpt['model'])
        else:
            model.load_state_dict(ckpt)
        model.eval()

        metrics = Accumulator()
        for data, label in loader:
            data = data.cuda()
            label = label.cuda()

            pred = model(data)
            loss = loss_fn(pred, label)  # (N)

            _, pred = pred.topk(1, 1, True, True)
            pred = pred.t()
            correct = pred.eq(label.view(
                1, -1).expand_as(pred)).detach().cpu().numpy()  # (1,N)

            metrics.add_dict({
                'minus_loss':
                -1 * np.sum(loss.detach().cpu().numpy()),
                'correct':
                np.sum(correct),
                'cnt':
                len(data)
            })
            del loss, correct, pred, data, label
        aug_accs.append(metrics['correct'] / metrics['cnt'])
    del model
    affs = []
    for aug_valid, clean_valid in zip(aug_accs, aff_bases):
        affs.append(aug_valid - clean_valid)
    return affs
Esempio n. 2
0
def eval_tta3(config, augment, reporter):
    C.get()
    C.get().conf = config
    save_path = augment['save_path']
    cv_id, gr_id = augment["cv_id"], augment["gr_id"]
    gr_ids = augment["gr_ids"]

    # setup - provided augmentation rules
    C.get()['aug'] = policy_decoder(augment, augment['num_policy'],
                                    augment['num_op'])

    # eval
    model = get_model(C.get()['model'], num_class(C.get()['dataset']))
    ckpt = torch.load(save_path)
    if 'model' in ckpt:
        model.load_state_dict(ckpt['model'])
    else:
        model.load_state_dict(ckpt)
    del ckpt
    model.eval()

    loader = get_post_dataloader(C.get()["dataset"],
                                 C.get()['batch'], augment["dataroot"],
                                 augment['cv_ratio_test'], cv_id, gr_id,
                                 gr_ids)

    start_t = time.time()
    metrics = Accumulator()
    loss_fn = torch.nn.CrossEntropyLoss(reduction='none')
    for data, label in loader:
        data = data.cuda()
        label = label.cuda()

        pred = model(data)
        loss = loss_fn(pred, label)  # (N)

        _, pred = pred.topk(1, 1, True, True)
        pred = pred.t()
        correct = pred.eq(label.view(
            1, -1).expand_as(pred)).detach().cpu().numpy()  # (1,N)

        metrics.add_dict({
            'loss': np.sum(loss.detach().cpu().numpy()),
            'correct': np.sum(correct),
            'cnt': len(data)
        })
        del loss, correct, pred, data, label
    del model, loader
    metrics = metrics / 'cnt'
    gpu_secs = (time.time() - start_t) * torch.cuda.device_count()
    reporter(loss=metrics['loss'],
             top1_valid=metrics['correct'],
             elapsed_time=gpu_secs,
             done=True)
    return metrics['correct']
Esempio n. 3
0
def eval_tta2(config, augment, reporter):
    C.get()
    C.get().conf = config
    cv_ratio_test, cv_id, save_path = augment['cv_ratio_test'], augment['cv_id'], augment['save_path']
    gr_id = augment["gr_id"]
    num_repeat = 1

    # setup - provided augmentation rules
    C.get()['aug'] = policy_decoder(augment, augment['num_policy'], augment['num_op'])

    # eval
    model = get_model(C.get()['model'], num_class(C.get()['dataset']))
    ckpt = torch.load(save_path)
    if 'model' in ckpt:
        model.load_state_dict(ckpt['model'])
    else:
        model.load_state_dict(ckpt)
    model.eval()

    loaders = []
    for i in range(num_repeat):
        _, tl, validloader, tl2 = get_dataloaders(C.get()['dataset'], C.get()['batch'], augment['dataroot'], cv_ratio_test, split_idx=cv_id, gr_assign=augment["gr_assign"], gr_id=gr_id)
        loaders.append(validloader)
        del tl, tl2


    start_t = time.time()
    metrics = Accumulator()
    loss_fn = torch.nn.CrossEntropyLoss(reduction='none')
    for loader in loaders:
        for data, label in loader:
            data = data.cuda()
            label = label.cuda()

            pred = model(data)
            loss = loss_fn(pred, label) # (N)

            _, pred = pred.topk(1, 1, True, True)
            pred = pred.t()
            correct = pred.eq(label.view(1, -1).expand_as(pred)).detach().cpu().numpy() # (1,N)

            metrics.add_dict({
                'minus_loss': -1 * np.sum(loss.detach().cpu().numpy()),
                'correct': np.sum(correct),
                'cnt': len(data)
            })
            del loss, correct, pred, data, label
    del model
    metrics = metrics / 'cnt'
    gpu_secs = (time.time() - start_t) * torch.cuda.device_count()
    reporter(minus_loss=metrics['minus_loss'], top1_valid=metrics['correct'], elapsed_time=gpu_secs, done=True)
    return metrics['correct']
Esempio n. 4
0
def eval_tta(config, augment, reporter, num_class, get_model, get_dataloaders):
    C.get()
    C.get().conf = config
    cv_ratio_test, cv_fold, save_path = (
        augment["cv_ratio_test"],
        augment["cv_fold"],
        augment["save_path"],
    )

    # setup - provided augmentation rules
    C.get()["aug"] = policy_decoder(augment, augment["num_policy"], augment["num_op"])

    # eval
    model = get_model(C.get()["model"], num_class(C.get()["dataset"]))
    ckpt = torch.load(save_path)
    if "model" in ckpt:
        model.load_state_dict(ckpt["model"])
    else:
        model.load_state_dict(ckpt)
    model.eval()

    loaders = []
    for _ in range(augment["num_policy"]):  # TODO
        _, tl, validloader, tl2 = get_dataloaders(
            C.get()["dataset"],
            C.get()["batch"],
            augment["dataroot"],
            cv_ratio_test,
            split_idx=cv_fold,
        )
        loaders.append(iter(validloader))
        del tl, tl2

    start_t = time.time()
    metrics = Accumulator()
    loss_fn = torch.nn.CrossEntropyLoss(reduction="none")
    try:
        while True:
            losses = []
            corrects = []
            for loader in loaders:
                data, label = next(loader)
                data = data.cuda()
                label = label.cuda()

                pred = model(data)

                loss = loss_fn(pred, label)
                losses.append(loss.detach().cpu().numpy())

                _, pred = pred.topk(1, 1, True, True)
                pred = pred.t()
                correct = (
                    pred.eq(label.view(1, -1).expand_as(pred)).detach().cpu().numpy()
                )
                corrects.append(correct)
                del loss, correct, pred, data, label

            losses = np.concatenate(losses)
            losses_min = np.min(losses, axis=0).squeeze()

            corrects = np.concatenate(corrects)
            corrects_max = np.max(corrects, axis=0).squeeze()
            metrics.add_dict(
                {
                    "minus_loss": -1 * np.sum(losses_min),
                    "correct": np.sum(corrects_max),
                    "cnt": len(corrects_max),
                }
            )
            del corrects, corrects_max
    except StopIteration:
        pass

    del model
    metrics = metrics / "cnt"
    gpu_secs = (time.time() - start_t) * torch.cuda.device_count()
    reporter(
        minus_loss=metrics["minus_loss"],
        top1_valid=metrics["correct"],
        elapsed_time=gpu_secs,
        done=True,
    )
    return metrics["correct"]
def train_and_eval(tag,
                   dataroot,
                   test_ratio=0.0,
                   cv_fold=0,
                   reporter=None,
                   metric='last',
                   save_path=None,
                   only_eval=False,
                   local_rank=-1,
                   evaluation_interval=5):
    total_batch = C.get()["batch"]
    if local_rank >= 0:
        dist.init_process_group(backend='nccl',
                                init_method='env://',
                                world_size=int(os.environ['WORLD_SIZE']))
        device = torch.device('cuda', local_rank)
        torch.cuda.set_device(device)

        C.get()['lr'] *= dist.get_world_size()
        logger.info(
            f'local batch={C.get()["batch"]} world_size={dist.get_world_size()} ----> total batch={C.get()["batch"] * dist.get_world_size()}'
        )
        total_batch = C.get()["batch"] * dist.get_world_size()

    is_master = local_rank < 0 or dist.get_rank() == 0
    if is_master:
        add_filehandler(logger, 'master' + '.log')

    if not reporter:
        reporter = lambda **kwargs: 0

    max_epoch = C.get()['epoch']
    trainsampler, trainloader, validloader, testloader_ = get_dataloaders(
        C.get()['dataset'],
        C.get()['batch'],
        dataroot,
        test_ratio,
        split_idx=cv_fold,
        multinode=(local_rank >= 0))

    # create a model & an optimizer
    model = get_model(C.get()['model'],
                      num_class(C.get()['dataset']),
                      local_rank=local_rank)
    model_ema = get_model(C.get()['model'],
                          num_class(C.get()['dataset']),
                          local_rank=-1)
    model_ema.eval()

    criterion_ce = criterion = CrossEntropyLabelSmooth(
        num_class(C.get()['dataset']),
        C.get().conf.get('lb_smooth', 0))
    if C.get().conf.get('mixup', 0.0) > 0.0:
        criterion = CrossEntropyMixUpLabelSmooth(
            num_class(C.get()['dataset']),
            C.get().conf.get('lb_smooth', 0))
    if C.get()['optimizer']['type'] == 'sgd':
        optimizer = optim.SGD(
            model.parameters(),
            lr=C.get()['lr'],
            momentum=C.get()['optimizer'].get('momentum', 0.9),
            weight_decay=0.0,
            nesterov=C.get()['optimizer'].get('nesterov', True))
    elif C.get()['optimizer']['type'] == 'rmsprop':
        optimizer = RMSpropTF(model.parameters(),
                              lr=C.get()['lr'],
                              weight_decay=0.0,
                              alpha=0.9,
                              momentum=0.9,
                              eps=0.001)
    else:
        raise ValueError('invalid optimizer type=%s' %
                         C.get()['optimizer']['type'])

    lr_scheduler_type = C.get()['lr_schedule'].get('type', 'cosine')
    if lr_scheduler_type == 'cosine':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=C.get()['epoch'], eta_min=0.)
    elif lr_scheduler_type == 'resnet':
        scheduler = adjust_learning_rate_resnet(optimizer)
    elif lr_scheduler_type == 'efficientnet':
        scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer,
            lr_lambda=lambda x: 0.97**int(
                (x + C.get()['lr_schedule']['warmup']['epoch']) / 2.4))
    else:
        raise ValueError('invalid lr_schduler=%s' % lr_scheduler_type)

    if C.get()['lr_schedule'].get(
            'warmup', None) and C.get()['lr_schedule']['warmup']['epoch'] > 0:
        scheduler = GradualWarmupScheduler(
            optimizer,
            multiplier=C.get()['lr_schedule']['warmup']['multiplier'],
            total_epoch=C.get()['lr_schedule']['warmup']['epoch'],
            after_scheduler=scheduler)

    if not tag or not is_master:
        from FastAutoAugment.metrics import SummaryWriterDummy as SummaryWriter
        logger.warning('tag not provided, no tensorboard log.')
    else:
        from tensorboardX import SummaryWriter
    writers = [
        SummaryWriter(log_dir='./logs/%s/%s' % (tag, x))
        for x in ['train', 'valid', 'test']
    ]

    if C.get()['optimizer']['ema'] > 0.0 and is_master:
        # https://discuss.pytorch.org/t/how-to-apply-exponential-moving-average-decay-for-variables/10856/4?u=ildoonet
        ema = EMA(C.get()['optimizer']['ema'])
    else:
        ema = None

    result = OrderedDict()
    epoch_start = 1
    #TODO: change only eval=False when without save_path ??
    if save_path != 'test.pth':  # and is_master: --> should load all data(not able to be broadcasted)
        if save_path and not os.path.exists(save_path):
            import torch.utils.model_zoo as model_zoo
            data = model_zoo.load_url(
                'https://download.pytorch.org/models/resnet50-19c8e357.pth',
                model_dir=os.path.join(os.getcwd(), 'FastAutoAugment/models'))
            if C.get()['dataset'] == 'cifar10':
                data.pop('fc.weight')
                data.pop('fc.bias')
                model_dict = model.state_dict()
                model_dict.update(data)
                model.load_state_dict(model_dict)
                torch.save(model_dict, save_path)

        logger.info('%s file found. loading...' % save_path)
        data = torch.load(save_path)
        key = 'model' if 'model' in data else 'state_dict'

        if 'epoch' not in data:
            model.load_state_dict(data)
        else:
            logger.info('checkpoint epoch@%d' % data['epoch'])
            if not isinstance(model, (DataParallel, DistributedDataParallel)):
                model.load_state_dict({
                    k.replace('module.', ''): v
                    for k, v in data[key].items()
                })
            else:
                model.load_state_dict({
                    k if 'module.' in k else 'module.' + k: v
                    for k, v in data[key].items()
                })
            logger.info('optimizer.load_state_dict+')
            optimizer.load_state_dict(data['optimizer'])
            if data['epoch'] < C.get()['epoch']:
                epoch_start = data['epoch']
            else:
                only_eval = True
            if ema is not None:
                ema.shadow = data.get('ema', {}) if isinstance(
                    data.get('ema', {}), dict) else data['ema'].state_dict()
        del data

    if local_rank >= 0:
        for name, x in model.state_dict().items():
            dist.broadcast(x, 0)
        logger.info(
            f'multinode init. local_rank={dist.get_rank()} is_master={is_master}'
        )
        torch.cuda.synchronize()

    tqdm_disabled = bool(os.environ.get(
        'TASK_NAME', '')) and local_rank != 0  # KakaoBrain Environment

    if only_eval:
        logger.info('evaluation only+')
        model.eval()
        rs = dict()
        rs['train'] = run_epoch(model,
                                trainloader,
                                criterion,
                                None,
                                desc_default='train',
                                epoch=0,
                                writer=writers[0],
                                is_master=is_master)

        with torch.no_grad():
            rs['valid'] = run_epoch(model,
                                    validloader,
                                    criterion,
                                    None,
                                    desc_default='valid',
                                    epoch=0,
                                    writer=writers[1],
                                    is_master=is_master)
            rs['test'] = run_epoch(model,
                                   testloader_,
                                   criterion,
                                   None,
                                   desc_default='*test',
                                   epoch=0,
                                   writer=writers[2],
                                   is_master=is_master)
            if ema is not None and len(ema) > 0:
                model_ema.load_state_dict({
                    k.replace('module.', ''): v
                    for k, v in ema.state_dict().items()
                })
                rs['valid'] = run_epoch(model_ema,
                                        validloader,
                                        criterion_ce,
                                        None,
                                        desc_default='valid(EMA)',
                                        epoch=0,
                                        writer=writers[1],
                                        verbose=is_master,
                                        tqdm_disabled=tqdm_disabled)
                rs['test'] = run_epoch(model_ema,
                                       testloader_,
                                       criterion_ce,
                                       None,
                                       desc_default='*test(EMA)',
                                       epoch=0,
                                       writer=writers[2],
                                       verbose=is_master,
                                       tqdm_disabled=tqdm_disabled)
        for key, setname in itertools.product(['loss', 'top1', 'top5'],
                                              ['train', 'valid', 'test']):
            if setname not in rs:
                continue
            result['%s_%s' % (key, setname)] = rs[setname][key]
        result['epoch'] = 0
        return result

    # train loop
    best_top1 = 0
    for epoch in range(epoch_start, max_epoch + 1):
        if local_rank >= 0:
            trainsampler.set_epoch(epoch)

        model.train()
        rs = dict()
        rs['train'] = run_epoch(model,
                                trainloader,
                                criterion,
                                optimizer,
                                desc_default='train',
                                epoch=epoch,
                                writer=writers[0],
                                verbose=(is_master and local_rank <= 0),
                                scheduler=scheduler,
                                ema=ema,
                                wd=C.get()['optimizer']['decay'],
                                tqdm_disabled=tqdm_disabled)
        model.eval()

        if math.isnan(rs['train']['loss']):
            raise Exception('train loss is NaN.')

        if ema is not None and C.get(
        )['optimizer']['ema_interval'] > 0 and epoch % C.get(
        )['optimizer']['ema_interval'] == 0:
            logger.info(f'ema synced+ rank={dist.get_rank()}')
            if ema is not None:
                model.load_state_dict(ema.state_dict())
            for name, x in model.state_dict().items():
                # print(name)
                dist.broadcast(x, 0)
            torch.cuda.synchronize()
            logger.info(f'ema synced- rank={dist.get_rank()}')

        if is_master and (epoch % evaluation_interval == 0
                          or epoch == max_epoch):
            with torch.no_grad():
                rs['valid'] = run_epoch(model,
                                        validloader,
                                        criterion_ce,
                                        None,
                                        desc_default='valid',
                                        epoch=epoch,
                                        writer=writers[1],
                                        verbose=is_master,
                                        tqdm_disabled=tqdm_disabled)
                rs['test'] = run_epoch(model,
                                       testloader_,
                                       criterion_ce,
                                       None,
                                       desc_default='*test',
                                       epoch=epoch,
                                       writer=writers[2],
                                       verbose=is_master,
                                       tqdm_disabled=tqdm_disabled)

                if ema is not None:
                    model_ema.load_state_dict({
                        k.replace('module.', ''): v
                        for k, v in ema.state_dict().items()
                    })
                    rs['valid'] = run_epoch(model_ema,
                                            validloader,
                                            criterion_ce,
                                            None,
                                            desc_default='valid(EMA)',
                                            epoch=epoch,
                                            writer=writers[1],
                                            verbose=is_master,
                                            tqdm_disabled=tqdm_disabled)
                    rs['test'] = run_epoch(model_ema,
                                           testloader_,
                                           criterion_ce,
                                           None,
                                           desc_default='*test(EMA)',
                                           epoch=epoch,
                                           writer=writers[2],
                                           verbose=is_master,
                                           tqdm_disabled=tqdm_disabled)

            logger.info(
                f'epoch={epoch} '
                f'[train] loss={rs["train"]["loss"]:.4f} top1={rs["train"]["top1"]:.4f} '
                f'[valid] loss={rs["valid"]["loss"]:.4f} top1={rs["valid"]["top1"]:.4f} '
                f'[test] loss={rs["test"]["loss"]:.4f} top1={rs["test"]["top1"]:.4f} '
            )

            if metric == 'last' or rs[metric]['top1'] > best_top1:
                if metric != 'last':
                    best_top1 = rs[metric]['top1']
                for key, setname in itertools.product(
                    ['loss', 'top1', 'top5'], ['train', 'valid', 'test']):
                    result['%s_%s' % (key, setname)] = rs[setname][key]
                result['epoch'] = epoch

                writers[1].add_scalar('valid_top1/best', rs['valid']['top1'],
                                      epoch)
                writers[2].add_scalar('test_top1/best', rs['test']['top1'],
                                      epoch)

                reporter(loss_valid=rs['valid']['loss'],
                         top1_valid=rs['valid']['top1'],
                         loss_test=rs['test']['loss'],
                         top1_test=rs['test']['top1'])

                # save checkpoint
                if is_master and save_path:
                    logger.info('save model@%d to %s, err=%.4f' %
                                (epoch, save_path, 1 - best_top1))
                    torch.save(
                        {
                            'epoch': epoch,
                            'log': {
                                'train': rs['train'].get_dict(),
                                'valid': rs['valid'].get_dict(),
                                'test': rs['test'].get_dict(),
                            },
                            'optimizer': optimizer.state_dict(),
                            'model': model.state_dict(),
                            'ema':
                            ema.state_dict() if ema is not None else None,
                        }, save_path)

    del model

    result['top1_test'] = best_top1
    return result
Esempio n. 6
0
def eval_tta(config, augment):
    C.get()
    C.get().conf = config
    cv_ratio_test, cv_fold, save_path = augment['cv_ratio_test'], augment[
        'cv_fold'], augment['save_path']
    print(augment)
    # setup - provided augmentation rules
    C.get()['aug'] = policy_decoder(augment, augment['num_policy'],
                                    augment['num_op'])

    # eval
    model = get_model(C.get()['model'], num_class(C.get()['dataset']))
    ckpt = torch.load(save_path)
    if 'model' in ckpt:
        model.load_state_dict(ckpt['model'])
    else:
        model.load_state_dict(ckpt)
    model.eval()

    loaders = []
    for _ in range(augment['num_policy']):  # TODO
        _, tl, validloader, tl2 = get_dataloaders(C.get()['dataset'],
                                                  C.get()['batch'],
                                                  augment['dataroot'],
                                                  cv_ratio_test,
                                                  split_idx=cv_fold)
        loaders.append(iter(validloader))
        del tl, tl2

    start_t = time.time()
    metrics = Accumulator()
    loss_fn = torch.nn.CrossEntropyLoss(reduction='none')
    try:
        while True:
            losses = []
            corrects = []
            for loader in loaders:
                data, label = next(loader)
                data = data.cuda()
                label = label.cuda()

                pred = model(data)

                loss = loss_fn(pred, label)
                losses.append(loss.detach().cpu().numpy())

                _, pred = pred.topk(1, 1, True, True)
                pred = pred.t()
                correct = pred.eq(label.view(
                    1, -1).expand_as(pred)).detach().cpu().numpy()
                corrects.append(correct)
                del loss, correct, pred, data, label

            losses = np.concatenate(losses)
            losses_min = np.min(losses, axis=0).squeeze()

            corrects = np.concatenate(corrects)
            corrects_max = np.max(corrects, axis=0).squeeze()
            metrics.add_dict({
                'minus_loss': -1 * np.sum(losses_min),
                'correct': np.sum(corrects_max),
                'cnt': len(corrects_max)
            })
            del corrects, corrects_max
    except StopIteration:
        pass

    del model
    metrics = metrics / 'cnt'
    gpu_secs = (time.time() - start_t) * torch.cuda.device_count()
    # reporter(minus_loss=metrics['minus_loss'], top1_valid=metrics['correct'], elapsed_time=gpu_secs, done=True)
    tune.track.log(minus_loss=metrics['minus_loss'],
                   top1_valid=metrics['correct'],
                   elapsed_time=gpu_secs,
                   done=True)
    return metrics['correct']
Esempio n. 7
0
def train_and_eval(tag, dataroot, test_ratio=0.0, cv_fold=0, reporter=None, metric='last', save_path=None, only_eval=False, horovod=False):
    if horovod:
        import horovod.torch as hvd
        hvd.init()
        device = torch.device('cuda', hvd.local_rank())
        torch.cuda.set_device(device)

    if not reporter:
        reporter = lambda **kwargs: 0

    max_epoch = C.get()['epoch']
    trainsampler, trainloader, validloader, testloader_ = get_dataloaders(C.get()['dataset'], C.get()['batch'], dataroot, test_ratio, split_idx=cv_fold, horovod=horovod)

    # create a model & an optimizer
    model = get_model(C.get()['model'], num_class(C.get()['dataset']), data_parallel=(not horovod))

    criterion = nn.CrossEntropyLoss()
    if C.get()['optimizer']['type'] == 'sgd':
        optimizer = optim.SGD(
            model.parameters(),
            lr=C.get()['lr'],
            momentum=C.get()['optimizer'].get('momentum', 0.9),
            weight_decay=C.get()['optimizer']['decay'],
            nesterov=C.get()['optimizer']['nesterov']
        )
    else:
        raise ValueError('invalid optimizer type=%s' % C.get()['optimizer']['type'])

    is_master = True
    if horovod:
        optimizer = hvd.DistributedOptimizer(optimizer, named_parameters=model.named_parameters())
        optimizer._requires_update = set()  # issue : https://github.com/horovod/horovod/issues/1099
        hvd.broadcast_parameters(model.state_dict(), root_rank=0)
        hvd.broadcast_optimizer_state(optimizer, root_rank=0)
        if hvd.rank() != 0:
            is_master = False
    logger.debug('is_master=%s' % is_master)

    lr_scheduler_type = C.get()['lr_schedule'].get('type', 'cosine')
    if lr_scheduler_type == 'cosine':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=C.get()['epoch'], eta_min=0.)
    elif lr_scheduler_type == 'resnet':
        scheduler = adjust_learning_rate_resnet(optimizer)
    elif lr_scheduler_type == 'pyramid':
        scheduler = adjust_learning_rate_pyramid(optimizer, C.get()['epoch'])
    else:
        raise ValueError('invalid lr_schduler=%s' % lr_scheduler_type)

    if C.get()['lr_schedule'].get('warmup', None):
        scheduler = GradualWarmupScheduler(
            optimizer,
            multiplier=C.get()['lr_schedule']['warmup']['multiplier'],
            total_epoch=C.get()['lr_schedule']['warmup']['epoch'],
            after_scheduler=scheduler
        )

    if not tag or not is_master:
        from FastAutoAugment.metrics import SummaryWriterDummy as SummaryWriter
        logger.warning('tag not provided, no tensorboard log.')
    else:
        from tensorboardX import SummaryWriter
    writers = [SummaryWriter(log_dir='./logs/%s/%s' % (tag, x)) for x in ['train', 'valid', 'test']]

    result = OrderedDict()
    epoch_start = 1
    if save_path and os.path.exists(save_path):
        logger.info('%s file found. loading...' % save_path)
        data = torch.load(save_path)
        if 'model' in data:
            logger.info('checkpoint epoch@%d' % data['epoch'])
            if not isinstance(model, DataParallel):
                model.load_state_dict({k.replace('module.', ''): v for k, v in data['model'].items()})
            else:
                model.load_state_dict({k if 'module.' in k else 'module.'+k: v for k, v in data['model'].items()})
            optimizer.load_state_dict(data['optimizer'])
            if data['epoch'] < C.get()['epoch']:
                epoch_start = data['epoch']
            else:
                only_eval = True
        else:
            model.load_state_dict({k: v for k, v in data.items()})
        del data
    else:
        logger.info('"%s" file not found. skip to pretrain weights...' % save_path)
        if only_eval:
            logger.warning('model checkpoint not found. only-evaluation mode is off.')
        only_eval = False

    if only_eval:
        logger.info('evaluation only+')
        model.eval()
        rs = dict()
        rs['train'] = run_epoch(model, trainloader, criterion, None, desc_default='train', epoch=0, writer=writers[0])
        rs['valid'] = run_epoch(model, validloader, criterion, None, desc_default='valid', epoch=0, writer=writers[1])
        rs['test'] = run_epoch(model, testloader_, criterion, None, desc_default='*test', epoch=0, writer=writers[2])
        for key, setname in itertools.product(['loss', 'top1', 'top5'], ['train', 'valid', 'test']):
            if setname not in rs:
                continue
            result['%s_%s' % (key, setname)] = rs[setname][key]
        result['epoch'] = 0
        return result

    # train loop
    best_top1 = 0
    for epoch in range(epoch_start, max_epoch + 1):
        if horovod:
            trainsampler.set_epoch(epoch)

        model.train()
        rs = dict()
        rs['train'] = run_epoch(model, trainloader, criterion, optimizer, desc_default='train', epoch=epoch, writer=writers[0], verbose=is_master, scheduler=scheduler)
        model.eval()

        if math.isnan(rs['train']['loss']):
            raise Exception('train loss is NaN.')

        if epoch % 5 == 0 or epoch == max_epoch:
            rs['valid'] = run_epoch(model, validloader, criterion, None, desc_default='valid', epoch=epoch, writer=writers[1], verbose=is_master)
            rs['test'] = run_epoch(model, testloader_, criterion, None, desc_default='*test', epoch=epoch, writer=writers[2], verbose=is_master)

            if metric == 'last' or rs[metric]['top1'] > best_top1:
                if metric != 'last':
                    best_top1 = rs[metric]['top1']
                for key, setname in itertools.product(['loss', 'top1', 'top5'], ['train', 'valid', 'test']):
                    result['%s_%s' % (key, setname)] = rs[setname][key]
                result['epoch'] = epoch

                writers[1].add_scalar('valid_top1/best', rs['valid']['top1'], epoch)
                writers[2].add_scalar('test_top1/best', rs['test']['top1'], epoch)

                reporter(
                    loss_valid=rs['valid']['loss'], top1_valid=rs['valid']['top1'],
                    loss_test=rs['test']['loss'], top1_test=rs['test']['top1']
                )

                # save checkpoint
                if is_master and save_path:
                    logger.info('save model@%d to %s' % (epoch, save_path))
                    torch.save({
                        'epoch': epoch,
                        'log': {
                            'train': rs['train'].get_dict(),
                            'valid': rs['valid'].get_dict(),
                            'test': rs['test'].get_dict(),
                        },
                        'optimizer': optimizer.state_dict(),
                        'model': model.state_dict()
                    }, save_path)
    del model

    result['top1_test'] = best_top1
    return result
Esempio n. 8
0
    for i in range(args.num_policy):
        for j in range(args.num_op):
            space['policy_%d_%d' % (i, j)] = hp.choice(
                'policy_%d_%d' % (i, j), list(range(0, len(ops))))
            space['prob_%d_%d' % (i, j)] = hp.uniform('prob_%d_ %d' % (i, j),
                                                      0.0, 1.0)
            space['level_%d_%d' % (i, j)] = hp.uniform('level_%d_ %d' % (i, j),
                                                       0.0, 1.0)

    final_policy_set = defaultdict(list)
    total_computation = 0
    reward_attr = 'top1_valid'  # top1_valid or minus_loss
    for cv_fold in range(cv_num):
        if args.onlycv >= 0 and args.onlycv != cv_fold:
            continue
        for class_idx in range(num_class(C.get()['dataset'])):
            name = "search_%s_%s_fold%d_ratio%.1f_classidx=%d" % (
                C.get()['dataset'], C.get()['model']['type'], cv_fold,
                args.cv_ratio, class_idx)
            logger.info(name)
            register_trainable(
                name,
                lambda augs, rpt: eval_tta(copy.deepcopy(copied_c), augs, rpt))
            algo = HyperOptSearch(space,
                                  max_concurrent=4 * 10,
                                  reward_attr=reward_attr)

            exp_config = {
                name: {
                    'run': name,
                    'num_samples': 4 if args.smoke_test else args.num_search,
def train_and_eval(tag, dataroot, test_ratio=0.0, cv_fold=0, reporter=None, metric='last', save_path=None, only_eval=False, horovod=False):
    if horovod:
        import horovod.torch as hvd
        hvd.init()
        device = torch.device('cuda', hvd.local_rank())
        torch.cuda.set_device(device)

    if not reporter:
        reporter = lambda **kwargs: 0

    max_epoch = C.get()['epoch']
    # trainsampler, trainloader, validloader, testloader_ = get_dataloaders(dataroot, C.get()['batch'], horovod=horovod)
    trainsampler, trainloader, validloader, testloader_ = get_dataloaders(C.get()['dataset'], C.get()['batch'], dataroot, test_ratio, split_idx=cv_fold, horovod=horovod)

    # create a model & an optimizer
    model = get_model(C.get()['model'], num_class(C.get()['dataset']), data_parallel=(not horovod))

    criterion = nn.CrossEntropyLoss()
    if C.get()['optimizer']['type'] == 'sgd':
        optimizer = optim.SGD(
            model.parameters(),
            lr=C.get()['lr'],
            momentum=C.get()['optimizer'].get('momentum', 0.9),
            weight_decay=C.get()['optimizer']['decay'],
            nesterov=C.get()['optimizer']['nesterov']
        )
    else:
        raise ValueError('invalid optimizer type=%s' % C.get()['optimizer']['type'])

    is_master = True
    if horovod:
        optimizer = hvd.DistributedOptimizer(optimizer, named_parameters=model.named_parameters())
        optimizer._requires_update = set()  # issue : https://github.com/horovod/horovod/issues/1099
        hvd.broadcast_parameters(model.state_dict(), root_rank=0)
        hvd.broadcast_optimizer_state(optimizer, root_rank=0)
        if hvd.rank() != 0:
            is_master = False
    logger.debug('is_master=%s' % is_master)

    lr_scheduler_type = C.get()['lr_schedule'].get('type', 'cosine')
    if lr_scheduler_type == 'cosine':
        t_max = C.get()['epoch']
        if C.get()['lr_schedule'].get('warmup', None):
            t_max -= C.get()['lr_schedule']['warmup']['epoch']
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=t_max, eta_min=0.)
    elif lr_scheduler_type == 'resnet':
        scheduler = adjust_learning_rate_resnet(optimizer)
    elif lr_scheduler_type == 'pyramid':
        scheduler = adjust_learning_rate_pyramid(optimizer, C.get()['epoch'])
    else:
        raise ValueError('invalid lr_schduler=%s' % lr_scheduler_type)

    if C.get()['lr_schedule'].get('warmup', None):
        scheduler = GradualWarmupScheduler(
            optimizer,
            multiplier=C.get()['lr_schedule']['warmup']['multiplier'],
            total_epoch=C.get()['lr_schedule']['warmup']['epoch'],
            after_scheduler=scheduler
        )
    if not tag.strip() or not is_master:
        from FastAutoAugment.metrics import SummaryWriterDummy as SummaryWriter
        logger.warning('tag not provided, no tensorboard log.')
    else:
        from tensorboardX import SummaryWriter
    writers = [SummaryWriter(log_dir='/app/results/logs/%s/%s' % (tag, x)) for x in ['train', 'valid', 'test']]

    result = OrderedDict()
    epoch_start = 1
    # if save_path and os.path.exists(save_path):
    #     data = torch.load(save_path)
    #     if 'model' in data:
    #         # TODO : patch, horovod trained checkpoint
    #         new_state_dict = {}
    #         for k, v in data['model'].items():
    #             if not horovod and 'module.' not in k:
    #                 new_state_dict['module.' + k] = v
    #             else:
    #                 new_state_dict[k] = v
    #
    #         model.load_state_dict(new_state_dict)
    #         optimizer.load_state_dict(data['optimizer'])
    #         logger.info('ckpt epoch@%d' % data['epoch'])
    #         if data['epoch'] < C.get()['epoch']:
    #             epoch_start = data['epoch']
    #         else:
    #             only_eval = True
    #         logger.info('epoch=%d' % data['epoch'])
    #     else:
    #         model.load_state_dict(data)
    #     del data

    if only_eval:
        logger.info('evaluation only+')
        model.eval()
        rs = dict()
        rs['train'] = run_epoch(model, trainloader, criterion, None, desc_default='train', epoch=0, writer=writers[0])
        rs['valid'] = run_epoch(model, validloader, criterion, None, desc_default='valid', epoch=0, writer=writers[1])
        rs['test'] = run_epoch(model, testloader_, criterion, None, desc_default='*test', epoch=0, writer=writers[2])
        for key, setname in itertools.product(['loss', 'top1', 'top5'], ['train', 'valid', 'test']):
            result['%s_%s' % (key, setname)] = rs[setname][key]
        result['epoch'] = 0
        return result
    # train loop
    best_valid_loss = 10e10

    for epoch in range(epoch_start, max_epoch + 1):
        if horovod:
            trainsampler.set_epoch(epoch)
        model.train()
        rs = dict()
        rs['train'] = run_epoch(model, trainloader, criterion, optimizer, desc_default='train', epoch=epoch, writer=writers[0], verbose=is_master, scheduler=scheduler)
        AugmentationPba.epoch += 1
        scheduler.step(epoch)
        model.eval()

        if math.isnan(rs['train']['loss']):
            raise Exception('train loss is NaN.')

        if epoch % (10 if 'cifar' in C.get()['dataset'] else 30) == 0 or epoch == max_epoch:
            rs['valid'] = run_epoch(model, validloader, criterion, None, desc_default='valid', epoch=epoch, writer=writers[1], verbose=is_master)
            rs['test'] = run_epoch(model, testloader_, criterion, None, desc_default='*test', epoch=epoch, writer=writers[2], verbose=is_master)

            if metric == 'last' or rs[metric]['loss'] < best_valid_loss:    # TODO
                if metric != 'last':
                    best_valid_loss = rs[metric]['loss']
                for key, setname in itertools.product(['loss', 'top1', 'top5'], ['train', 'valid', 'test']):
                    result['%s_%s' % (key, setname)] = rs[setname][key]
                result['epoch'] = epoch

                writers[1].add_scalar('valid_top1/best', rs['valid']['top1'], epoch)
                writers[2].add_scalar('test_top1/best', rs['test']['top1'], epoch)

                reporter(
                    loss_valid=rs['valid']['loss'], top1_valid=rs['valid']['top1'],
                    loss_test=rs['test']['loss'], top1_test=rs['test']['top1']
                )

            # save checkpoint
            if is_master and save_path:
                model_name = C.get()['model']['type']
                if 'skip' in model_name:
                    alpha = int(np.log10(1/C.get()['alpha']))
                    filename = '{}/{}_last_epoch_alpha_{}.pth'.format(save_path, model_name, alpha)
                else:
                    filename = '{}/{}_last_epoch.pth'.format(save_path, model_name)
                logger.info('save model@%d to %s' % (epoch, filename))
                torch.save({
                    'epoch': epoch,
                    'log': {
                        'train': rs['train'].get_dict(),
                        'valid': rs['valid'].get_dict(),
                        'test': rs['test'].get_dict(),
                    },
                    'optimizer': optimizer.state_dict(),
                    'model': model.state_dict()
                }, filename)

    del model

    return result
Esempio n. 10
0
 def train(self, policy, config):
     # gr: group별 optimal policy가 주어질 때 평균 reward가 가장 높도록 나누는 assigner
     self.model.train()
     gr_num = self.model.module.gr_num
     cv_id = config['cv_id']
     load_path = config["load_path"]
     max_step = config["max_step"]
     childnet = get_model(C.get()['model'],
                          num_class(C.get()['dataset'])).cuda()
     ckpt = torch.load(load_path)
     if 'model' in ckpt:
         childnet.load_state_dict(ckpt['model'])
     else:
         childnet.load_state_dict(ckpt)
     childnet = nn.DataParallel(childnet).cuda()
     childnet.eval()
     pol_losses = []
     ori_aug = C.get()["aug"]
     C.get()["aug"] = "clean"
     _, _, dataloader, _ = get_dataloaders(C.get()['dataset'],
                                           C.get()['batch'],
                                           config['dataroot'],
                                           config['cv_ratio_test'],
                                           split_idx=cv_id,
                                           rand_val=True)
     loader_iter = iter(dataloader)
     reports = []
     for step in range(max_step):
         try:
             data, label = next(loader_iter)
         except:
             loader_iter = iter(dataloader)
             data, label = next(loader_iter)
         data = data.cuda()
         label = label.cuda()
         logits = self.model(data, label)
         if self.mode == "supervised":
             with torch.no_grad():
                 losses = torch.zeros(gr_num, data.size(0)).cuda()
                 for i in range(gr_num):
                     aug_data = self.augmentation(
                         data, i * torch.ones(data.size(0)), policy)
                     losses[i] = self.loss_fn(childnet(aug_data), label)
                 optimal_gr_ids = losses.min(0)[1]
             loss = self.loss_fn(logits, optimal_gr_ids).mean()
             loss.backward()
             torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
             report_number = loss
         else:
             m = Categorical(logits)
             gr_ids = m.sample()
             log_probs = m.log_prob(gr_ids)
             entropys = m.entropy()
             with torch.no_grad():
                 probs = m.log_prob(
                     torch.tensor([[i]
                                   for i in range(gr_num)]).cuda()).exp()
                 rewards_list = torch.zeros(gr_num, data.size(0)).cuda()
                 for i in range(gr_num):
                     aug_data = self.augmentation(
                         data, i * torch.ones_like(gr_ids), policy)
                     rewards_list[i] = 1. / (
                         self.loss_fn(childnet(aug_data), label) + self.eps)
                 rewards = torch.tensor([
                     rewards_list[gr_id][idx]
                     for idx, gr_id in enumerate(gr_ids)
                 ]).cuda().detach()
                 # value function as baseline
                 baselines = sum([
                     prob * reward
                     for prob, reward in zip(probs, rewards_list)
                 ])
                 advantages = rewards - baselines
             if self.mode == "reinforce":
                 loss = (-log_probs * advantages).mean()
             elif self.mode == "ppo":
                 old_log_probs = log_probs.detach()
                 gr_ids = m.sample()
                 log_probs = m.log_prob(gr_ids)
                 ratios = (log_probs - old_log_probs).exp()
                 surr1 = ratios * advantages
                 surr2 = torch.clamp(ratios, 1 - self.eps_clip,
                                     1 + self.eps_clip) * advantages
                 loss = -torch.min(surr1, surr2).mean()
             loss -= self.ent_w * entropys.mean()
             loss.backward()
             torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
             report_number = advantages.mean()
         self.optimizer.step()
         self.optimizer.zero_grad()
         reports.append(float(report_number.cpu().detach()))
         if step % self.eval_step == 0 or step == max_step - 1:
             if self.mode == "supervised":
                 entropy = 0.
             else:
                 entropy = (self.ent_w *
                            entropys.mean()).cpu().detach().data
             print(
                 f"[step{step}/{max_step}] objective {np.mean(reports):.4f}, entropy {entropy:.4f}"
             )
     C.get()["aug"] = ori_aug
     return reports
Esempio n. 11
0
def train_controller(controller, dataloaders, save_path, ctl_save_path):
    dataset = C.get()['test_dataset']
    ctl_train_steps = 1500
    ctl_num_aggre = 10
    ctl_entropy_w = 1e-5
    ctl_ema_weight = 0.95
    metrics = Accumulator()
    cnt = 0

    controller.train()
    test_ratio = 0.
    _, _, dataloader, _ = dataloaders  # validloader
    optimizer = optim.SGD(controller.parameters(),
                          lr=0.00035,
                          momentum=0.9,
                          weight_decay=0.0,
                          nesterov=True)
    # optimizer = optim.Adam(controller.parameters(), lr = 0.00035)
    # create a model & a criterion
    model = get_model(C.get()['model'], num_class(dataset), local_rank=-1)
    criterion = CrossEntropyLabelSmooth(num_class(dataset),
                                        C.get().conf.get('lb_smooth', 0),
                                        reduction="batched_sum").cuda()
    # load model weights
    data = torch.load(save_path)
    key = 'model' if 'model' in data else 'state_dict'

    if 'epoch' not in data:
        model.load_state_dict(data)
    else:
        logger.info('checkpoint epoch@%d' % data['epoch'])
        if not isinstance(model, (DataParallel, DistributedDataParallel)):
            model.load_state_dict(
                {k.replace('module.', ''): v
                 for k, v in data[key].items()})
        else:
            model.load_state_dict({
                k if 'module.' in k else 'module.' + k: v
                for k, v in data[key].items()
            })
    del data

    model.eval()
    loader_iter = iter(dataloader)  # [(image)->ToTensor->Normalize]
    baseline = None
    if os.path.isfile(ctl_save_path):
        logger.info('------Controller load------')
        checkpoint = torch.load(ctl_save_path)
        controller.load_state_dict(checkpoint['ctl_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        cnt = checkpoint['cnt']
        mean_probs = checkpoint['mean_probs']
        accs = checkpoint['accs']
        metrics_dict = checkpoint['metrics']
        metrics.metrics = metrics_dict
        init_step = checkpoint['step']
    else:
        logger.info('------Train Controller from scratch------')
        mean_probs = []
        accs = []
        init_step = 0
    for step in tqdm(range(init_step + 1,
                           ctl_train_steps * ctl_num_aggre + 1)):
        try:
            inputs, labels = next(loader_iter)
        except:
            loader_iter = iter(dataloader)
            inputs, labels = next(loader_iter)
        batch_size = len(labels)
        inputs, labels = inputs.cuda(), labels.cuda()
        log_probs, entropys, sampled_policies = controller(inputs)
        # evaluate model with augmented validation dataset
        with torch.no_grad():
            # compare Accuracy before/after augmentation
            # ori_preds = model(inputs)
            # ori_top1, ori_top5 = accuracy(ori_preds, labels, (1, 5))
            batch_policies = batch_policy_decoder(
                sampled_policies
            )  # (list:list:list:tuple) [batch, num_policy, n_op, 3]
            aug_inputs, applied_policy = augment_data(inputs, batch_policies)
            aug_inputs = aug_inputs.cuda()
            # assert type(aug_inputs) == torch.Tensor, "Augmented Input Type Error: {}".format(type(aug_inputs))
            preds = model(aug_inputs)
            model_losses = criterion(preds, labels)  # (tensor)[batch]
            top1, top5 = accuracy(preds, labels, (1, 5))
            # logger.info("Acc B/A Aug, {:.2f}->{:.2f}".format(ori_top1, top1))
        # assert model_losses.shape == entropys.shape == log_probs.shape, \
        #         "[Size miss match] loss: {}, entropy: {}, log_prob: {}".format(model_losses.shape, entropys.shape, log_probs.shape)
        rewards = -model_losses + ctl_entropy_w * entropys  # (tensor)[batch]
        if baseline is None:
            baseline = -model_losses.mean()  # scalar tensor
        else:
            # assert baseline, "len(baseline): {}".format(len(baseline))
            baseline = baseline - (1 - ctl_ema_weight) * (
                baseline - rewards.mean().detach())
        # baseline = 0.
        loss = -1 * (log_probs * (rewards - baseline)).mean()  #scalar tensor
        # Average gradient over controller_num_aggregate samples
        loss = loss / ctl_num_aggre
        loss.backward(retain_graph=True)
        metrics.add_dict({
            'loss': loss.item() * batch_size,
            'top1': top1.item() * batch_size,
            'top5': top5.item() * batch_size,
        })
        cnt += batch_size
        if (step + 1) % ctl_num_aggre == 0:
            torch.nn.utils.clip_grad_norm_(controller.parameters(), 5.0)
            optimizer.step()
            controller.zero_grad()
            # torch.cuda.empty_cache()
            logger.info('\n[Train Controller %03d/%03d] log_prob %02f, %s', step, ctl_train_steps*ctl_num_aggre, \
            log_probs.mean().item(), metrics / cnt
            )
        if step % 100 == 0 or step == ctl_train_steps * ctl_num_aggre:
            save_pic(inputs, aug_inputs, labels, applied_policy,
                     batch_policies, step)
            ps = []
            for pol in batch_policies:  # (list:list:list:tuple) [batch, num_policy, n_op, 3]
                for ops in pol:
                    for op in ops:
                        p = op[1]
                        ps.append(p)
            mean_prob = np.mean(ps)
            mean_probs.append(mean_prob)
            accs.append(top1.item())
            print("Mean probability: {:.2f}".format(mean_prob))
            torch.save(
                {
                    'step': step,
                    'ctl_state_dict': controller.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'metrics': dict(metrics.metrics),
                    'cnt': cnt,
                    'mean_probs': mean_probs,
                    'accs': accs
                }, ctl_save_path)
    return metrics, None  #baseline.item()
Esempio n. 12
0
def eval_tta(config, augment, reporter):
    C.get()
    C.get().conf = config
    save_path = augment['save_path']
    cv_id, gr_id = augment["cv_id"], augment["gr_id"]
    gr_ids = augment["gr_ids"]

    # setup - provided augmentation rules
    C.get()['aug'] = policy_decoder(augment, augment['num_policy'],
                                    augment['num_op'])

    # eval
    model = get_model(C.get()['model'], num_class(C.get()['dataset']))
    ckpt = torch.load(save_path)
    if 'model' in ckpt:
        model.load_state_dict(ckpt['model'])
    else:
        model.load_state_dict(ckpt)
    model.eval()

    loaders = []
    for _ in range(augment['num_policy']):  # TODO
        loader = get_post_dataloader(C.get()["dataset"],
                                     C.get()['batch'], augment["dataroot"],
                                     augment['cv_ratio_test'], cv_id, gr_id,
                                     gr_ids)
        loaders.append(iter(loader))

    start_t = time.time()
    metrics = Accumulator()
    loss_fn = torch.nn.CrossEntropyLoss(reduction='none')
    try:
        while True:
            losses = []
            corrects = []
            for loader in loaders:
                data, label = next(loader)
                data = data.cuda()
                label = label.cuda()

                pred = model(data)

                loss = loss_fn(pred, label)
                losses.append(loss.detach().cpu().numpy().reshape(1,
                                                                  -1))  # (1,N)

                _, pred = pred.topk(1, 1, True, True)
                pred = pred.t()
                correct = pred.eq(label.view(
                    1, -1).expand_as(pred)).detach().cpu().numpy()  # (1,N)
                corrects.append(correct)
                del loss, correct, pred, data, label

            losses = np.concatenate(losses)
            losses_min = np.min(losses, axis=0).squeeze()  # (N,)

            corrects = np.concatenate(corrects)
            corrects_max = np.max(corrects, axis=0).squeeze()  # (N,)
            metrics.add_dict({
                'loss': np.sum(losses_min),
                'correct': np.sum(corrects_max),
                'cnt': corrects_max.size
            })
            del corrects, corrects_max
    except StopIteration:
        pass

    del model
    metrics = metrics / 'cnt'
    gpu_secs = (time.time() - start_t) * torch.cuda.device_count()
    reporter(loss=metrics['loss'],
             top1_valid=metrics['correct'],
             elapsed_time=gpu_secs,
             done=True)
    return metrics['correct']
Esempio n. 13
0
        ops = augment_list(False)
        space = {}
        for i in range(args.num_policy):
            for j in range(args.num_op):
                space['policy_%d_%d' % (i, j)] = hp.choice(
                    'policy_%d_%d' % (i, j), list(range(0, len(ops))))
                space['prob_%d_%d' % (i, j)] = hp.uniform(
                    'prob_%d_ %d' % (i, j), 0.0, 1.0)
                space['level_%d_%d' % (i, j)] = hp.uniform(
                    'level_%d_ %d' % (i, j), 0.0, 1.0)

        num_process_per_gpu = 2
        total_computation = 0
        reward_attr = 'top1_valid'  # top1_valid or minus_loss
        # load childnet for g
        childnet = get_model(C.get()['model'], num_class(C.get()['dataset']))
        ckpt = torch.load(paths[0])
        if 'model' in ckpt:
            childnet.load_state_dict(ckpt['model'])
        else:
            childnet.load_state_dict(ckpt)
        # g definition
        gr_spliter = GrSpliter(childnet, gr_num=args.gr_num, mode=args.mode)
        del childnet, ckpt
        gr_results = []
        gr_dist_collector = defaultdict(list)
        # best_configs = defaultdict(lambda: None)
        # result_to_save = ['timestamp', 'top1_valid', 'loss']
        final_policy_group = defaultdict(lambda: [])
        for r in range(args.repeat):  # run multiple times.
            for cv_id in range(cv_num):