Ejemplo n.º 1
0
from torchpack.mtpack.utils.config import Config, configs

from dgc.compression import DGCCompressor
from dgc.memory import DGCSGDMemory
from dgc.optim import DGCSGD

configs.train.dgc = True
configs.train.compression = Config(DGCCompressor)
configs.train.compression.compress_ratio = 0.001
configs.train.compression.sample_ratio = 0.01
configs.train.compression.strided_sample = True
configs.train.compression.compress_upper_bound = 1.3
configs.train.compression.compress_lower_bound = 0.8
configs.train.compression.max_adaptation_iters = 10
configs.train.compression.resample = True

old_optimizer = configs.train.optimizer
configs.train.optimizer = Config(DGCSGD)
for k, v in old_optimizer.items():
    configs.train.optimizer[k] = v

configs.train.compression.memory = Config(DGCSGDMemory)
configs.train.compression.memory.momentum = configs.train.optimizer.momentum
import torch

from torchpack.mtpack.datasets.vision import ImageNet
from torchpack.mtpack.utils.config import Config, configs

# dataset
configs.dataset = Config(ImageNet)
configs.dataset.root = '/dataset/imagenet'
configs.dataset.num_classes = 1000
configs.dataset.image_size = 224

# training
configs.train.num_epochs = 90
configs.train.batch_size = 32

# optimizer
configs.train.optimize_bn_separately = False
configs.train.optimizer.lr = 0.0125
configs.train.optimizer.weight_decay = 5e-5

# scheduler
configs.train.scheduler = Config(torch.optim.lr_scheduler.MultiStepLR)
configs.train.scheduler.milestones = [
    e - configs.train.warmup_lr_epochs for e in [30, 60, 80]
]
configs.train.scheduler.gamma = 0.1
from torchvision.models import resnet50

from torchpack.mtpack.utils.config import Config, configs

configs.train.optimizer.weight_decay = 1e-4
configs.train.optimizer.nesterov = True
configs.train.optimize_bn_separately = True

# model
configs.model = Config(resnet50)
configs.model.num_classes = configs.dataset.num_classes
configs.model.zero_init_residual = True
Ejemplo n.º 4
0
from torchpack.mtpack.models.vision.resnet import resnet20

from torchpack.mtpack.utils.config import Config, configs

# model
configs.model = Config(resnet20)
configs.model.num_classes = configs.dataset.num_classes
Ejemplo n.º 5
0
import torch

from torchpack.mtpack.utils.config import Config, configs

# scheduler
configs.train.scheduler = Config(torch.optim.lr_scheduler.CosineAnnealingLR)
configs.train.scheduler.T_max = configs.train.num_epochs - configs.train.warmup_lr_epochs
import torch

from torchpack.mtpack.datasets.vision import CIFAR
from torchpack.mtpack.utils.config import Config, configs

# dataset
configs.dataset = Config(CIFAR)
configs.dataset.root = './data/cifar10'
configs.dataset.num_classes = 10
configs.dataset.image_size = 32

# training
configs.train.num_epochs = 200
configs.train.batch_size = 128

# optimizer
configs.train.optimizer.lr = 0.1
configs.train.optimizer.weight_decay = 1e-4

# scheduler
configs.train.scheduler = Config(torch.optim.lr_scheduler.CosineAnnealingLR)
configs.train.scheduler.T_max = configs.train.num_epochs - configs.train.warmup_lr_epochs
Ejemplo n.º 7
0
from torchvision.models import vgg16_bn

from torchpack.mtpack.utils.config import Config, configs

# model
configs.model = Config(vgg16_bn)
configs.model.num_classes = configs.dataset.num_classes
Ejemplo n.º 8
0
from torchpack.mtpack.models.vision.resnet import resnet110

from torchpack.mtpack.utils.config import Config, configs

# model
configs.model = Config(resnet110)
configs.model.num_classes = configs.dataset.num_classes
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--configs', nargs='+')
    parser.add_argument('--devices', default='gpu')
    parser.add_argument('--evaluate', action='store_true')
    parser.add_argument('--suffix', default='')
    args, opts = parser.parse_known_args()

    ##################
    # Update configs #
    ##################

    printr(f'==> loading configs from {args.configs}')
    Config.update_from_modules(*args.configs)
    Config.update_from_arguments(*opts)

    if args.devices is not None and args.devices != 'cpu':
        configs.device = 'cuda'
        # Horovod: pin GPU to local rank.
        torch.cuda.set_device(hvd.local_rank())
        cudnn.benchmark = True
    else:
        configs.device = 'cpu'

    if 'seed' in configs and configs.seed is not None:
        random.seed(configs.seed)
        np.random.seed(configs.seed)
        torch.manual_seed(configs.seed)
        if configs.device == 'cuda' and configs.get('deterministic', True):
            cudnn.deterministic = True
            cudnn.benchmark = False

    configs.train.num_batches_per_step = \
        configs.train.get('num_batches_per_step', 1)

    configs.train.save_path = get_save_path(*args.configs) \
                              + f'{args.suffix}.np{hvd.size()}'
    printr(f'[train.save_path] = {configs.train.save_path}')
    checkpoint_path = os.path.join(configs.train.save_path, 'checkpoints')
    configs.train.checkpoint_path = os.path.join(
        checkpoint_path, f'e{"{epoch}"}-r{hvd.rank()}.pth')
    configs.train.latest_pth_path = os.path.join(checkpoint_path,
                                                 f'latest-r{hvd.rank()}.pth')
    configs.train.best_pth_path = os.path.join(checkpoint_path,
                                               f'best-r{hvd.rank()}.pth')
    os.makedirs(checkpoint_path, exist_ok=True)

    if args.evaluate:
        configs.train.latest_pth_path = configs.train.best_pth_path

    printr(configs)

    #####################################################################
    # Initialize DataLoaders, Model, Criterion, LRScheduler & Optimizer #
    #####################################################################

    printr(f'\n==> creating dataset "{configs.dataset}"')
    dataset = configs.dataset()
    # Horovod: limit # of CPU threads to be used per worker.
    torch.set_num_threads(configs.data.num_threads_per_worker)
    loader_kwargs = {
        'num_workers': configs.data.num_threads_per_worker,
        'pin_memory': True
    } if configs.device == 'cuda' else {}
    # When supported, use 'forkserver' to spawn dataloader workers
    # instead of 'fork' to prevent issues with Infiniband implementations
    # that are not fork-safe
    if (loader_kwargs.get('num_workers', 0) > 0
            and hasattr(mp, '_supports_context') and mp._supports_context
            and 'forkserver' in mp.get_all_start_methods()):
        loader_kwargs['multiprocessing_context'] = 'forkserver'
    printr(f'\n==> loading dataset "{loader_kwargs}""')
    samplers, loaders = {}, {}
    for split in dataset:
        # Horovod: use DistributedSampler to partition data among workers.
        # Manually specify `num_replicas=hvd.size()` and `rank=hvd.rank()`.
        samplers[split] = torch.utils.data.distributed.DistributedSampler(
            dataset[split], num_replicas=hvd.size(), rank=hvd.rank())
        loaders[split] = torch.utils.data.DataLoader(
            dataset[split],
            batch_size=configs.train.batch_size *
            (configs.train.num_batches_per_step if split == 'train' else 1),
            sampler=samplers[split],
            drop_last=(configs.train.num_batches_per_step > 1
                       and split == 'train'),
            **loader_kwargs)

    printr(f'\n==> creating model "{configs.model}"')
    model = configs.model()
    model = model.cuda()

    criterion = configs.train.criterion().to(configs.device)
    # Horovod: scale learning rate by the number of GPUs.
    configs.train.base_lr = configs.train.optimizer.lr
    configs.train.optimizer.lr *= (configs.train.num_batches_per_step *
                                   hvd.size())
    printr(f'\n==> creating optimizer "{configs.train.optimizer}"')

    if configs.train.optimize_bn_separately:
        optimizer = configs.train.optimizer([
            dict(params=get_common_parameters(model)),
            dict(params=get_bn_parameters(model), weight_decay=0)
        ])
    else:
        optimizer = configs.train.optimizer(model.parameters())

    # Horovod: (optional) compression algorithm.
    printr(f'\n==> creating compression "{configs.train.compression}"')
    if configs.train.dgc:
        printr(f'\n==> initializing dgc compression')
        configs.train.compression.memory = configs.train.compression.memory()
        compression = configs.train.compression()
        compression.memory.initialize(model.named_parameters())
        cpr_parameters = {}
        for name, param in model.named_parameters():
            if param.dim() > 1:
                cpr_parameters[name] = param
        compression.initialize(cpr_parameters.items())
    else:
        compression = configs.train.compression()

    # Horovod: wrap optimizer with DistributedOptimizer.
    optimizer = DistributedOptimizer(
        optimizer,
        named_parameters=model.named_parameters(),
        compression=compression,
        backward_passes_per_step=configs.train.num_batches_per_step,
        op=hvd.Average)

    # resume from checkpoint
    last_epoch, best_metric = -1, None
    if os.path.exists(configs.train.latest_pth_path):
        printr(f'\n[resume_path] = {configs.train.latest_pth_path}')
        checkpoint = torch.load(configs.train.latest_pth_path)
        if 'model' in checkpoint:
            model.load_state_dict(checkpoint.pop('model'))
        if 'optimizer' in checkpoint:
            optimizer.load_state_dict(checkpoint.pop('optimizer'))
        if configs.train.dgc and 'compression' in checkpoint:
            compression.memory.load_state_dict(checkpoint.pop('compression'))
        last_epoch = checkpoint.get('epoch', last_epoch)
        best_metric = checkpoint.get('meters',
                                     {}).get(f'{configs.train.metric}_best',
                                             best_metric)
        # Horovod: broadcast parameters.
        hvd.broadcast_parameters(model.state_dict(), root_rank=0)
    else:
        printr('\n==> train from scratch')
        # Horovod: broadcast parameters & optimizer state.
        printr('\n==> broadcasting paramters and optimizer state')
        hvd.broadcast_parameters(model.state_dict(), root_rank=0)
        hvd.broadcast_optimizer_state(optimizer, root_rank=0)

    num_steps_per_epoch = len(loaders['train'])
    if 'scheduler' in configs.train and configs.train.scheduler is not None:
        if configs.train.schedule_lr_per_epoch:
            last = max(last_epoch - configs.train.warmup_lr_epochs - 1, -1)
        else:
            last = max((last_epoch - configs.train.warmup_lr_epochs + 1) *
                       num_steps_per_epoch - 2, -1)
        scheduler = configs.train.scheduler(optimizer, last_epoch=last)
    else:
        scheduler = None

    ############
    # Training #
    ############

    meters = evaluate(model,
                      device=configs.device,
                      meters=configs.train.meters,
                      loader=loaders['test'],
                      split='test')
    for k, meter in meters.items():
        printr(f'[{k}] = {meter:2f}')
    if args.evaluate or last_epoch >= configs.train.num_epochs:
        return

    if hvd.rank() == 0:
        from tensorboardX import SummaryWriter
        writer = SummaryWriter(configs.train.save_path)
    else:
        writer = None

    for current_epoch in range(last_epoch + 1, configs.train.num_epochs):
        printr(f'\n==> training epoch {current_epoch}'
               f'/{configs.train.num_epochs}')

        if configs.train.dgc:
            compression.warmup_compress_ratio(current_epoch)

        train(model=model,
              loader=loaders['train'],
              device=configs.device,
              epoch=current_epoch,
              sampler=samplers['train'],
              criterion=criterion,
              optimizer=optimizer,
              scheduler=scheduler,
              batch_size=configs.train.batch_size,
              num_batches_per_step=configs.train.num_batches_per_step,
              num_steps_per_epoch=num_steps_per_epoch,
              warmup_lr_epochs=configs.train.warmup_lr_epochs,
              schedule_lr_per_epoch=configs.train.schedule_lr_per_epoch,
              writer=writer,
              quiet=hvd.rank() != 0)

        meters = dict()
        for split, loader in loaders.items():
            if split != 'train':
                meters.update(
                    evaluate(model,
                             loader=loader,
                             device=configs.device,
                             meters=configs.train.meters,
                             split=split,
                             quiet=hvd.rank() != 0))

        best = False
        if 'metric' in configs.train and configs.train.metric is not None:
            if best_metric is None or best_metric < meters[
                    configs.train.metric]:
                best_metric, best = meters[configs.train.metric], True
            meters[configs.train.metric + '_best'] = best_metric

        if writer is not None:
            num_inputs = ((current_epoch + 1) * num_steps_per_epoch *
                          configs.train.num_batches_per_step *
                          configs.train.batch_size * hvd.size())
            print('')
            for k, meter in meters.items():
                print(f'[{k}] = {meter:2f}')
                writer.add_scalar(k, meter, num_inputs)

        checkpoint = {
            'epoch': current_epoch,
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'meters': meters,
            'compression': compression.memory.state_dict() \
                            if configs.train.dgc else None
        }

        # save checkpoint
        checkpoint_path = \
            configs.train.checkpoint_path.format(epoch=current_epoch)
        torch.save(checkpoint, checkpoint_path)
        shutil.copyfile(checkpoint_path, configs.train.latest_pth_path)
        if best:
            shutil.copyfile(checkpoint_path, configs.train.best_pth_path)
        if current_epoch >= 3:
            os.remove(
                configs.train.checkpoint_path.format(epoch=current_epoch - 3))
        printr(f'[save_path] = {checkpoint_path}')
Ejemplo n.º 10
0
import torch

from torchpack.mtpack.utils.config import Config, configs
from torchpack.mtpack.meters import TopKClassMeter

from dgc.horovod.compression import Compression


configs.seed = 42
configs.data = Config()
configs.data.num_threads_per_worker = 4

# criterion
configs.train = Config()
configs.train.dgc = False
configs.train.compression = Compression.none
configs.train.criterion = Config(torch.nn.CrossEntropyLoss)

# optimizer
configs.train.optimizer = Config(torch.optim.SGD)
configs.train.optimizer.momentum = 0.9

# scheduler
configs.train.schedule_lr_per_epoch = True
configs.train.warmup_lr_epochs = 5

# metrics
configs.train.metric = 'acc/test_top1'
configs.train.meters = Config()
configs.train.meters['acc/{}_top1'] = Config(TopKClassMeter)
configs.train.meters['acc/{}_top1'].k = 1