예제 #1
0
def create_compressed_model(model, config):
    input_args = (next(model.parameters()).new_empty(
        config['input_sample_size']), )
    if is_main_process():
        print(*get_all_modules(model).keys(), sep="\n")
        ctx = build_graph(model,
                          input_args, {},
                          'create_model',
                          reset_context=True)
        dump_graph(ctx, osp.join(config.log_dir, "original_graph.dot"))

    compression_algo = create_compression_algorithm(model, config)

    if is_main_process():
        if hasattr(compression_algo.model, "build_graph"):
            ctx = compression_algo.model.build_graph()
        else:
            ctx = build_graph(compression_algo.model,
                              input_args, {},
                              "create_model_compressed",
                              reset_context=True)
        dump_graph(ctx, osp.join(config.log_dir, "compressed_graph.dot"))

    model = compression_algo.model
    return compression_algo, model
예제 #2
0
def safe_thread_call(main_call_fn, after_barrier_call_fn=None):
    result = None
    if is_dist_avail_and_initialized():
        if is_main_process():
            result = main_call_fn()
        distributed.barrier()
        if not is_main_process():
            result = after_barrier_call_fn(
            ) if after_barrier_call_fn else main_call_fn()
    else:
        result = main_call_fn()
    return result
예제 #3
0
def validate(val_loader, model, criterion, config):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (input_, target) in enumerate(val_loader):
            input_ = input_.to(config.device)
            target = target.to(config.device)

            # compute output
            output = model(input_)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss, input_.size(0))
            top1.update(acc1, input_.size(0))
            top5.update(acc5, input_.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % config.print_freq == 0:
                print('{rank}'
                      'Test: [{0}/{1}] '
                      'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                      'Loss: {loss.val:.4f} ({loss.avg:.4f}) '
                      'Acc@1: {top1.val:.3f} ({top1.avg:.3f}) '
                      'Acc@5: {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1,
                          top5=top5,
                          rank='{}:'.format(config.rank)
                          if config.multiprocessing_distributed else ''))

        if is_main_process():
            config.tb.add_scalar("val/loss", losses.avg,
                                 len(val_loader) * config.get('cur_epoch', 0))
            config.tb.add_scalar("val/top1", top1.avg,
                                 len(val_loader) * config.get('cur_epoch', 0))
            config.tb.add_scalar("val/top5", top5.avg,
                                 len(val_loader) * config.get('cur_epoch', 0))

        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1,
                                                                    top5=top5))

    return top1.avg, top5.avg
예제 #4
0
def train(config, compression_algo, model, criterion, is_inception,
          lr_scheduler, model_name, optimizer, train_loader, train_sampler,
          val_loader):
    global best_acc1
    for epoch in range(config.start_epoch, config.epochs):
        config.cur_epoch = epoch
        if config.distributed:
            train_sampler.set_epoch(epoch)
        lr_scheduler.step(epoch if not isinstance(
            lr_scheduler, ReduceLROnPlateau) else best_acc1)

        # train for one epoch
        train_epoch(train_loader, model, criterion, optimizer,
                    compression_algo, epoch, config, is_inception)

        # compute compression algo statistics
        stats = compression_algo.statistics()

        acc1 = best_acc1
        if epoch % config.test_every_n_epochs == 0:
            # evaluate on validation set
            acc1, _ = validate(val_loader, model, criterion, config)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        # update compression scheduler state at the end of the epoch
        compression_algo.scheduler.epoch_step()

        if is_main_process():
            print_statistics(stats)

            checkpoint_path = osp.join(config.checkpoint_save_dir,
                                       get_name(config) + '_last.pth')
            checkpoint = {
                'epoch': epoch + 1,
                'arch': model_name,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer': optimizer.state_dict(),
                'scheduler': compression_algo.scheduler.state_dict()
            }

            torch.save(checkpoint, checkpoint_path)
            make_additional_checkpoints(checkpoint_path, is_best, epoch + 1,
                                        config)

            for key, value in stats.items():
                if isinstance(value, (int, float)):
                    config.tb.add_scalar(
                        "compression/statistics/{0}".format(key), value,
                        len(train_loader) * epoch)
예제 #5
0
def main_worker(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    if is_main_process():
        configure_logging(config)
        print_args(config)

    print(config)

    config.device = get_device(config)
    dataset = get_dataset(config.dataset)
    color_encoding = dataset.color_encoding
    num_classes = len(color_encoding)

    weights = config.get('weights')
    model = load_model(config.model,
                       pretrained=config.get('pretrained', True) if weights is None else False,
                       num_classes=num_classes,
                       model_params=config.get('model_params', {}))
    compression_algo, model = create_compressed_model(model, config)
    if weights:
        sd = torch.load(weights, map_location='cpu')
        load_state(model, sd)

    model, model_without_dp = prepare_model_for_execution(model, config)

    if config.distributed:
        compression_algo.distributed()

    resuming_checkpoint = config.resuming_checkpoint

    if resuming_checkpoint is not None:
        if not config.pretrained:
            # Load the previously saved model state
            model, _, _, _, _ = \
                load_checkpoint(model, resuming_checkpoint, config.device,
                                compression_scheduler=compression_algo.scheduler)

    if config.to_onnx is not None:
        compression_algo.export_model(config.to_onnx)
        print("Saved to", config.to_onnx)
        return

    if config.mode.lower() == 'test':
        print(model)
        model_parameters = filter(lambda p: p.requires_grad, model.parameters())
        params = sum([np.prod(p.size()) for p in model_parameters])
        print("Trainable argument count:{params}".format(params=params))

        model = model.to(config.device)
        loaders, w_class = load_dataset(dataset, config)
        _, val_loader = loaders
        test(model, val_loader, w_class, color_encoding, config)
        print_statistics(compression_algo.statistics())
    elif config.mode.lower() == 'train':
        loaders, w_class = load_dataset(dataset, config)
        train_loader, val_loader = loaders
        if not resuming_checkpoint:
            compression_algo.initialize(train_loader)
        model = \
            train(model, model_without_dp, compression_algo, train_loader, val_loader, w_class, color_encoding, config)
    else:
        # Should never happen...but just in case it does
        raise RuntimeError(
            "\"{0}\" is not a valid choice for execution mode.".format(
                config.mode))
예제 #6
0
def train(model, model_without_dp, compression_algo, train_loader, val_loader, class_weights, class_encoding, config):
    print("\nTraining...\n")

    # Check if the network architecture is correct
    print(model)

    optim_config = config.get('optimizer', {})
    optim_params = optim_config.get('optimizer_params', {})
    lr = optim_params.get("lr", 1e-4)

    params_to_optimize, criterion = get_aux_loss_dependent_params(model_without_dp,
                                                                  class_weights,
                                                                  lr * 10,
                                                                  config)

    optimizer, lr_scheduler = make_optimizer(params_to_optimize, config)

    # Evaluation metric

    ignore_index = None
    ignore_unlabeled = config.get("ignore_unlabeled", True)
    if ignore_unlabeled and ('unlabeled' in class_encoding):
        ignore_index = list(class_encoding).index('unlabeled')

    metric = IoU(len(class_encoding), ignore_index=ignore_index)

    best_miou = -1
    resuming_checkpoint = config.resuming_checkpoint
    # Optionally resume from a checkpoint
    if resuming_checkpoint is not None:
        model, optimizer, start_epoch, best_miou, _ = \
            load_checkpoint(
                model, resuming_checkpoint, config.device,
                optimizer, compression_algo.scheduler)
        print("Resuming from model: Start epoch = {0} "
              "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou))
        config.start_epoch = start_epoch

    # Start Training
    train_obj = Train(model, train_loader, optimizer, criterion, compression_algo, metric, config.device,
                      config.model)
    val_obj = Test(model, val_loader, criterion, metric, config.device,
                   config.model)

    for epoch in range(config.start_epoch, config.epochs):
        print(">>>> [Epoch: {0:d}] Training".format(epoch))

        if config.distributed:
            train_loader.sampler.set_epoch(epoch)

        if not isinstance(lr_scheduler, ReduceLROnPlateau):
            lr_scheduler.step(epoch)

        epoch_loss, (iou, miou) = train_obj.run_epoch(config.print_step)
        compression_algo.scheduler.epoch_step()

        print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
              format(epoch, epoch_loss, miou))

        if is_main_process():
            config.tb.add_scalar("train/loss", epoch_loss, epoch)
            config.tb.add_scalar("train/mIoU", miou, epoch)
            config.tb.add_scalar("train/learning_rate", optimizer.param_groups[0]['lr'], epoch)
            config.tb.add_scalar("train/compression_loss", compression_algo.loss(), epoch)

            for key, value in compression_algo.statistics().items():
                if isinstance(value, (int, float)):
                    config.tb.add_scalar("compression/statistics/{0}".format(key), value, epoch)

        if (epoch + 1) % config.save_freq == 0 or epoch + 1 == config.epochs:
            print(">>>> [Epoch: {0:d}] Validation".format(epoch))

            loss, (iou, miou) = val_obj.run_epoch(config.print_step)

            print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
                  format(epoch, loss, miou))

            if is_main_process():
                config.tb.add_scalar("val/mIoU", miou, epoch)
                config.tb.add_scalar("val/loss", loss, epoch)
                for i, (key, class_iou) in enumerate(zip(class_encoding.keys(), iou)):
                    config.tb.add_scalar("{}/mIoU_Cls{}_{}".format(config.dataset, i, key), class_iou, epoch)

            is_best = miou > best_miou
            best_miou = max(miou, best_miou)

            if isinstance(lr_scheduler, ReduceLROnPlateau):
                lr_scheduler.step(best_miou)

            # Print per class IoU on last epoch or if best iou
            if epoch + 1 == config.epochs or is_best:
                for key, class_iou in zip(class_encoding.keys(), iou):
                    print("{0}: {1:.4f}".format(key, class_iou))

            # Save the model if it's the best thus far
            if is_main_process():
                checkpoint_path = save_checkpoint(model,
                                                  optimizer, epoch + 1, best_miou,
                                                  compression_algo.scheduler, config)

                make_additional_checkpoints(checkpoint_path, is_best, epoch + 1, config)
                print_statistics(compression_algo.statistics())

    return model
def train_epoch(train_loader,
                model,
                criterion,
                optimizer,
                compression_algo,
                epoch,
                config,
                is_inception=False):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    compression_losses = AverageMeter()
    criterion_losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    compression_scheduler = compression_algo.scheduler

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input_, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        input_ = input_.to(config.device)
        target = target.to(config.device)

        # compute output
        if is_inception:
            # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
            output, aux_outputs = model(input_)
            loss1 = criterion(output, target)
            loss2 = criterion(aux_outputs, target)
            criterion_loss = loss1 + 0.4 * loss2
        else:
            output = model(input_)
            criterion_loss = criterion(output, target)

        # compute compression loss
        compression_loss = compression_algo.loss()
        loss = criterion_loss + compression_loss

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), input_.size(0))
        comp_loss_val = compression_loss.item() if isinstance(
            compression_loss, torch.Tensor) else compression_loss
        compression_losses.update(comp_loss_val, input_.size(0))
        criterion_losses.update(criterion_loss.item(), input_.size(0))
        top1.update(acc1, input_.size(0))
        top5.update(acc5, input_.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        compression_scheduler.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % config.print_freq == 0:
            print('{rank}: '
                  'Epoch: [{0}][{1}/{2}] '
                  'Lr: {3:.3} '
                  'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                  'Data: {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'CE_loss: {ce_loss.val:.4f} ({ce_loss.avg:.4f}) '
                  'CR_loss: {cr_loss.val:.4f} ({cr_loss.avg:.4f}) '
                  'Loss: {loss.val:.4f} ({loss.avg:.4f}) '
                  'Acc@1: {top1.val:.3f} ({top1.avg:.3f}) '
                  'Acc@5: {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(train_loader),
                      get_lr(optimizer),
                      batch_time=batch_time,
                      data_time=data_time,
                      ce_loss=criterion_losses,
                      cr_loss=compression_losses,
                      loss=losses,
                      top1=top1,
                      top5=top5,
                      rank='{}:'.format(config.rank)
                      if config.multiprocessing_distributed else ''))

        if is_main_process():
            global_step = len(train_loader) * epoch
            config.tb.add_scalar("train/learning_rate", get_lr(optimizer),
                                 i + global_step)
            config.tb.add_scalar("train/criterion_loss", criterion_losses.avg,
                                 i + global_step)
            config.tb.add_scalar("train/compression_loss",
                                 compression_losses.avg, i + global_step)
            config.tb.add_scalar("train/loss", losses.avg, i + global_step)
            config.tb.add_scalar("train/top1", top1.avg, i + global_step)
            config.tb.add_scalar("train/top5", top5.avg, i + global_step)

            for stat_name, stat_value in compression_algo.statistics().items():
                if isinstance(stat_value, (int, float)):
                    config.tb.add_scalar(
                        'train/statistics/{}'.format(stat_name), stat_value,
                        i + global_step)
def main_worker(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (
        ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    config.device = get_device(config)

    if is_main_process():
        configure_logging(config)
        print_args(config)

    if config.seed is not None:
        manual_seed(config.seed)
        cudnn.deterministic = True
        cudnn.benchmark = False

    # create model
    model_name = config['model']
    weights = config.get('weights')
    model = load_model(model_name,
                       pretrained=config.get('pretrained', True)
                       if weights is None else False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'))
    compression_algo, model = create_compressed_model(model, config)
    if weights:
        load_state(model, torch.load(weights, map_location='cpu'))
    model, _ = prepare_model_for_execution(model, config)
    if config.distributed:
        compression_algo.distributed()

    is_inception = 'inception' in model_name

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(config.device)

    params_to_optimize = get_parameter_groups(model, config)
    optimizer, lr_scheduler = make_optimizer(params_to_optimize, config)

    resuming_checkpoint = config.resuming_checkpoint
    best_acc1 = 0
    # optionally resume from a checkpoint
    if resuming_checkpoint is not None:
        model, config, optimizer, compression_algo, best_acc1 = \
            resume_from_checkpoint(resuming_checkpoint, model,
                                   config, optimizer, compression_algo)

    if config.to_onnx is not None:
        compression_algo.export_model(config.to_onnx)
        print("Saved to", config.to_onnx)
        return

    if config.execution_mode != ExecutionMode.CPU_ONLY:
        cudnn.benchmark = True

    # Data loading code
    train_loader, train_sampler, val_loader = create_dataloaders(config)

    if config.mode.lower() == 'test':
        print_statistics(compression_algo.statistics())
        validate(val_loader, model, criterion, config)

    if config.mode.lower() == 'train':
        if not resuming_checkpoint:
            compression_algo.initialize(train_loader)
        train(config, compression_algo, model, criterion, is_inception,
              lr_scheduler, model_name, optimizer, train_loader, train_sampler,
              val_loader, best_acc1)
def main_worker_binarization(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (
        ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    config.device = get_device(config)

    if is_main_process():
        configure_logging(config)
        print_args(config)

    if config.seed is not None:
        manual_seed(config.seed)
        cudnn.deterministic = True
        cudnn.benchmark = False

    # create model
    model_name = config['model']
    weights = config.get('weights')
    model = load_model(model_name,
                       pretrained=config.get('pretrained', True)
                       if weights is None else False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'))

    original_model = copy.deepcopy(model)
    compression_algo, model = create_compressed_model(model, config)
    if not isinstance(compression_algo, Binarization):
        raise RuntimeError(
            "The binarization sample worker may only be run with the binarization algorithm!"
        )

    if weights:
        load_state(model, torch.load(weights, map_location='cpu'))

    model, _ = prepare_model_for_execution(model, config)
    original_model.to(config.device)

    if config.distributed:
        compression_algo.distributed()

    is_inception = 'inception' in model_name

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(config.device)

    params_to_optimize = model.parameters()

    compression_config = config['compression']
    binarization_config = compression_config if isinstance(
        compression_config, dict) else compression_config[0]
    optimizer = get_binarization_optimizer(params_to_optimize,
                                           binarization_config)
    optimizer_scheduler = BinarizationOptimizerScheduler(
        optimizer, binarization_config)
    kd_loss_calculator = KDLossCalculator(original_model)

    resuming_checkpoint = config.resuming_checkpoint
    best_acc1 = 0
    # optionally resume from a checkpoint
    if resuming_checkpoint is not None:
        model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_algo, best_acc1 = \
            resume_from_checkpoint(resuming_checkpoint, model,
                                   config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_algo)

    if config.to_onnx is not None:
        compression_algo.export_model(config.to_onnx)
        print("Saved to", config.to_onnx)
        return

    if config.execution_mode != ExecutionMode.CPU_ONLY:
        cudnn.benchmark = True

    # Data loading code
    train_loader, train_sampler, val_loader = create_dataloaders(config)

    if config.mode.lower() == 'test':
        print_statistics(compression_algo.statistics())
        validate(val_loader, model, criterion, config)

    if config.mode.lower() == 'train':
        if not resuming_checkpoint:
            compression_algo.initialize(train_loader)

        batch_multiplier = (binarization_config.get("params", {})).get(
            "batch_multiplier", 1)
        train_bin(config, compression_algo, model, criterion, is_inception,
                  optimizer_scheduler, model_name, optimizer, train_loader,
                  train_sampler, val_loader, kd_loss_calculator,
                  batch_multiplier, best_acc1)