Exemple #1
0
def init_classifier_compression_arg_parser():
    '''Common classifier-compression application command-line arguments.
    '''
    SUMMARY_CHOICES = ['sparsity', 'compute', 'model', 'modules', 'png', 'png_w_params']

    parser = argparse.ArgumentParser(description='Distiller image classification model compression')
    parser.add_argument('data', metavar='DIR', help='path to dataset')
    parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18', type=lambda s: s.lower(),
                        choices=models.ALL_MODEL_NAMES,
                        help='model architecture: ' +
                        ' | '.join(models.ALL_MODEL_NAMES) +
                        ' (default: resnet18)')
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument('--epochs', type=int, metavar='N', default=90,
                        help='number of total epochs to run (default: 90')
    parser.add_argument('-b', '--batch-size', default=256, type=int,
                        metavar='N', help='mini-batch size (default: 256)')

    optimizer_args = parser.add_argument_group('Optimizer arguments')
    optimizer_args.add_argument('--lr', '--learning-rate', default=0.1,
                    type=float, metavar='LR', help='initial learning rate')
    optimizer_args.add_argument('--momentum', default=0.9, type=float,
                    metavar='M', help='momentum')
    optimizer_args.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
                    metavar='W', help='weight decay (default: 1e-4)')

    parser.add_argument('--print-freq', '-p', default=10, type=int,
                        metavar='N', help='print frequency (default: 10)')
    parser.add_argument('--verbose', '-v', action='store_true', help='Emit debug log messages')

    load_checkpoint_group = parser.add_argument_group('Resuming arguments')
    load_checkpoint_group_exc = load_checkpoint_group.add_mutually_exclusive_group()
    # TODO(barrh): args.deprecated_resume is deprecated since v0.3.1
    load_checkpoint_group_exc.add_argument('--resume', dest='deprecated_resume', default='', type=str,
                        metavar='PATH', help=argparse.SUPPRESS)
    load_checkpoint_group_exc.add_argument('--resume-from', dest='resumed_checkpoint_path', default='',
                        type=str, metavar='PATH',
                        help='path to latest checkpoint. Use to resume paused training session.')
    load_checkpoint_group_exc.add_argument('--exp-load-weights-from', dest='load_model_path',
                        default='', type=str, metavar='PATH',
                        help='path to checkpoint to load weights from (excluding other fields) (experimental)')
    load_checkpoint_group.add_argument('--pretrained', dest='pretrained', action='store_true',
                        help='use pre-trained model')
    load_checkpoint_group.add_argument('--reset-optimizer', action='store_true',
                        help='Flag to override optimizer if resumed from checkpoint. This will reset epochs count.')

    parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                        help='evaluate model on test set')
    parser.add_argument('--activation-stats', '--act-stats', nargs='+', metavar='PHASE', default=list(),
                        help='collect activation statistics on phases: train, valid, and/or test'
                        ' (WARNING: this slows down training)')
    parser.add_argument('--activation-histograms', '--act-hist',
                        type=distiller.utils.float_range_argparse_checker(exc_min=True),
                        metavar='PORTION_OF_TEST_SET',
                        help='Run the model in evaluation mode on the specified portion of the test dataset and '
                             'generate activation histograms. NOTE: This slows down evaluation significantly')
    parser.add_argument('--masks-sparsity', dest='masks_sparsity', action='store_true', default=False,
                        help='print masks sparsity table at end of each epoch')
    parser.add_argument('--param-hist', dest='log_params_histograms', action='store_true', default=False,
                        help='log the parameter tensors histograms to file '
                             '(WARNING: this can use significant disk space)')
    parser.add_argument('--summary', type=lambda s: s.lower(), choices=SUMMARY_CHOICES, action='append',
                        help='print a summary of the model, and exit - options: | '.join(SUMMARY_CHOICES))
    parser.add_argument('--export-onnx', action='store', nargs='?', type=str, const='model.onnx', default=None,
                        help='export model to ONNX format')
    parser.add_argument('--compress', dest='compress', type=str, nargs='?', action='store',
                        help='configuration file for pruning the model (default is to use hard-coded schedule)')
    parser.add_argument('--sense', dest='sensitivity', choices=['element', 'filter', 'channel'],
                        type=lambda s: s.lower(), help='test the sensitivity of layers to pruning')
    parser.add_argument('--sense-range', dest='sensitivity_range', type=float, nargs=3, default=[0.0, 0.95, 0.05],
                        help='an optional parameter for sensitivity testing '
                             'providing the range of sparsities to test.\n'
                             'This is equivalent to creating sensitivities = np.arange(start, stop, step)')
    parser.add_argument('--extras', default=None, type=str,
                        help='file with extra configuration information')
    parser.add_argument('--deterministic', '--det', action='store_true',
                        help='Ensure deterministic execution for re-producible results.')
    parser.add_argument('--seed', type=int, default=None,
                        help='seed the PRNG for CPU, CUDA, numpy, and Python')
    parser.add_argument('--gpus', metavar='DEV_ID', default=None,
                        help='Comma-separated list of GPU device IDs to be used '
                             '(default is to use all available devices)')
    parser.add_argument('--cpu', action='store_true', default=False,
                        help='Use CPU only. \n'
                        'Flag not set => uses GPUs according to the --gpus flag value.'
                        'Flag set => overrides the --gpus flag')
    parser.add_argument('--name', '-n', metavar='NAME', default=None, help='Experiment name')
    parser.add_argument('--out-dir', '-o', dest='output_dir', default='logs', help='Path to dump logs and checkpoints')
    parser.add_argument('--validation-split', '--valid-size', '--vs', dest='validation_split',
                        type=float_range(exc_max=True), default=0.1,
                        help='Portion of training dataset to set aside for validation')
    parser.add_argument('--effective-train-size', '--etrs', type=float_range(exc_min=True), default=1.,
                        help='Portion of training dataset to be used in each epoch. '
                             'NOTE: If --validation-split is set, then the value of this argument is applied '
                             'AFTER the train-validation split according to that argument')
    parser.add_argument('--effective-valid-size', '--evs', type=float_range(exc_min=True), default=1.,
                        help='Portion of validation dataset to be used in each epoch. '
                             'NOTE: If --validation-split is set, then the value of this argument is applied '
                             'AFTER the train-validation split according to that argument')
    parser.add_argument('--effective-test-size', '--etes', type=float_range(exc_min=True), default=1.,
                        help='Portion of test dataset to be used in each epoch')
    parser.add_argument('--confusion', dest='display_confusion', default=False, action='store_true',
                        help='Display the confusion matrix')
    parser.add_argument('--num-best-scores', dest='num_best_scores', default=1, type=int,
                        help='number of best scores to track and report (default: 1)')
    parser.add_argument('--load-serialized', dest='load_serialized', action='store_true', default=False,
                        help='Load a model without DataParallel wrapping it')
    parser.add_argument('--thinnify', dest='thinnify', action='store_true', default=False,
                        help='physically remove zero-filters and create a smaller model')

    distiller.quantization.add_post_train_quant_args(parser)
    return parser
Exemple #2
0
def get_parser():
    parser = argparse.ArgumentParser(
        description='Distiller image classification model compression')
    parser.add_argument('--arch',
                        '-a',
                        metavar='ARCH',
                        default='resnet152',
                        type=lambda s: s.lower(),
                        choices=models.ALL_MODEL_NAMES,
                        help='model architecture: ' +
                        ' | '.join(models.ALL_MODEL_NAMES) +
                        ' (default: resnet152)')
    parser.add_argument('-j',
                        '--workers',
                        default=44,
                        type=int,
                        metavar='N',
                        help='number of data loading workers (default: 44)')
    parser.add_argument('--epochs',
                        default=20,
                        type=int,
                        metavar='N',
                        help='number of total epochs to run')
    parser.add_argument('-b',
                        '--batch-size',
                        default=16,
                        type=int,
                        metavar='N',
                        help='mini-batch size (default: 16)')
    parser.add_argument('--lr',
                        '--learning-rate',
                        default=0.001,
                        type=float,
                        metavar='LR',
                        help='initial learning rate')
    parser.add_argument('--momentum',
                        default=0.9,
                        type=float,
                        metavar='M',
                        help='momentum')
    parser.add_argument('--weight-decay',
                        '--wd',
                        default=0e-5,
                        type=float,
                        metavar='W',
                        help='weight decay (default: 1e-4)')
    parser.add_argument('--print-freq',
                        '-p',
                        default=500,
                        type=int,
                        metavar='N',
                        help='print frequency (default: 50)')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('-e',
                        '--evaluate',
                        dest='evaluate',
                        action='store_true',
                        help='evaluate model on validation set')
    parser.add_argument('--pretrained',
                        dest='pretrained',
                        action='store_true',
                        help='use pre-trained model')
    parser.add_argument(
        '--activation-stats',
        '--act-stats',
        nargs='+',
        metavar='PHASE',
        default=list(),
        help='collect activation statistics on phases: train, valid, and/or test'
        ' (WARNING: this slows down training)'
    )  # 收集阶段的激活统计信息:列车、有效和/或测试(警告:这会减慢训练速度)
    parser.add_argument('--masks-sparsity',
                        dest='masks_sparsity',
                        action='store_true',
                        default=False,
                        help='print masks sparsity table at end of each epoch'
                        )  # 打印掩盖稀疏表 在end of each epoch
    parser.add_argument(
        '--param-hist',
        dest='log_params_histograms',
        action='store_true',
        default=False,
        help=
        'log the parameter tensors histograms to file (WARNING: this can use significant disk space)'
    )  # 将参数张量柱状图记录到文件中(警告:这可能会占用大量磁盘空间)
    parser.add_argument(
        '--summary',
        type=lambda s: s.lower(),
        choices=SUMMARY_CHOICES,
        help='print a summary of the model, and exit - options: ' +
        ' | '.join(SUMMARY_CHOICES))  # 打印模型摘要,并退出-选项
    parser.add_argument(
        '--compress',
        dest='compress',
        type=str,
        nargs='?',
        action='store',
        help=
        'configuration file for pruning the model (default is to use hard-coded schedule)'
    )  # 用于修剪模型的配置文件(默认为使用硬编码计划)
    parser.add_argument(
        '--sense',
        dest='sensitivity',
        choices=['element', 'filter', 'channel'],
        type=lambda s: s.lower(),
        help='test the thinnify of layers to pruning')  # 测试修剪层的敏感性
    parser.add_argument(
        '--sense-range',
        dest='sensitivity_range',
        type=float,
        nargs=3,
        default=[0.4, 0.9, 0.1],
        help=
        'an optional parameter for sensitivity testing providing the range of sparsities to test.\n'
        'This is equivalent to creating sensitivities = np.arange(start, stop, step)'
    )  # 灵敏度测试的可选参数,提供要测试的稀疏度范围。这相当于创建灵敏度=np.arange(开始、停止、步骤)
    parser.add_argument(
        '--extras',
        default=None,
        type=str,
        help='file with extra configuration information')  # 包含额外配置信息的文件
    parser.add_argument(
        '--deterministic',
        '--det',
        action='store_true',
        help='Ensure deterministic execution for re-producible results.'
    )  # 确保可重复生产结果的确定性执行。
    parser.add_argument(
        '--gpus',
        metavar='DEV_ID',
        default=None,
        help=
        'Comma-separated list of GPU device IDs to be used (default is to use all available devices)'
    )  # 要使用的GPU设备ID的逗号分隔列表(默认为使用所有可用设备)
    parser.add_argument(
        '--cpu',
        action='store_true',
        default=False,
        help='Use CPU only. \n'
        'Flag not set => uses GPUs according to the --gpus flag value.'
        'Flag set => overrides the --gpus flag')
    parser.add_argument('--name',
                        '-n',
                        metavar='NAME',
                        default=None,
                        help='Experiment name')  # 实验名
    parser.add_argument(
        '--out-dir',
        '-o',
        dest='output_dir',
        default='logs',
        help='Path to dump logs and checkpoints')  # 转储日志和检查点的路径
    parser.add_argument(
        '--validation-split',
        '--valid-size',
        '--vs',
        dest='validation_split',
        type=float_range(exc_max=True),
        default=0.1,
        help='Portion of training dataset to set aside for validation'
    )  # 用于验证的培训数据集的一部分
    parser.add_argument(
        '--effective-train-size',
        '--etrs',
        type=float_range(exc_min=True),
        default=1.,
        help='Portion of training dataset to be used in each epoch. '
        'NOTE: If --validation-split is set, then the value of this argument is applied '
        'AFTER the train-validation split according to that argument')
    # 培训数据集的一部分将在每个时期使用。
    # '注意:如果设置了--validation split,则应用此参数的值'
    # '在根据该参数划分列车验证之后'
    parser.add_argument(
        '--effective-valid-size',
        '--evs',
        type=float_range(exc_min=True),
        default=1.,
        help='Portion of validation dataset to be used in each epoch. '
        'NOTE: If --validation-split is set, then the value of this argument is applied '
        'AFTER the train-validation split according to that argument')
    # 在每个时期使用的验证数据集的一部分。
    # '注意:如果设置了--validation split,则应用此参数的值'
    # '根据该参数,在列车验证拆分后
    parser.add_argument('--effective-test-size',
                        '--etes',
                        type=float_range(exc_min=True),
                        default=1.,
                        help='Portion of test dataset to be used in each epoch'
                        )  # 每个时期使用的测试数据集的一部分
    parser.add_argument('--confusion',
                        dest='display_confusion',
                        default=False,
                        action='store_true',
                        help='Display the confusion matrix')  # 显示混淆矩阵
    parser.add_argument(
        '--earlyexit_lossweights',
        type=float,
        nargs='*',
        dest='earlyexit_lossweights',
        default=None,
        help=
        'List of loss weights for early exits (e.g. --earlyexit_lossweights 0.1 0.3)'
    )  # 提前退出的的损失权重列表(例如0.1 0.3)
    parser.add_argument(
        '--earlyexit_thresholds',
        type=float,
        nargs='*',
        dest='earlyexit_thresholds',
        default=None,
        help='List of EarlyExit thresholds (e.g. --earlyexit_thresholds 1.2 0.9)'
    )  # 提前退出阈值列表(例如1.2 0.9)
    parser.add_argument(
        '--num-best-scores',
        dest='num_best_scores',
        default=1,
        type=int,
        help='number of best scores to track and report (default: 1)'
    )  # 要跟踪和报告的最佳分数数(默认值:1)
    parser.add_argument(
        '--load-serialized',
        dest='load_serialized',
        action='store_true',
        default=False,
        help='Load a model without DataParallel wrapping it')  # 加载模型时不使用数据并行包装
    parser.add_argument(
        '--thinnify',
        dest='thinnify',
        action='store_true',
        default=False,
        help='physically remove zero-filters and create a smaller model'
    )  # 物理删除零过滤器并创建较小的模型

    distiller.knowledge_distillation.add_distillation_args(
        parser, models.ALL_MODEL_NAMES, True)
    distiller.quantization.add_post_train_quant_args(parser)
    distiller.pruning.greedy_filter_pruning.add_greedy_pruner_args(parser)
    adc.automl_args.add_automl_args(parser)
    return parser
Exemple #3
0
def get_parser():
    parser = argparse.ArgumentParser(
        description='Distiller image classification model compression')
    parser.add_argument('data', metavar='DIR', help='path to dataset')
    parser.add_argument('--arch',
                        '-a',
                        metavar='ARCH',
                        default='resnet18',
                        type=lambda s: s.lower(),
                        choices=models.ALL_MODEL_NAMES,
                        help='model architecture: ' +
                        ' | '.join(models.ALL_MODEL_NAMES) +
                        ' (default: resnet18)')
    parser.add_argument(
        '--loaders',
        type=int,
        metavar='N',
        help=
        'number of data loading workers (default: max({}, {} per GPU). 1 if deterministic is set.)'
        .format(DEFAULT_LOADERS_COUNT, DEFAULT_LOADERS_COUNT))
    parser.add_argument('--epochs',
                        default=90,
                        type=int,
                        metavar='N',
                        help='number of total epochs to run')
    parser.add_argument('-b',
                        '--batch-size',
                        default=256,
                        type=int,
                        metavar='N',
                        help='mini-batch size (default: 256)')

    optimizer_args = parser.add_argument_group('optimizer_arguments')
    optimizer_args.add_argument('--lr',
                                '--learning-rate',
                                default=0.1,
                                type=float,
                                metavar='LR',
                                help='initial learning rate')
    optimizer_args.add_argument('--momentum',
                                default=0.9,
                                type=float,
                                metavar='M',
                                help='momentum')
    optimizer_args.add_argument('--weight-decay',
                                '--wd',
                                default=1e-4,
                                type=float,
                                metavar='W',
                                help='weight decay (default: 1e-4)')
    parser.add_argument(
        '--reset-optimizer',
        '--reset-lr',
        action='store_true',
        help='Flag to override optimizer if resumed from checkpoint')

    print_freq_group = parser.add_mutually_exclusive_group()
    print_freq_group.add_argument(
        '--print-frequency',
        type=int,
        metavar='N',
        help='print frequency (default: {} prints per epoch)'.format(
            DEFAULT_PRINT_FREQUENCY))
    print_freq_group.add_argument('--print-period',
                                  type=int,
                                  metavar='N',
                                  help='print every N mini-batches')

    load_checkpoint_group = parser.add_mutually_exclusive_group()
    load_checkpoint_group.add_argument(
        '--resume',
        default='',
        type=str,
        metavar='PATH',
        help='path to latest checkpoint (default: none)')
    load_checkpoint_group.add_argument(
        '--load-state-dict',
        default='',
        type=str,
        metavar='PATH',
        help='load only state dict field from checkpoint at given path')

    parser.add_argument('-e',
                        '--evaluate',
                        dest='evaluate',
                        action='store_true',
                        help='evaluate model on validation set')
    parser.add_argument('--pretrained',
                        dest='pretrained',
                        action='store_true',
                        help='use pre-trained model')
    parser.add_argument(
        '--activation-stats',
        '--act-stats',
        nargs='+',
        metavar='PHASE',
        default=list(),
        help='collect activation statistics on phases: train, valid, and/or test'
        ' (WARNING: this slows down training)')
    parser.add_argument('--masks-sparsity',
                        dest='masks_sparsity',
                        action='store_true',
                        default=False,
                        help='print masks sparsity table at end of each epoch')
    parser.add_argument(
        '--param-hist',
        dest='log_params_histograms',
        action='store_true',
        default=False,
        help=
        'log the parameter tensors histograms to file (WARNING: this can use significant disk space)'
    )
    parser.add_argument(
        '--summary',
        type=lambda s: s.lower(),
        choices=SUMMARY_CHOICES,
        help='print a summary of the model, and exit - options: ' +
        ' | '.join(SUMMARY_CHOICES))
    parser.add_argument(
        '--compress',
        dest='compress',
        type=str,
        nargs='?',
        action='store',
        help=
        'configuration file for pruning the model (default is to use hard-coded schedule)'
    )
    parser.add_argument('--sense',
                        dest='sensitivity',
                        choices=['element', 'filter', 'channel'],
                        type=lambda s: s.lower(),
                        help='test the sensitivity of layers to pruning')
    parser.add_argument(
        '--sense-range',
        dest='sensitivity_range',
        type=float,
        nargs=3,
        default=[0.0, 0.95, 0.05],
        help=
        'an optional parameter for sensitivity testing providing the range of sparsities to test.\n'
        'This is equivalent to creating sensitivities = np.arange(start, stop, step)'
    )
    parser.add_argument('--extras',
                        default=None,
                        type=str,
                        help='file with extra configuration information')
    parser.add_argument(
        '--deterministic',
        '--det',
        action='store_true',
        help='Ensure deterministic execution for re-producible results.')

    device_group = parser.add_mutually_exclusive_group()
    device_group.add_argument(
        '--gpus',
        metavar='DEV_ID',
        default=None,
        help=
        'Comma-separated list of GPU device IDs to be used (default: use all available devices)'
    )
    device_group.add_argument('--use-cpu',
                              action='store_true',
                              default=False,
                              help='Force use of CPU only')

    parser.add_argument('--name',
                        '-n',
                        metavar='NAME',
                        default=None,
                        help='Experiment name')
    parser.add_argument('--out-dir',
                        '-o',
                        dest='output_dir',
                        default='logs',
                        help='Path to dump logs and checkpoints')
    parser.add_argument(
        '--validation-split',
        '--vs',
        type=float_range(exc_max=True),
        default=0,
        metavar='FRACTION',
        help='Portion of training dataset to set aside for validation')
    parser.add_argument(
        '--effective-train-size',
        '--etrs',
        type=float_range(exc_min=True),
        default=1.,
        help='Portion of training dataset to be used in each epoch. '
        'NOTE: If --validation-split is set, then the value of this argument is applied '
        'AFTER the train-validation split according to that argument')
    parser.add_argument(
        '--effective-valid-size',
        '--evs',
        type=float_range(exc_min=True),
        default=1.,
        help='Portion of validation dataset to be used in each epoch. '
        'NOTE: If --validation-split is set, then the value of this argument is applied '
        'AFTER the train-validation split according to that argument')
    parser.add_argument(
        '--effective-test-size',
        '--etes',
        type=float_range(exc_min=True),
        default=1.,
        help='Portion of test dataset to be used in each epoch')
    parser.add_argument('--confusion',
                        dest='display_confusion',
                        default=False,
                        action='store_true',
                        help='Display the confusion matrix')
    parser.add_argument(
        '--earlyexit_lossweights',
        type=float,
        nargs='*',
        dest='earlyexit_lossweights',
        default=None,
        help=
        'List of loss weights for early exits (e.g. --earlyexit_lossweights 0.1 0.3)'
    )
    parser.add_argument(
        '--earlyexit_thresholds',
        type=float,
        nargs='*',
        dest='earlyexit_thresholds',
        default=None,
        help=
        'List of EarlyExit thresholds (e.g. --earlyexit_thresholds 1.2 0.9)')
    parser.add_argument(
        '--num-best-scores',
        dest='num_best_scores',
        default=1,
        type=int,
        help='number of best scores to track and report (default: 1)')
    parser.add_argument('--load-serialized',
                        dest='load_serialized',
                        action='store_true',
                        default=False,
                        help='Load a model without DataParallel wrapping it')
    parser.add_argument(
        '--thinnify',
        dest='thinnify',
        action='store_true',
        default=False,
        help='physically remove zero-filters and create a smaller model')

    # deprecations
    def deprecation_warning(*args, old_keys=None, new_keys=None, **kwargs):
        if old_keys and new_keys:
            msglogger.warning(
                '{okey} have been deprecated. Try {nkey} instead.'.format(
                    okey=old_keys, nkey=new_keys))
        elif old_keys:
            msglogger.warning(
                '{okey} have been deprecated.'.format(okey=old_keys))
        else:
            msglogger.warning(
                'Some arguments have been deprecated and ignored.')

    parser.add_argument('--valid-size',
                        '--validation-size',
                        type=functools.partial(
                            deprecation_warning,
                            old_keys=['--valid-size', '--validation-size'],
                            new_keys=['--validation-split', '--vs']),
                        help=argparse.SUPPRESS)
    parser.add_argument('--print-freq',
                        '-p',
                        type=functools.partial(deprecation_warning,
                                               old_keys=['--print-freq', '-p'],
                                               new_keys=['--print-period']),
                        help=argparse.SUPPRESS)
    parser.add_argument('-j',
                        '--workers',
                        type=functools.partial(deprecation_warning,
                                               old_keys=['-j', '--workers'],
                                               new_keys=['--loaders']),
                        help=argparse.SUPPRESS)

    distiller.knowledge_distillation.add_distillation_args(
        parser, models.ALL_MODEL_NAMES, True)
    distiller.quantization.add_post_train_quant_args(parser)
    distiller.pruning.greedy_filter_pruning.add_greedy_pruner_args(parser)
    adc.automl_args.add_automl_args(parser)
    return parser
def get_parser(model_names, dataset_names):
    """
    Return the argument parser
    """
    parser = argparse.ArgumentParser(description='Image classification model')
    parser.add_argument('--device',
                        type=device,
                        default=84,
                        help='set device (default: AI84)')
    parser.add_argument('--8-bit-mode',
                        '-8',
                        dest='act_mode_8bit',
                        action='store_true',
                        default=False,
                        help='simluate device operation (8-bit data)')
    parser.add_argument('--arch',
                        '-a',
                        '--model',
                        metavar='ARCH',
                        required=True,
                        type=lambda s: s.lower(),
                        dest='cnn',
                        choices=model_names,
                        help='model architecture: ' + ' | '.join(model_names))
    parser.add_argument('--dataset',
                        metavar='S',
                        required=True,
                        choices=dataset_names,
                        help="dataset(s) (" + ', '.join(dataset_names) + ")")
    parser.add_argument('--truncate-testset',
                        action='store_true',
                        default=False,
                        help='get only the first image from the test set')
    parser.add_argument('--data',
                        metavar='DIR',
                        default='data',
                        help='path to dataset')
    parser.add_argument('-j',
                        '--workers',
                        default=4,
                        type=int,
                        metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument('--epochs',
                        type=int,
                        metavar='N',
                        help='number of total epochs to run (default: 90)')
    parser.add_argument('-b',
                        '--batch-size',
                        default=256,
                        type=int,
                        metavar='N',
                        help='mini-batch size (default: 256)')
    parser.add_argument('--kernel-stats',
                        action='store_true',
                        default=False,
                        help='compute kernel statistics')
    parser.add_argument(
        '--use-bias',
        action='store_true',
        default=False,
        help='for models that support both bias and no bias, set the '
        '`use bias` flag to true')
    parser.add_argument(
        '--avg-pool-rounding',
        action='store_true',
        default=False,
        help='when simulating, use "round()" in AvgPool operations '
        '(default: use "floor()")')

    qat_args = parser.add_argument_group('Quantization Arguments')
    qat_args.add_argument('--qat-policy',
                          dest='qat_policy',
                          default='qat_policy.yaml',
                          help='path to YAML file that defines the '
                          'QAT (quantization-aware training) policy')

    optimizer_args = parser.add_argument_group('Optimizer Arguments')
    optimizer_args.add_argument('--optimizer',
                                default='SGD',
                                help='optimizer for training (default: SGD)')
    optimizer_args.add_argument('--lr',
                                '--learning-rate',
                                default=0.1,
                                type=float,
                                metavar='LR',
                                help='initial learning rate')
    optimizer_args.add_argument('--momentum',
                                default=0.9,
                                type=float,
                                metavar='M',
                                help='momentum')
    optimizer_args.add_argument('--weight-decay',
                                '--wd',
                                default=1e-4,
                                type=float,
                                metavar='W',
                                help='weight decay (default: 1e-4)')

    parser.add_argument('--print-freq',
                        '-p',
                        default=10,
                        type=int,
                        metavar='N',
                        help='print frequency (default: 10)')

    load_checkpoint_group = parser.add_argument_group('Resuming Arguments')
    load_checkpoint_group_exc = load_checkpoint_group.add_mutually_exclusive_group(
    )
    load_checkpoint_group_exc.add_argument(
        '--resume-from',
        dest='resumed_checkpoint_path',
        default='',
        type=str,
        metavar='PATH',
        help='path to latest checkpoint. Use to resume paused '
        'training session.')
    load_checkpoint_group_exc.add_argument(
        '--exp-load-weights-from',
        dest='load_model_path',
        default='',
        type=str,
        metavar='PATH',
        help='path to checkpoint to load weights from '
        '(excluding other fields) (experimental)')
    load_checkpoint_group.add_argument('--pretrained',
                                       dest='pretrained',
                                       action='store_true',
                                       help='use pre-trained model')
    load_checkpoint_group.add_argument(
        '--reset-optimizer',
        action='store_true',
        help='Flag to override optimizer if resumed from '
        'checkpoint. This will reset epochs count.')

    parser.add_argument('-e',
                        '--evaluate',
                        dest='evaluate',
                        action='store_true',
                        help='evaluate model on test set')
    mgroup = parser.add_mutually_exclusive_group()
    mgroup.add_argument(
        '--save-csv',
        dest='csv_prefix',
        default=None,
        type=str,
        help='save as CSVs with the given prefix during evaluation')
    mgroup.add_argument(
        '--save-sample',
        dest='generate_sample',
        type=int,
        help='save the sample at given index as NumPy sample data')
    parser.add_argument(
        '--shap',
        default=0,
        type=int,
        help=
        'select # of images from the test set and plot SHAP after evaluation')
    parser.add_argument(
        '--activation-stats',
        '--act-stats',
        nargs='+',
        metavar='PHASE',
        default=list(),
        help='collect activation statistics on phases: train, valid, and/or test'
        ' (WARNING: this slows down training)')
    parser.add_argument('--masks-sparsity',
                        dest='masks_sparsity',
                        action='store_true',
                        default=False,
                        help='print masks sparsity table at end of each epoch')
    parser.add_argument(
        '--param-hist',
        dest='log_params_histograms',
        action='store_true',
        default=False,
        help=
        'log the parameter tensors histograms to file (WARNING: this can use '
        'significant disk space)')
    parser.add_argument(
        '--summary',
        type=lambda s: s.lower(),
        choices=SUMMARY_CHOICES,
        help='print a summary of the model, and exit - options: ' +
        ' | '.join(SUMMARY_CHOICES))
    parser.add_argument(
        '--summary-filename',
        default='model',
        help='file name (w/o extension) for the model summary (default: "model"'
    )

    parser.add_argument('--compress',
                        dest='compress',
                        type=str,
                        nargs='?',
                        action='store',
                        help='configuration file for pruning the model '
                        '(default is to use hard-coded schedule)')
    parser.add_argument('--sense',
                        dest='sensitivity',
                        choices=['element', 'filter', 'channel'],
                        type=lambda s: s.lower(),
                        help='test the sensitivity of layers to pruning')
    parser.add_argument(
        '--sense-range',
        dest='sensitivity_range',
        type=float,
        nargs=3,
        default=[0.0, 0.95, 0.05],
        help='an optional parameter for sensitivity testing providing the range '
        'of sparsities to test.\n'
        'This is equivalent to creating sensitivities = np.arange(start, '
        'stop, step)')
    parser.add_argument('--extras',
                        default=None,
                        type=str,
                        help='file with extra configuration information')
    parser.add_argument(
        '--deterministic',
        '--det',
        action='store_true',
        help='Ensure deterministic execution for re-producible results.')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='seed the PRNG for CPU, CUDA, numpy, and Python')
    parser.add_argument(
        '--gpus',
        metavar='DEV_ID',
        default=None,
        help='Comma-separated list of GPU device IDs to be used (default is to '
        'use all available devices)')
    parser.add_argument(
        '--cpu',
        action='store_true',
        default=False,
        help='Use CPU only. \n'
        'Flag not set => uses GPUs according to the --gpus flag value.'
        'Flag set => overrides the --gpus flag')
    parser.add_argument('--name',
                        '-n',
                        metavar='NAME',
                        default=None,
                        help='Experiment name')
    parser.add_argument('--out-dir',
                        '-o',
                        dest='output_dir',
                        default='logs',
                        help='Path to dump '
                        'logs and checkpoints')
    parser.add_argument(
        '--validation-split',
        '--valid-size',
        '--vs',
        dest='validation_split',
        type=float_range(exc_max=True),
        default=0.1,
        help='Portion of training dataset to set aside for validation')
    parser.add_argument(
        '--effective-train-size',
        '--etrs',
        type=float_range(exc_min=True),
        default=1.,
        help='Portion of training dataset to be used in each epoch. '
        'NOTE: If --validation-split is set, then the value of this argument '
        'is applied AFTER the train-validation split according to that '
        'argument')
    parser.add_argument(
        '--effective-valid-size',
        '--evs',
        type=float_range(exc_min=True),
        default=1.,
        help='Portion of validation dataset to be used in each epoch. '
        'NOTE: If --validation-split is set, then the value of this argument '
        'is applied AFTER the train-validation split according to that '
        'argument')
    parser.add_argument(
        '--effective-test-size',
        '--etes',
        type=float_range(exc_min=True),
        default=1.,
        help='Portion of test dataset to be used in each epoch')
    parser.add_argument('--confusion',
                        dest='display_confusion',
                        default=False,
                        action='store_true',
                        help='Display the confusion matrix')
    parser.add_argument('--embedding',
                        dest='display_embedding',
                        default=False,
                        action='store_true',
                        help='Display embedding (using projector)')
    parser.add_argument('--pr-curves',
                        dest='display_prcurves',
                        default=False,
                        action='store_true',
                        help='Display the precision-recall curves')
    parser.add_argument('--no-tensorboard',
                        dest='tblog',
                        default=True,
                        action='store_false',
                        help='Disable TensorBoard')
    parser.add_argument('--regression',
                        dest='regression',
                        default=False,
                        action='store_true',
                        help='Force regression output')
    parser.add_argument('--earlyexit_lossweights',
                        type=float,
                        nargs='*',
                        dest='earlyexit_lossweights',
                        default=None,
                        help='List of loss weights for early exits '
                        '(e.g. --earlyexit_lossweights 0.1 0.3)')
    parser.add_argument(
        '--earlyexit_thresholds',
        type=float,
        nargs='*',
        dest='earlyexit_thresholds',
        default=None,
        help=
        'List of EarlyExit thresholds (e.g. --earlyexit_thresholds 1.2 0.9)')
    parser.add_argument(
        '--num-best-scores',
        dest='num_best_scores',
        default=1,
        type=int,
        help='number of best scores to track and report (default: 1)')
    parser.add_argument('--load-serialized',
                        dest='load_serialized',
                        action='store_true',
                        default=False,
                        help='Load a model without DataParallel wrapping it')
    parser.add_argument(
        '--thinnify',
        dest='thinnify',
        action='store_true',
        default=False,
        help='physically remove zero-filters and create a smaller model')
    parser.add_argument(
        '--sparsity-perf',
        action='store_true',
        default=False,
        help='when determining best epoch, use sparsity as primary key')

    distiller.knowledge_distillation.add_distillation_args(
        parser, model_names, True)
    distiller.quantization.add_post_train_quant_args(parser)
    distiller.pruning.greedy_filter_pruning.add_greedy_pruner_args(parser)
    adc.add_automl_args(parser)
    return parser
Exemple #5
0
def get_parser():
    parser = argparse.ArgumentParser(
        description='Distiller image classification model compression')
    parser.add_argument('data', metavar='DIR', help='path to dataset')
    parser.add_argument('--arch',
                        '-a',
                        metavar='ARCH',
                        default='resnet18',
                        type=lambda s: s.lower(),
                        choices=models.ALL_MODEL_NAMES,
                        help='model architecture: ' +
                        ' | '.join(models.ALL_MODEL_NAMES) +
                        ' (default: resnet18)')
    parser.add_argument('-j',
                        '--workers',
                        default=4,
                        type=int,
                        metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument('--epochs',
                        default=90,
                        type=int,
                        metavar='N',
                        help='number of total epochs to run')
    parser.add_argument('-b',
                        '--batch-size',
                        default=256,
                        type=int,
                        metavar='N',
                        help='mini-batch size (default: 256)')
    parser.add_argument('--lr',
                        '--learning-rate',
                        default=0.1,
                        type=float,
                        metavar='LR',
                        help='initial learning rate')
    parser.add_argument('--momentum',
                        default=0.9,
                        type=float,
                        metavar='M',
                        help='momentum')
    parser.add_argument('--weight-decay',
                        '--wd',
                        default=1e-4,
                        type=float,
                        metavar='W',
                        help='weight decay (default: 1e-4)')
    parser.add_argument('--print-freq',
                        '-p',
                        default=10,
                        type=int,
                        metavar='N',
                        help='print frequency (default: 10)')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('-e',
                        '--evaluate',
                        dest='evaluate',
                        action='store_true',
                        help='evaluate model on validation set')
    parser.add_argument('--pretrained',
                        dest='pretrained',
                        action='store_true',
                        help='use pre-trained model')
    parser.add_argument(
        '--activation-stats',
        '--act-stats',
        nargs='+',
        metavar='PHASE',
        default=list(),
        help='collect activation statistics on phases: train, valid, and/or test'
        ' (WARNING: this slows down training)')
    parser.add_argument('--masks-sparsity',
                        dest='masks_sparsity',
                        action='store_true',
                        default=False,
                        help='print masks sparsity table at end of each epoch')
    parser.add_argument(
        '--param-hist',
        dest='log_params_histograms',
        action='store_true',
        default=False,
        help=
        'log the parameter tensors histograms to file (WARNING: this can use significant disk space)'
    )
    parser.add_argument(
        '--summary',
        type=lambda s: s.lower(),
        choices=SUMMARY_CHOICES,
        help='print a summary of the model, and exit - options: ' +
        ' | '.join(SUMMARY_CHOICES))
    parser.add_argument(
        '--compress',
        dest='compress',
        type=str,
        nargs='?',
        action='store',
        help=
        'configuration file for pruning the model (default is to use hard-coded schedule)'
    )
    parser.add_argument('--sense',
                        dest='sensitivity',
                        choices=['element', 'filter', 'channel'],
                        type=lambda s: s.lower(),
                        help='test the sensitivity of layers to pruning')
    parser.add_argument(
        '--sense-range',
        dest='sensitivity_range',
        type=float,
        nargs=3,
        default=[0.0, 0.95, 0.05],
        help=
        'an optional parameter for sensitivity testing providing the range of sparsities to test.\n'
        'This is equivalent to creating sensitivities = np.arange(start, stop, step)'
    )
    parser.add_argument('--extras',
                        default=None,
                        type=str,
                        help='file with extra configuration information')
    parser.add_argument(
        '--deterministic',
        '--det',
        action='store_true',
        help='Ensure deterministic execution for re-producible results.')
    parser.add_argument(
        '--gpus',
        metavar='DEV_ID',
        default=None,
        help=
        'Comma-separated list of GPU device IDs to be used (default is to use all available devices)'
    )
    parser.add_argument(
        '--cpu',
        action='store_true',
        default=False,
        help='Use CPU only. \n'
        'Flag not set => uses GPUs according to the --gpus flag value.'
        'Flag set => overrides the --gpus flag')
    parser.add_argument('--name',
                        '-n',
                        metavar='NAME',
                        default=None,
                        help='Experiment name')
    parser.add_argument('--out-dir',
                        '-o',
                        dest='output_dir',
                        default='logs',
                        help='Path to dump logs and checkpoints')
    parser.add_argument(
        '--validation-split',
        '--valid-size',
        '--vs',
        dest='validation_split',
        type=float_range(exc_max=True),
        default=0.1,
        help='Portion of training dataset to set aside for validation')
    parser.add_argument(
        '--effective-train-size',
        '--etrs',
        type=float_range(exc_min=True),
        default=1.,
        help='Portion of training dataset to be used in each epoch. '
        'NOTE: If --validation-split is set, then the value of this argument is applied '
        'AFTER the train-validation split according to that argument')
    parser.add_argument(
        '--effective-valid-size',
        '--evs',
        type=float_range(exc_min=True),
        default=1.,
        help='Portion of validation dataset to be used in each epoch. '
        'NOTE: If --validation-split is set, then the value of this argument is applied '
        'AFTER the train-validation split according to that argument')
    parser.add_argument(
        '--effective-test-size',
        '--etes',
        type=float_range(exc_min=True),
        default=1.,
        help='Portion of test dataset to be used in each epoch')
    parser.add_argument('--adc',
                        dest='ADC',
                        action='store_true',
                        help='temp HACK')
    parser.add_argument('--adc-params',
                        dest='ADC_params',
                        default=None,
                        help='temp HACK')
    parser.add_argument('--confusion',
                        dest='display_confusion',
                        default=False,
                        action='store_true',
                        help='Display the confusion matrix')
    parser.add_argument(
        '--earlyexit_lossweights',
        type=float,
        nargs='*',
        dest='earlyexit_lossweights',
        default=None,
        help=
        'List of loss weights for early exits (e.g. --earlyexit_lossweights 0.1 0.3)'
    )
    parser.add_argument(
        '--earlyexit_thresholds',
        type=float,
        nargs='*',
        dest='earlyexit_thresholds',
        default=None,
        help=
        'List of EarlyExit thresholds (e.g. --earlyexit_thresholds 1.2 0.9)')
    parser.add_argument(
        '--num-best-scores',
        dest='num_best_scores',
        default=1,
        type=int,
        help='number of best scores to track and report (default: 1)')
    parser.add_argument('--load-serialized',
                        dest='load_serialized',
                        action='store_true',
                        default=False,
                        help='Load a model without DataParallel wrapping it')
    parser.add_argument(
        '--thinnify',
        dest='thinnify',
        action='store_true',
        default=False,
        help='physically remove zero-filters and create a smaller model')

    distiller.knowledge_distillation.add_distillation_args(
        parser, models.ALL_MODEL_NAMES, True)
    distiller.quantization.add_post_train_quant_args(parser)

    return parser