Ejemplo n.º 1
0
def set_log_level(model: ORTModule,
                  level: LogLevel = LogLevel.WARNING) -> None:
    '''Set verbosity level for `model` at the specified `level`

    Args:
        model (ORTModule): ORTModule instance
        level(LogLevel, default is WARNING): Log level which must be one of VERBOSE, INFO, WARNING, ERROR, FATAL.

    Raises:
        TypeError: if :attr:`model` is not a :class:`ORTModule` object
        TypeError: if :attr:`level` is not a :class:`LogLevel` object

    '''

    if not isinstance(model, ORTModule):
        raise TypeError(
            f'`model` must be a `ORTModule` object, but `{type(model)}` object was specified.'
        )

    if not isinstance(level, LogLevel):
        raise TypeError(
            f'`level` must be a `LogLevel` object, but `{type(level)}` object was specified.'
        )

    for mode in [True, False]:
        model._execution_manager(mode)._loglevel = level
Ejemplo n.º 2
0
 def on_before_accelerator_backend_setup(
         self, trainer: Trainer, pl_module: LightningModule) -> None:
     if not hasattr(pl_module, "model"):
         raise MisconfigurationException(
             "Torch ORT requires to wrap a single model that defines a forward function "
             "assigned as `model` inside the `LightningModule`.")
     if not isinstance(pl_module.model, ORTModule):
         pl_module.model = ORTModule(pl_module.model)
Ejemplo n.º 3
0
def test_set_propagate_cast_with_bad_strategy(bad_strategy):
    # Setting up ORTModule
    device = 'cuda'
    D_in, H, D_out = 784, 500, 10
    model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device)
    model = ORTModule(model)

    with pytest.raises(TypeError) as runtime_error:
        set_propagate_cast_ops_optimization(model=model, strategy=bad_strategy)
    assert "`strategy` must be a `PropagateCastOpsStrategy` object, but" in str(
        runtime_error.value)
Ejemplo n.º 4
0
def test_set_loglevel_with_bad_loglevel(bad_level):
    # Setting up ORTModule
    device = 'cuda'
    D_in, H, D_out = 784, 500, 10
    model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device)
    model = ORTModule(model)

    with pytest.raises(TypeError) as runtime_error:
        set_log_level(model=model, level=bad_level)
    assert "`level` must be a `LogLevel` object, but " in str(
        runtime_error.value)
Ejemplo n.º 5
0
def test_set_propagate_cast_with_bad_level(bad_level):
    # Setting up ORTModule
    device = 'cuda'
    D_in, H, D_out = 784, 500, 10
    model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device)
    model = ORTModule(model)

    strategy = PropagateCastOpsStrategy.INSERT_AND_REDUCE
    with pytest.raises(TypeError) as runtime_error:
        set_propagate_cast_ops_optimization(model=model,
                                            strategy=strategy,
                                            level=bad_level)
    assert "`level` must be a `PropagateCastLevel` object" in str(
        runtime_error.value)
Ejemplo n.º 6
0
def save_intermediate_onnx_graphs(model: ORTModule,
                                  prefix: str,
                                  enable: bool = True) -> None:
    '''Saves all intermediate ONNX graphs produced by `model` at the specified `prefix`

    When `enable` is `False`, ONNX graphs saving is disabled, otherwise
    multiple ONNX graphs will be saved as specified by `prefix` with an ending ".onnx" extension.

    Args:
        model (ORTModule): ORTModule instance
        prefix(str): Full path plus a file prefix to be prepended to all ONNX graphs.
                     Prefix must have a directory and file prefix components (e.g. /foo/bar_).
                     The directory must exist and be writable by the user.
        enable (bool, default is True): Toggle to enable or disable ONNX graph saving

    Raises:
        NotADirectoryError: if :attr:`prefix`does not contain a directory component
        NameError: if :attr:`prefix`does not contain a file prefix component
        TypeError: if :attr:`model` is not a :class:`ORTModule` object

    '''

    if not isinstance(model, ORTModule):
        raise TypeError(
            f'`model` must be a `ORTModule` object, but `{type(model)}` object was specified.'
        )

    if not isinstance(prefix, str):
        raise TypeError('`prefix` must be a non-empty string.')

    if not isinstance(enable, bool):
        raise TypeError('`enable` must be a boolean.')

    dir_name = os.path.dirname(prefix)
    prefix_name = os.path.basename(prefix)

    if not os.path.exists(dir_name):
        raise NotADirectoryError(
            f'{dir_name} is not a valid directory to save ONNX graphs.')

    if not prefix_name or prefix_name.isspace():
        raise NameError(
            f'{prefix_name} is not a valid prefix name for the ONNX graph files.'
        )

    # Set flags for both eval and training mode
    for mode in [True, False]:
        model._execution_manager(is_training=mode)._save_onnx = enable
        model._execution_manager(is_training=mode)._save_onnx_prefix = prefix
Ejemplo n.º 7
0
def test_set_propagate_cast(strategy, level):
    # Setting up ORTModule
    device = 'cuda'
    D_in, H, D_out = 784, 500, 10
    model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device)
    model = ORTModule(model)

    set_propagate_cast_ops_optimization(model=model,
                                        level=level,
                                        strategy=strategy)
    for mode in [True, False]:
        assert model._execution_manager(
            is_training=mode)._propagate_cast_ops_strategy == strategy
        assert model._execution_manager(
            is_training=mode)._propagate_cast_ops_level == level
Ejemplo n.º 8
0
def test_save_onnx(enable):
    # Generating a safe filename prefix to save onnx graphs
    prefix = ''
    with tempfile.NamedTemporaryFile() as f:
        prefix = f.name

    # Setting up ORTModule
    device = 'cuda'
    N, D_in, H, D_out = 64, 784, 500, 10
    model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device)
    model = ORTModule(model)

    if enable is None:
        # Use implicit default value
        save_intermediate_onnx_graphs(model=model, prefix=prefix)
        # But explicitly set default value for assertion below
        enable = True
    else:
        save_intermediate_onnx_graphs(model=model,
                                      enable=enable,
                                      prefix=prefix)

    x = torch.randn(N, D_in, device=device)
    _ = model(x)

    # Check saving status
    assert enable == model._execution_manager(model._is_training())._save_onnx

    # Check ONNX graphs were saved and delete them before completing the test
    success = True
    file_suffixes = [
        '_inference_optimized.onnx', '_torch_exporter.onnx', '_training.onnx',
        '_training_optimized.onnx'
    ]
    for suffix in file_suffixes:
        filename = prefix + suffix
        if (enable and not os.path.exists(filename)) or (
                not enable and os.path.exists(filename)):
            success = False

        # Clean-up time
        if os.path.exists(filename):
            os.remove(filename)

    assert success is True
Ejemplo n.º 9
0
def test_save_onnx_with_bad_prefix(bad_prefix, error_type):
    device = 'cuda'
    D_in, H, D_out = 784, 500, 10
    model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device)
    model = ORTModule(model)

    if error_type == 'folder_not_exist':
        with pytest.raises(NotADirectoryError) as runtime_error:
            save_intermediate_onnx_graphs(model=model, prefix=bad_prefix)
        assert "is not a valid directory to save ONNX graphs" in str(
            runtime_error.value)
    elif error_type == 'prefix_name_not_valid':
        with pytest.raises(NameError) as runtime_error:
            save_intermediate_onnx_graphs(model=model, prefix=bad_prefix)
        assert "is not a valid prefix name for the ONNX graph files" in str(
            runtime_error.value)
    elif error_type == 'prefix_type_not_valid':
        with pytest.raises(TypeError) as runtime_error:
            save_intermediate_onnx_graphs(model=model, prefix=bad_prefix)
        assert "`prefix` must be a non-empty string" in str(
            runtime_error.value)
Ejemplo n.º 10
0
def test_set_loglevel(level):
    # Setting up ORTModule
    device = 'cuda'
    N, D_in, H, D_out = 64, 784, 500, 10
    model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device)
    model = ORTModule(model)

    if level is None:
        # Use implicit default value
        set_log_level(model=model)
        # But explicitly set default value for assertion below
        level = LogLevel.WARNING
    else:
        set_log_level(model=model, level=level)

    x = torch.randn(N, D_in, device=device)
    _ = model(x)

    # Check log level are really set on `model`
    for mode in [True, False]:
        assert model._execution_manager(is_training=mode)._loglevel == level
Ejemplo n.º 11
0
def set_propagate_cast_ops_optimization(
        model: ORTModule,
        strategy: PropagateCastOpsStrategy = PropagateCastOpsStrategy.
    FLOOD_FILL,
        level: PropagateCastLevel = PropagateCastLevel.NOT_USED) -> None:
    '''Set Cast Op propagation strategy for ONNX graph optimization in an attempt to achieve higher throughput

    Cast Op propagation allows ONNX graph to be optimized by replacing float32 nodes by their 16-bit counterpart
    without losing precision during computation. To enable cast propagation, user must select `strategy` and a `level`.
    Each combination of strategy and level have predefined lists of allowed nodes that are safe to move float-cast
    operations from inputs to outputs and float16-cast operations from outputs to inputs.

    Args:
        model (ORTModule): ORTModule instance to apply the cast propagation configuration
        strategy (PropagateCastOpsStrategy, default is FLOOD_FILL): specify the choice of cast propagation optimization.
            It must be one of NONE, INSERT_AND_REDUCE, FLOOD_FILL or REMOVE_INPUT_OUTPUT_UP_DOWN_CASTS.
            NONE strategy doesn't perform any cast propagation transformation on the graph, although other optimizations
            locally change cast operations. For example, in order to fuse Transpose and MatMul nodes,
            the TransposeMatMulFunsion optimization could interchange Transpose and Cast if the Cast node exists
            between Transpose and MatMul.
            INSERT_AND_REDUCE strategy inserts and reduces cast operations around nodes with a predefined list of
            allowed nodes, even if that results in changing nodes outside the expanded float16 region.            
            FLOOD_FILL strategy expands float16 regions using the a predefined allowed list of nodes without modifying
            nodes outside the expanded float16 region.
            REMOVE_INPUT_OUTPUT_UP_DOWN_CASTS strategy removes float Cast on inputs and float16 Casts on outputs for
            nodes of any operations to increase throughput. For example, if both inputs of a node with Add operation,
            happen to be outputs of float Cast operations and the output happen to be input to a float16 Cast operation,
            requiring the Add operation to be performed in float instead of float16, then it is possible to remove casts
            on inputs and output to perform the Add operation in float16 to increase throughput.
            The pattern of up/down casts on inputs/outputs could be due to other cast propagation optimizations.
        level (PropagateCastLevel, default is NOT_USED): NOT_USED does not optimize the graph.
            FASTER_KEEP_PRECISION and AGGRRESSIVE_MIXED_PRECISION use predetermined list of nodes considered safe to
            move before/after cast operation. While FASTER_KEEP_PRECISION guarantees precision is not affected,
            AGGRRESSIVE_MIXED_PRECISION usually affects final precision.

    Raises:
        TypeError: if :attr:`model` is not a :class:`ORTModule` object
        TypeError: if :attr:`strategy` is not a :class:`PropagateCastOpsStrategy` object
        TypeError: if :attr:`level` is not a :class:`PropagateCastLevel` object

    '''

    if not isinstance(model, ORTModule):
        raise TypeError(
            f'`model` must be a `ORTModule` object, but `{type(model)}` object was specified.'
        )

    if not isinstance(strategy, PropagateCastOpsStrategy):
        raise TypeError(
            f'`strategy` must be a `PropagateCastOpsStrategy` object, but `{type(model)}` object was specified.'
        )

    if not isinstance(level, PropagateCastLevel):
        raise TypeError(f'`level` must be a `PropagateCastLevel` object.')

    # Set flags for both eval and training mode
    for mode in [True, False]:
        model._execution_manager(
            is_training=mode)._propagate_cast_ops_strategy = strategy
        model._execution_manager(
            is_training=mode)._propagate_cast_ops_level = level
Ejemplo n.º 12
0
def main():
    # 1. Basic setup
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--pytorch-only',
                        action='store_true',
                        default=False,
                        help='disables ONNX Runtime training')
    parser.add_argument('--batch-size',
                        type=int,
                        default=32,
                        metavar='N',
                        help='input batch size for training (default: 32)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for testing (default: 64)')
    parser.add_argument('--view-graphs',
                        action='store_true',
                        default=False,
                        help='views forward and backward graphs')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--epochs',
                        type=int,
                        default=4,
                        metavar='N',
                        help='number of epochs to train (default: 4)')
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        metavar='S',
                        help='random seed (default: 42)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=40,
        metavar='N',
        help=
        'how many batches to wait before logging training status (default: 40)'
    )
    parser.add_argument(
        '--train-steps',
        type=int,
        default=-1,
        metavar='N',
        help=
        'number of steps to train. Set -1 to run through whole dataset (default: -1)'
    )
    parser.add_argument(
        '--log-level',
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        default='WARNING',
        help='Log level (default: WARNING)')
    parser.add_argument(
        '--num-hidden-layers',
        type=int,
        default=1,
        metavar='H',
        help=
        'Number of hidden layers for the BERT model. A vanila BERT has 12 hidden layers (default: 1)'
    )
    parser.add_argument('--data-dir',
                        type=str,
                        default='./cola_public/raw',
                        help='Path to the bert data directory')

    args = parser.parse_args()

    # Device (CPU vs CUDA)
    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
        print('There are %d GPU(s) available.' % torch.cuda.device_count())
        print('We will use the GPU:', torch.cuda.get_device_name(0))
    else:
        print('No GPU available, using the CPU instead.')
        device = torch.device("cpu")

    # Set log level
    numeric_level = getattr(logging, args.log_level.upper(), None)
    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: %s' % args.log_level)
    logging.basicConfig(level=numeric_level)

    # 2. Dataloader
    train_dataloader, validation_dataloader = load_dataset(args)

    # 3. Modeling
    # Load BertForSequenceClassification, the pretrained BERT model with a single
    # linear classification layer on top.
    config = AutoConfig.from_pretrained(
        "bert-base-uncased",
        num_labels=2,
        num_hidden_layers=args.num_hidden_layers,
        output_attentions=False,  # Whether the model returns attentions weights.
        output_hidden_states=
        False,  # Whether the model returns all hidden-states.
    )
    model = BertForSequenceClassification.from_pretrained(
        "bert-base-uncased",  # Use the 12-layer BERT model, with an uncased vocab.
        config=config,
    )

    if not args.pytorch_only:
        model = ORTModule(model)

    # TODO: change it to False to stop saving ONNX models
    model._save_onnx = True
    model._save_onnx_prefix = 'BertForSequenceClassification'

    # Tell pytorch to run this model on the GPU.
    if torch.cuda.is_available() and not args.no_cuda:
        model.cuda()

    # Note: AdamW is a class from the huggingface library (as opposed to pytorch)
    optimizer = AdamW(
        model.parameters(),
        lr=2e-5,  # args.learning_rate - default is 5e-5, our notebook had 2e-5
        eps=1e-8  # args.adam_epsilon  - default is 1e-8.
    )

    # Authors recommend between 2 and 4 epochs
    # Total number of training steps is number of batches * number of epochs.
    total_steps = len(train_dataloader) * args.epochs

    # Create the learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,  # Default value in run_glue.py
        num_training_steps=total_steps)
    # Seed
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    onnxruntime.set_seed(args.seed)
    if torch.cuda.is_available() and not args.no_cuda:
        torch.cuda.manual_seed_all(args.seed)

    # 4. Train loop (fine-tune)
    total_training_time, total_test_time, epoch_0_training, validation_accuracy = 0, 0, 0, 0
    for epoch_i in range(0, args.epochs):
        total_training_time += train(model, optimizer, scheduler,
                                     train_dataloader, epoch_i, device, args)
        if not args.pytorch_only and epoch_i == 0:
            epoch_0_training = total_training_time
        test_time, validation_accuracy = test(model, validation_dataloader,
                                              device, args)
        total_test_time += test_time

    assert validation_accuracy > 0.5

    print('\n======== Global stats ========')
    if not args.pytorch_only:
        estimated_export = 0
        if args.epochs > 1:
            estimated_export = epoch_0_training - (
                total_training_time - epoch_0_training) / (args.epochs - 1)
            print("  Estimated ONNX export took:               {:.4f}s".format(
                estimated_export))
        else:
            print(
                "  Estimated ONNX export took:               Estimate available when epochs > 1 only"
            )
        print("  Accumulated training without export took: {:.4f}s".format(
            total_training_time - estimated_export))
    print("  Accumulated training took:                {:.4f}s".format(
        total_training_time))
    print("  Accumulated validation took:              {:.4f}s".format(
        total_test_time))