コード例 #1
0
    "cls.transform.dense.weight": "BertModel/MLM/LMPrediction/Dense/Weight",
    "cls.transform.dense.bias": "BertModel/MLM/LMPrediction/Dense/Bias",
    "cls.transform.LayerNorm.weight": "BertModel/MLM/LMPrediction/Norm/Gamma",
    "cls.transform.LayerNorm.bias": "BertModel/MLM/LMPrediction/Norm/Beta",
}

onnx_torch_tform = {
    "bert.embeddings.word_embeddings.weight": np.transpose,
    "cls.transform.dense.weight": np.transpose,
}


@pytest.mark.parametrize(
    "mode, replication_factor, replicated_weight_sharding",
    [(ExecutionMode.DEFAULT, 1, False),
     requires_remote_buffers(ExecutionMode.PHASED, 1, False),
     requires_remote_buffers(ExecutionMode.PHASED, 4, True),
     requires_remote_buffers(ExecutionMode.PHASED, 4, False)])
def test_pretraining_fwd(custom_ops, mode, replication_factor,
                         replicated_weight_sharding):
    #  ------------------- PopART --------------------
    config = BertConfig(task="PRETRAINING",
                        vocab_length=9728,
                        num_layers=2,
                        batch_size=1,
                        hidden_size=768,
                        sequence_length=128,
                        popart_dtype="FLOAT",
                        activation_type="relu",
                        no_dropout=True,
                        no_attn_dropout=True,
コード例 #2
0
import torch

import popart
import onnx

from bert_model import BertConfig, ExecutionMode, get_model
from tests.torch_bert import BertConfig as TorchBertConfig, BertAttention
from tests.utils import (run_py, copy_weights_to_torch, run_fwd_model,
                         check_tensors, check_model, requires_remote_buffers,
                         sanity)
'''
Tests the attention op.
'''
test_modes = [
    ExecutionMode.DEFAULT,
    requires_remote_buffers(ExecutionMode.PHASED)
]

TORCH_TO_ONNX = {
    ExecutionMode.DEFAULT: {
        "self.query.weight": "QKV",
        "self.key.weight": "QKV",
        "self.value.weight": "QKV",
        "self.query.bias": "QKV_Bias",
        "self.key.bias": "QKV_Bias",
        "self.value.bias": "QKV_Bias",
        "output.dense.weight": "Out",
        "output.dense.bias": "Out_Bias",
        "output.LayerNorm.weight": "Gamma",
        "output.LayerNorm.bias": "Beta"
    },
コード例 #3
0
    expanded_name_map = {}
    remapped_transform_map = {}
    for k, v in torch_to_onnx.items():
        if v in mapping.keys():
            expanded_name_map[k] = mapping[v]
        else:
            expanded_name_map[k] = v
        if v in transform_map.keys():
            remapped_transform_map[k] = transform_map[v]
    return expanded_name_map, remapped_transform_map


@pytest.mark.parametrize(
    "mode",
    [ExecutionMode.DEFAULT,
     requires_remote_buffers(ExecutionMode.PHASED)])
@pytest.mark.parametrize(
    "micro_batch_size, batch_serialization_factor, embedding_serialization_vocab_steps",
    [(1, 1, 1), (2, 2, 1), (2, 2, 2), (2, 1, 1), (2, 1, 2)])
def test_embedding_fwd(custom_ops, mode, micro_batch_size,
                       batch_serialization_factor,
                       embedding_serialization_vocab_steps):
    #  ------------------- PopART --------------------
    config = BertConfig(
        task="SQUAD",
        vocab_length=9728,
        micro_batch_size=micro_batch_size,
        hidden_size=768,
        sequence_length=128,
        activation_type='relu',
        popart_dtype="FLOAT",
コード例 #4
0
    expanded_name_map = {}
    remapped_transform_map = {}
    for k, v in torch_to_onnx.items():
        if v in mapping.keys():
            expanded_name_map[k] = mapping[v]
        else:
            expanded_name_map[k] = v
        if v in transform_map.keys():
            remapped_transform_map[k] = transform_map[v]
    return expanded_name_map, remapped_transform_map


@pytest.mark.parametrize(
    "mode",
    [ExecutionMode.DEFAULT,
     requires_remote_buffers(ExecutionMode.PHASED)])
@pytest.mark.parametrize(
    "batch_size, batch_serialization_factor, embedding_serialization_vocab_steps",
    [(1, 1, 1), (2, 2, 1), (2, 2, 2), (2, 1, 1), (2, 1, 2)])
def test_embedding_fwd(custom_ops, mode, batch_size,
                       batch_serialization_factor,
                       embedding_serialization_vocab_steps):
    #  ------------------- PopART --------------------
    config = BertConfig(
        task="SQUAD",
        vocab_length=9728,
        batch_size=batch_size,
        hidden_size=768,
        sequence_length=128,
        activation_type='relu',
        popart_dtype="FLOAT",
コード例 #5
0
        return run_py(proto,
                      data=data,
                      outputs=x,
                      patterns=patterns,
                      user_options={
                          "enableOutlining": outline,
                          "constantWeights": False
                      },
                      skip_execution=skip_execution)


@pytest.mark.sanity
@pytest.mark.parametrize('splits', (1, 4))
@pytest.mark.parametrize(['phase', 'optimizer'],
                         [("fwd", None), ("bwd", "Sgd"),
                          requires_remote_buffers("bwd", "Lamb")])
def test_tied_gather_pattern_ir(splits, phase, optimizer, custom_ops):
    train = phase == "bwd"

    sess = session(train, skip_execution=True, splits=splits, optim=optimizer)

    ir = json.loads(sess._serializeIr(popart.IrSerializationFormat.JSON))

    ops = ir["maingraph"]

    # The gatherOp should be replaced with TiedGather
    assert len(list(filter(lambda op: op["type"] == "TiedGather",
                           ops))) == splits
    assert len(list(filter(lambda op: op["type"] == "Gather", ops))) == 0

    # The matmuls should have fully_connected_pass disabled