"cls.transform.dense.weight": "BertModel/MLM/LMPrediction/Dense/Weight", "cls.transform.dense.bias": "BertModel/MLM/LMPrediction/Dense/Bias", "cls.transform.LayerNorm.weight": "BertModel/MLM/LMPrediction/Norm/Gamma", "cls.transform.LayerNorm.bias": "BertModel/MLM/LMPrediction/Norm/Beta", } onnx_torch_tform = { "bert.embeddings.word_embeddings.weight": np.transpose, "cls.transform.dense.weight": np.transpose, } @pytest.mark.parametrize( "mode, replication_factor, replicated_weight_sharding", [(ExecutionMode.DEFAULT, 1, False), requires_remote_buffers(ExecutionMode.PHASED, 1, False), requires_remote_buffers(ExecutionMode.PHASED, 4, True), requires_remote_buffers(ExecutionMode.PHASED, 4, False)]) def test_pretraining_fwd(custom_ops, mode, replication_factor, replicated_weight_sharding): # ------------------- PopART -------------------- config = BertConfig(task="PRETRAINING", vocab_length=9728, num_layers=2, batch_size=1, hidden_size=768, sequence_length=128, popart_dtype="FLOAT", activation_type="relu", no_dropout=True, no_attn_dropout=True,
import torch import popart import onnx from bert_model import BertConfig, ExecutionMode, get_model from tests.torch_bert import BertConfig as TorchBertConfig, BertAttention from tests.utils import (run_py, copy_weights_to_torch, run_fwd_model, check_tensors, check_model, requires_remote_buffers, sanity) ''' Tests the attention op. ''' test_modes = [ ExecutionMode.DEFAULT, requires_remote_buffers(ExecutionMode.PHASED) ] TORCH_TO_ONNX = { ExecutionMode.DEFAULT: { "self.query.weight": "QKV", "self.key.weight": "QKV", "self.value.weight": "QKV", "self.query.bias": "QKV_Bias", "self.key.bias": "QKV_Bias", "self.value.bias": "QKV_Bias", "output.dense.weight": "Out", "output.dense.bias": "Out_Bias", "output.LayerNorm.weight": "Gamma", "output.LayerNorm.bias": "Beta" },
expanded_name_map = {} remapped_transform_map = {} for k, v in torch_to_onnx.items(): if v in mapping.keys(): expanded_name_map[k] = mapping[v] else: expanded_name_map[k] = v if v in transform_map.keys(): remapped_transform_map[k] = transform_map[v] return expanded_name_map, remapped_transform_map @pytest.mark.parametrize( "mode", [ExecutionMode.DEFAULT, requires_remote_buffers(ExecutionMode.PHASED)]) @pytest.mark.parametrize( "micro_batch_size, batch_serialization_factor, embedding_serialization_vocab_steps", [(1, 1, 1), (2, 2, 1), (2, 2, 2), (2, 1, 1), (2, 1, 2)]) def test_embedding_fwd(custom_ops, mode, micro_batch_size, batch_serialization_factor, embedding_serialization_vocab_steps): # ------------------- PopART -------------------- config = BertConfig( task="SQUAD", vocab_length=9728, micro_batch_size=micro_batch_size, hidden_size=768, sequence_length=128, activation_type='relu', popart_dtype="FLOAT",
expanded_name_map = {} remapped_transform_map = {} for k, v in torch_to_onnx.items(): if v in mapping.keys(): expanded_name_map[k] = mapping[v] else: expanded_name_map[k] = v if v in transform_map.keys(): remapped_transform_map[k] = transform_map[v] return expanded_name_map, remapped_transform_map @pytest.mark.parametrize( "mode", [ExecutionMode.DEFAULT, requires_remote_buffers(ExecutionMode.PHASED)]) @pytest.mark.parametrize( "batch_size, batch_serialization_factor, embedding_serialization_vocab_steps", [(1, 1, 1), (2, 2, 1), (2, 2, 2), (2, 1, 1), (2, 1, 2)]) def test_embedding_fwd(custom_ops, mode, batch_size, batch_serialization_factor, embedding_serialization_vocab_steps): # ------------------- PopART -------------------- config = BertConfig( task="SQUAD", vocab_length=9728, batch_size=batch_size, hidden_size=768, sequence_length=128, activation_type='relu', popart_dtype="FLOAT",
return run_py(proto, data=data, outputs=x, patterns=patterns, user_options={ "enableOutlining": outline, "constantWeights": False }, skip_execution=skip_execution) @pytest.mark.sanity @pytest.mark.parametrize('splits', (1, 4)) @pytest.mark.parametrize(['phase', 'optimizer'], [("fwd", None), ("bwd", "Sgd"), requires_remote_buffers("bwd", "Lamb")]) def test_tied_gather_pattern_ir(splits, phase, optimizer, custom_ops): train = phase == "bwd" sess = session(train, skip_execution=True, splits=splits, optim=optimizer) ir = json.loads(sess._serializeIr(popart.IrSerializationFormat.JSON)) ops = ir["maingraph"] # The gatherOp should be replaced with TiedGather assert len(list(filter(lambda op: op["type"] == "TiedGather", ops))) == splits assert len(list(filter(lambda op: op["type"] == "Gather", ops))) == 0 # The matmuls should have fully_connected_pass disabled