def test_attention_fusion(self): model = create_bert_attention() dir = "." model_path = os.path.join(dir, "attention.onnx") onnx.save(model, model_path) optimized_model = optimize_model(model_path) os.remove(model_path) self.verify_fusion(optimized_model, "attention_opt.onnx")
def test_attention_fusion_pruned_model(self): model = create_bert_attention(input_hidden_size=16, num_heads=2, pruned_qk_hidden_size=8, pruned_v_hidden_size=8) dir = '.' model_path = os.path.join(dir, "pruned_attention.onnx") onnx.save(model, model_path) optimized_model = optimize_model(model_path) os.remove(model_path) self.verify_fusion(optimized_model, 'pruned_attention_opt.onnx')
def test_attention_fusion_pruned_model(self): model = create_bert_attention() dir = '.' model_path = os.path.join(dir, "pruned_attention.onnx") onnx.save(model, model_path) optimized_model = optimize_model(model_path) os.remove(model_path) expected_model_path = os.path.join(os.path.dirname(__file__), 'test_data', 'fusion', 'pruned_attention_opt.onnx') expected = onnx.load(expected_model_path) self.assertEqual(str(optimized_model.model.graph), str(expected.graph))
def test_attention_fusion_for_varied_qkv_dimensions(self): model = create_bert_attention(input_hidden_size=16, num_heads=2, pruned_qk_hidden_size=24, pruned_v_hidden_size=16) dir = '.' model_path = os.path.join(dir, "attention_with_varied_qkv.onnx") onnx.save(model, model_path) optimized_model = optimize_model(model_path) os.remove(model_path) self.verify_fusion(optimized_model, 'attention_with_varied_qkv_opt.onnx')
def test_attention_fusion_reverse_add_order(self): model = create_bert_attention(switch_add_inputs=True) dir = '.' model_path = os.path.join(dir, "bert_attention_reverse_add_order.onnx") onnx.save(model, model_path) optimized_model = optimize_model(model_path) os.remove(model_path) # reverse add input order will get same optimized model expected_model_path = os.path.join(os.path.dirname(__file__), 'test_data', 'fusion', 'pruned_attention_opt.onnx') expected = onnx.load(expected_model_path) self.assertEqual(str(optimized_model.model.graph), str(expected.graph))
def test_attention_fusion_reverse_add_order(self): model = create_bert_attention(input_hidden_size=16, num_heads=2, pruned_qk_hidden_size=8, pruned_v_hidden_size=8, switch_add_inputs=True) dir = '.' model_path = os.path.join(dir, "bert_attention_reverse_add_order.onnx") onnx.save(model, model_path) optimized_model = optimize_model(model_path) os.remove(model_path) # reverse add input order will get same optimized model self.verify_fusion(optimized_model, 'pruned_attention_opt.onnx')
def test_attention_fusion_for_varied_qkv_dimensions(self): model = create_bert_attention(input_hidden_size=16, num_heads=2, pruned_qk_hidden_size=24, pruned_v_hidden_size=16) dir = '.' model_path = os.path.join(dir, "attention_with_varied_qkv.onnx") onnx.save(model, model_path) optimized_model = optimize_model(model_path) os.remove(model_path) expected_model_path = os.path.join( os.path.dirname(__file__), 'test_data', 'models', 'attention_with_varied_qkv_opt.onnx') expected = onnx.load(expected_model_path) self.assertEqual(str(optimized_model.model.graph), str(expected.graph))
def test_attention_fusion_for_varied_qkv_dimensions_with_wrong_opt_parameters(self): model = create_bert_attention( input_hidden_size=16, num_heads=2, pruned_qk_hidden_size=24, pruned_v_hidden_size=16, ) dir = "." model_path = os.path.join(dir, "attention_with_varied_qkv.onnx") onnx.save(model, model_path) # wrong num_heads and hidden_size optimized_model = optimize_model(model_path, "bert", num_heads=8, hidden_size=8) os.remove(model_path) self.verify_fusion(optimized_model, "attention_with_varied_qkv_opt.onnx")