Example #1
0
def get_modules(params_dict):
    modules = {}
    params = copy.deepcopy(params_dict)
    params["attention_probs_dropout_prob"] = params.pop("dropout")

    # bert, roberta, electra self attentions have the same code.

    torch.manual_seed(1234)
    hf_module = BertSelfAttention(BertConfig(**params))
    modules["bert"] = hf_module

    torch.manual_seed(1234)
    hf_module = RobertaSelfAttention(RobertaConfig(**params))
    modules["roberta"] = hf_module

    torch.manual_seed(1234)
    hf_module = ElectraSelfAttention(ElectraConfig(**params))
    modules["electra"] = hf_module

    torch.manual_seed(1234)
    distilparams = copy.deepcopy(params_dict)
    distilparams["n_heads"] = distilparams.pop("num_attention_heads")
    distilparams["dim"] = distilparams.pop("hidden_size")
    distilparams["attention_dropout"] = distilparams.pop("dropout")
    hf_module = MultiHeadSelfAttention(DistilBertConfig(**distilparams))
    modules["distilbert"] = hf_module

    return modules
Example #2
0
 def __init__(self, config, opt):
     super(SelfAttention, self).__init__()
     self.opt = opt
     self.config = config
     self.SA = BertSelfAttention(config)
     self.tanh = torch.nn.Tanh()