Esempio n. 1
0
def get_modules(params_dict):
    modules = {}
    params = copy.deepcopy(params_dict)
    params["attention_probs_dropout_prob"] = params.pop("dropout")

    # bert, roberta, electra self attentions have the same code.

    torch.manual_seed(1234)
    hf_module = BertSelfAttention(BertConfig(**params))
    modules["bert"] = hf_module

    torch.manual_seed(1234)
    hf_module = RobertaSelfAttention(RobertaConfig(**params))
    modules["roberta"] = hf_module

    torch.manual_seed(1234)
    hf_module = ElectraSelfAttention(ElectraConfig(**params))
    modules["electra"] = hf_module

    torch.manual_seed(1234)
    distilparams = copy.deepcopy(params_dict)
    distilparams["n_heads"] = distilparams.pop("num_attention_heads")
    distilparams["dim"] = distilparams.pop("hidden_size")
    distilparams["attention_dropout"] = distilparams.pop("dropout")
    hf_module = MultiHeadSelfAttention(DistilBertConfig(**distilparams))
    modules["distilbert"] = hf_module

    return modules
Esempio n. 2
0
def get_attention_modules():
    params = copy.deepcopy(ATTENTION_PARAMS_DICT)
    params["attention_probs_dropout_prob"] = params.pop("attention_dropout")
    params["hidden_dropout_prob"] = params.pop("hidden_dropout")

    torch.manual_seed(1234)
    yield "bert", BertAttention(BertConfig(**params)).eval()

    torch.manual_seed(1234)
    yield "roberta", RobertaAttention(RobertaConfig(**params)).eval()

    torch.manual_seed(1234)
    yield "electra", ElectraAttention(ElectraConfig(**params)).eval()
Esempio n. 3
0
def get_layer_modules():
    params = copy.deepcopy(LAYER_PARAMS_DICT)
    params["attention_probs_dropout_prob"] = params.pop("attention_dropout")
    params["hidden_dropout_prob"] = params.pop("hidden_dropout")
    params["hidden_act"] = params.pop("activation")

    torch.manual_seed(1234)
    yield "bert", BertLayer(BertConfig(**params)).eval()

    torch.manual_seed(1234)
    yield "roberta", RobertaLayer(RobertaConfig(**params)).eval()

    torch.manual_seed(1234)
    yield "electra", ElectraLayer(ElectraConfig(**params)).eval()
Esempio n. 4
0
def get_modules(params_dict):
    modules = {}
    params = copy.deepcopy(params_dict)
    params["attention_probs_dropout_prob"] = params.pop("attention_dropout")
    params["hidden_dropout_prob"] = params.pop("hidden_dropout")

    torch.manual_seed(1234)
    hf_module = BertEncoder(BertConfig(**params))
    modules["bert"] = hf_module

    torch.manual_seed(1234)
    hf_module = RobertaEncoder(RobertaConfig(**params))
    modules["roberta"] = hf_module

    torch.manual_seed(1234)
    hf_module = ElectraEncoder(ElectraConfig(**params))
    modules["electra"] = hf_module

    return modules
Esempio n. 5
0
def get_layer_modules(params_dict):
    modules = {}
    params = copy.deepcopy(params_dict)
    params["attention_probs_dropout_prob"] = params.pop("attention_dropout")
    params["hidden_dropout_prob"] = params.pop("hidden_dropout")

    # bert, roberta, electra, layoutlm self attentions have the same code.

    torch.manual_seed(1234)
    hf_module = BertLayer(BertConfig(**params))
    modules["bert"] = hf_module

    torch.manual_seed(1234)
    hf_module = RobertaLayer(RobertaConfig(**params))
    modules["roberta"] = hf_module

    torch.manual_seed(1234)
    hf_module = ElectraLayer(ElectraConfig(**params))
    modules["electra"] = hf_module

    return modules