def __init__(self, n_words, n_rels, n_tags=None, n_chars=None, encoder='lstm', feat=['char'], n_embed=100, n_pretrained=100, n_feat_embed=100, n_char_embed=50, n_char_hidden=100, char_pad_index=0, elmo='original_5b', elmo_bos_eos=(True, False), bert=None, n_bert_layers=4, mix_dropout=.0, bert_pooling='mean', bert_pad_index=0, freeze=True, embed_dropout=.33, n_lstm_hidden=400, n_lstm_layers=3, encoder_dropout=.33, n_arc_mlp=500, n_rel_mlp=100, mlp_dropout=.33, scale=0, pad_index=0, unk_index=1, **kwargs): super().__init__(**Config().update(locals())) self.arc_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_arc_mlp, dropout=mlp_dropout) self.arc_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_arc_mlp, dropout=mlp_dropout) self.rel_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_rel_mlp, dropout=mlp_dropout) self.rel_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_rel_mlp, dropout=mlp_dropout) self.arc_attn = Biaffine(n_in=n_arc_mlp, scale=scale, bias_x=True, bias_y=False) self.rel_attn = Biaffine(n_in=n_rel_mlp, n_out=n_rels, bias_x=True, bias_y=True) self.criterion = nn.CrossEntropyLoss()
def __init__(self, n_words, n_labels, n_tags=None, n_chars=None, encoder='lstm', feat=['char'], n_embed=100, n_pretrained=100, n_feat_embed=100, n_char_embed=50, n_char_hidden=100, char_pad_index=0, bert=None, n_bert_layers=4, mix_dropout=.0, bert_pooling='mean', bert_pad_index=0, freeze=True, embed_dropout=.33, n_lstm_hidden=400, n_lstm_layers=3, encoder_dropout=.33, n_span_mlp=500, n_pair_mlp=100, n_label_mlp=100, mlp_dropout=.33, inference='mfvi', max_iter=3, interpolation=0.1, pad_index=0, unk_index=1, **kwargs): super().__init__(**Config().update(locals())) self.span_mlp_l = MLP(n_in=self.args.n_hidden, n_out=n_span_mlp, dropout=mlp_dropout) self.span_mlp_r = MLP(n_in=self.args.n_hidden, n_out=n_span_mlp, dropout=mlp_dropout) self.pair_mlp_l = MLP(n_in=self.args.n_hidden, n_out=n_pair_mlp, dropout=mlp_dropout) self.pair_mlp_r = MLP(n_in=self.args.n_hidden, n_out=n_pair_mlp, dropout=mlp_dropout) self.pair_mlp_b = MLP(n_in=self.args.n_hidden, n_out=n_pair_mlp, dropout=mlp_dropout) self.label_mlp_l = MLP(n_in=self.args.n_hidden, n_out=n_label_mlp, dropout=mlp_dropout) self.label_mlp_r = MLP(n_in=self.args.n_hidden, n_out=n_label_mlp, dropout=mlp_dropout) self.span_attn = Biaffine(n_in=n_span_mlp, bias_x=True, bias_y=False) self.pair_attn = Triaffine(n_in=n_pair_mlp, bias_x=True, bias_y=False) self.label_attn = Biaffine(n_in=n_label_mlp, n_out=n_labels, bias_x=True, bias_y=True) self.inference = (MFVIConstituency if inference == 'mfvi' else LBPConstituency)(max_iter) self.criterion = nn.CrossEntropyLoss()
def __init__(self, n_words, n_feats, n_labels, feat='char', n_embed=100, n_feat_embed=100, n_char_embed=50, bert=None, n_bert_layers=4, max_len=None, mix_dropout=.0, embed_dropout=.33, n_lstm_hidden=400, n_lstm_layers=3, lstm_dropout=.33, n_mlp_span=500, n_mlp_label=100, mlp_dropout=.33, feat_pad_index=0, pad_index=0, unk_index=1, **kwargs): super().__init__() self.args = Config().update(locals()) # the embedding layer self.word_embed = nn.Embedding(num_embeddings=n_words, embedding_dim=n_embed) if feat == 'char': self.feat_embed = CharLSTM(n_chars=n_feats, n_embed=n_char_embed, n_out=n_feat_embed, pad_index=feat_pad_index) elif feat == 'bert': self.feat_embed = BertEmbedding(model=bert, n_layers=n_bert_layers, n_out=n_feat_embed, pad_index=feat_pad_index, max_len=max_len, dropout=mix_dropout) self.n_feat_embed = self.feat_embed.n_out elif feat == 'tag': self.feat_embed = nn.Embedding(num_embeddings=n_feats, embedding_dim=n_feat_embed) else: raise RuntimeError("The feat type should be in ['char', 'bert', 'tag'].") self.embed_dropout = IndependentDropout(p=embed_dropout) # the lstm layer self.lstm = BiLSTM(input_size=n_embed+n_feat_embed, hidden_size=n_lstm_hidden, num_layers=n_lstm_layers, dropout=lstm_dropout) self.lstm_dropout = SharedDropout(p=lstm_dropout) # the MLP layers self.mlp_span_l = MLP(n_in=n_lstm_hidden*2, n_out=n_mlp_span, dropout=mlp_dropout) self.mlp_span_r = MLP(n_in=n_lstm_hidden*2, n_out=n_mlp_span, dropout=mlp_dropout) self.mlp_label_l = MLP(n_in=n_lstm_hidden*2, n_out=n_mlp_label, dropout=mlp_dropout) self.mlp_label_r = MLP(n_in=n_lstm_hidden*2, n_out=n_mlp_label, dropout=mlp_dropout) # the Biaffine layers self.span_attn = Biaffine(n_in=n_mlp_span, bias_x=True, bias_y=False) self.label_attn = Biaffine(n_in=n_mlp_label, n_out=n_labels, bias_x=True, bias_y=True) self.crf = CRFConstituency() self.criterion = nn.CrossEntropyLoss() self.pad_index = pad_index self.unk_index = unk_index
def __init__( self, n_words, n_rels, n_tags=None, n_chars=None, encoder='lstm', feat=['char'], n_embed=100, n_pretrained=100, n_feat_embed=100, n_char_embed=50, n_char_hidden=100, char_pad_index=0, bert=None, n_bert_layers=4, mix_dropout=.0, bert_pooling='mean', bert_pad_index=0, freeze=True, embed_dropout=.33, n_lstm_hidden=400, n_lstm_layers=3, encoder_dropout=.33, n_arc_mlp=500, n_rel_mlp=100, mlp_dropout=.33, use_attentions=True, # attention attention_head=0, # attention attention_layer=6, # attention scale=0, pad_index=0, unk_index=1, **kwargs): super().__init__(**Config().update(locals())) self.arc_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_arc_mlp, dropout=mlp_dropout) self.arc_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_arc_mlp, dropout=mlp_dropout) self.rel_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_rel_mlp, dropout=mlp_dropout) self.rel_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_rel_mlp, dropout=mlp_dropout) self.arc_attn = Biaffine(n_in=n_arc_mlp, scale=scale, bias_x=True, bias_y=False) self.rel_attn = Biaffine(n_in=n_rel_mlp, n_out=n_rels, bias_x=True, bias_y=True) # transformer attention if use_attentions: self.attn_mix = nn.Parameter(torch.randn(1)) self.criterion = nn.CrossEntropyLoss()
def __init__(self, n_words, n_labels, n_tags=None, n_chars=None, n_lemmas=None, encoder='lstm', feat=['tag', 'char', 'lemma'], n_embed=100, n_pretrained=125, n_feat_embed=100, n_char_embed=50, n_char_hidden=400, char_pad_index=0, char_dropout=0.33, bert=None, n_bert_layers=4, mix_dropout=.0, bert_pooling='mean', bert_pad_index=0, freeze=True, embed_dropout=.2, n_lstm_hidden=600, n_lstm_layers=3, encoder_dropout=.33, n_edge_mlp=600, n_label_mlp=600, edge_mlp_dropout=.25, label_mlp_dropout=.33, interpolation=0.1, pad_index=0, unk_index=1, **kwargs): super().__init__(**Config().update(locals())) self.edge_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_edge_mlp, dropout=edge_mlp_dropout, activation=False) self.edge_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_edge_mlp, dropout=edge_mlp_dropout, activation=False) self.label_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_label_mlp, dropout=label_mlp_dropout, activation=False) self.label_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_label_mlp, dropout=label_mlp_dropout, activation=False) self.edge_attn = Biaffine(n_in=n_edge_mlp, n_out=2, bias_x=True, bias_y=True) self.label_attn = Biaffine(n_in=n_label_mlp, n_out=n_labels, bias_x=True, bias_y=True) self.criterion = nn.CrossEntropyLoss()
def __init__(self, n_words, n_feats, n_rels, encoder='lstm', feat='char', n_embed=100, n_feat_embed=100, n_char_embed=50, bert=None, n_bert_layers=4, mix_dropout=.0, embed_dropout=.33, n_lstm_hidden=400, n_lstm_layers=3, n_att_layers=6, lstm_dropout=.33, n_mlp_arc=500, n_mlp_rel=100, mlp_dropout=.33, feat_pad_index=0, pad_index=0, unk_index=1, **kwargs): super().__init__() self.args = Config().update(locals()) self.word_embed = nn.Embedding(num_embeddings=n_words, embedding_dim=n_embed) # can trained embed:word in train self.feat_embed = CharLSTM(n_chars=n_feats, n_embed=n_char_embed, n_out=n_feat_embed, pad_index=feat_pad_index) self.embed_dropout = IndependentDropout(p=embed_dropout) # 输入层的dropout,采用独立dropout self.encoder_type=encoder if(encoder=='lstm'): self.encoder = BiLSTM(input_size=n_embed + n_feat_embed, hidden_size=n_lstm_hidden, num_layers=n_lstm_layers, dropout=lstm_dropout) self.lstm_dropout = SharedDropout(p=lstm_dropout) # 编码层lstm以及shared dropout elif(encoder=='att'): d_input=n_embed + n_feat_embed self.linear1=nn.Linear(d_input,n_lstm_hidden * 2) # 前加 self.encoder=Attention_encoder(d_model=n_lstm_hidden * 2,n_layers=n_att_layers) # self.linear2=nn.Linear(512,n_lstm_hidden * 2,bias=False) # 后加 self.mlp_arc_d = MLP(n_in=n_lstm_hidden * 2, n_out=n_mlp_arc, dropout=mlp_dropout) self.mlp_arc_h = MLP(n_in=n_lstm_hidden * 2, n_out=n_mlp_arc, dropout=mlp_dropout) self.mlp_rel_d = MLP(n_in=n_lstm_hidden * 2, n_out=n_mlp_rel, dropout=mlp_dropout) self.mlp_rel_h = MLP(n_in=n_lstm_hidden * 2, n_out=n_mlp_rel, dropout=mlp_dropout) # 四个不同的全连接层,映射到对应的维度 self.arc_attn = Biaffine(n_in=n_mlp_arc, bias_x=True, bias_y=False) self.rel_attn = Biaffine(n_in=n_mlp_rel, n_out=n_rels, bias_x=True, bias_y=True) self.criterion = nn.CrossEntropyLoss() self.pad_index = pad_index self.unk_index = unk_index
def __init__(self, n_words, n_labels, n_tags=None, n_chars=None, n_lemmas=None, encoder='lstm', feat=['tag', 'char', 'lemma'], n_embed=100, n_pretrained=125, n_feat_embed=100, n_char_embed=50, n_char_hidden=100, char_pad_index=0, char_dropout=0, elmo='original_5b', elmo_bos_eos=(True, False), bert=None, n_bert_layers=4, mix_dropout=.0, bert_pooling='mean', bert_pad_index=0, freeze=True, embed_dropout=.2, n_lstm_hidden=600, n_lstm_layers=3, encoder_dropout=.33, n_edge_mlp=600, n_pair_mlp=150, n_label_mlp=600, edge_mlp_dropout=.25, pair_mlp_dropout=.25, label_mlp_dropout=.33, inference='mfvi', max_iter=3, interpolation=0.1, pad_index=0, unk_index=1, **kwargs): super().__init__(**Config().update(locals())) self.edge_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_edge_mlp, dropout=edge_mlp_dropout, activation=False) self.edge_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_edge_mlp, dropout=edge_mlp_dropout, activation=False) self.pair_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_pair_mlp, dropout=pair_mlp_dropout, activation=False) self.pair_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_pair_mlp, dropout=pair_mlp_dropout, activation=False) self.pair_mlp_g = MLP(n_in=self.args.n_hidden, n_out=n_pair_mlp, dropout=pair_mlp_dropout, activation=False) self.label_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_label_mlp, dropout=label_mlp_dropout, activation=False) self.label_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_label_mlp, dropout=label_mlp_dropout, activation=False) self.edge_attn = Biaffine(n_in=n_edge_mlp, bias_x=True, bias_y=True) self.sib_attn = Triaffine(n_in=n_pair_mlp, bias_x=True, bias_y=True) self.cop_attn = Triaffine(n_in=n_pair_mlp, bias_x=True, bias_y=True) self.grd_attn = Triaffine(n_in=n_pair_mlp, bias_x=True, bias_y=True) self.label_attn = Biaffine(n_in=n_label_mlp, n_out=n_labels, bias_x=True, bias_y=True) self.inference = (SemanticDependencyMFVI if inference == 'mfvi' else SemanticDependencyLBP)(max_iter) self.criterion = nn.CrossEntropyLoss()
def __init__(self, n_words, n_labels, n_tags=None, n_chars=None, n_lemmas=None, feat='tag,char,lemma', n_embed=100, n_embed_proj=125, n_feat_embed=100, n_char_embed=50, char_pad_index=0, bert=None, n_bert_layers=4, mix_dropout=.0, bert_pad_index=0, embed_dropout=.2, n_lstm_hidden=600, n_lstm_layers=3, lstm_dropout=.33, n_mlp_edge=600, n_mlp_label=600, edge_mlp_dropout=.25, label_mlp_dropout=.33, interpolation=0.1, pad_index=0, unk_index=1, **kwargs): super().__init__() self.args = Config().update(locals()) # the embedding layer self.word_embed = nn.Embedding(num_embeddings=n_words, embedding_dim=n_embed) self.embed_proj = nn.Linear(n_embed, n_embed_proj) self.n_input = n_embed + n_embed_proj if 'tag' in feat: self.tag_embed = nn.Embedding(num_embeddings=n_tags, embedding_dim=n_feat_embed) self.n_input += n_feat_embed if 'char' in feat: self.char_embed = CharLSTM(n_chars=n_chars, n_embed=n_char_embed, n_out=n_feat_embed, pad_index=char_pad_index) self.n_input += n_feat_embed if 'lemma' in feat: self.lemma_embed = nn.Embedding(num_embeddings=n_lemmas, embedding_dim=n_feat_embed) self.n_input += n_feat_embed if 'bert' in feat: self.bert_embed = BertEmbedding(model=bert, n_layers=n_bert_layers, pad_index=bert_pad_index, dropout=mix_dropout) self.n_input += self.bert_embed.n_out self.embed_dropout = IndependentDropout(p=embed_dropout) # the lstm layer self.lstm = LSTM(input_size=self.n_input, hidden_size=n_lstm_hidden, num_layers=n_lstm_layers, bidirectional=True, dropout=lstm_dropout) self.lstm_dropout = SharedDropout(p=lstm_dropout) # the MLP layers self.mlp_edge_d = MLP(n_in=n_lstm_hidden*2, n_out=n_mlp_edge, dropout=edge_mlp_dropout, activation=False) self.mlp_edge_h = MLP(n_in=n_lstm_hidden*2, n_out=n_mlp_edge, dropout=edge_mlp_dropout, activation=False) self.mlp_label_d = MLP(n_in=n_lstm_hidden*2, n_out=n_mlp_label, dropout=label_mlp_dropout, activation=False) self.mlp_label_h = MLP(n_in=n_lstm_hidden*2, n_out=n_mlp_label, dropout=label_mlp_dropout, activation=False) # the Biaffine layers self.edge_attn = Biaffine(n_in=n_mlp_edge, n_out=2, bias_x=True, bias_y=True) self.label_attn = Biaffine(n_in=n_mlp_label, n_out=n_labels, bias_x=True, bias_y=True) self.criterion = nn.CrossEntropyLoss() self.interpolation = interpolation self.pad_index = pad_index self.unk_index = unk_index
def __init__(self, n_words, n_feats, n_labels, feat='char', n_embed=100, n_feat_embed=100, n_char_embed=50, bert=None, n_bert_layers=4, mix_dropout=.0, embed_dropout=.33, n_lstm_hidden=600, n_lstm_layers=3, lstm_dropout=.33, n_mlp_edge=600, n_mlp_label=600, mlp_dropout=.33, feat_pad_index=0, pad_index=0, unk_index=1, **kwargs): super().__init__() self.args = Config().update(locals()) # the embedding layer self.word_embed = nn.Embedding(num_embeddings=n_words, embedding_dim=n_embed) # the linear to transform 100d glove to 125d self.glove_linear = nn.Linear(100, n_embed) # 用的glove_100d if feat == 'char': self.feat_embed = CharLSTM(n_chars=n_feats, n_embed=n_char_embed, n_out=n_feat_embed, pad_index=feat_pad_index) elif feat == 'bert': self.feat_embed = BertEmbedding(model=bert, n_layers=n_bert_layers, n_out=n_feat_embed, pad_index=feat_pad_index, dropout=mix_dropout) self.n_feat_embed = self.feat_embed.n_out elif feat == 'tag': self.feat_embed = nn.Embedding(num_embeddings=n_feats, embedding_dim=n_feat_embed) else: raise RuntimeError( "The feat type should be in ['char', 'bert', 'tag'].") self.embed_dropout = IndependentDropout(p=embed_dropout) # the lstm layer self.lstm = LSTM(input_size=n_embed + n_feat_embed, hidden_size=n_lstm_hidden, num_layers=n_lstm_layers, bidirectional=True, dropout=lstm_dropout) self.lstm_dropout = SharedDropout(p=lstm_dropout) # the MLP layers self.mlp_edge_d = MLP(n_in=n_lstm_hidden * 2, n_out=n_mlp_edge, dropout=mlp_dropout) self.mlp_edge_h = MLP(n_in=n_lstm_hidden * 2, n_out=n_mlp_edge, dropout=mlp_dropout) self.mlp_label_d = MLP(n_in=n_lstm_hidden * 2, n_out=n_mlp_label, dropout=mlp_dropout) self.mlp_label_h = MLP(n_in=n_lstm_hidden * 2, n_out=n_mlp_label, dropout=mlp_dropout) # the Biaffine layers self.edge_attn = Biaffine(n_in=n_mlp_edge, n_out=2, bias_x=True, bias_y=True) self.label_attn = Biaffine(n_in=n_mlp_label, n_out=n_labels, bias_x=True, bias_y=True) self.criterion = nn.CrossEntropyLoss() self.pad_index = pad_index self.unk_index = unk_index