Beispiel #1
0
    def __init__(self,
                 embed,
                 hidden_size,
                 num_layers,
                 tag_vocab,
                 dropout=0.5,
                 encoding_type='bioes'):
        super().__init__()
        self.embedding = embed
        self.lstm = LSTM(input_size=self.embedding.embedding_dim,
                         hidden_size=hidden_size // 2,
                         num_layers=num_layers,
                         bidirectional=True,
                         batch_first=True)
        self.fc = nn.Linear(hidden_size, len(tag_vocab))

        transitions = allowed_transitions(tag_vocab.idx2word,
                                          encoding_type=encoding_type,
                                          include_start_end=True)
        self.crf = ConditionalRandomField(len(tag_vocab),
                                          include_start_end_trans=True,
                                          allowed_transitions=transitions)

        self.dropout = nn.Dropout(dropout, inplace=True)

        for name, param in self.named_parameters():
            if 'fc' in name:
                if param.data.dim() > 1:
                    nn.init.xavier_uniform_(param)
                else:
                    nn.init.constant_(param, 0)
            if 'crf' in name:
                nn.init.zeros_(param)
Beispiel #2
0
    def __init__(self, args, embedding, hid_dim):
        super(GCN, self).__init__()

        self.args = args
        self.layers = args.num_layers
        self.mem_dim = hid_dim
        self.in_dim = args.tok_dim + args.pos_dim + args.post_dim
        self.tok_emb, self.pos_emb, self.post_emb = embedding
        # drop out
        self.rnn_drop = nn.Dropout(args.rnn_dropout)
        self.in_drop = nn.Dropout(args.input_dropout)
        self.gcn_drop = nn.Dropout(args.gcn_dropout)

        # lstm
        input_size = self.in_dim
        self.rnn = LSTM(
            input_size,
            args.rnn_hidden,
            args.rnn_layers,
            batch_first=True,
            dropout=args.rnn_dropout,
            bidirectional=args.bidirect,
        )
        if args.bidirect:
            self.in_dim = args.rnn_hidden * 2
        else:
            self.in_dim = args.rnn_hidden

        # gcn layer
        self.G = nn.ModuleList()
        for layer in range(self.layers):
            input_dim = [self.in_dim, self.mem_dim][layer != 0]
            self.G.append(MLP([input_dim, self.mem_dim]))
Beispiel #3
0
    def __init__(self,
                 char_embed,
                 hidden_size,
                 num_layers,
                 target_vocab=None,
                 bigram_embed=None,
                 trigram_embed=None,
                 dropout=0.5):
        super().__init__()

        embed_size = char_embed.embed_size
        self.char_embed = char_embed
        if bigram_embed:
            embed_size += bigram_embed.embed_size
        self.bigram_embed = bigram_embed
        if trigram_embed:
            embed_size += trigram_embed.embed_size
        self.trigram_embed = trigram_embed

        self.lstm = LSTM(embed_size,
                         hidden_size=hidden_size // 2,
                         bidirectional=True,
                         batch_first=True,
                         num_layers=num_layers)
        self.dropout = nn.Dropout(p=dropout)
        self.fc = nn.Linear(hidden_size, len(target_vocab))

        transitions = None
        if target_vocab:
            transitions = allowed_transitions(target_vocab,
                                              include_start_end=True,
                                              encoding_type='bmes')

        self.crf = ConditionalRandomField(num_tags=len(target_vocab),
                                          allowed_transitions=transitions)
Beispiel #4
0
    def __init__(self,
                 char_embed,
                 num_classes,
                 bigram_embed=None,
                 trigram_embed=None,
                 num_layers=1,
                 hidden_size=100,
                 dropout=0.5,
                 target_vocab=None,
                 encoding_type=None):
        super().__init__()

        self.char_embed = get_embeddings(char_embed)
        embed_size = self.char_embed.embedding_dim
        if bigram_embed:
            self.bigram_embed = get_embeddings(bigram_embed)
            embed_size += self.bigram_embed.embedding_dim
        if trigram_embed:
            self.trigram_ebmbed = get_embeddings(trigram_embed)
            embed_size += self.bigram_embed.embedding_dim

        if num_layers > 1:
            self.lstm = LSTM(embed_size,
                             num_layers=num_layers,
                             hidden_size=hidden_size // 2,
                             bidirectional=True,
                             batch_first=True,
                             dropout=dropout)
        else:
            self.lstm = LSTM(embed_size,
                             num_layers=num_layers,
                             hidden_size=hidden_size // 2,
                             bidirectional=True,
                             batch_first=True)

        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size, num_classes)

        trans = None
        if target_vocab is not None and encoding_type is not None:
            trans = allowed_transitions(target_vocab.idx2word,
                                        encoding_type=encoding_type,
                                        include_start_end=True)

        self.crf = ConditionalRandomField(num_classes,
                                          include_start_end_trans=True,
                                          allowed_transitions=trans)
Beispiel #5
0
 def __init__(self,bert_embedding,label_size,vocabs,after_bert):
     super().__init__()
     self.after_bert = after_bert
     self.bert_embedding = bert_embedding
     self.label_size = label_size
     self.vocabs = vocabs
     self.hidden_size = bert_embedding._embed_size
     self.output = nn.Linear(self.hidden_size,self.label_size)
     self.crf = get_crf_zero_init(self.label_size)
     if self.after_bert == 'lstm':
         self.lstm = LSTM(bert_embedding._embed_size,bert_embedding._embed_size//2,
                          bidirectional=True)
     self.dropout = MyDropout(0.5)
Beispiel #6
0
    def __init__(self,
                 char_embed,
                 bigram_embed,
                 word_embed,
                 hidden_size,
                 label_size,
                 bias=True,
                 bidirectional=False,
                 device=None,
                 embed_dropout=0,
                 output_dropout=0,
                 use_bigram=True):

        if device is None:
            self.device = torch.device('cpu')
        else:
            self.device = torch.device(device)
        super().__init__()
        self.char_embed_size = char_embed.embedding.weight.size(1)
        self.bigram_embed_size = bigram_embed.embedding.weight.size(1)
        self.word_embed_size = word_embed.embedding.weight.size(1)
        self.hidden_size = hidden_size
        self.label_size = label_size
        self.bidirectional = bidirectional
        self.use_bigram = use_bigram

        self.char_embed = char_embed
        self.bigram_embed = bigram_embed
        self.word_embed = word_embed

        if self.use_bigram:
            self.input_size = self.char_embed_size + self.bigram_embed_size
        else:
            self.input_size = self.char_embed_size

        self.encoder = LSTM(self.input_size,
                            self.hidden_size,
                            bidirectional=self.bidirectional)

        better_init_rnn(self.encoder.lstm)

        self.output = nn.Linear(
            self.hidden_size * (2 if self.bidirectional else 1),
            self.label_size)

        self.debug = False
        self.loss_func = nn.CrossEntropyLoss()
        self.embed_dropout = nn.Dropout(embed_dropout)
        self.output_dropout = nn.Dropout(output_dropout)
        self.crf = ConditionalRandomField(label_size, True)
Beispiel #7
0
    def __init__(self, config):
        super(Encoder, self).__init__()
        self.config = config
        self.embedding = nn.Embedding(config.vocab_size, config.emb_dim)
        init_wt_normal(config, self.embedding.weight)
        self.join = nn.Linear(4 * config.hidden_dim, 2 * config.hidden_dim)
        init_linear_wt(config, self.join)
        self.lstm = LSTM(config.emb_dim,
                         config.hidden_dim,
                         num_layers=1,
                         batch_first=True,
                         bidirectional=True)
        self.graph_feature_lstm = LSTM(config.emb_dim,
                                       config.hidden_dim,
                                       num_layers=1,
                                       batch_first=True,
                                       bidirectional=True)
        self.mlp = MLP(size_layer=[
            config.hidden_dim * 4, config.hidden_dim * 2,
            config.hidden_dim * 2, 1
        ],
                       activation="tanh")

        self.criterion = nn.MSELoss(reduction="sum")
Beispiel #8
0
 def __init__(self,
              char_embed,
              bigram_embed,
              hidden_size: int = 400,
              num_layers: int = 1,
              L: int = 6,
              drop_p: float = 0.2):
     super().__init__()
     self.char_embedding = char_embed
     self.bigram_embedding = bigram_embed
     self.lstm = LSTM(char_embed.embed_size + bigram_embed.embed_size,
                      hidden_size // 2,
                      num_layers=num_layers,
                      bidirectional=True,
                      batch_first=True)
     self.feature_fn = FeatureFunMax(hidden_size, L)
     self.semi_crf_relay = SemiCRFShiftRelay(L)
     self.feat_drop = nn.Dropout(drop_p)
     self.reset_param()
Beispiel #9
0
 def __init__(self,
              char_embed: Embedding,
              bigram_embed: Embedding,
              hidden_size: int = 400,
              num_layers: int = 1,
              L: int = 6,
              num_bigram_per_char: int = 1,
              drop_p: float = 0.2):
     super().__init__()
     self.char_embedding = Embedding(char_embed, dropout=drop_p)
     self._pretrained_embed = False
     if isinstance(char_embed, np.ndarray):
         self._pretrained_embed = True
     self.bigram_embedding = Embedding(bigram_embed, dropout=drop_p)
     self.lstm = LSTM(100 * (num_bigram_per_char + 1),
                      hidden_size // 2,
                      num_layers=num_layers,
                      bidirectional=True,
                      batch_first=True)
     self.feature_fn = FeatureFunMax(hidden_size, L)
     self.semi_crf_relay = SemiCRFShiftRelay(L)
     self.feat_drop = nn.Dropout(drop_p)
     self.reset_param()
Beispiel #10
0
    def __init__(self, args, dep_tag_num, pos_tag_num):
        super(Aspect_Text_GAT_ours, self).__init__()
        self.args = args

        num_embeddings, embed_dim = args.glove_embedding.shape
        self.embed = nn.Embedding(num_embeddings, embed_dim, padding_idx=0)
        self.embed.weight = nn.Parameter(args.glove_embedding,
                                         requires_grad=False)

        self.dropout = nn.Dropout(args.dropout)

        if args.highway:
            self.highway_dep = Highway(args.num_layers, args.embedding_dim)
            self.highway = Highway(args.num_layers, args.embedding_dim)
        if args.num_layers > 1:
            self.bilstm = LSTM(input_size=args.embedding_dim,
                               hidden_size=args.hidden_size,
                               bidirectional=True,
                               batch_first=True,
                               num_layers=args.num_layers,
                               dropout=0.5)
        else:
            self.bilstm = LSTM(input_size=args.embedding_dim,
                               hidden_size=args.hidden_size,
                               bidirectional=True,
                               batch_first=True,
                               num_layers=args.num_layers)
        gcn_input_dim = args.hidden_size * 2

        # if args.gat:
        self.gat_dep = [
            RelationAttention(in_dim=args.embedding_dim).to(args.device)
            for i in range(args.num_heads)
        ]
        if args.gat_attention_type == 'linear':
            self.gat = [
                LinearAttention(in_dim=gcn_input_dim,
                                mem_dim=gcn_input_dim).to(args.device)
                for i in range(args.num_heads)
            ]  # we prefer to keep the dimension unchanged
        elif args.gat_attention_type == 'dotprod':
            self.gat = [DotprodAttention() for i in range(args.num_heads)]
        else:
            # reshaped gcn
            self.gat = nn.Linear(gcn_input_dim, gcn_input_dim)

        self.dep_embed = nn.Embedding(dep_tag_num,
                                      args.embedding_dim,
                                      padding_idx=0)
        torch.nn.init.uniform_(self.dep_embed.weight,
                               a=-1. / math.sqrt(args.embedding_dim),
                               b=1. / math.sqrt(args.embedding_dim))

        last_hidden_size = args.hidden_size * 4

        layers = [
            nn.Linear(last_hidden_size, args.final_hidden_size),
            nn.Dropout(0.5),
            nn.ReLU()
        ]
        for _ in range(args.num_mlps - 1):
            layers += [
                nn.Linear(args.final_hidden_size, args.final_hidden_size),
                nn.Dropout(0.5),
                nn.ReLU()
            ]
        self.fcs = nn.Sequential(*layers)
        self.fc_final = nn.Linear(args.final_hidden_size, args.num_classes)
        self._reset_params()
Beispiel #11
0
    def __init__(self,
                 lattice_embed,
                 bigram_embed,
                 hidden_size,
                 label_size,
                 num_heads,
                 num_layers,
                 use_abs_pos,
                 use_rel_pos,
                 learnable_position,
                 add_position,
                 layer_preprocess_sequence,
                 layer_postprocess_sequence,
                 ff_size=-1,
                 scaled=True,
                 dropout=None,
                 use_bigram=True,
                 mode=collections.defaultdict(bool),
                 dvc=None,
                 vocabs=None,
                 rel_pos_shared=True,
                 max_seq_len=-1,
                 k_proj=True,
                 q_proj=True,
                 v_proj=True,
                 r_proj=True,
                 self_supervised=False,
                 attn_ff=True,
                 pos_norm=False,
                 ff_activate='relu',
                 rel_pos_init=0,
                 abs_pos_fusion_func='concat',
                 embed_dropout_pos='0',
                 four_pos_shared=True,
                 four_pos_fusion=None,
                 four_pos_fusion_shared=True,
                 bert_embedding=None,
                 use_pos_tag=False,
                 after_bert='mlp'):
        '''
        :param rel_pos_init: 如果是0,那么从-max_len到max_len的相对位置编码矩阵就按0-2*max_len来初始化,
        如果是1,那么就按-max_len,max_len来初始化

        :param embed_dropout_pos: 如果是0,就直接在embed后dropout,是1就在embed变成hidden size之后再dropout,
        是2就在绝对位置加上之后dropout
        '''
        super().__init__()

        self.use_bert = False
        if bert_embedding is not None:
            self.use_bert = True
            self.bert_embedding = bert_embedding
        self.after_bert = after_bert
        self.four_pos_fusion_shared = four_pos_fusion_shared
        self.mode = mode
        self.four_pos_shared = four_pos_shared
        self.abs_pos_fusion_func = abs_pos_fusion_func
        self.lattice_embed = lattice_embed
        self.bigram_embed = bigram_embed
        self.hidden_size = hidden_size
        self.label_size = label_size
        self.num_heads = num_heads
        self.num_layers = num_layers
        # self.relative_position = relative_position
        self.use_abs_pos = use_abs_pos
        self.use_rel_pos = use_rel_pos
        if self.use_rel_pos:
            assert four_pos_fusion is not None
        self.four_pos_fusion = four_pos_fusion
        self.learnable_position = learnable_position
        self.add_position = add_position
        self.rel_pos_shared = rel_pos_shared
        self.self_supervised = self_supervised
        self.vocabs = vocabs
        self.attn_ff = attn_ff
        self.pos_norm = pos_norm
        self.ff_activate = ff_activate
        self.rel_pos_init = rel_pos_init
        self.embed_dropout_pos = embed_dropout_pos
        self.use_pos_tag = use_pos_tag

        if self.use_rel_pos and max_seq_len < 0:
            print_info('max_seq_len should be set if relative position encode')
            exit(1208)

        self.max_seq_len = max_seq_len

        self.k_proj = k_proj
        self.q_proj = q_proj
        self.v_proj = v_proj
        self.r_proj = r_proj

        self.pe = None

        if self.use_abs_pos:
            self.abs_pos_encode = Absolute_SE_Position_Embedding(
                self.abs_pos_fusion_func,
                self.hidden_size,
                learnable=self.learnable_position,
                mode=self.mode,
                pos_norm=self.pos_norm)

        if self.use_rel_pos:
            pe = get_embedding(max_seq_len,
                               hidden_size,
                               rel_pos_init=self.rel_pos_init)
            pe_sum = pe.sum(dim=-1, keepdim=True)
            if self.pos_norm:
                with torch.no_grad():
                    pe = pe / pe_sum
            self.pe = nn.Parameter(pe, requires_grad=self.learnable_position)
            if self.four_pos_shared:
                self.pe_ss = self.pe
                self.pe_se = self.pe
                self.pe_es = self.pe
                self.pe_ee = self.pe
            else:
                self.pe_ss = nn.Parameter(
                    copy.deepcopy(pe), requires_grad=self.learnable_position)
                self.pe_se = nn.Parameter(
                    copy.deepcopy(pe), requires_grad=self.learnable_position)
                self.pe_es = nn.Parameter(
                    copy.deepcopy(pe), requires_grad=self.learnable_position)
                self.pe_ee = nn.Parameter(
                    copy.deepcopy(pe), requires_grad=self.learnable_position)
        else:
            self.pe = None
            self.pe_ss = None
            self.pe_se = None
            self.pe_es = None
            self.pe_ee = None

        self.layer_preprocess_sequence = layer_preprocess_sequence
        self.layer_postprocess_sequence = layer_postprocess_sequence
        if ff_size == -1:
            ff_size = self.hidden_size
        self.ff_size = ff_size
        self.scaled = scaled
        if dvc == None:
            dvc = 'cpu'
        self.dvc = torch.device(dvc)
        if dropout is None:
            self.dropout = collections.defaultdict(int)
        else:
            self.dropout = dropout
        self.use_bigram = use_bigram

        if self.use_bigram:
            self.bigram_size = self.bigram_embed.embedding.weight.size(1)
            self.char_input_size = self.lattice_embed.embedding.weight.size(
                1) + self.bigram_embed.embedding.weight.size(1)
        else:
            self.char_input_size = self.lattice_embed.embedding.weight.size(1)

        if self.use_bert:
            self.char_input_size += self.bert_embedding._embed_size

        self.lex_input_size = self.lattice_embed.embedding.weight.size(1)

        self.embed_dropout = MyDropout(self.dropout['embed'])
        self.gaz_dropout = MyDropout(self.dropout['gaz'])

        self.char_proj = nn.Linear(self.char_input_size, self.hidden_size)
        self.lex_proj = nn.Linear(self.lex_input_size, self.hidden_size)

        self.encoder = Transformer_Encoder(
            self.hidden_size,
            self.num_heads,
            self.num_layers,
            relative_position=self.use_rel_pos,
            learnable_position=self.learnable_position,
            add_position=self.add_position,
            layer_preprocess_sequence=self.layer_preprocess_sequence,
            layer_postprocess_sequence=self.layer_postprocess_sequence,
            dropout=self.dropout,
            scaled=self.scaled,
            ff_size=self.ff_size,
            mode=self.mode,
            dvc=self.dvc,
            max_seq_len=self.max_seq_len,
            pe=self.pe,
            pe_ss=self.pe_ss,
            pe_se=self.pe_se,
            pe_es=self.pe_es,
            pe_ee=self.pe_ee,
            k_proj=self.k_proj,
            q_proj=self.q_proj,
            v_proj=self.v_proj,
            r_proj=self.r_proj,
            attn_ff=self.attn_ff,
            ff_activate=self.ff_activate,
            lattice=True,
            four_pos_fusion=self.four_pos_fusion,
            four_pos_fusion_shared=self.four_pos_fusion_shared)

        self.output_dropout = MyDropout(self.dropout['output'])

        print('pos_tag len...', len(list(vocabs['pos_tag'])))

        self.pos_feature_size = 10
        self.pos_embed_size = 28
        if self.use_pos_tag:
            self.pos_embedding = nn.Embedding(len(list(vocabs['pos_tag'])),
                                              self.pos_embed_size)
            self.pos_pj = nn.Linear(self.pos_embed_size, self.pos_feature_size)
        else:
            self.pos_feature_size = 0
        self.output = nn.Linear(self.hidden_size + self.pos_feature_size,
                                self.label_size)
        if self.after_bert == 'lstm':
            self.pj_after_bert = LSTM(
                self.hidden_size + self.pos_feature_size,
                (self.hidden_size + self.pos_feature_size) // 2,
                bidirectional=True,
                num_layers=2,
            )
        if self.self_supervised:
            self.output_self_supervised = nn.Linear(self.hidden_size,
                                                    len(vocabs['char']))
            print('self.output_self_supervised:{}'.format(
                self.output_self_supervised.weight.size()))
        self.crf = get_crf_zero_init(self.label_size)
        self.loss_func = nn.CrossEntropyLoss(ignore_index=-100)