def __init__(self, config, params):
        super(Context_att, self).__init__()
        self.word_num = params.word_num
        self.label_num = params.label_num
        self.char_num = params.char_num

        self.id2word = params.word_alphabet.id2word
        self.word2id = params.word_alphabet.word2id
        self.padID = params.word_alphabet.word2id['<pad>']
        self.unkID = params.word_alphabet.word2id['<unk>']

        self.use_cuda = params.use_cuda
        self.add_char = params.add_char
        self.static = params.static

        self.feature_count = config.shrink_feature_thresholds
        self.word_dims = config.word_dims
        self.char_dims = config.char_dims

        self.lstm_hiddens = config.lstm_hiddens
        self.attention_size = config.attention_size

        self.dropout_emb = nn.Dropout(p=config.dropout_emb)
        self.dropout_lstm = nn.Dropout(p=config.dropout_lstm)

        self.lstm_layers = config.lstm_layers
        self.batch_size = config.train_batch_size

        self.embedding = nn.Embedding(self.word_num, self.word_dims)
        self.embedding.weight.requires_grad = True
        if self.static:
            self.embedding_static = nn.Embedding(self.word_num, self.word_dims)
            self.embedding_static.weight.requires_grad = False

        if params.pretrain_word_embedding is not None:
            # pretrain_weight = np.array(params.pretrain_word_embedding)
            # self.embedding.weight.data.copy_(torch.from_numpy(pretrain_weight))
            # pretrain_weight = np.array(params.pretrain_embed)
            pretrain_weight = torch.FloatTensor(params.pretrain_word_embedding)
            self.embedding.weight.data.copy_(pretrain_weight)

        # for id in range(self.word_dims):
        #     self.embedding.weight.data[self.eofID][id] = 0

        if params.static:
            self.lstm = nn.LSTM(self.word_dims * 2,
                                self.lstm_hiddens // 2,
                                num_layers=self.lstm_layers,
                                bidirectional=True,
                                dropout=config.dropout_lstm)
        else:
            self.lstm = nn.LSTM(self.word_dims,
                                self.lstm_hiddens // 2,
                                num_layers=self.lstm_layers,
                                bidirectional=True,
                                dropout=config.dropout_lstm)

        self.hidden2label = nn.Linear(self.lstm_hiddens, self.label_num)
        self.hidden = self.init_hidden(self.batch_size, self.lstm_layers)

        # self.attention = Attention(self.lstm_hiddens, self.attention_size, self.use_cuda)
        # self.attention_l = Attention(self.lstm_hiddens, self.attention_size, self.use_cuda)
        # self.attention_r = Attention(self.lstm_hiddens, self.attention_size, self.use_cuda)

        self.attention = Attention_b(self.lstm_hiddens, self.attention_size,
                                     self.use_cuda)
        self.attention_l = Attention_b(self.lstm_hiddens, self.attention_size,
                                       self.use_cuda)
        self.attention_r = Attention_b(self.lstm_hiddens, self.attention_size,
                                       self.use_cuda)

        self.linear = nn.Linear(self.lstm_hiddens, self.label_num, bias=True)
        self.linear_l = nn.Linear(self.lstm_hiddens, self.label_num, bias=True)
        self.linear_r = nn.Linear(self.lstm_hiddens, self.label_num, bias=True)
    def __init__(self, config, params):
        super(Context_att_gate, self).__init__()
        self.word_num = params.word_num
        self.label_num = params.label_num
        self.char_num = params.char_num
        self.category_num = params.category_num

        self.id2word = params.word_alphabet.id2word
        self.word2id = params.word_alphabet.word2id
        self.padID = params.word_alphabet.word2id['<pad>']
        self.unkID = params.word_alphabet.word2id['<unk>']

        self.use_cuda = params.use_cuda
        self.add_char = params.add_char
        self.static = params.static

        self.feature_count = config.shrink_feature_thresholds
        self.word_dims = config.word_dims
        self.char_dims = config.char_dims

        self.lstm_hiddens = config.lstm_hiddens
        self.attention_size = config.attention_size

        self.dropout_emb = nn.Dropout(p=config.dropout_emb)
        self.dropout_lstm = nn.Dropout(p=config.dropout_lstm)

        self.lstm_layers = config.lstm_layers
        self.batch_size = config.train_batch_size

        self.embedding = nn.Embedding(self.word_num, self.word_dims)
        self.embedding.weight.requires_grad = True
        self.embedding_label = nn.Embedding(self.label_num, self.word_dims)
        self.embedding_label.weight.requires_grad = True

        if params.pretrain_word_embedding is not None:
            # pretrain_weight = np.array(params.pretrain_word_embedding)
            # self.embedding.weight.data.copy_(torch.from_numpy(pretrain_weight))
            # pretrain_weight = np.array(params.pretrain_embed)
            pretrain_weight = torch.FloatTensor(params.pretrain_word_embedding)
            self.embedding.weight.data.copy_(pretrain_weight)

        # for id in range(self.word_dims):
        #     self.embedding.weight.data[self.eofID][id] = 0

        self.lstm = nn.LSTM(self.word_dims * 2,
                            self.lstm_hiddens // 2,
                            num_layers=self.lstm_layers,
                            bidirectional=True,
                            dropout=config.dropout_lstm)

        # nn.init.xavier_uniform(self.lstm.all_weights[0][0])
        # nn.init.xavier_uniform(self.lstm.all_weights[0][1])
        # nn.init.xavier_uniform(self.lstm.all_weights[1][0])
        # nn.init.xavier_uniform(self.lstm.all_weights[1][1])

        self.hidden2label = nn.Linear(self.lstm_hiddens, self.category_num)
        self.hidden = self.init_hidden(self.batch_size, self.lstm_layers)

        self.attention = Attention_b(self.lstm_hiddens, self.attention_size,
                                     self.use_cuda)
        self.attention_l = Attention_b(self.lstm_hiddens, self.attention_size,
                                       self.use_cuda)
        self.attention_r = Attention_b(self.lstm_hiddens, self.attention_size,
                                       self.use_cuda)

        self.w1 = Parameter(torch.randn(self.lstm_hiddens, self.lstm_hiddens))
        self.w2 = Parameter(torch.randn(self.lstm_hiddens, self.lstm_hiddens))
        self.w3 = Parameter(torch.randn(self.lstm_hiddens, self.lstm_hiddens))

        self.u1 = Parameter(torch.randn(self.lstm_hiddens, self.lstm_hiddens))
        self.u2 = Parameter(torch.randn(self.lstm_hiddens, self.lstm_hiddens))
        self.u3 = Parameter(torch.randn(self.lstm_hiddens, self.lstm_hiddens))

        self.b1 = Parameter(torch.randn(self.lstm_hiddens, self.batch_size))
        self.b2 = Parameter(torch.randn(self.lstm_hiddens, self.batch_size))
        self.b3 = Parameter(torch.randn(self.lstm_hiddens, self.batch_size))

        nn.init.xavier_uniform(self.w1)
        nn.init.xavier_uniform(self.w2)
        nn.init.xavier_uniform(self.w3)

        nn.init.xavier_uniform(self.u1)
        nn.init.xavier_uniform(self.u2)
        nn.init.xavier_uniform(self.u3)

        nn.init.xavier_uniform(self.b1)
        nn.init.xavier_uniform(self.b2)
        nn.init.xavier_uniform(self.b3)

        self.linear_2 = nn.Linear(self.lstm_hiddens,
                                  self.category_num,
                                  bias=True)
        nn.init.xavier_uniform(self.linear_2.weight)