Exemple #1
0
    def __call__(self, inputs, state, scope=None):
        """Updates the state using the previous @state and @inputs.
        
        Args:
            inputs: is the input vector of size [None, self.input_size]
            state: is the previous state vector of size [None, self.state_size]
            scope: is the name of the scope to be used when defining the variables inside.
        Returns:
            a pair of the output vector and the new state vector.
        """
        scope = scope or type(self).__name__

        with tf.variable_scope(scope):

            W_x = tf.get_variable('W_x',
                                  shape=[self.input_size, self.state_size],
                                  dtype=tf.float32,
                                  initializer=xavier_weight_init())
            W_h = tf.get_variable('W_h',
                                  shape=[self.state_size, self.state_size],
                                  dtype=tf.float32,
                                  initializer=xavier_weight_init())
            b = tf.get_variable('b',
                                shape=[
                                    self.state_size,
                                ],
                                dtype=tf.float32,
                                initializer=tf.constant_initializer(0.0))

            new_state = tf.sigmoid(
                tf.matmul(inputs, W_x) + tf.matmul(state, W_h) + b)

        output = new_state
        return output, new_state
Exemple #2
0
    def add_prediction_op(self):
        """
        Returns:
            pred: tf.Tensor of shape (batch_size, n_classes)
        """

        x = self.add_embedding()
        dropout_rate = self.dropout_placeholder

        weight_initializer = xavier_weight_init()
        self.W = tf.Variable(
            weight_initializer(
                (self.config.n_window_features * self.config.embed_size,
                 self.config.hidden_size)))
        self.b1 = tf.Variable(tf.zeros(self.config.hidden_size, tf.float32))
        self.U = tf.Variable(
            weight_initializer(
                (self.config.hidden_size, self.config.n_classes)))
        self.b2 = tf.Variable(tf.zeros(self.config.n_classes, tf.float32))

        h = tf.nn.relu(tf.matmul(x, self.W) + self.b1)
        h_drop = tf.nn.dropout(h, 1 - self.config.dropout)
        pred = tf.matmul(h_drop, self.U) + self.b2

        return pred
    def add_embeddings(self):
        """Creates embeddings that map word, tag, deprels to vectors

        Embedding layers convert (sparse) ID representations to dense,
        lower-dimensional representations. Inputs are integers, outputs
        are floats.

         - Create 3 embedding matrices, one for each of the input types.
           Input values index the rows of the matrices to extract. The
           max bound (exclusive) on the values in the input can be found
           in {n_word_ids, n_tag_ids, n_deprel_ids}
           After lookup, the resulting tensors should each be of shape
           (None, n, embed_size), where n is one of
           {n_word_features, n_tag_features, n_deprel_features}.
         - Initialize the word_id embedding matrix with
           self.word_embeddings. Initialize the other two matrices
           with the Xavier initialization you implemented
         - Reshape the embedding tensors into shapes
           (None, n * embed_size)

        ** Embedding matrices should be variables, not constants! **

        Use tf.nn.embedding_lookup. Also take a look at tf.reshape

        Returns:
            word_embeddings : tf.Tensor of type tf.float32 and shape
                (None, n_word_features * embed_size)
            tag_embeddings : tf.float32 (None, n_tag_features * embed_size)
            deprel_embeddings : tf.float32
                (None, n_deprel_features * embed_size)
        """
        ### BEGIN YOUR CODE
        xavier_initializer = xavier_weight_init()

        word_embedding_matrix = tf.get_variable("word_embedding_matrix", 
            [self.config.n_word_ids, self.config.embed_size], 
            initializer=tf.constant_initializer(self.word_embeddings))

        tag_embedding_matrix = tf.get_variable("tag_embedding_matrix", 
            [self.config.n_tag_ids, self.config.embed_size], 
            initializer=xavier_initializer)

        deprel_embedding_matrix = tf.get_variable("deprel_embedding_matrix", 
            [self.config.n_deprel_ids, self.config.embed_size], 
            initializer=xavier_initializer)


        word_lookup = tf.nn.embedding_lookup(word_embedding_matrix, self.word_id_placeholder)
        tag_lookup = tf.nn.embedding_lookup(tag_embedding_matrix, self.tag_id_placeholder)
        deprel_lookup = tf.nn.embedding_lookup(deprel_embedding_matrix, self.deprel_id_placeholder)

        word_embeddings = tf.reshape(word_lookup, [-1, self.config.n_word_features * self.config.embed_size])
        tag_embeddings = tf.reshape(tag_lookup, [-1, self.config.n_tag_features * self.config.embed_size])
        deprel_embeddings = tf.reshape(deprel_lookup, [-1, self.config.n_deprel_features * self.config.embed_size])
        ### END YOUR CODE
        return word_embeddings, tag_embeddings, deprel_embeddings
Exemple #4
0
    def add_prediction_op(self):
        x = self.add_embedding()
        xavier_initializer = xavier_weight_init()

        W1 = tf.Variable(
            xavier_initializer([
                self.config.n_window_features * self.config.embed_size,
                self.config.hidden_size_1
            ]))
        b1 = tf.Variable(
            tf.zeros([self.config.hidden_size_1], dtype=tf.float32))
        U = tf.Variable(
            xavier_initializer(
                [self.config.hidden_size_2, self.config.n_classes]))
        W2 = tf.Variable(
            xavier_initializer(
                [self.config.hidden_size_1, self.config.hidden_size_2]))
        b2 = tf.Variable(
            tf.zeros([self.config.hidden_size_2], dtype=tf.float32))
        W3 = tf.Variable(
            xavier_initializer(
                [self.config.hidden_size_1, self.config.hidden_size_2]))
        b3 = tf.Variable(
            tf.zeros([self.config.hidden_size_2], dtype=tf.float32))
        W4 = tf.Variable(
            xavier_initializer(
                [self.config.hidden_size_1, self.config.hidden_size_2]))
        b4 = tf.Variable(
            tf.zeros([self.config.hidden_size_2], dtype=tf.float32))
        b5 = tf.Variable(tf.zeros([self.config.n_classes], dtype=tf.float32))

        layer_1 = tf.add(tf.matmul(x, W1), b1)
        layer_1 = tf.nn.relu(layer_1)

        layer_2 = tf.add(tf.matmul(layer_1, W2), b2)
        layer_2 = tf.nn.relu(layer_2)

        layer_3 = tf.add(tf.matmul(layer_2, W3), b3)
        layer_3 = tf.nn.relu(layer_3)

        layer_4 = tf.add(tf.matmul(layer_3, W4), b4)
        layer_4 = tf.nn.softmax(layer_4)

        distrib = tf.add(
            tf.matmul(layer_4, U), b5
        )  # not actually the probability distrib yet because using softmax_cross_entropy
        pred = tf.nn.softmax(distrib)

        # self.regularization = self.config.lr*tf.nn.l2_loss(U) + self.config.lr*tf.nn.l2_loss(W)

        return pred
    def add_prediction_op(self):

        x = self.add_embedding()

        init = xavier_weight_init()
        with tf.variable_scope("transformation"):
            b1 = tf.Variable(tf.zeros([self.config.hidden_size]))
            b2 = tf.Variable(tf.zeros([self.config.n_classes]))
            self.W = init([
                self.config.n_features * self.config.embed_size,
                self.config.hidden_size
            ])
            U = init([self.config.hidden_size, self.config.n_classes])
            h = tf.nn.relu(tf.matmul(x, self.W) + b1)
            h_drop = tf.nn.dropout(h, 1 - self.dropout_placeholder)
            pred = tf.matmul(h_drop, U) + b2

        return pred
Exemple #6
0
    def add_prediction_op(self):
        """Adds the single layer neural network

        The l
            h = Relu(W_w x_w + W_t x_t + W_d x_d + b1)
            h_drop = Dropout(h, dropout_rate)
            pred = h_drop U + b2

        Note that we are not applying a softmax to pred. The softmax
        will instead be done in the add_loss_op function, which improves
        efficiency because we can use
            tf.nn.softmax_cross_entropy_with_logits
        Excluding the softmax in predictions won't change the expected
        transition.

        Use the Xavier initializer from initialization.py for W_ and
        U. Initialize b1 and b2 with zeros.

        The dimensions of the various variables you will need to create
        are:
            W_w : (n_word_features * embed_size, hidden_size)
            W_t : (n_tag_features * embed_size, hidden_size)
            W_d : (n_deprel_features * embed_size, hidden_size)
            b1: (hidden_size,)
            U:  (hidden_size, n_classes)
            b2: (n_classes)

        Use the value self.dropout_placeholder in tf.nn.dropout directly

        Returns:
            pred: tf.Tensor of shape (batch_size, n_classes)
        """
        x_w, x_t, x_d = self.add_embeddings()
        ### BEGIN YOUR CODE
        xavier_initialization = xavier_weight_init()

        #Initialize variables
        W_w = tf.Variable(
            xavier_initialization(
                (self.config.n_word_features * self.config.embed_size,
                 self.config.hidden_size)))
        W_t = tf.Variable(
            xavier_initialization(
                (self.config.n_tag_features * self.config.embed_size,
                 self.config.hidden_size)))
        W_d = tf.Variable(
            xavier_initialization(
                (self.config.n_deprel_features * self.config.embed_size,
                 self.config.hidden_size)))

        U = tf.Variable(
            xavier_initialization(
                (self.config.hidden_size, self.config.n_classes)))

        b1 = tf.Variable(tf.zeros((self.config.hidden_size, )))
        b2 = tf.Variable(tf.zeros((self.config.n_classes)))

        h = tf.nn.relu(
            tf.matmul(x_w, W_w) + tf.matmul(x_t, W_t) + tf.matmul(x_d, W_d) +
            b1)
        h_drop = tf.nn.dropout(h, self.dropout_placeholder)

        pred = tf.Variable((self.config.batch_size, self.config.n_classes))
        pred = tf.matmul(h_drop, U) + b2
        # pred = tf.reshape(pred, (self.config.batch_size, self.config.n_classes))
        ### END YOUR CODE
        return pred
Exemple #7
0
    def add_embeddings(self):
        """Creates embeddings that map word, tag, deprels to vectors

        Embedding layers convert (sparse) ID representations to dense,
        lower-dimensional representations. Inputs are integers, outputs
        are floats.

         - Create 3 embedding matrices, one for each of the input types.
           Input values index the rows of the matrices to extract. The
           max bound (exclusive) on the values in the input can be found
           in {n_word_ids, n_tag_ids, n_deprel_ids}
           After lookup, the resulting tensors should each be of shape
           (None, n, embed_size), where n is one of
           {n_word_features, n_tag_features, n_deprel_features}.
         - Initialize the word_id embedding matrix with
           self.word_embeddings. Initialize the other two matrices
           with the Xavier initialization you implemented
         - Reshape the embedding tensors into shapes
           (None, n * embed_size)

        ** Embedding matrices should be variables, not constants! **

        Use tf.nn.embedding_lookup. Also take a look at tf.reshape

        Returns:
            word_embeddings : tf.Tensor of type tf.float32 and shape
                (None, n_word_features * embed_size)
            tag_embeddings : tf.float32 (None, n_tag_features * embed_size)
            deprel_embeddings : tf.float32
                (None, n_deprel_features * embed_size)
        """
        ### BEGIN YOUR CODE

        # TA:
        # The add_embeddings() function should be used to not only create and initialize the embeddings,
        # but to also lookup embeddings corresponding to the deprel_ids, word_ids, and tag_ids that you feed in through create_feed_dict().
        # The return value should therefore be the word_embeddings, tag_embeddings, and deprel_embeddings corresponding to the ids provided by the placeholders.

        # - Initialize the word_id embedding matrix with self.word_embeddings.
        word_embeddings = tf.Variable(self.word_embeddings, dtype=tf.float32)
        word_embeddings = tf.nn.embedding_lookup(word_embeddings,
                                                 self.word_id_placeholder)

        #   Input values index the rows of the matrices to extract.
        # The max bound (exclusive) on the values in the input can be found in {n_word_ids, n_tag_ids, n_deprel_ids}

        #   After lookup, the resulting tensors should each be of shape (None, n, embed_size),
        # where n is one of {n_word_features, n_tag_features, n_deprel_features}.

        # Initialize the other two matrices with the Xavier initialization you implemented
        xavier_initializer = xavier_weight_init()

        # embeddings for tags
        shape = (self.config.n_tag_ids, self.config.embed_size)
        xavier_mat = xavier_initializer(shape)
        tag_embeddings = tf.Variable(xavier_mat, dtype=tf.float32)
        tag_embeddings = tf.nn.embedding_lookup(tag_embeddings,
                                                self.tag_id_placeholder)

        # embeddings for arc-labels
        shape = (self.config.n_deprel_ids, self.config.embed_size)
        xavier_mat = xavier_initializer(shape)
        deprel_embeddings = tf.Variable(xavier_mat, dtype=tf.float32)
        deprel_embeddings = tf.nn.embedding_lookup(deprel_embeddings,
                                                   self.deprel_id_placeholder)

        # - Reshape the embedding tensors into shapes (None, n * embed_size)
        word_embeddings = tf.reshape(
            word_embeddings,
            [-1, self.config.n_word_features * self.config.embed_size])
        tag_embeddings = tf.reshape(
            tag_embeddings,
            [-1, self.config.n_tag_features * self.config.embed_size])
        deprel_embeddings = tf.reshape(
            deprel_embeddings,
            [-1, self.config.n_deprel_features * self.config.embed_size])

        ### END YOUR CODE
        return word_embeddings, tag_embeddings, deprel_embeddings
Exemple #8
0
    def add_prediction_op(self):
        """Adds the single layer neural network

        The l
            h = Relu(W_w x_w + W_t x_t + W_d x_d + b1)
            h_drop = Dropout(h, dropout_rate)
            pred = h_drop U + b2

        Note that we are not applying a softmax to pred. The softmax
        will instead be done in the add_loss_op function, which improves
        efficiency because we can use
            tf.nn.softmax_cross_entropy_with_logits
        Excluding the softmax in predictions won't change the expected
        transition.

        Use the Xavier initializer from initialization.py for W_ and
        U. Initialize b1 and b2 with zeros.

        The dimensions of the various variables you will need to create
        are:
            W_w : (n_word_features * embed_size, hidden_size)
            W_t : (n_tag_features * embed_size, hidden_size)
            W_d : (n_deprel_features * embed_size, hidden_size)
            b1: (hidden_size,)
            U:  (hidden_size, n_classes)
            b2: (n_classes)

        Use the value self.dropout_placeholder in tf.nn.dropout directly

        Returns:
            pred: tf.Tensor of shape (batch_size, n_classes)
        """
        x_w, x_t, x_d = self.add_embeddings()

        xavier_initializer = xavier_weight_init()
        W_w = tf.Variable(xavier_initializer((self.config.n_word_features * self.config.embed_size, self.config.hidden_size)))
        W_t = tf.Variable(xavier_initializer((self.config.n_tag_features * self.config.embed_size, self.config.hidden_size)))
        W_d = tf.Variable(xavier_initializer((self.config.n_deprel_features* self.config.embed_size, self.config.hidden_size)))
        b1 = tf.Variable(tf.zeros((self.config.hidden_size,)))
        U = tf.Variable(xavier_initializer((self.config.hidden_size, self.config.n_classes)))
        b2 = tf.Variable(tf.zeros((self.config.n_classes)))

        x = tf.matmul(x_w, W_w) + tf.matmul(x_t, W_t) + tf.matmul(x_d, W_d) + b1

        print("\n\t" + FLAGS.activation + " activation function")

        # compute first hidden layer
        if FLAGS.activation == 'cube':
            # cube activation function
            h = tf.pow(x, tf.constant(3, dtype=tf.float32))
        else:
            h = tf.nn.relu(x)

        # add all weights and biases for l2 regularization
        if self.config.l2_beta:
            print("\tl2 regularization with beta " + str(self.config.l2_beta))
            self.config.l2_loss += tf.nn.l2_loss(W_w) + tf.nn.l2_loss(W_t) + \
                                  tf.nn.l2_loss(W_d) + tf.nn.l2_loss(b1) + \
                                  tf.nn.l2_loss(U) + tf.nn.l2_loss(b2)

        print("\t" + str(FLAGS.hidden + 1) + " hidden layer(s) with size " + str(FLAGS.hidden_size))

        if FLAGS.hidden:
            # initialize weights and biases for hidden layers
            w, b = {}, {}
            for i in range(FLAGS.hidden):
                w[i] = tf.Variable(xavier_initializer((self.config.hidden_size, self.config.hidden_size)))
                b[i] = tf.Variable(tf.random_normal([self.config.hidden_size]))

            def hidden_layers(x):
                layer = tf.nn.relu(tf.matmul(x, w[0]) + b[0])
                for i in range(1, FLAGS.hidden):
                    layer = tf.nn.relu(tf.matmul(layer, w[i]) + b[i])
                return layer
            
            # apply dropout then compute additional hidden layers
            h_drop = tf.nn.dropout(h, self.dropout_placeholder)
            layers = hidden_layers(h_drop)
            pred = tf.matmul(layers, U) + b2

            # add l2 loss for hidden weights and biases
            if self.config.l2_beta:
                for i in range(FLAGS.hidden):
                    self.config.l2_loss += tf.nn.l2_loss(w[i]) + tf.nn.l2_loss(b[i])
        else:
            h_drop = tf.nn.dropout(h, self.dropout_placeholder)
            pred = tf.matmul(h_drop, U) + b2

        return pred
Exemple #9
0
    def add_prediction_op(self):
        """Adds the unrolled RNN:
            h_0 = 0
            for t in 1 to T:
                o_t, h_t = cell(x_t, h_{t-1})
                o_drop_t = Dropout(o_t, dropout_rate)
                y_t = o_drop_t U + b_2


        Remember:
            * Use the xavier initilization for matrices.
            * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument.
            The keep probability should be set to the value of self.dropout_placeholder

        Returns:
            pred: tf.Tensor of shape (batch_size, max_length, n_classes)
        """

        x = self.add_embedding()
        dropout_rate = self.dropout_placeholder

        preds = []  # Predicted output at each timestep should go here!

        if self.config.cell == "rnn":
            cell = RNNCell(Config.n_features * Config.embed_size,
                           Config.hidden_size)
        elif self.config.cell == "gru":
            cell = GRUCell(Config.n_features * Config.embed_size,
                           Config.hidden_size)
        else:
            raise ValueError("Unsuppported cell type: " + self.config.cell)

        U = tf.get_variable(
            "U",
            shape=[self.config.hidden_size, self.config.n_classes],
            dtype=tf.float32,
            initializer=xavier_weight_init())
        b2 = tf.get_variable("b2",
                             shape=[
                                 self.config.n_classes,
                             ],
                             dtype=tf.float32,
                             initializer=tf.constant_initializer(0.0))
        h_t = tf.zeros([tf.shape(x)[0], self.config.hidden_size],
                       dtype=tf.float32)
        x = tf.transpose(x, [1, 0, 2])

        with tf.variable_scope("RNN"):
            for time_step in range(self.max_length):
                if (time_step > 0): tf.get_variable_scope().reuse_variables()
                o_t, h_t = cell(x[time_step], h_t)
                o_drop_t = tf.nn.dropout(o_t, 1 - self.config.dropout)
                y_t = tf.matmul(o_drop_t, U) + b2
                preds.append(y_t)

        preds = tf.transpose(tf.stack(preds), [1, 0, 2])

        assert preds.get_shape().as_list() == [
            None, self.max_length, self.config.n_classes
        ], "predictions are not of the right shape. Expected {}, got {}".format(
            [None, self.max_length, self.config.n_classes],
            preds.get_shape().as_list())
        return preds
    def add_prediction_op(self):
        """Adds the single layer neural network

        The l
            h = Relu(x_w W_w + x+t W_t + x_d W_d + b1)
            h_drop = Dropout(h, dropout_rate)
            pred = h_drop U + b2

        Note that we are not applying a softmax to pred. The softmax
        will instead be done in the add_loss_op function, which improves
        efficiency because we can use
            tf.nn.softmax_cross_entropy_with_logits
        Excluding the softmax in predictions won't change the expected
        transition.

        Use the Xavier initializer from initialization.py for W_ and
        U. Initialize b1 and b2 with zeros.

        The dimensions of the various variables you will need to create
        are:
            W_w : (n_word_features * embed_size, hidden_size)
            W_t : (n_tag_features * embed_size, hidden_size)
            W_d : (n_deprel_features * embed_size, hidden_size)
            b1: (hidden_size,)
            U:  (hidden_size, n_classes)
            b2: (n_classes)

        Use the value self.dropout_placeholder in tf.nn.dropout directly

        Returns:
            pred: tf.Tensor of shape (batch_size, n_classes)
        """
        ### BEGIN YOUR CODE
        x_w, x_t, x_d = self.add_embeddings()
        
        # Code for Part3
        """
        if (self.config.addHidden2):
            w_w = tf.get_variable("W_w", [self.config.n_word_features * self.config.embed_size, self.config.hidden_size])
            w_t = tf.get_variable("W_t", [self.config.n_tag_features * self.config.embed_size, self.config.hidden_size])
            w_d = tf.get_variable("W_d", [self.config.n_deprel_features * self.config.embed_size, self.config.hidden_size])
            wh = tf.get_variable("wh", [self.config.hidden_size, self.config.hidden2_size])
            b1 = tf.get_variable("b1", [self.config.hidden_size])
            b2 = tf.get_variable("b2", [self.config.hidden2_size])
            b3 = tf.get_variable("b3", [self.config.n_classes])
            u1 = tf.get_variable("U1", [self.config.hidden_size, self.config.hidden2_size])
            u2 = tf.get_variable("U2", [self.config.hidden2_size, self.config.n_classes])

            h1 = tf.nn.relu(tf.matmul(x_w, w_w) + tf.matmul(x_t, w_t) + tf.matmul(x_d, w_d) + b1, name="relu1")

            h2 = tf.nn.relu(tf.matmul(h1, wh) + b2, name="relu2")

            h2_drop = tf.nn.dropout(h2, self.dropout_placeholder, name="dropout2")
            h2_output = tf.matmul(h2_drop, u2) + b3
            pred = h2_output
        else:
        """
        xavier_initializer = xavier_weight_init()
        w_w = tf.get_variable("W_w", [self.config.n_word_features * self.config.embed_size, self.config.hidden_size], initializer=xavier_initializer)
        w_t = tf.get_variable("W_t", [self.config.n_tag_features * self.config.embed_size, self.config.hidden_size], initializer=xavier_initializer)
        w_d = tf.get_variable("W_d", [self.config.n_deprel_features * self.config.embed_size, self.config.hidden_size], initializer=xavier_initializer)
        b1 = tf.get_variable("b1", [self.config.hidden_size], initializer=tf.zeros_initializer())
        b2 = tf.get_variable("b2", [self.config.n_classes], initializer=tf.zeros_initializer())
        u = tf.get_variable("U", [self.config.hidden_size, self.config.n_classes], initializer=xavier_initializer)

        h = tf.nn.relu(tf.matmul(x_w, w_w) + tf.matmul(x_t, w_t) + tf.matmul(x_d, w_d) + b1, name="relu")
        h_drop = tf.nn.dropout(h, self.dropout_placeholder, name="dropout")
        pred = tf.matmul(h_drop, u) + b2
        ### END YOUR CODE
        return pred
Exemple #11
0
Fichier : NER.py Projet : pzjc/nlp
    def add_model(self, window):
        """Adds the 1-hidden-layer NN.

    Hint: Use a variable_scope (e.g. "Layer") for the first hidden layer, and
          another variable_scope (e.g. "Softmax") for the linear transformation
          preceding the softmax. Make sure to use the xavier_weight_init you
          defined in the previous part to initialize weights.							# 初始化assignment中有,Xavier initialization
    Hint: Make sure to add in regularization and dropout to this network.
          Regularization should be an addition to the cost function, while				# 	cost function加上Regularization
          dropout should be added after both variable scopes.						# 	both variable scopes=first hidden layer,softmax加上dropout
    Hint: You might consider using a tensorflow Graph Collection (e.g
          "total_loss") to collect the regularization and loss terms (which you		# 用 tensorflow Graph 收集regularization and loss
          will add in add_loss_op below).
    Hint: Here are the dimensions of the various variables you will need to
          create

          W:  (window_size*embed_size, hidden_size)
          b1: (hidden_size,)
          U:  (hidden_size, label_size)
          b2: (label_size)

    https://www.tensorflow.org/versions/r0.7/api_docs/python/framework.html#graph-collections
    Args:
      window: tf.Tensor of shape (-1, window_size*embed_size)
    Returns:
      output: tf.Tensor of shape (batch_size, label_size)
    """
        ### YOUR CODE HERE
        with tf.variable_scope('Layer1', initializer=xavier_weight_init()
                               ) as scope:  # 用initializer=xavier去初始化第一层
            W = tf.get_variable(  # 第一层有 W,b1,h
                'W', [
                    self.config.window_size * self.config.embed_size,
                    self.config.hidden_size
                ])
            b1 = tf.get_variable('b1', [self.config.hidden_size])
            h = tf.nn.sigmoid(tf.matmul(window, W) + b1)
            if self.config.l2:  # L2 regularization for W
                tf.add_to_collection(
                    'total_loss', 0.5 * self.config.l2 * tf.nn.l2_loss(W)
                )  # 0.5 * self.config.l2 * tf.nn.l2_loss(W)

        # with tf.variable_scope('Layer2', initializer=xavier_weight_init()) as scope:
        # 	W2 = tf.get_variable('W2',[self.config.hidden_size,
        # 							   self.config.hidden_size])
        # 	b2 = tf.get_variable('b2',[self.config.hidden_size])
        # 	h2 = tf.nn.tanh(tf.matmul(h, W2) + b2)
        # 	if self.config.l2:																# L2 regularization for W
        #       tf.add_to_collection('total_loss', 0.5 * self.config.l2 * tf.nn.l2_loss(W2))	# 0.5 * self.config.l2 * tf.nn.l2_loss(W)

        with tf.variable_scope('Output_Layer',
                               initializer=xavier_weight_init()) as scope:
            U = tf.get_variable(
                'U', [self.config.hidden_size, self.config.label_size])
            b2 = tf.get_variable('b2', [self.config.label_size])
            y = tf.matmul(h, U) + b2
            if self.config.l2:
                tf.add_to_collection('total_loss',
                                     0.5 * self.config.l2 * tf.nn.l2_loss(U))
        output = tf.nn.dropout(
            y, self.dropout_placeholder)  # 返回 output,两个variable_scope都带dropout
        ### END YOUR CODE
        return output