Ejemplo n.º 1
0
    def __init__(self,
                 X,
                 y,
                 filename='lstm_cell',
                 inspect_rate=50,
                 iterations=1000,
                 learning_rate=0.000025,
                 input_nodes=3,
                 hidden_nodes=3,
                 output_nodes=1):
        self.X = X
        self.y = y
        self.filename = filename
        self.inspect_rate = inspect_rate
        self.iterations = iterations
        self.learning_rate = learning_rate
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # initialize placeholder nodes
        self.activation_input = np.atleast_2d(np.ones(self.input_nodes))
        self.activation_hidden = np.apply_along_axis(
            lambda x: LSTMCell(5, 5, 5, 0.000025), 0,
            np.atleast_2d(np.ones(self.hidden_nodes)))
        self.activation_output = np.atleast_2d(np.ones(self.output_nodes))

        # initialize weights
        self.ih_weights = np.random.randn(self.input_nodes, self.hidden_nodes)
        self.ho_weights = np.random.randn(self.hidden_nodes, self.output_nodes)

        # initialize placeholder deltas
        self.ih_deltas = np.zeros_like(self.ih_weights)
        self.ho_deltas = np.zeros_like(self.ho_weights)
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size

        # Our own LSTM implementation
        self.lstm = LSTMCell(input_size, hidden_size)

        # Fully-connected output layer
        self.fc = torch.nn.Linear(hidden_size, output_size)
    def add_prediction_op(self):
        """Adds the unrolled RNN:
            h_0 = 0
            for t in 1 to T:
                o_t, h_t = cell(x_t, h_{t-1})
                o_drop_t = Dropout(o_t, dropout_rate)
                y_t = o_drop_t U + b_2
        TODO: There a quite a few things you'll need to do in this function:
            - Define the variables U, b_2.
            - Define the vector h as a constant and inititalize it with
              zeros. See tf.zeros and tf.shape for information on how
              to initialize this variable to be of the right shape.
              https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros
              https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape
            - In a for loop, begin to unroll the RNN sequence. Collect
              the predictions in a list.
            - When unrolling the loop, from the second iteration
              onwards, you will HAVE to call
              tf.get_variable_scope().reuse_variables() so that you do
              not create new variables in the RNN cell.
              See https://www.tensorflow.org/versions/master/how_tos/variable_scope/
            - Concatenate and reshape the predictions into a predictions
              tensor.
        Hint: You will find the function tf.pack (similar to np.asarray)
              useful to assemble a list of tensors into a larger tensor.
              https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack
        Hint: You will find the function tf.transpose and the perms
              argument useful to shuffle the indices of the tensor.
              https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose
        Remember:
            * Use the xavier initilization for matrices.
            * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument.
            The keep probability should be set to the value of self.dropout_placeholder
        Returns:
            pred: tf.Tensor of shape (batch_size, max_length, n_classes)
        """
        x1, x2 = self.add_embedding()
        dropout_rate = self.dropout_placeholder

        # choose cell type
        if self.config.cell == "rnn":
            cell = RNNCell(self.config.embed_size, self.config.hidden_size)
        elif self.config.cell == "gru":
            cell = GRUCell(self.config.embed_size, self.config.hidden_size)
        elif self.config.cell == "lstm":
            cell = LSTMCell(self.config.embed_size, self.config.hidden_size)
        else:
            raise ValueError("Unsuppported cell type: " + self.config.cell)

        # Initialize hidden states to zero vectors of shape (num_examples, hidden_size)
        h1 = tf.zeros((tf.shape(x1)[0], self.config.hidden_size), tf.float32)
        h2 = tf.zeros((tf.shape(x2)[0], self.config.hidden_size), tf.float32)

        with tf.variable_scope("RNN1") as scope:
            for time_step in range(self.helper.max_length):
                if time_step != 0:
                    scope.reuse_variables()
                o1_t, h1 = cell(x1[:, time_step, :], h1, scope)
        with tf.variable_scope("RNN2") as scope:
            for time_step in range(self.helper.max_length):
                if time_step != 0:
                    scope.reuse_variables()
                o2_t, h2 = cell(x2[:, time_step, :], h2, scope)

        # h_drop1 = tf.nn.dropout(h1, dropout_rate)
        # h_drop2 = tf.nn.dropout(h2, dropout_rate)

        # use L2-regularization: sum of squares of all parameters

        if self.config.distance_measure == "l2":
            # perform logistic regression on l2-distance between h1 and h2
            distance = norm(h1 - h2 + 0.000001)
            logistic_a = tf.Variable(0.0, dtype=tf.float32, name="logistic_a")
            logistic_b = tf.Variable(0.0, dtype=tf.float32, name="logistic_b")
            self.regularization_term = tf.square(logistic_a) + tf.square(
                logistic_b)
            preds = tf.sigmoid(logistic_a * distance + logistic_b)

        elif self.config.distance_measure == "cosine":
            # perform logistic regression on cosine distance between h1 and h2
            distance = cosine_distance(h1 + 0.000001, h2 + 0.000001)
            logistic_a = tf.Variable(1.0, dtype=tf.float32, name="logistic_a")
            logistic_b = tf.Variable(0.0, dtype=tf.float32, name="logistic_b")
            self.regularization_term = tf.square(logistic_a) + tf.square(
                logistic_b)
            preds = tf.sigmoid(logistic_a * distance + logistic_b)

        elif self.config.distance_measure == "custom_coef":
            # perform logistic regression on the vector |h1-h2|,
            # equivalent to logistic regression on the (scalar) weighted Manhattan distance between h1 and h2,
            # ie. weighted sum of |h1-h2|
            logistic_a = tf.get_variable(
                "coef", [self.config.hidden_size], tf.float32,
                tf.contrib.layers.xavier_initializer())
            logistic_b = tf.Variable(0.0, dtype=tf.float32, name="logistic_b")
            self.regularization_term = tf.reduce_sum(
                tf.square(logistic_a)) + tf.square(logistic_b)
            preds = tf.sigmoid(
                tf.reduce_sum(logistic_a * tf.abs(h1 - h2), axis=1) +
                logistic_b)

        elif self.config.distance_measure == "concat":
            # use softmax for prediction
            U = tf.get_variable(
                "U", (4 * self.config.hidden_size, self.config.n_classes),
                tf.float32, tf.contrib.layers.xavier_initializer())
            b = tf.get_variable("b", (self.config.n_classes, ), tf.float32,
                                tf.constant_initializer(0))
            v = tf.nn.relu(tf.concat([h1, h2, tf.square(h1 - h2), h1 * h2], 1))
            self.regularization_term = tf.reduce_sum(
                tf.square(U)) + tf.reduce_sum(tf.square(b))
            preds = tf.matmul(v, U) + b

        elif self.config.distance_measure == "concat_steroids":
            # use softmax for prediction
            W1 = tf.get_variable(
                "W1", (4 * self.config.hidden_size, self.config.hidden_size),
                tf.float32, tf.contrib.layers.xavier_initializer())
            b1 = tf.get_variable("b1", (self.config.hidden_size, ), tf.float32,
                                 tf.constant_initializer(0))

            W2 = tf.get_variable(
                "W2", (self.config.hidden_size, self.config.n_classes),
                tf.float32, tf.contrib.layers.xavier_initializer())
            b2 = tf.get_variable("b2", (self.config.n_classes, ), tf.float32,
                                 tf.constant_initializer(0))

            v1 = tf.nn.relu(tf.concat(
                [h1, h2, tf.square(h1 - h2), h1 * h2], 1))
            v2 = tf.nn.relu(tf.matmul(v1, W1) + b1)

            self.regularization_term = tf.reduce_sum(
                tf.square(W1)) + tf.reduce_sum(tf.square(b1)) + tf.reduce_sum(
                    tf.square(W2)) + tf.reduce_sum(tf.square(b2))
            preds = tf.matmul(v2, W2) + b2

        else:
            raise ValueError("Unsuppported distance type: " +
                             self.config.distance_measure)

        return preds
Ejemplo n.º 4
0
    def add_prediction_op(self):
        """
        Adds the unrolled RNN:
            h_0 = 0
            for t in 1 to T:
                o_t, h_t = cell(x_t, h_{t-1})
                o_drop_t = Dropout(o_t, dropout_rate)
                y_t = o_drop_t U + b_2

        Returns:
            pred: tf.Tensor of shape (batch_size, max_length, non_terminal_vocab)
        """

        x = self.add_embedding()
        dropout_rate = self.dropout_placeholder

        preds = []  # Predicted output at each timestep should go here!
        hidden = []

        cell = LSTMCell(Config.n_token_features * Config.embed_size,
                        Config.hidden_size)

        # Define U and b2 as variables.
        # Initialize state as vector of zeros.
        xinit = tf.contrib.layers.xavier_initializer(dtype=tf.float64)
        if not self.config.terminal_pred:
            output_size = self.config.non_terminal_vocab
        else:
            output_size = self.config.terminal_vocab
        U = tf.get_variable('U',
                            shape=[self.config.hidden_size, output_size],
                            initializer=xinit,
                            dtype=tf.float64)
        b2 = tf.get_variable('b2',
                             shape=[output_size],
                             initializer=tf.constant_initializer(0.0),
                             dtype=tf.float64)
        c_t = tf.zeros([tf.shape(x)[0], self.config.hidden_size],
                       dtype=tf.float64)
        h_t = tf.zeros([tf.shape(x)[0], self.config.hidden_size],
                       dtype=tf.float64)
        state_tuple = (c_t, h_t)

        scope = "LSTM_terminal" if self.config.terminal_pred else "LSTM_non_terminal"
        '''
        if self.config.cell == "lstmA":
            W_a = tf.get_variable('W_a', shape = [self.config.hidden_size, self.config.hidden_size], dtype = tf.float64, initializer = xinit)
            W_o = tf.get_variable('W_o', shape = [2*self.config.hidden_size, self.config.hidden_size], dtype = tf.float64, initializer = xinit)
            W_s = tf.get_variable('W_s', shape = [self.config.hidden_size, self.config.hidden_size], dtype = tf.float64, initializer = xinit)
            b_o = tf.get_variable('b_o', shape = [self.config.hidden_size], dtype = tf.float64, initializer = tf.constant_initializer(0.0))
            b_s = tf.get_variable('b_s', shape = [self.config.hidden_size], dtype = tf.float64, initializer = tf.constant_initializer(0.0))
            with tf.variable_scope(scope):
            for time_step in range(self.max_length):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()
                o_t, h_t= cell(x[:,time_step,:], state_tuple)

                ht = tf.reshape(tf.matmul(h_t, W_a), (tf.shape(x)[0], -1, self.config.hidden_size))
                weights = tf.reduce_sum(ht * hidden, axis=2) * self.attn_mask_placeholder
                weights = tf.nn.softmax(weights)

                context = tf.reduce_sum(tf.reshape(weights, (tf.shape(weights)[0], tf.shape(weights)[1], -1)) * hidden, axis = 1)
                o_drop_t = tf.nn.dropout(o_t, dropout_rate)
                preds.append(tf.matmul(o_drop_t, U) + b2)
        preds = tf.stack(preds, 1)
        final_preds = tf.boolean_mask(preds, self.mask_placeholder)
        '''

        with tf.variable_scope(scope):
            for time_step in range(self.max_length):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()
                o_t, h_t = cell(x[:, time_step, :], state_tuple)
                o_drop_t = tf.nn.dropout(o_t, dropout_rate)
                preds.append(tf.matmul(o_drop_t, U) + b2)

                if self.config.cell == "lstmA":
                    W_a = tf.get_variable('W_a',
                                          shape=[
                                              self.config.hidden_size,
                                              self.config.hidden_size
                                          ],
                                          dtype=tf.float64,
                                          initializer=xinit)
                    W_o = tf.get_variable(
                        'W_o',
                        shape=[2 * self.config.hidden_size, output_size],
                        dtype=tf.float64,
                        initializer=xinit)
                    W_s = tf.get_variable('W_s',
                                          shape=[output_size, output_size],
                                          dtype=tf.float64,
                                          initializer=xinit)
                    b_o = tf.get_variable(
                        'b_o',
                        shape=[output_size],
                        dtype=tf.float64,
                        initializer=tf.constant_initializer(0.0))
                    b_s = tf.get_variable(
                        'b_s',
                        shape=[output_size],
                        dtype=tf.float64,
                        initializer=tf.constant_initializer(0.0))

                    hidden.append(h_t[1])
                    hidden_stack = tf.stack(hidden, 1)
                    ht = tf.reshape(
                        tf.matmul(h_t[1], W_a),
                        (tf.shape(x)[0], -1, self.config.hidden_size))
                    #print "ht shape: ", ht.get_shape().as_list()
                    #print "hidden shape: ", hidden_stack.get_shape().as_list()
                    #print "time step: ", time_step
                    #print "original mask: ", self.attn_mask_placeholder.get_shape().as_list()
                    #print "mask: ", tf.slice(self.attn_mask_placeholder, [0,0], [-1,time_step + 1])
                    weights = tf.reduce_sum(
                        ht * hidden_stack, axis=2) * tf.slice(
                            self.attn_mask_placeholder, [0, 0],
                            [-1, time_step + 1])
                    weights = tf.nn.softmax(weights)

                    context = tf.reduce_sum(tf.reshape(
                        weights,
                        (tf.shape(weights)[0], tf.shape(weights)[1], -1)) *
                                            hidden_stack,
                                            axis=1)
                    #print "context shape: ", context.get_shape().as_list()
                    #print "weights: ", weights.get_shape().as_list()
                    #replace last hidden state with context
                    hidden = hidden[:-1] + [context]

            preds = tf.stack(preds, 1)
            hidden = tf.stack(hidden, 1)
            #print hidden.get_shape().as_list()
            final_preds = tf.boolean_mask(preds, self.mask_placeholder)
            final_hidden = tf.boolean_mask(hidden, self.mask_placeholder)

#print final_hidden.get_shape().as_list()

        if self.config.cell == "lstmA":
            W_a = tf.get_variable(
                'W_a',
                shape=[self.config.hidden_size, self.config.hidden_size],
                dtype=tf.float64,
                initializer=xinit)
            W_o = tf.get_variable(
                'W_o',
                shape=[2 * self.config.hidden_size, output_size],
                dtype=tf.float64,
                initializer=xinit)
            W_s = tf.get_variable('W_s',
                                  shape=[output_size, output_size],
                                  dtype=tf.float64,
                                  initializer=xinit)
            b_o = tf.get_variable('b_o',
                                  shape=[output_size],
                                  dtype=tf.float64,
                                  initializer=tf.constant_initializer(0.0))
            b_s = tf.get_variable('b_s',
                                  shape=[output_size],
                                  dtype=tf.float64,
                                  initializer=tf.constant_initializer(0.0))
            ht = tf.reshape(tf.matmul(final_hidden, W_a),
                            (tf.shape(x)[0], -1, self.config.hidden_size))
            weights = tf.reduce_sum(ht * hidden,
                                    axis=2) * self.attn_mask_placeholder
            weights = tf.nn.softmax(weights)

            context = tf.reduce_sum(
                tf.reshape(weights,
                           (tf.shape(weights)[0], tf.shape(weights)[1], -1)) *
                hidden,
                axis=1)

            #print context.get_shape().as_list()
            final_preds = tf.tanh(
                tf.matmul(tf.concat(1, [context, final_hidden]), W_o) + b_o)
            final_preds = tf.matmul(final_preds, W_s) + b_s

        if self.config.terminal_pred:
            nt = tf.nn.embedding_lookup(
                self.embeddings, self.next_non_terminal_input_placeholder)
            nt = tf.reshape(
                nt,
                [-1, self.config.n_token_features * self.config.embed_size])
            U_nt = tf.get_variable(
                'U_nt',
                shape=[self.config.hidden_size, output_size],
                initializer=xinit)
            b_t = tf.get_variable('b_t',
                                  shape=[output_size],
                                  initializer=tf.constant_initializer(0.0))
            final_preds = final_preds + tf.matmul(nt, U_nt) + b_t

        return final_preds
Ejemplo n.º 5
0
    def add_prediction_op(self):
        """
        Adds the unrolled RNN:
            h_0 = 0
            for t in 1 to T:
                o_t, h_t = cell(x_t, h_{t-1})
                o_drop_t = Dropout(o_t, dropout_rate)
                y_t = o_drop_t U + b_2

        Returns:
            pred: tf.Tensor of shape (batch_size, max_length, non_terminal_vocab)
        """

        x = self.add_embedding()
        dropout_rate = self.dropout_placeholder

        preds = []  # Predicted output at each timestep should go here!
        hidden = []

        cell = LSTMCell(Config.n_token_features * Config.embed_size,
                        Config.hidden_size)

        # Define U and b2 as variables.
        # Initialize state as vector of zeros.
        xinit = tf.contrib.layers.xavier_initializer(dtype=tf.float64)
        if not self.config.terminal_pred:
            output_size = self.config.non_terminal_vocab
        else:
            output_size = self.config.terminal_vocab

        U = tf.get_variable('U',
                            shape=[self.config.hidden_size, output_size],
                            initializer=xinit,
                            dtype=tf.float64)
        b2 = tf.get_variable('b2',
                             shape=[output_size],
                             initializer=tf.constant_initializer(0.0),
                             dtype=tf.float64)
        c_t = tf.zeros([tf.shape(x)[0], self.config.hidden_size],
                       dtype=tf.float64)
        h_t = tf.zeros([tf.shape(x)[0], self.config.hidden_size],
                       dtype=tf.float64)
        state_tuple = (c_t, h_t)

        scope = "LSTM_terminal" if self.config.terminal_pred else "LSTM_non_terminal"

        with tf.variable_scope(scope):
            for time_step in range(self.max_length):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()
                o_t, h_t = cell(x[:, time_step, :], state_tuple)
                o_drop_t = tf.nn.dropout(o_t, dropout_rate)
                preds.append(tf.matmul(o_drop_t, U) + b2)
                hidden.append(h_t[1])

                if not (self.config.cell
                        == "lstmAend") and not (self.config.cell == "lstm"):
                    W_a = tf.get_variable('W_a',
                                          shape=[
                                              self.config.hidden_size,
                                              self.config.hidden_size
                                          ],
                                          dtype=tf.float64,
                                          initializer=xinit)
                    W_o = tf.get_variable(
                        'W_o',
                        shape=[2 * self.config.hidden_size, output_size],
                        dtype=tf.float64,
                        initializer=xinit)
                    W_s = tf.get_variable('W_s',
                                          shape=[output_size, output_size],
                                          dtype=tf.float64,
                                          initializer=xinit)
                    b_o = tf.get_variable(
                        'b_o',
                        shape=[output_size],
                        dtype=tf.float64,
                        initializer=tf.constant_initializer(0.0))
                    b_s = tf.get_variable(
                        'b_s',
                        shape=[output_size],
                        dtype=tf.float64,
                        initializer=tf.constant_initializer(0.0))

                    hidden_stack = tf.stack(hidden, 1)
                    ht = tf.reshape(
                        tf.matmul(h_t[1], W_a),
                        (tf.shape(x)[0], -1, self.config.hidden_size))
                    weights = tf.reduce_sum(
                        ht * hidden_stack, axis=2) * tf.slice(
                            self.attn_mask_placeholder, [0, 0],
                            [-1, time_step + 1])
                    weights = tf.nn.softmax(weights)
                    context = tf.reduce_sum(tf.reshape(
                        weights,
                        (tf.shape(weights)[0], tf.shape(weights)[1], -1)) *
                                            hidden_stack,
                                            axis=1)
                    context_hidden_sum = tf.add(context, h_t[1])

                    if (self.config.cell == "lstmAcont"):
                        hidden = hidden[:-1] + [context]

                    if (self.config.cell == "lstmAsum_fn"):
                        W_alpha = tf.get_variable('W_alpha',
                                                  shape=[
                                                      self.config.hidden_size,
                                                      self.config.hidden_size
                                                  ],
                                                  dtype=tf.float64,
                                                  initializer=xinit)
                        W_beta = tf.get_variable('W_beta',
                                                 shape=[
                                                     self.config.hidden_size,
                                                     self.config.hidden_size
                                                 ],
                                                 dtype=tf.float64,
                                                 initializer=xinit)
                        U_alpha = tf.get_variable('U_alpha',
                                                  shape=[
                                                      self.config.embed_size,
                                                      self.config.hidden_size
                                                  ],
                                                  dtype=tf.float64,
                                                  initializer=xinit)
                        U_beta = tf.get_variable('U_beta',
                                                 shape=[
                                                     self.config.embed_size,
                                                     self.config.hidden_size
                                                 ],
                                                 dtype=tf.float64,
                                                 initializer=xinit)
                        alpha = tf.sigmoid(
                            tf.matmul(context, W_alpha) +
                            tf.matmul(x[:, time_step, :], U_alpha))
                        beta = tf.sigmoid(
                            tf.matmul(h_t[1], W_beta) +
                            tf.matmul(x[:, time_step, :], U_beta))
                        straightSum = alpha * context + beta * h_t[1]
                        hidden = hidden[:-1] + [straightSum]

                    if (self.config.cell == "lstmAsum"):
                        alpha = tf.get_variable(
                            'alpha',
                            shape=(),
                            dtype=tf.float64,
                            initializer=tf.random_uniform_initializer(
                                -1.0, 2.0))
                        beta = tf.get_variable(
                            'beta',
                            shape=(),
                            dtype=tf.float64,
                            initializer=tf.random_uniform_initializer(
                                -1.0, 2.0))
                        straightSum = tf.add(tf.scalar_mul(alpha, context),
                                             tf.scalar_mul(beta, h_t[1]))
                        hidden = hidden[:-1] + [straightSum]

                    if (self.config.cell == "lstmAwsum_fn"):
                        W_ph = tf.get_variable('W_ph',
                                               shape=[
                                                   self.config.hidden_size,
                                                   self.config.hidden_size
                                               ],
                                               dtype=tf.float64,
                                               initializer=xinit)
                        W_pc = tf.get_variable('W_pc',
                                               shape=[
                                                   self.config.hidden_size,
                                                   self.config.hidden_size
                                               ],
                                               dtype=tf.float64,
                                               initializer=xinit)
                        W_px = tf.get_variable('W_px',
                                               shape=[
                                                   self.config.embed_size,
                                                   self.config.hidden_size
                                               ],
                                               dtype=tf.float64,
                                               initializer=xinit)
                        hTerm = tf.matmul(h_t[1], W_ph)
                        cTerm = tf.matmul(context, W_pc)
                        xTerm = tf.matmul(x[:, time_step, :], W_px)
                        p_arr = tf.sigmoid(hTerm + cTerm + xTerm)
                        weightedSum = (p_arr * context) + (
                            (1 - p_arr) * h_t[1])
                        hidden = hidden[:-1] + [weightedSum]

            preds = tf.stack(preds, 1)
            hidden = tf.stack(hidden, 1)
            final_preds = tf.boolean_mask(preds, self.mask_placeholder)
            final_hidden = tf.boolean_mask(hidden, self.mask_placeholder)

        if not (self.config.cell == "lstm"):
            W_a = tf.get_variable(
                'W_a',
                shape=[self.config.hidden_size, self.config.hidden_size],
                dtype=tf.float64,
                initializer=xinit)
            W_o = tf.get_variable(
                'W_o',
                shape=[2 * self.config.hidden_size, output_size],
                dtype=tf.float64,
                initializer=xinit)
            W_s = tf.get_variable('W_s',
                                  shape=[output_size, output_size],
                                  dtype=tf.float64,
                                  initializer=xinit)
            b_o = tf.get_variable('b_o',
                                  shape=[output_size],
                                  dtype=tf.float64,
                                  initializer=tf.constant_initializer(0.0))
            b_s = tf.get_variable('b_s',
                                  shape=[output_size],
                                  dtype=tf.float64,
                                  initializer=tf.constant_initializer(0.0))
            ht = tf.reshape(tf.matmul(final_hidden, W_a),
                            (tf.shape(x)[0], -1, self.config.hidden_size))
            weights = tf.reduce_sum(ht * hidden,
                                    axis=2) * self.attn_mask_placeholder
            weights = tf.nn.softmax(weights)

            context = tf.reduce_sum(
                tf.reshape(weights,
                           (tf.shape(weights)[0], tf.shape(weights)[1], -1)) *
                hidden,
                axis=1)
            final_preds = tf.tanh(
                tf.matmul(tf.concat(1, [context, final_hidden]), W_o) + b_o)
            final_preds = tf.matmul(final_preds, W_s) + b_s

        if self.config.terminal_pred:
            nt = tf.nn.embedding_lookup(
                self.embeddings, self.next_non_terminal_input_placeholder)
            nt = tf.reshape(
                nt,
                [-1, self.config.n_token_features * self.config.embed_size])
            U_nt = tf.get_variable(
                'U_nt',
                shape=[self.config.hidden_size, output_size],
                initializer=xinit,
                dtype=tf.float64)
            b_t = tf.get_variable('b_t',
                                  shape=[output_size],
                                  initializer=tf.constant_initializer(0.0),
                                  dtype=tf.float64)
            final_preds = final_preds + tf.matmul(nt, U_nt) + b_t

        return final_preds
Ejemplo n.º 6
0
    def encode(self,
               inputs,
               masks,
               dropout,
               scope,
               lstm_size,
               encoder_state_input=None):
        """
        In a generalized encode function, you pass in your inputs,
        masks, and an initial
        hidden state input into this function.

        :param inputs: Symbolic representations of your input
        :param masks: this is to make sure tf.nn.dynamic_rnn doesn't iterate
                      through masked steps
        :param encoder_state_input: (Optional) pass this as initial hidden state
                                    to tf.nn.dynamic_rnn to build conditional representations
        :return: an encoded representation of your input.
                 It can be context-level representation, word-level representation,
                 or both.
        """
        """
        # print('\n')
        # print(inputs.get_shape())
        # print('\n')
        """
        #print(tf.shape(inputs)[0])
        batch_size = tf.shape(inputs)[0]
        passage_length = tf.shape(inputs)[1]
        embedding_size = inputs.get_shape().as_list()[2]

        lstm = LSTMCell(lstm_size=lstm_size)

        # LSTM for encoding the question
        if encoder_state_input != None:
            state = encoder_state_input
        else:
            h = tf.zeros(shape=[batch_size, lstm_size], dtype=tf.float32)
            c = tf.zeros(shape=[batch_size, lstm_size], dtype=tf.float32)
            state = [h, c]

        with tf.variable_scope(scope):
            inpute_size = inputs.get_shape()[1]
            encoded = None
            # print(int(inpute_size), type(inpute_size))
            for word_step in xrange(inputs.get_shape()[1]):
                if word_step >= 1:
                    tf.get_variable_scope().reuse_variables()
                #print('SIZE MASK',masks[:,word_step].get_shape(),'-----------------------')
                # hidden_mask = tf.tile(masks[:,word_step], [1,lstm_size])
                output, state = lstm(inputs[:, word_step], state,
                                     scope=scope)  #*masks[:,word_step]
                """print('\n ~ ~ ~ Output shape' )
                print(output.get_shape())
                print('\n ~ ~ ~ Hidden mask' )
                print(hidden_mask)"""
                # print('~ ~ ~  word_step      ',word_step )
                """
                print('Iinputs.get_shape()[1]\n')
                print(inputs.get_shape()[1])s
                print(hidden_mask[:,word_step-1])"""
                # print(output.get_shape())
                #print('SIZE HIDDEN MASK',masks[:,word_step].get_shape(),'-----------------------')
                #print('SIZE OUTPUT',output.get_shape(),'-----------------------')

                # output = tf.boolean_mask(output,masks[:,word_step],name='boolean_mask')
                #print('output bolean mask  ',output.get_shape().as_list())
                # apply dropout
                output = tf.nn.dropout(output, dropout)
                #print('output dropout ',output.get_shape().as_list())
                #print('batch size   ',batch_size.get_shape(), '        lstm size        ', lstm_size )
                output = tf.reshape(
                    output, [batch_size, 1, lstm_size
                             ])  #tf.reshape(output,[batch_size,1,lstm_size])
                #print('output reshape ',output.get_shape().as_list())

                # print(output.get_shape())
                #print('\n ~ ~ ~ Output shape' )
                #print(output.get_shape())
                if word_step == 0:
                    encoded = output
                else:
                    #    print('\n ~ ~ ~ ECONDED value (word_step != 0:)')
                    #    print(encoded)
                    #    print('\n ~ ~ ~ Output value (word_step != 0:)')
                    #    print(output)
                    encoded = tf.concat_v2([encoded, output], 1)

                # print('\n ~ ~ ~ encoded shape' )
                # print(encoded.get_shape())
        return (encoded, state)