Exemple #1
0
    def __init__(self,
            num_embed_units,  # pretrained wordvec size
            num_units,        # RNN units size
            num_layers,       # number of RNN layers
            num_vocabs,       # vocabulary size
            wordvec,            # pretrained wordvec matrix
            dataloader):      # dataloader

        super().__init__()

        # load pretrained wordvec
        self.wordvec = wordvec
        # the dataloader
        self.dataloader = dataloader

        # TODO START
        # fill the parameter for multi-layer RNN
        self.cells = nn.Sequential(\
            RNNCell(),
            *[RNNCell() for _ in range(num_layers - 1)]
        )
        # TODO END

        # intialize other layers
        self.linear = nn.Linear(num_units, num_vocabs)
Exemple #2
0
    def __init__(
        self,
        num_embed_units,  # pretrained wordvec size
        num_units,  # RNN units size
        num_layers,  # number of RNN layers
        num_vocabs,  # vocabulary size
        wordvec,  # pretrained wordvec matrix
        dataloader,  # dataloader
        cell_type="GRU",  # cell type,
        layer_norm=False,
        residual=False,
    ):

        super().__init__()

        # load pretrained wordvec
        self.num_vocabs = num_vocabs
        self.num_units = num_units
        self.wordvec = nn.Embedding.from_pretrained(wordvec)
        # the dataloader
        self.dataloader = dataloader

        # TODO START
        # fill the parameter for multi-layer RNN
        assert (num_layers >= 1)
        if cell_type == "RNN":
            self.cells = nn.Sequential(
                RNNCell(num_embed_units, num_units), *[
                    RNNCell(num_units, num_units)
                    for _ in range(num_layers - 1)
                ])
        elif cell_type == "GRU":
            self.cells = nn.Sequential(
                GRUCell(num_embed_units, num_units), *[
                    GRUCell(num_units, num_units)
                    for _ in range(num_layers - 1)
                ])
        elif cell_type == "LSTM":
            self.cells = nn.Sequential(
                LSTMCell(num_embed_units, num_units), *[
                    LSTMCell(num_units, num_units)
                    for _ in range(num_layers - 1)
                ])
        else:
            raise NotImplementedError("Unknown Cell Type")
        self.cell_type = cell_type
        # TODO END

        # intialize other layers
        self.linear = nn.Linear(num_units, num_vocabs)
        self.layer_norms = nn.Sequential(
            nn.LayerNorm(num_embed_units), *
            [nn.LayerNorm(num_units)
             for _ in range(num_layers - 1)]) if layer_norm else None
        self.residual = residual
Exemple #3
0
    def add_prediction_op(self):
        """Runs an rnn on the input using TensorFlows's
        @tf.nn.dynamic_rnn function, and returns the final state as a prediction.
        TODO:
            - Call tf.nn.dynamic_rnn using @cell below. See:
              https://www.tensorflow.org/api_docs/python/nn/recurrent_neural_networks
            - Apply a sigmoid transformation on the final state to
              normalize the inputs between 0 and 1.
        Returns:
            preds: tf.Tensor of shape (batch_size, 1)
        """

        # Pick out the cell to use here.
        if self.config.cell == "rnn":
            cell = RNNCell(1, 1)
        elif self.config.cell == "gru":
            cell = GRUCell(1, 1)
        elif self.config.cell == "lstm":
            cell = tf.nn.rnn_cell.LSTMCell(1)
        else:
            raise ValueError("Unsupported cell type.")

        x = self.inputs_placeholder
        ### YOUR CODE HERE (~2-3 lines)
        _, state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32)
        preds = tf.nn.sigmoid(state)
        ### END YOUR CODE

        return preds  #state # preds
Exemple #4
0
    def add_prediction_op(self): 
        """Runs an rnn on the input

        Returns:
            preds: tf.Tensor of shape (batch_size, 1)
        """

        # Pick out the cell to use here.
        if self.config.cell == "rnn":
            cell = RNNCell(1, 1)
        elif self.config.cell == "gru":
            cell = GRUCell(1, 1)
        elif self.config.cell == "lstm":
            cell = tf.nn.rnn_cell.LSTMCell(1)
        else:
            raise ValueError("Unsupported cell type.")

        x = self.inputs_placeholder

        preds, _ = tf.nn.dynamic_rnn(cell, x, dtype = tf.float32)
        preds = tf.transpose(preds, [1, 0, 2])
        preds = tf.sigmoid(preds[preds.shape[0]-1])


        return preds 
Exemple #5
0
    def __init__(
            self,
            num_embed_units,  # pretrained wordvec size
            num_units,  # RNN units size
            num_layers,  # number of RNN layers
            num_vocabs,  # vocabulary size
            wordvec,  # pretrained wordvec matrix [len(vocab_list), n_dims]
            dataloader,  # dataloader
            cell_kind):

        super().__init__()

        # load pretrained wordvec
        self.wordvec = wordvec
        # the dataloader
        self.dataloader = dataloader

        # TODO START
        self.num_vocabs = num_vocabs
        self.cross_entropy = nn.CrossEntropyLoss()
        # fill the parameter for multi-layer RNN
        self.cell_kind = cell_kind
        if self.cell_kind == "RNN":
            self.cells = nn.Sequential(
                RNNCell(num_embed_units, num_units), *[
                    RNNCell(num_units, num_units)
                    for _ in range(num_layers - 1)
                ])
        elif self.cell_kind == "GRU":
            self.cells = nn.Sequential(
                GRUCell(num_embed_units, num_units), *[
                    GRUCell(num_units, num_units)
                    for _ in range(num_layers - 1)
                ])
        elif self.cell_kind == "LSTM":
            self.cells = nn.Sequential(
                LSTMCell(num_embed_units, num_units), *[
                    LSTMCell(num_units, num_units)
                    for _ in range(num_layers - 1)
                ])
        # TODO END

        # intialize other layers
        self.linear = nn.Linear(num_units, num_vocabs)
Exemple #6
0
    def add_prediction_op_rnn(self, x, dropout_rate):
        """Adds the unrolled RNN:
            h_0 = 0
            for t in 1 to T:
                o_t, h_t = cell(x_t, h_{t-1})
                o_drop_t = Dropout(o_t, dropout_rate)
                y_t = o_drop_t U + b_2
    
        TODO: There a quite a few things you'll need to do in this function:
            - Define the variables U, b_2.
            - Define the vector h as a constant and inititalize it with
              zeros. See tf.zeros and tf.shape for information on how
              to initialize this variable to be of the right shape.
              https://www.tensorflow.org/api_docs/python/tf/zeros
              https://www.tensorflow.org/api_docs/python/tf/shape
            - In a for loop, begin to unroll the RNN sequence. Collect
              the predictions in a list.
            - When unrolling the loop, from the second iteration
              onwards, you will HAVE to call
              tf.get_variable_scope().reuse_variables() so that you do
              not create new variables in the RNN cell.
              See https://www.tensorflow.org/api_guides/python/state_ops#Sharing_Variables
            - Concatenate and reshape the predictions into a predictions
              tensor.
        Hint: You will find the function tf.stack (similar to np.asarray)
              useful to assemble a list of tensors into a larger tensor.
              https://www.tensorflow.org/api_docs/python/tf/stack
        Hint: You will find the function tf.transpose and the perms
              argument useful to shuffle the indices of the tensor.
              https://www.tensorflow.org/api_docs/python/tf/transpose
    
        Remember:
            * Use the xavier initilization for matrices.
            * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument.
            The keep probability should be set to the value of self.dropout_placeholder
    
        Returns:
            pred: tf.Tensor of shape (batch_size, max_length, n_classes)
        """

        cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size)
        preds = []  # Predicted output at each timestep should go here!

        # Define U and b2 as variables.
        # Initialize state as vector of zeros.
        ### YOUR CODE HERE (~4-6 lines)
        U   = tf.get_variable('U',   shape=[self.config.hidden_size, self.config.n_classes], initializer=tf.contrib.layers.xavier_initializer()) 
        b_2 = tf.get_variable('b_2', shape=[self.config.n_classes],                          initializer=tf.constant_initializer(0))             
        h   = tf.zeros((tf.shape(x)[0], self.config.hidden_size))  
        ### END YOUR CODE

        with tf.variable_scope("RNN"):
            for time_step in range(self.max_length):
                ### YOUR CODE HERE (~6-10 lines)                
                if(time_step > 0):
                    tf.get_variable_scope().reuse_variables()
                o, h   = cell(x[:, time_step, :], h)
                o_drop = tf.nn.dropout(o, dropout_rate)
                y      = tf.matmul(o_drop, U) + b_2
                preds.append(y)
                ### END YOUR CODE

        # Make sure to reshape @preds here.
        ### YOUR CODE HERE (~2-4 lines)
        preds = tf.stack(preds, axis=1)        
        ### END YOUR CODE

        return preds
Exemple #7
0
    def add_prediction_op(self):
        """Adds the unrolled RNN:
            h_0 = 0
            for t in 1 to T:
                o_t, h_t = cell(x_t, h_{t-1})
                o_drop_t = Dropout(o_t, dropout_rate)
                y_t = o_drop_t U + b_2
        TODO: There a quite a few things you'll need to do in this function:
            - Define the variables U, b_2.
            - Define the vector h as a constant and inititalize it with
              zeros. See tf.zeros and tf.shape for information on how
              to initialize this variable to be of the right shape.
              https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros
              https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape
            - In a for loop, begin to unroll the RNN sequence. Collect
              the predictions in a list.
            - When unrolling the loop, from the second iteration
              onwards, you will HAVE to call
              tf.get_variable_scope().reuse_variables() so that you do
              not create new variables in the RNN cell.
              See https://www.tensorflow.org/versions/master/how_tos/variable_scope/
            - Concatenate and reshape the predictions into a predictions
              tensor.
        Hint: You will find the function tf.pack (similar to np.asarray)
              useful to assemble a list of tensors into a larger tensor.
              https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack
        Hint: You will find the function tf.transpose and the perms
              argument useful to shuffle the indices of the tensor.
              https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose
        Remember:
            * Use the xavier initilization for matrices.
            * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument.
            The keep probability should be set to the value of self.dropout_placeholder
        Returns:
            pred: tf.Tensor of shape (batch_size, max_length, n_classes)
        """

        x = self.add_embedding()

        dropout_rate = self.dropout_placeholder

        if Config.cnn:
            repr_ = tf.expand_dims(tf.transpose(x, perm=[0, 2, 1]), -1)

            cnn_output1 = self.cnn_layer(repr_, Config.stepsize[0])
            cnn_output2 = self.cnn_layer(repr_, Config.stepsize[1])
            #cnn_output1_trunc = tf.slice(cnn_output1,[0,]*3,[-1,(self.max_length-Config.stepsize[1]+1)//2, Config.m])
            #assert cnn_output1_trunc.get_shape().as_list() == [None, (self.max_length-Config.stepsize[1]+1)//2, Config.m], "truncated results are not of the right shape. Expected {}, got {}".format([None, (dim1-Config.step_size[1]+1)//2, self.config.m], cnn_output1_trunc.get_shape().as_list())
            x = tf.concat([x, cnn_output1, cnn_output2], 2)
            #x = self.fully_connected_layer(x)

        preds = []  # Predicted output at each timestep should go here!

        # Use the cell defined below. For Q2, we will just be using the
        # RNNCell you defined, but for Q3, we will run this code again
        # with a GRU cell!
        if self.config.cell == "rnn":
            cell = RNNCell(Config.n_features * Config.embed_size,
                           Config.hidden_size)
        elif self.config.cell == "gru":
            cell = GRUCell(Config.n_features * Config.embed_size,
                           Config.hidden_size)
        elif self.config.cell == "lstm":

            rnn_inputs = tf.nn.dropout(x, dropout_rate)
            lstm_features = self.bilstm_layer(rnn_inputs)
            preds = self.project_layer(lstm_features, Config.n_classes)

            return preds
        else:
            raise ValueError("Unsuppported cell type: " + self.config.cell)

        # Define U and b2 as variables.
        # Initialize state as vector of zeros.
        ### YOUR CODE HERE (~4-6 lines)
        self.U = tf.get_variable(
            'U', [Config.hidden_size, Config.n_classes],
            initializer=tf.contrib.layers.xavier_initializer())
        self.b2 = tf.get_variable(
            'b2', [
                Config.n_classes,
            ],
            initializer=tf.contrib.layers.xavier_initializer())
        h = tf.zeros([tf.shape(x)[0], Config.hidden_size])
        ### END YOUR CODE

        with tf.variable_scope("RNN"):
            for time_step in range(self.max_length):
                ### YOUR CODE HERE (~6-10 lines)
                if time_step >= 1:
                    tf.get_variable_scope().reuse_variables()
                o, h = cell(x[:, time_step, :], h)
                o_drop = tf.nn.dropout(o, dropout_rate)
                curPred = tf.matmul(o_drop, self.U) + self.b2
                preds.append(curPred)
                ### END YOUR CODE

        # Make sure to reshape @preds here.
        ### YOUR CODE HERE (~2-4 lines)
        preds = tf.stack(preds)
        preds = tf.transpose(preds, perm=[1, 0, 2])

        preds = tf.reshape(preds, [-1, Config.max_length, Config.n_classes])
        ### END YOUR CODE

        assert preds.get_shape().as_list() == [
            None, self.max_length, Config.n_classes
        ], "predictions are not of the right shape. Expected {}, got {}".format(
            [None, self.max_length, self.config.n_classes],
            preds.get_shape().as_list())

        return preds
    def add_prediction_op(self):
        """Adds the unrolled RNN:
            h_0 = 0
            for t in 1 to T:
                o_t, h_t = cell(x_t, h_{t-1})
                o_drop_t = Dropout(o_t, dropout_rate)
                y_t = o_drop_t U + b_2
        TODO: There a quite a few things you'll need to do in this function:
            - Define the variables U, b_2.
            - Define the vector h as a constant and inititalize it with
              zeros. See tf.zeros and tf.shape for information on how
              to initialize this variable to be of the right shape.
              https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros
              https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape
            - In a for loop, begin to unroll the RNN sequence. Collect
              the predictions in a list.
            - When unrolling the loop, from the second iteration
              onwards, you will HAVE to call
              tf.get_variable_scope().reuse_variables() so that you do
              not create new variables in the RNN cell.
              See https://www.tensorflow.org/versions/master/how_tos/variable_scope/
            - Concatenate and reshape the predictions into a predictions
              tensor.
        Hint: You will find the function tf.pack (similar to np.asarray)
              useful to assemble a list of tensors into a larger tensor.
              https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack
        Hint: You will find the function tf.transpose and the perms
              argument useful to shuffle the indices of the tensor.
              https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose
        Remember:
            * Use the xavier initilization for matrices.
            * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument.
            The keep probability should be set to the value of self.dropout_placeholder
        Returns:
            pred: tf.Tensor of shape (batch_size, max_length, n_classes)
        """
        x1, x2 = self.add_embedding()
        dropout_rate = self.dropout_placeholder

        # choose cell type
        if self.config.cell == "rnn":
            cell = RNNCell(self.config.embed_size, self.config.hidden_size)
        elif self.config.cell == "gru":
            cell = GRUCell(self.config.embed_size, self.config.hidden_size)
        elif self.config.cell == "lstm":
            cell = LSTMCell(self.config.embed_size, self.config.hidden_size)
        else:
            raise ValueError("Unsuppported cell type: " + self.config.cell)

        # Initialize hidden states to zero vectors of shape (num_examples, hidden_size)
        h1 = tf.zeros((tf.shape(x1)[0], self.config.hidden_size), tf.float32)
        h2 = tf.zeros((tf.shape(x2)[0], self.config.hidden_size), tf.float32)

        with tf.variable_scope("RNN1") as scope:
            for time_step in range(self.helper.max_length):
                if time_step != 0:
                    scope.reuse_variables()
                o1_t, h1 = cell(x1[:, time_step, :], h1, scope)
        with tf.variable_scope("RNN2") as scope:
            for time_step in range(self.helper.max_length):
                if time_step != 0:
                    scope.reuse_variables()
                o2_t, h2 = cell(x2[:, time_step, :], h2, scope)

        # h_drop1 = tf.nn.dropout(h1, dropout_rate)
        # h_drop2 = tf.nn.dropout(h2, dropout_rate)

        # use L2-regularization: sum of squares of all parameters

        if self.config.distance_measure == "l2":
            # perform logistic regression on l2-distance between h1 and h2
            distance = norm(h1 - h2 + 0.000001)
            logistic_a = tf.Variable(0.0, dtype=tf.float32, name="logistic_a")
            logistic_b = tf.Variable(0.0, dtype=tf.float32, name="logistic_b")
            self.regularization_term = tf.square(logistic_a) + tf.square(
                logistic_b)
            preds = tf.sigmoid(logistic_a * distance + logistic_b)

        elif self.config.distance_measure == "cosine":
            # perform logistic regression on cosine distance between h1 and h2
            distance = cosine_distance(h1 + 0.000001, h2 + 0.000001)
            logistic_a = tf.Variable(1.0, dtype=tf.float32, name="logistic_a")
            logistic_b = tf.Variable(0.0, dtype=tf.float32, name="logistic_b")
            self.regularization_term = tf.square(logistic_a) + tf.square(
                logistic_b)
            preds = tf.sigmoid(logistic_a * distance + logistic_b)

        elif self.config.distance_measure == "custom_coef":
            # perform logistic regression on the vector |h1-h2|,
            # equivalent to logistic regression on the (scalar) weighted Manhattan distance between h1 and h2,
            # ie. weighted sum of |h1-h2|
            logistic_a = tf.get_variable(
                "coef", [self.config.hidden_size], tf.float32,
                tf.contrib.layers.xavier_initializer())
            logistic_b = tf.Variable(0.0, dtype=tf.float32, name="logistic_b")
            self.regularization_term = tf.reduce_sum(
                tf.square(logistic_a)) + tf.square(logistic_b)
            preds = tf.sigmoid(
                tf.reduce_sum(logistic_a * tf.abs(h1 - h2), axis=1) +
                logistic_b)

        elif self.config.distance_measure == "concat":
            # use softmax for prediction
            U = tf.get_variable(
                "U", (4 * self.config.hidden_size, self.config.n_classes),
                tf.float32, tf.contrib.layers.xavier_initializer())
            b = tf.get_variable("b", (self.config.n_classes, ), tf.float32,
                                tf.constant_initializer(0))
            v = tf.nn.relu(tf.concat([h1, h2, tf.square(h1 - h2), h1 * h2], 1))
            self.regularization_term = tf.reduce_sum(
                tf.square(U)) + tf.reduce_sum(tf.square(b))
            preds = tf.matmul(v, U) + b

        elif self.config.distance_measure == "concat_steroids":
            # use softmax for prediction
            W1 = tf.get_variable(
                "W1", (4 * self.config.hidden_size, self.config.hidden_size),
                tf.float32, tf.contrib.layers.xavier_initializer())
            b1 = tf.get_variable("b1", (self.config.hidden_size, ), tf.float32,
                                 tf.constant_initializer(0))

            W2 = tf.get_variable(
                "W2", (self.config.hidden_size, self.config.n_classes),
                tf.float32, tf.contrib.layers.xavier_initializer())
            b2 = tf.get_variable("b2", (self.config.n_classes, ), tf.float32,
                                 tf.constant_initializer(0))

            v1 = tf.nn.relu(tf.concat(
                [h1, h2, tf.square(h1 - h2), h1 * h2], 1))
            v2 = tf.nn.relu(tf.matmul(v1, W1) + b1)

            self.regularization_term = tf.reduce_sum(
                tf.square(W1)) + tf.reduce_sum(tf.square(b1)) + tf.reduce_sum(
                    tf.square(W2)) + tf.reduce_sum(tf.square(b2))
            preds = tf.matmul(v2, W2) + b2

        else:
            raise ValueError("Unsuppported distance type: " +
                             self.config.distance_measure)

        return preds
Exemple #9
0
    def add_prediction_op(self):
        """Adds the unrolled RNN:
            h_0 = 0
            for t in 1 to T:
                o_t, h_t = cell(x_t, h_{t-1})
                o_drop_t = Dropout(o_t, dropout_rate)
                y_t = o_drop_t U + b_2


        Remember:
            * Use the xavier initilization for matrices.
            * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument.
            The keep probability should be set to the value of self.dropout_placeholder

        Returns:
            pred: tf.Tensor of shape (batch_size, max_length, n_classes)
        """

        x = self.add_embedding()
        dropout_rate = self.dropout_placeholder

        preds = []  # Predicted output at each timestep should go here!

        if self.config.cell == "rnn":
            cell = RNNCell(Config.n_features * Config.embed_size,
                           Config.hidden_size)
        elif self.config.cell == "gru":
            cell = GRUCell(Config.n_features * Config.embed_size,
                           Config.hidden_size)
        else:
            raise ValueError("Unsuppported cell type: " + self.config.cell)

        U = tf.get_variable(
            "U",
            shape=[self.config.hidden_size, self.config.n_classes],
            dtype=tf.float32,
            initializer=xavier_weight_init())
        b2 = tf.get_variable("b2",
                             shape=[
                                 self.config.n_classes,
                             ],
                             dtype=tf.float32,
                             initializer=tf.constant_initializer(0.0))
        h_t = tf.zeros([tf.shape(x)[0], self.config.hidden_size],
                       dtype=tf.float32)
        x = tf.transpose(x, [1, 0, 2])

        with tf.variable_scope("RNN"):
            for time_step in range(self.max_length):
                if (time_step > 0): tf.get_variable_scope().reuse_variables()
                o_t, h_t = cell(x[time_step], h_t)
                o_drop_t = tf.nn.dropout(o_t, 1 - self.config.dropout)
                y_t = tf.matmul(o_drop_t, U) + b2
                preds.append(y_t)

        preds = tf.transpose(tf.stack(preds), [1, 0, 2])

        assert preds.get_shape().as_list() == [
            None, self.max_length, self.config.n_classes
        ], "predictions are not of the right shape. Expected {}, got {}".format(
            [None, self.max_length, self.config.n_classes],
            preds.get_shape().as_list())
        return preds