Beispiel #1
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        #print("x", x.shape)
        #print("y", y.shape)
        graph = nn.Graph(
            [self.W1, self.W2, self.W3, self.W4, self.W5, self.W6])
        input_x = nn.Input(graph, x)

        #first term
        xW1mult = nn.MatrixMultiply(graph, input_x, self.W1)
        #second term
        xW2mult = nn.MatrixMultiply(graph, input_x, self.W2)
        addW1W2 = nn.Add(graph, xW1mult, xW2mult)
        relu1 = nn.ReLU(graph, addW1W2)
        reluMult = nn.MatrixMultiply(graph, relu1, self.W3)

        xW4mult = nn.MatrixMultiply(graph, input_x, self.W4)
        W4W5mult = nn.MatrixMultiply(graph, xW4mult, self.W5)

        per2Add = nn.Add(graph, reluMult, W4W5mult)
        totalMult = nn.MatrixMultiply(graph, per2Add, self.W6)

        #another term

        #lastRelu = nn.ReLU(graph, totalMult)

        if y is not None:
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss_node = nn.SoftmaxLoss(graph, totalMult, input_y)
            return graph

        else:
            "*** YOUR CODE HERE ***"
            return graph.get_output(totalMult)
Beispiel #2
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        graph = nn.Graph(self.w1_list + self.b1_list + self.w2_list + self.b2_list + self.w3_list + self.b3_list)
        digit_losses = []
        for digit in range(0,10):
            xInput = nn.Input(graph,x)
            layer1 = nn.MatrixMultiply(graph, xInput, self.w1_list[digit])
            layer2 = nn.MatrixVectorAdd(graph, layer1, self.b1_list[digit])
            layer3 = nn.ReLU(graph, layer2)
            layer4 = nn.MatrixMultiply(graph, layer3, self.w2_list[digit])
            layer5 = nn.MatrixVectorAdd(graph, layer4, self.b2_list[digit])
            layer6 = nn.ReLU(graph, layer5)
            layer7 = nn.MatrixMultiply(graph, layer6, self.w3_list[digit])
            layer8 = nn.MatrixVectorAdd(graph, layer7, self.b3_list[digit])
            basis_vector = np.zeros((1, 10))
            basis_vector[0][digit] = 1
            basis_vector_input = nn.Input(graph, basis_vector)
            digit_losses.append(nn.MatrixMultiply(graph, layer8, basis_vector_input))
            if digit == 1:
                digit_losses_matrix = nn.Add(graph, digit_losses[0], digit_losses[1])
            if digit > 1:
                previous = digit_losses_matrix
                digit_losses_matrix = nn.Add(graph, digit_losses_matrix, digit_losses[digit])
 
        if y is not None:
            "*** YOUR CODE HERE ***"
            yInput = nn.Input(graph, y)
            soft_max_layer = nn.SoftmaxLoss(graph, digit_losses_matrix, yInput)
            return graph
        else:
            "*** YOUR CODE HERE ***"
            return graph.get_output(digit_losses_matrix)
Beispiel #3
0
    def run(self, xs):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        node with shape (batch_size x self.num_chars), where every row in the
        array is a one-hot vector encoding of a character. For example, if we
        have a batch of 8 three-letter words where the last word is "cat", then
        xs[1] will be a node that contains a 1 at position (7, 0). Here the
        index 7 reflects the fact that "cat" is the last word in the batch, and
        the index 0 reflects the fact that the letter "a" is the inital (0th)
        letter of our combined alphabet for this task.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node of shape (batch_size x hidden_size), for your
        choice of hidden_size. It should then calculate a node of shape
        (batch_size x 5) containing scores, where higher scores correspond to
        greater probability of the word originating from a particular language.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a node with shape (batch_size x self.num_chars)
        Returns:
            A node with shape (batch_size x 5) containing predicted scores
                (also called logits)
        """
        "*** YOUR CODE HERE ***"
        h=nn.AddBias(nn.Linear(xs[0],self.w),self.b)
        h=nn.ReLU(h)
        for i in range(1,len(xs)):
            h=nn.ReLU(nn.Add(nn.AddBias(nn.Linear(xs[i],self.w),self.b),nn.AddBias(nn.Linear(h,self.w_hidden),self.b_hidden)))
        return nn.AddBias(nn.Linear(h,self.w_last),self.b_last)
Beispiel #4
0
 def f(h, x):
     if h is None:
         result = nn.Linear(x, self.w)
         temp = nn.Constant(numpy.ones([x.data.shape[0], result.data.shape[0]]))
         return nn.Linear(temp, result)
     else:
         return nn.Add(nn.Linear(x, self.w), nn.Linear(h, self.w_hidden))
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """

        #batch_size = x.shape[0]

        # set up the graph
        oddRegressionGraph = nn.Graph([self.W1, self.b1, self.W2, self.b2])
        input_x = nn.Input(oddRegressionGraph, x)
        xW1 = nn.MatrixMultiply(oddRegressionGraph, input_x, self.W1)
        xW1_plus_b1 = nn.MatrixVectorAdd(oddRegressionGraph, xW1, self.b1)
        ReLU_1 = nn.ReLU(oddRegressionGraph, xW1_plus_b1)
        R1W2 = nn.MatrixMultiply(oddRegressionGraph, ReLU_1, self.W2)
        R1W2_plus_b2 = nn.MatrixVectorAdd(oddRegressionGraph, R1W2, self.b2)

        negx = nn.Input(oddRegressionGraph, x * -1)
        negxW1 = nn.MatrixMultiply(oddRegressionGraph, negx, self.W1)
        negxW1_plus_b1 = nn.MatrixVectorAdd(oddRegressionGraph, negxW1,
                                            self.b1)
        ReLU_2 = nn.ReLU(oddRegressionGraph, negxW1_plus_b1)
        R2W2 = nn.MatrixMultiply(oddRegressionGraph, ReLU_2, self.W2)
        R2W2_plus_b2 = nn.MatrixVectorAdd(oddRegressionGraph, R2W2, self.b2)
        #neg2R2W2_plus_b2 = nn.Input(oddRegressionGraph, oddRegressionGraph.get_output(R2W2_plus_b2)*-2)
        #negR2W2_plus_b2 = nn.Add(oddRegressionGraph, R2W2_plus_b2, neg2R2W2_plus_b2)
        negR2W2_plus_b2 = nn.Input(
            oddRegressionGraph,
            oddRegressionGraph.get_output(R2W2_plus_b2) * -1)

        sumMatrix = nn.Add(oddRegressionGraph, R1W2_plus_b2, negR2W2_plus_b2)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            input_y = nn.Input(oddRegressionGraph, y)
            sumMatrix_SL_y = nn.SquareLoss(oddRegressionGraph, sumMatrix,
                                           input_y)
            return oddRegressionGraph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            return oddRegressionGraph.get_output(sumMatrix)
Beispiel #6
0
    def run(self, xs, y=None):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use `nn.SoftmaxLoss` as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]

        "*** YOUR CODE HERE ***"
        h = nn.Variable(batch_size, self.dimensionality)
        h.data = np.zeros((batch_size, self.dimensionality))
        g = nn.Graph([h, self.w1, self.w2, self.w3, self.b])
        for x in xs:
            h1 = nn.MatrixMultiply(g, h, self.w1)
            x2 = nn.MatrixMultiply(g, nn.Input(g, x), self.w2)
            h1_add_x2 = nn.Add(g, h1, x2)
            add_b = nn.MatrixVectorAdd(g, h1_add_x2, self.b)
            relu = nn.ReLU(g, add_b)
            h = relu
        result = nn.MatrixMultiply(g, h, self.w3)

        if y is not None:
            "*** YOUR CODE HERE ***"
            nn.SoftmaxLoss(g, result, nn.Input(g, y))
            return g
        else:
            "*** YOUR CODE HERE ***"
            return g.get_output(result)
Beispiel #7
0
 def f(h, c):
     cw = nn.MatrixMultiply(graph, c,
                            self.w_one)  #(batch_size x hidden_size)
     hw = nn.MatrixMultiply(graph, h, self.w_four)
     add = nn.Add(graph, cw, hw)  #(bs x hidden_size)
     relu = nn.ReLU(graph, add)  #(bs x hidden_size)
     reluw = nn.MatrixMultiply(graph, relu, self.w_three)  #(bs x 5)
     return reluw
Beispiel #8
0
 def run(self, xs):
     layer = nn.Linear(nn.DataNode(xs[0].data), self.weight[0])
     for x in xs:
         layer = nn.ReLU(
             nn.AddBias(
                 nn.Linear(nn.Add(nn.Linear(x, self.weight[0]), layer),
                           self.weight[1]), self.bias[1]))
     return nn.AddBias(nn.Linear(layer, self.weight[2]), self.bias[2])
Beispiel #9
0
    def run(self, xs):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        node with shape (batch_size x self.num_chars), where every row in the
        array is a one-hot vector encoding of a character. For example, if we
        have a batch of 8 three-letter words where the last word is "cat", then
        xs[1] will be a node that contains a 1 at position (7, 0). Here the
        index 7 reflects the fact that "cat" is the last word in the batch, and
        the index 0 reflects the fact that the letter "a" is the inital (0th)
        letter of our combined alphabet for this task.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node of shape (batch_size x hidden_size), for your
        choice of hidden_size. It should then calculate a node of shape
        (batch_size x 5) containing scores, where higher scores correspond to
        greater probability of the word originating from a particular language.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a node with shape (batch_size x self.num_chars)
        Returns:
            A node with shape (batch_size x 5) containing predicted scores
                (also called logits)
        """
        "*** YOUR CODE HERE ***"
        # self.batch_size = len(xs[0].data)
        # self.expander = nn.Parameter(self.num_chars, self.batch_size)
        xw1 = nn.Linear(xs[0], self.w1)  # 1x47 * 47x100 == 1x100
        xw1b1 = nn.AddBias(xw1, self.b1)  # 1x100 + 1x100
        reluxw1b1 = nn.ReLU(xw1b1)
        last_node = reluxw1b1
        # expanded_node = nn.Linear(self.expander, reluxw1b1)
        for i in range(1, len(xs)):
            # print(i)
            # self.batch_size = len(xs[i].data)
            # self.expander = nn.Parameter(self.num_chars, self.batch_size)
            # expanded_node_added = nn.Add(self.w2, expanded_node)
            hw = nn.Linear(last_node, self.w2)
            loop_xw1 = nn.Linear(xs[i], self.w1)
            loop_xw1b1 = nn.AddBias(loop_xw1, self.b1)
            loop_reluxw1b1 = nn.ReLU(loop_xw1b1)
            hw_plus_loop_reluxw1b1 = nn.Add(hw, loop_reluxw1b1)
            last_node = hw_plus_loop_reluxw1b1
            # expanded_node = nn.Linear(self.expander, loop_reluxw1b1)
        end_xw1 = nn.Linear(last_node, self.end_w1)
        end_xw1b1 = nn.AddBias(end_xw1, self.end_b1)  # 1x100 + 1x100
        end_reluxw1b1 = nn.ReLU(end_xw1b1)
        end_reluxw1b1w2 = nn.Linear(end_reluxw1b1, self.end_w2)
        end_reluxw1b1w2b2 = nn.AddBias(end_reluxw1b1w2,
                                       self.end_b2)  # 1x100 + 1x100
        end_reluxw1b1w2b2last = nn.ReLU(end_reluxw1b1w2b2)
        shrunkyclunk = nn.Linear(end_reluxw1b1w2b2last, self.shrinker)
        return shrunkyclunk
Beispiel #10
0
    def run(self, xs, y=None):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use `nn.SoftmaxLoss` as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]
        graph = nn.Graph([self.C_training, self.H_traing, self.m, self.b])

        H = np.zeros((batch_size, self.hidden_size))
        inputH = nn.Input(graph, H)
        for X in xs:
            inputX = nn.Input(graph, X)
            CWx = nn.MatrixMultiply(graph, inputX, self.C_training)
            HWh = nn.MatrixMultiply(graph, inputH, self.H_traing)
            inputH = nn.ReLU(graph, nn.Add(graph, CWx, HWh))

        xm = nn.MatrixMultiply(graph, inputH, self.m)
        xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b)

        if y is not None:
            input_y = nn.Input(graph, y)
            nn.SquareLoss(graph, xm_plus_b, input_y)
            return graph
        else:
            return graph.get_output(xm_plus_b)
Beispiel #11
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"
        graph = nn.Graph(self.variables)
        negative1 = -1*np.ones((1,1))
        input_x = nn.Input(graph, x)
        neg_1 = nn.Input(graph, negative1)
        """First we do the positives"""
        xw1 = nn.MatrixMultiply(graph, input_x, self.variables[0])
        sumxw1b1 = nn.MatrixVectorAdd(graph, xw1, self.variables[1])
        relu = nn.ReLU(graph, sumxw1b1)
        reluW2 = nn.MatrixMultiply(graph, relu, self.variables[2])
        """Now we do the negatives"""
        negx = nn.MatrixMultiply(graph, input_x, neg_1)
        nxw1 = nn.MatrixMultiply(graph, negx, self.variables[0])
        sumnxw1 = nn.MatrixVectorAdd(graph, nxw1, self.variables[1])
        nrelu = nn.ReLU(graph, sumnxw1)
        nreluW2 = nn.MatrixMultiply(graph, nrelu, self.variables[2])
        """Set the negative value of negative x to negative"""
        nsumNRW2b2 = nn.MatrixMultiply(graph, nreluW2, neg_1)
        """Add the two sums together"""
        totalSum = nn.Add(graph, reluW2, nsumNRW2b2)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SquareLoss(graph, totalSum, input_y)
            return graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            nodes = graph.get_nodes()
            lastnode = nodes[-1]
            out = graph.get_output(lastnode)
            return out
Beispiel #12
0
    def run(self, x, y=None):
        """
        TODO: Question 5 - [Application] OddRegression

        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"
        # calculates g(x)
        graph = nn.Graph([self.w_one, self.b_one, self.w_two, self.b_two])
        input_x = nn.Input(graph, x)
        xw = nn.MatrixMultiply(graph, input_x, self.w_one)
        xw_plus_b = nn.MatrixVectorAdd(graph, xw, self.b_one)
        relu = nn.ReLU(graph, xw_plus_b)
        reluw = nn.MatrixMultiply(graph, relu, self.w_two)
        reluw_plus_b = nn.MatrixVectorAdd(graph, reluw, self.b_two)
        # calculates g(-x)
        negone = nn.Input(graph, np.array([[-1.0]]))
        neg_x = nn.MatrixMultiply(graph, input_x, negone)
        negxw = nn.MatrixMultiply(graph, neg_x, self.w_one)
        negxw_plus_b = nn.MatrixVectorAdd(graph, negxw, self.b_one)
        negrelu = nn.ReLU(graph, negxw_plus_b)
        negreluw = nn.MatrixMultiply(graph, negrelu, self.w_two)
        negreluw_plus_b = nn.MatrixVectorAdd(graph, negreluw, self.b_two)
        #g(x)-(g(-x))
        negG = nn.MatrixMultiply(graph, negreluw_plus_b, negone)
        oddFunc = nn.Add(graph, reluw_plus_b, negG)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SquareLoss(graph, oddFunc, input_y)
            return graph

        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            return graph.get_output(oddFunc)
Beispiel #13
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"

        graph = nn.Graph([self.W1, self.W2, self.b1, self.b2])
        input_x = nn.Input(graph, x)
        xW1mult = nn.MatrixMultiply(graph, input_x, self.W1)
        b1add = nn.Add(graph, xW1mult, self.b1)
        relu = nn.ReLU(graph, b1add)
        W2reluMult = nn.MatrixMultiply(graph, relu, self.W2)
        lastAdd = nn.Add(graph, W2reluMult, self.b2)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss_node = nn.SquareLoss(graph, lastAdd, input_y)
            return graph

        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            return graph.get_output(lastAdd)
Beispiel #14
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])

        #pos
        a_graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
        a_input_x = nn.Input(graph, x)
        a_mult1 = nn.MatrixMultiply(graph, a_input_x, self.w1)
        a_add1 = nn.MatrixVectorAdd(graph, a_mult1, self.b1)
        a_relu1 = nn.ReLU(graph, a_add1)
        a_mult2 = nn.MatrixMultiply(graph, a_relu1, self.w2)
        a_add2 = nn.MatrixVectorAdd(graph, a_mult2, self.b2)

        #neg
        b_input_x = nn.Input(graph, np.dot(-1, x))
        b_mult1 = nn.MatrixMultiply(graph, b_input_x, self.w1)
        b_add1 = nn.MatrixVectorAdd(graph, b_mult1, self.b1)
        b_relu1 = nn.ReLU(graph, b_add1)
        b_mult2 = nn.MatrixMultiply(graph, b_relu1, self.w2)
        b_add2 = nn.MatrixVectorAdd(graph, b_mult2, self.b2)
        b_output = graph.get_output(b_add2)

        neg_b_output = -1 * b_output
        neg_matrix = np.zeros(np.shape(b_output)[1])
        neg = np.negative(np.identity(np.shape(b_output)[1]))

        neg_b_add2 = nn.Input(graph, neg)
        b_mult3 = nn.MatrixMultiply(graph, b_add2, neg_b_add2)
        result = nn.Add(graph, a_add2, b_mult3)

        if y is not None:
            input_y = nn.Input(graph, y)
            loss = nn.SquareLoss(graph, result, input_y)
            return graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            return graph.get_output(result)
Beispiel #15
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"
        graph = nn.Graph([self.m1, self.b1, self.m2, self.b2])
        input_x = nn.Input(graph, x)
        xm1 = nn.MatrixMultiply(graph, input_x, self.m1)
        xm_plus_b1 = nn.MatrixVectorAdd(graph, xm1, self.b1)
        rel = nn.ReLU(graph, xm_plus_b1)
        xm2 = nn.MatrixMultiply(graph, rel, self.m2)
        f = nn.MatrixVectorAdd(graph, xm2, self.b2)

        input_x_neg = nn.Input(graph, -x)
        xm1_neg = nn.MatrixMultiply(graph, input_x_neg, self.m1)
        xm_plus_b1_neg = nn.MatrixVectorAdd(graph, xm1_neg, self.b1)
        rel_neg = nn.ReLU(graph, xm_plus_b1_neg)
        xm2_neg = nn.MatrixMultiply(graph, rel_neg, self.m2)
        xm_plus_b2_neg = nn.MatrixVectorAdd(graph, xm2_neg, self.b2)

        minus_one = nn.Input(graph, np.array([[-1.0]]))
        minus_f = nn.MatrixMultiply(graph, xm_plus_b2_neg, minus_one)
        lastone = nn.Add(graph, f, minus_f)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            nn.SquareLoss(graph, lastone, input_y)
            return graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            return graph.get_output(lastone)
Beispiel #16
0
    def run(self, xs):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        node with shape (batch_size x self.num_chars), where every row in the
        array is a one-hot vector encoding of a character. For example, if we
        have a batch of 8 three-letter words where the last word is "cat", then
        xs[1] will be a node that contains a 1 at position (7, 0). Here the
        index 7 reflects the fact that "cat" is the last word in the batch, and
        the index 0 reflects the fact that the letter "a" is the inital (0th)
        letter of our combined alphabet for this task.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node of shape (batch_size x hidden_size), for your
        choice of hidden_size. It should then calculate a node of shape
        (batch_size x 5) containing scores, where higher scores correspond to
        greater probability of the word originating from a particular language.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a node with shape (batch_size x self.num_chars)
        Returns:
            A node with shape (batch_size x 5) containing predicted scores
                (also called logits)
        """
        "*** YOUR CODE HERE ***"
        # Batch_size x feature_size * feature_size x layer_size = batch_size x layer_size
        latest_prediction = self.getInitialPrediction(xs[0])
        latest_prediction = nn.ReLU(latest_prediction)
        for x in xs[1:]:
            # (Batch_size x feature_size * feature_size x layer_size) +
            # + (batch_size x layer_size * self.network_weights[1]) =
            # = batch_size x layer_size because for add function both matrices should have same dimensions
            # from that we can see that (batch_size x layer_size * self.network_weights[1]) = batch_size x layer_size
            # and self.network_weights[1] should be layer_size x layer_size
            # after that there are two ways to return batch_size x 5.
            # first way is that first and second layer weights should be 47x5 and 5x5
            # that means that we cannot have neither more nor less than 5 perceptrons in each layer which isnt optimal
            # second way is that we have one more layer that converts previous matrices to batch_size x 5
            # in this way we can have first layer weights of size 47xN, second layer NxN and third layer Nx5
            initial_prediction = self.getInitialPrediction(x)
            latest_prediction = nn.Add(
                initial_prediction,
                nn.Linear(latest_prediction, self.network_weights[1]))
            latest_prediction = nn.ReLU(latest_prediction)

        final_prediction = nn.Linear(latest_prediction,
                                     self.network_weights[2])
        return final_prediction
Beispiel #17
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"
        # set the trainable variables here
        # in the first try, we will do 2 layers
        # f(x) = W2 * ReLU(W1 * x + b1) + b2
        # size of each variable:
        # x: i * 1
        # W1: h * i, b1: h * 1
        # W2: i * h, b2: i * 1

        graph = nn.Graph([self.W1, self.W2, self.b1, self.b2])
        input_x = nn.Input(graph, x)
        mul_1 = nn.MatrixMultiply(graph, self.W1, input_x)
        add_1 = nn.MatrixVectorAdd(graph, mul_1, self.b1)
        # add_1 = nn.Add(graph, mul_1, self.b1)
        relu_1 = nn.ReLU(graph, add_1)
        mul_2 = nn.MatrixMultiply(graph, self.W2, relu_1)
        add_2 = nn.Add(graph, mul_2, self.b2)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SquareLoss(graph, add_2, input_y)
            return graph

        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            return graph.get_output(add_2)
Beispiel #18
0
    def run(self, xs):
        current_output = nn.AddBias(nn.Linear(xs[0], self.W), self.b)
        current_output = nn.ReLU(current_output)

        for i in range(1, len(xs)):
            current_output = nn.AddBias(
                nn.Add(nn.Linear(xs[i], self.W),
                       nn.Linear(current_output, self.W_hidden)), self.b)
            current_output = nn.ReLU(current_output)

        output = nn.AddBias(nn.Linear(current_output, self.W_last),
                            self.b_last)
        return output
Beispiel #19
0
    def run(self, x, y=None):
        """
        TODO: Question 4 - [Application] Regression

        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"

        if y is not None:
            "*** YOUR CODE HERE ***"
            w1 = nn.Variable(len(x), len(x))
            w2 = nn.Variable(len(x), len(x))

            b1 = nn.Variable(len(x), 1)
            b2 = nn.Variable(len(x), 1)

            self.nodes = nn.Graph([w1, w2, b1, b2])
            input_x = nn.Input(self.nodes, x)

            xw1 = nn.MatrixMultiply(self.nodes, w1, input_x)
            xw1_plus_b1 = nn.MatrixVectorAdd(self.nodes, xw1, b1)
            relu_xw1b1 = nn.ReLU(self.nodes, xw1_plus_b1)
            input_y = nn.Input(self.nodes, y)
            loss1 = nn.SquareLoss(self.nodes, relu_xw1b1, input_y)

            xw2 = nn.MatrixMultiply(self.nodes, w2, input_x)
            xw2_plus_b2 = nn.MatrixVectorAdd(self.nodes, xw2, b2)
            relu_xw2b2 = nn.ReLU(self.nodes, xw2_plus_b2)
            loss2 = nn.SquareLoss(self.nodes, relu_xw2b2, input_y)

            nn.Add(self.nodes, loss1, loss2)
            return self.nodes
        else:
            "*** YOUR CODE HERE ***"
            pred1 = self.nodes.get_output(self.nodes.get_nodes()[-7])
            return pred1
Beispiel #20
0
    def run(self, xs, y=None):
        batch_size = xs[0].shape[0]

        graph = nn.Graph([
            self.w1, self.b1, self.w2, self.b2, self.w3, self.w3_f, self.b2_f
        ])
        h = nn.Input(graph, np.tile(self.h, (batch_size, 1)))

        "*** YOUR CODE HERE ***"
        for element in xs[:len(xs) - 1]:
            x_graph = nn.Input(graph, element)
            mm_x = nn.MatrixMultiply(graph, x_graph, self.w1)
            hm = nn.MatrixMultiply(graph, h, self.w2)
            hm_x = nn.Add(graph, mm_x, hm)
            mva_b1 = nn.MatrixVectorAdd(graph, hm_x, self.b1)
            rl_b1 = nn.ReLU(graph, mva_b1)
            mm_w3 = nn.MatrixMultiply(graph, rl_b1, self.w3)
            xm_plus_b_w2_b2 = nn.MatrixVectorAdd(graph, mm_w3, self.b2)
            h = nn.ReLU(graph, xm_plus_b_w2_b2)

        # for the last element, use w3_final and b2_final to converge to batch size X 5
        x_graph = nn.Input(graph, xs[len(xs) - 1])
        mm_x = nn.MatrixMultiply(graph, x_graph, self.w1)
        hm = nn.MatrixMultiply(graph, h, self.w2)
        hm_x = nn.Add(graph, mm_x, hm)
        mva_b1 = nn.MatrixVectorAdd(graph, hm_x, self.b1)
        rl_b1 = nn.ReLU(graph, mva_b1)
        mm_w3 = nn.MatrixMultiply(graph, rl_b1, self.w3_f)
        xm_plus_b_w2_b2 = nn.MatrixVectorAdd(graph, mm_w3, self.b2_f)
        h = xm_plus_b_w2_b2

        if y is not None:
            input_y = nn.Input(graph, y)
            loss = nn.SquareLoss(graph, h, input_y)
            return graph
        else:
            "*** YOUR CODE HERE ***"
            return graph.get_output(h)
Beispiel #21
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"
        ide = nn.Variable(1)
        ide.data = -np.identity(1)
        g = nn.Graph([self.w1, self.b1, self.w2, self.b2, ide])
        x1 = nn.MatrixMultiply(g, nn.Input(g, x), self.w1)
        x1_add_b1 = nn.MatrixVectorAdd(g, x1, self.b1)
        relu = nn.ReLU(g, x1_add_b1)
        x2 = nn.MatrixMultiply(g, relu, self.w2)
        x2_add_b2 = nn.MatrixVectorAdd(g, x2, self.b2)

        n_x1 = nn.MatrixMultiply(g, nn.Input(g, -x), self.w1)
        n_x1_add_b1 = nn.MatrixVectorAdd(g, n_x1, self.b1)
        n_relu = nn.ReLU(g, n_x1_add_b1)
        n_x2 = nn.MatrixMultiply(g, n_relu, self.w2)
        n_x2_add_b2 = nn.MatrixVectorAdd(g, n_x2, self.b2)
        n_x2_add_b2 = nn.MatrixMultiply(g, n_x2_add_b2, ide)
        f = nn.Add(g, x2_add_b2, n_x2_add_b2)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            nn.SquareLoss(g, f, nn.Input(g, y))
            return g
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            return g.get_output(f)
Beispiel #22
0
    def run(self, xs):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        node with shape (batch_size x self.num_chars), where every row in the
        array is a one-hot vector encoding of a character. For example, if we
        have a batch of 8 three-letter words where the last word is "cat", then
        xs[1] will be a node that contains a 1 at position (7, 0). Here the
        index 7 reflects the fact that "cat" is the last word in the batch, and
        the index 0 reflects the fact that the letter "a" is the inital (0th)
        letter of our combined alphabet for this task.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node of shape (batch_size x hidden_size), for your
        choice of hidden_size. It should then calculate a node of shape
        (batch_size x 5) containing scores, where higher scores correspond to
        greater probability of the word originating from a particular language.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a node with shape (batch_size x self.num_chars)
        Returns:
            A node with shape (batch_size x 5) containing predicted scores
                (also called logits)
        """
        "*** YOUR CODE HERE ***"
        initialx = xs[0]
        rest = xs[1:]

        def firstNet(xi):
            xw = nn.Linear(xi, self.w)
            xwb0 = nn.AddBias(xw, self.b0)
            return nn.ReLU(xwb0)

        def secondNet(hi):
            hWhidden = nn.Linear(hi, self.wHidden)
            return nn.ReLU(nn.AddBias(hWhidden, self.b1))

        hi = firstNet(initialx)

        for xi in rest:
            hi = nn.Add(firstNet(xi), secondNet(
                hi))  # also add Relu(nn.Add...?) + b of size (1 x h)?
            # hi is of size (batch_size x hidden_size)

        return nn.ReLU(nn.AddBias(nn.Linear(hi, self.wOutput), self.b2))
Beispiel #23
0
    def run(self, xs):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        node with shape (batch_size x self.num_chars), where every row in the
        array is a one-hot vector encoding of a character. For example, if we
        have a batch of 8 three-letter words where the last word is "cat", then
        xs[1] will be a node that contains a 1 at position (7, 0). Here the
        index 7 reflects the fact that "cat" is the last word in the batch, and
        the index 0 reflects the fact that the letter "a" is the inital (0th)
        letter of our combined alphabet for this task.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node of shape (batch_size x hidden_size), for your
        choice of hidden_size. It should then calculate a node of shape
        (batch_size x 5) containing scores, where higher scores correspond to
        greater probability of the word originating from a particular language.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a node with shape (batch_size x self.num_chars)
        Returns:
            A node with shape (batch_size x 5) containing predicted scores
                (also called logits)
        """
        "*** YOUR CODE HERE ***"
        x = xs[0]
        x_w1 = nn.Linear(
            x, self.weights1
        )  # (bs x numchars) * (numchars x hidden) = (bs x hidden)
        xw1_b1_sum = nn.AddBias(x_w1, self.bias1)  # (bs x hidden)
        relu = nn.ReLU(xw1_b1_sum)  # (bs x hs)
        h_n = nn.AddBias(
            nn.Linear(relu, self.weights2),
            self.bias2)  # (bs x hs) * (hs x 5) = (bs x 5) + (bs x 5)

        for x in xs[1:]:
            x_w1 = nn.Add(
                nn.Linear(x, self.weights1),
                nn.Linear(h_n,
                          self.weights3))  # bs x hs + bsx5 * 5xhs = bs x hs
            xw1_b1_sum = nn.AddBias(x_w1, self.bias1)  # bs x hs
            relu = nn.ReLU(xw1_b1_sum)
            h_n = nn.AddBias(nn.Linear(relu, self.weights2), self.bias2)

        return h_n
Beispiel #24
0
    def helper_function(self, graph, h, c):
        # this function helps to calculate the feature f(h, c)
        # f = ReLU(h * W1 + c * W2 + b1) * W3 + b2
        # size: (h has size batch_size x h1, c has size batch_size x 47)
        # W1: h1 x h2, W2: 47 x h2, b1: 1 x h2, W3: h2 x h1, b2: 1 x h1
        mul_1 = nn.MatrixMultiply(graph, h, self.W1)
        mul_2 = nn.MatrixMultiply(graph, c, self.W2)
        add_1 = nn.Add(graph, mul_1, mul_2)
        add_2 = nn.MatrixVectorAdd(graph, add_1, self.b1)
        relu_1 = nn.ReLU(graph, add_1)

        mul_3 = nn.MatrixMultiply(graph, relu_1, self.W3)
        add_3 = nn.MatrixVectorAdd(graph, mul_3, self.b2)
        return add_3
Beispiel #25
0
        def f(h, c):
            if h == None:

                ones = nn.Input(graph, np.ones([batch_size, 1]))
                in_h = nn.MatrixMultiply(
                    graph, ones,
                    self.h0)  #nn.Input(g, np.zeros([batch_size, self.d]))
            else:
                in_h = h
            input_c = nn.Input(graph, c)
            c_mul_w = nn.MatrixMultiply(graph, input_c,
                                        self.w)  # batchsize x d
            h_mul_v = nn.MatrixMultiply(graph, in_h, self.v)
            relu1 = nn.ReLU(graph, nn.Add(graph, c_mul_w, h_mul_v))
            return relu1
Beispiel #26
0
    def run(self, xs):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        node with shape (batch_size x self.num_chars), where every row in the
        array is a one-hot vector encoding of a character. For example, if we
        have a batch of 8 three-letter words where the last word is "cat", then
        xs[1] will be a node that contains a 1 at position (7, 0). Here the
        index 7 reflects the fact that "cat" is the last word in the batch, and
        the index 0 reflects the fact that the letter "a" is the inital (0th)
        letter of our combined alphabet for this task.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node of shape (batch_size x hidden_size), for your
        choice of hidden_size. It should then calculate a node of shape
        (batch_size x 5) containing scores, where higher scores correspond to
        greater probability of the word originating from a particular language.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a node with shape (batch_size x self.num_chars)
        Returns:
            A node with shape (batch_size x 5) containing predicted scores
                (also called logits)
        """
        "*** YOUR CODE HERE ***"
        word_length = len(xs)
        hidden_output = None
        current_index = 0
        
        while current_index < word_length:
            initial_output = self.initial_network.predict(xs[current_index])
            
            if hidden_output is None:
                hidden_output = initial_output
            else:
                intermediate_output = self.hidden_network.predict(hidden_output)
                hidden_output = nn.Add(initial_output, intermediate_output)
                
            current_index += 1 
        
        final_prediction = self.final_network.predict(hidden_output)
        return final_prediction
Beispiel #27
0
    def run(self, xs):
        """
        Runs the model for a batch of examples.
        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).
        Here `xs` will be a list of length L. Each element of `xs` will be a
        node with shape (batch_size x self.num_chars), where every row in the
        array is a one-hot vector encoding of a character. For example, if we
        have a batch of 8 three-letter words where the last word is "cat", then
        xs[1] will be a node that contains a 1 at position (7, 0). Here the
        index 7 reflects the fact that "cat" is the last word in the batch, and
        the index 0 reflects the fact that the letter "a" is the inital (0th)
        letter of our combined alphabet for this task.
        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node of shape (batch_size x hidden_size), for your
        choice of hidden_size. It should then calculate a node of shape
        (batch_size x 5) containing scores, where higher scores correspond to
        greater probability of the word originating from a particular language.
        Inputs:
            xs: a list with L elements (one per character), where each element
                is a node with shape (batch_size x self.num_chars)
        Returns:
            A node with shape (batch_size x 5) containing predicted scores
                (also called logits)
        """
        "*** YOUR CODE HERE ***"
        # one layer
        # based on architecture, f(x) = relu(x*w1 + b1)
        # except z_i = x_i*w + h_i * w_hidden
        # so it's more like f(z) = relu(z + b1)
        # z0 = x0 * w
        z_0 = nn.Linear(xs[0], self.w)
        z_0PlusB1 = nn.AddBias(z_0, self.b)
        relu = nn.ReLU(z_0PlusB1)
        # compute first h
        h_i = relu

        for x in xs[1:]:
            xw = nn.Linear(x, self.w)
            hw = nn.Linear(h_i, self.w_hidden)
            # z_i = x_i*w + h_i * w_hidden
            z_i = nn.Add(xw, hw)
            addBias = nn.AddBias(z_i, self.b)
            h_i = nn.ReLU(addBias)
        return nn.Linear(h_i, self.w_final)
Beispiel #28
0
def check_graph_accumulator(tracker):
    # A more thorough test that now requires gradient accumulators to be working
    import nn

    v1 = nn.Variable(1, 5)
    v1_data = np.ones_like(v1.data) / 10
    v1.data = v1_data
    graph = nn.Graph([v1])
    adder = nn.Add(graph, v1, v1)
    assert graph.get_nodes() == [v1, adder], \
        "Not all nodes are present after adding a node."
    assert graph.get_inputs(v1) == [], \
        "Graph.get_inputs should return no inputs for a Variable node"
    assert np.allclose(graph.get_output(v1), v1_data), \
        "Graph.get_output for a Variable should be its data:\n{}\n" \
        "Student returned:\n{}".format(v1_data, graph.get_output(v1))
    expected = [graph.get_output(v1)] * 2
    student = graph.get_inputs(adder)
    for a, b in zip(student, expected):
        assert np.allclose(a, b), "Graph.get_inputs returned incorrect value for an Add node\nStudent returned:\n{}\n" \
                                  "Expected:\n{}".format(a, b)
    assert np.allclose(graph.get_output(adder), 2 * graph.get_output(v1)), \
        "Graph.get_output returned incorrect value for an Add node\nStudent returned:\n{}\nExpected:\n{}"\
        .format(graph.get_output(adder), 2 * graph.get_output(v1))
    loss = nn.SoftmaxLoss(graph, adder, adder)
    for node in [v1, adder]:
        output_shape = graph.get_output(node).shape
        node_grad = graph.get_gradient(node)
        assert node_grad is not None, \
            "Graph.get_gradient returned None, instead of an all-zero value"
        assert np.shape(node_grad) == output_shape, \
            "Graph.get_gradient returned gradient of wrong shape, {0}; expected, {1}".format(np.shape(node_grad),
                                                                                             output_shape)
        assert np.allclose(node_grad, np.zeros_like(node_grad)), "Graph.get_gradient should return all-zero values" \
                                                                 " before backprop is called, instead returned:\n{}"\
            .format(node_grad)

    expected_loss = 1.60943791243
    graph.backprop()
    v1_grad = graph.get_gradient(v1)
    assert np.allclose(v1_grad, np.ones_like(v1_grad) * expected_loss * 2), \
        "Incorrect gradient after running Graph.backprop().\nStudent returned:\n{}\nExpected:\n{}\nMake sure you are" \
        " correctly accumulating your gradients.".format(v1_grad, np.ones_like(v1_grad) * expected_loss * 2)
    tracker.add_points(3)
Beispiel #29
0
    def run(self, xs):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        node with shape (batch_size x self.num_chars), where every row in the
        array is a one-hot vector encoding of a character. For example, if we
        have a batch of 8 three-letter words where the last word is "cat", then
        xs[1] will be a node that contains a 1 at position (7, 0). Here the
        index 7 reflects the fact that "cat" is the last word in the batch, and
        the index 0 reflects the fact that the letter "a" is the inital (0th)
        letter of our combined alphabet for this task.
=======
        self.w_h = nn.Parameter(self.hiddenLayerSize, self.hiddenLayerSize)
        self.w_f = nn.Parameter(self.hiddenLayerSize, len(self.languages))
        self.w = nn.Parameter(self.num_chars, self.hiddenLayerSize)


<<<<<<< HEAD
        Inputs:
            xs: a list with L elements (one per character), where each element
                is a node with shape (batch_size x self.num_chars)
        Returns:
            A node with shape (batch_size x 5) containing predicted scores
                (also called logits)
        """
        "*** YOUR CODE HERE ***"
        h = nn.Linear(xs[0], self.w)
        z = h
        for i, x in enumerate(xs[1:]):
            z = nn.Add(nn.Linear(x, self.w), nn.Linear(z, self.wh))

        return nn.Linear(z, self.wf)
Beispiel #30
0
 def hidden(self, h, x):
     return nn.Add(nn.Linear(x, self.w1), nn.Linear(h, self.w3))