Ejemplo n.º 1
0
    def __init__(self):
        Model.__init__(self)
        self.get_data_and_monitor = backend.get_data_and_monitor_lang_id

        # Our dataset contains words from five different languages, and the
        # combined alphabets of the five languages contain a total of 47 unique
        # characters.
        # You can refer to self.num_chars or len(self.languages) in your code
        self.num_chars = 47
        self.languages = ["English", "Spanish", "Finnish", "Dutch", "Polish"]

        # Remember to set self.learning_rate!
        # You may use any learning rate that works well for your architecture
        "*** YOUR CODE HERE ***"
        self.learning_rate = .007
        self.w1 = []
        self.w2 = []
        self.w3 = []
        self.b1 = []
        self.output = []
        self.hidden_size = 0
        c = self.num_chars

        #size of the input vector
        # i = x.shape[1]
        #to test and modify
        d = 160

        if not self.w1:
            self.w1 = nn.Variable(d, c)
        if not self.w2:
            self.w2 = nn.Variable(c, c)
        if not self.w3:
            self.w3 = nn.Variable(c, d)
        if not self.b1:
            self.b1 = nn.Variable(d)
        if not self.output:
            self.output = nn.Variable(d, 5)

        graph = nn.Graph([self.w1, self.w2, self.w3, self.b1, self.output])
        h0 = np.zeros((batch_size, d), dtype=np.float)

        input_nodeH = nn.Input(graph, h0)
        # print x.shape
        #array of zeros

        # multiply1 = nn.MatrixMultiply(graph, input_nodeX, self.w1)
        # add1 = nn.MatrixVectorAdd(graph, multiply1, self.b1)
        # relu = nn.ReLU(graph, add1)
        # multiply2 = nn.MatrixMultiply(graph, relu, self.w2)
        # add2 = nn.MatrixVectorAdd(graph, multiply2, self.b2)
        i = 0
        while i < len(xs):
            input_nodeC = nn.Input(graph, xs[i])
            multiply1 = nn.MatrixMultiply(graph, input_nodeH, self.w1)
            multiply2 = nn.MatrixMultiply(graph, input_nodeC, self.w2)
            combine = nn.MatrixVectorAdd(graph, multiply1, multiply2)
            multiply3 = nn.MatrixMultiply(graph, combine, self.w3)
            add1 = nn.MatrixVectorAdd(graph, multiply3, self.b1)
            relu = nn.ReLU(graph, add1)
            input_nodeH = relu
            i = i + 1
        final = nn.MatrixMultiply(graph, relu, self.output)
        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            input_nodeY = nn.Input(graph, y)
            loss_node = nn.SoftmaxLoss(graph, final, input_nodeY)
            graph.add(loss_node)

            return graph

            "*** YOUR CODE HERE ***"
        else:
            # print graph.get_output(add2).shape
            return graph.get_output(final)
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
Ejemplo n.º 2
0
    def run(self, x, y=None):
        """
        TODO: Question 5 - [Application] OddRegression

        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        # "*** YOUR CODE HERE ***"
        graph = nn.Graph(
            [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3])
        # pos input
        pinput_x = nn.Input(graph, x)
        # layer 1
        pxm = nn.MatrixMultiply(graph, pinput_x, self.W1)
        pxm_plus_b = nn.MatrixVectorAdd(graph, pxm, self.b1)
        pa1 = nn.ReLU(graph, pxm_plus_b)
        # layer 2
        pa1m = nn.MatrixMultiply(graph, pa1, self.W2)
        pa1m_plus_b = nn.MatrixVectorAdd(graph, pa1m, self.b2)
        pa2 = nn.ReLU(graph, pa1m_plus_b)
        # layer 3
        pa2m = nn.MatrixMultiply(graph, pa2, self.W3)
        pa2m_plus_b = nn.MatrixVectorAdd(graph, pa2m, self.b3)

        # neg input
        ninput_x = nn.Input(graph, -x)
        # layer 1
        nxm = nn.MatrixMultiply(graph, ninput_x, self.W1)
        nxm_plus_b = nn.MatrixVectorAdd(graph, nxm, self.b1)
        na1 = nn.ReLU(graph, nxm_plus_b)
        # layer 2
        na1m = nn.MatrixMultiply(graph, na1, self.W2)
        na1m_plus_b = nn.MatrixVectorAdd(graph, na1m, self.b2)
        na2 = nn.ReLU(graph, na1m_plus_b)
        # layer 3
        na2m = nn.MatrixMultiply(graph, na2, self.W3)
        na2m_plus_b = nn.MatrixVectorAdd(graph, na2m, self.b3)

        # output
        neg_op = nn.Input(graph, -np.ones((1, 1)))
        neg_na2m_plus_b = nn.MatrixMultiply(graph, na2m_plus_b,
                                            neg_op)  # a helper function
        output = nn.Add(graph, pa2m_plus_b, neg_na2m_plus_b)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            # "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SquareLoss(graph, output, input_y)
            return graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            # "*** YOUR CODE HERE ***"
            return graph.get_output(output)
Ejemplo n.º 3
0
    def run(self, xs, y=None):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use `nn.SoftmaxLoss` as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]

        "*** YOUR CODE HERE ***"
        graph = nn.Graph([
            self.m0, self.b0, self.m1, self.b1, self.m2, self.b2, self.m3,
            self.b3
        ])

        # d sized vector h0
        batch = np.zeros((batch_size, self.hidden_layer))
        H0 = nn.Input(graph, batch)
        H = nn.MatrixVectorAdd(graph, H0, self.b0)

        for x in xs:
            input_x = nn.Input(graph, x)
            xm0 = nn.MatrixMultiply(graph, input_x, self.m0)
            xm0_puls_h = nn.MatrixVectorAdd(graph, H, xm0)

            xm1 = nn.MatrixMultiply(graph, xm0_puls_h, self.m1)
            xm_plus_b1 = nn.MatrixVectorAdd(graph, xm1, self.b1)
            rel = nn.ReLU(graph, xm_plus_b1)
            xm2 = nn.MatrixMultiply(graph, rel, self.m2)
            H = nn.MatrixVectorAdd(graph, xm2, self.b2)

        last_xm = nn.MatrixMultiply(graph, H, self.m3)
        lastone = nn.MatrixVectorAdd(graph, last_xm, self.b3)

        if y is not None:
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            nn.SoftmaxLoss(graph, lastone, input_y)
            return graph
        else:
            "*** YOUR CODE HERE ***"
            return graph.get_output(lastone)
Ejemplo n.º 4
0
    def run(self, xs, y=None):
        """
        Runs the model for a batch of examples.

        batxh xs[0].shape[0
        c's individual elements inside array]
        result = batchxd multiply dx5 add c

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here xs will be a list of length L. Each element of xs will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, y is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        xs into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use nn.SoftmaxLoss as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]

        "* YOUR CODE HERE *"
        graph = nn.Graph([
            self.b1, self.b2, self.b3, self.w1, self.w2, self.w3, self.add0,
            self.c
        ])
        add1 = nn.Input(graph, np.zeros((batch_size, 200)))
        add2 = nn.MatrixVectorAdd(graph, add1, self.add0)

        for var in xs:
            input_xs = nn.Input(graph, var)
            c = nn.MatrixMultiply(graph, input_xs, self.c)
            h_update = nn.MatrixVectorAdd(graph, add2, c)

            mul1 = nn.MatrixMultiply(graph, h_update, self.w1)
            addmul1 = nn.MatrixVectorAdd(graph, mul1, self.b1)
            hidden_output = nn.ReLU(graph, addmul1)
            mul2 = nn.MatrixMultiply(graph, hidden_output, self.w2)
            add2 = nn.MatrixVectorAdd(graph, mul2, self.b2)

        xmul = nn.MatrixMultiply(graph, add2, self.w3)
        addmul2 = nn.MatrixVectorAdd(graph, xmul, self.b3)

        if y is not None:
            inputY = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, addmul2, inputY)
            return graph
        else:
            return graph.get_output(addmul2)
Ejemplo n.º 5
0
    def run(self, xs, y=None):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here xs will be a list of length L. Each element of xs will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, y is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        xs into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use nn.SoftmaxLoss as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]
        if not self.graph:
            dim = 128
            w1 = nn.Variable(47, 47) #         
            w2 = nn.Variable(47, 47)
            w3 = nn.Variable(50, 2)
            b1 = nn.Variable(1, 47) #
            b2 = nn.Variable(1, 47) 
            # b3 = nn.Variable(1, 2) 
            h0 = nn.Variable(1, 47)

            w3 = nn.Variable(47, 47)  
            w4 = nn.Variable(47, dim) # 
            w6 = nn.Variable(dim, 5) #
            b3 = nn.Variable(1, 47) 
            b4 = nn.Variable(1, dim) #
            b6 = nn.Variable(1, 5) # 

            w5 = nn.Variable(47, 47)
            b5 = nn.Variable(1, 47) 
            
            self.l = [w1,w2,b1,b2,h0,w3,w4,b3,b4, w5, b5,w6,b6]
            #         #     #            #    #           # #
        self.graph = nn.Graph(self.l)
        
        
        
 

        "* YOUR CODE HERE *"
        char_inputs = [] 
        h = self.l[4]
        zero = np.zeros((batch_size, 47))
        zeroInput = nn.Input(self.graph,zero)
        z = nn.MatrixVectorAdd(self.graph, zeroInput, h)
        h = z
        for i in range(len(xs)):
            char_inputs.append(nn.Input(self.graph, xs[i])) 
            incorporate = nn.MatrixVectorAdd(self.graph, h, char_inputs[i]) #Tx47 x 
            mult = nn.MatrixMultiply(self.graph, incorporate, self.l[0]) #Tx47
            add = nn.MatrixVectorAdd(self.graph, mult, self.l[2]) 
            relu = nn.ReLU(self.graph, add)
            # mult2 = nn.MatrixMultiply(self.graph, relu, self.l[1]) #Tx47
            # add2 = nn.MatrixVectorAdd(self.graph, mult2, self.l[3]) #Tx47
            # relu2 = nn.ReLU(self.graph, add2)
            # mult3 = nn.MatrixMultiply(self.graph, relu2, self.l[9]) #Tx47
            # add3 = nn.MatrixVectorAdd(self.graph, mult3, self.l[10]) #Tx47
            # relu3 = nn.ReLU(self.graph, add3)
            h = relu
        # mult = nn.MatrixMultiply(self.graph, h, self.l[5]) #Tx47
        # add = nn.MatrixVectorAdd(self.graph, mult, self.l[7]) #Tx47
        # relu = nn.ReLU(self.graph, add)
        mult2 = nn.MatrixMultiply(self.graph, h, self.l[6]) #Tx5
        add2 = nn.MatrixVectorAdd(self.graph, mult2, self.l[8]) #Tx5
        relu2 = nn.ReLU(self.graph, add2)
        mult3 = nn.MatrixMultiply(self.graph, relu2, self.l[11]) #Tx5
        add3 = nn.MatrixVectorAdd(self.graph, mult3, self.l[12]) #Tx5
        if y is not None:
            "* YOUR CODE HERE *"
 
            input_y = nn.Input(self.graph, y) #Tx5
            #print(self.graph.get_output(input_y))
            loss = nn.SoftmaxLoss(self.graph, add3, input_y)
            return self.graph
        else:
            "* YOUR CODE HERE *"
            return self.graph.get_output(self.graph.get_nodes()[-1])
Ejemplo n.º 6
0
    def run(self, x, y=None):
        """
        TODO: Question 5 - [Application] OddRegression

        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"
        if not self.nodes:
            w1 = nn.Variable(1, 50)
            w2 = nn.Variable(50, 50)
            w3 = nn.Variable(50, 1)
            b1 = nn.Variable(1, 50)
            b2 = nn.Variable(1, 50)
            b3 = nn.Variable(1, 1)
            self.nodes = nn.Graph([w1, w2, w3, b1, b2, b3])
            self.inputs = [w1, w2, w3, b1, b2, b3]
        w1 = self.inputs[0]
        w2 = self.inputs[1]
        w3 = self.inputs[2]
        b1 = self.inputs[3]
        b2 = self.inputs[4]
        b3 = self.inputs[5]
        self.nodes = nn.Graph(self.inputs)
        input_x = nn.Input(self.nodes, x)
        if y is not None:
            input_y = nn.Input(self.nodes, y)

        negation = nn.Input(self.nodes, np.matrix([-1.0]))

        xw1 = nn.MatrixMultiply(self.nodes, input_x, w1)
        xw1_plus_b1 = nn.MatrixVectorAdd(self.nodes, xw1, b1)
        relu_xw1b1 = nn.ReLU(self.nodes, xw1_plus_b1)

        xw2 = nn.MatrixMultiply(self.nodes, relu_xw1b1, w2)
        xw2_plus_b2 = nn.MatrixVectorAdd(self.nodes, xw2, b2)
        relu_xw2b2 = nn.ReLU(self.nodes, xw2_plus_b2)

        xw3 = nn.MatrixMultiply(self.nodes, relu_xw2b2, w3)
        final1 = nn.MatrixVectorAdd(self.nodes, xw3, b3)

        #deep breath, now calculations for negative x (might put this on a loop  if i have time)
        x_neg = nn.MatrixMultiply(self.nodes, input_x, negation)
        xw1 = nn.MatrixMultiply(self.nodes, x_neg)
        xw1_plus_b1 = nn.MatrixVectorAdd(self.nodes, xw1, b1)
        relu_xw1b1 = nn.ReLU(self.nodes, xw1_plus_b1)

        xw2 = nn.MatrixMultiply(self.nodes, relu_xw1b1, w2)
        xw2_b2 = nn.MatrixVectorAdd(self.nodes, xw2, b2)
        relu_xw1b1 = nn.ReLU(self.nodes, xw2_b2)

        xw3 = nn.MatrixMultiply(self.nodes, relu_xw1b1, w3)
        xw3_b3 = nn.MatrixVectorAdd(self.nodes, xw3, b3)
        final2 = nn.MatrixMultiply(self.nodes, xw3_b3, negation)

        final = nn.MatrixVectorAdd(self.nodes, final1, final2)

        if y is not None:
            "*** YOUR CODE HERE ***"
            loss = nn.SquareLoss(self.nodes, final, input_y)
            return self.nodes

        else:
            "*** YOUR CODE HERE ***"
            return self.nodes.get_output(self.nodes.get_nodes()[-1])
Ejemplo n.º 7
0
    def run(self, states, Q_target=None):
        """
        TODO: Question 7 - [Application] Reinforcement Learning

        Runs the DQN for a batch of states.

        The DQN takes the state and computes Q-values for all possible actions
        that can be taken. That is, if there are two actions, the network takes
        as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)]

        When Q_target == None, return the matrix of Q-values currently computed
        by the network for the input states.

        When Q_target is passed, it will contain the Q-values which the network
        should be producing for the current states. You must return a nn.Graph
        which computes the training loss between your current Q-value
        predictions and these target values, using nn.SquareLoss.

        Inputs:
            states: a (batch_size x 4) numpy array
            Q_target: a (batch_size x 2) numpy array, or None
        Output:
            (if Q_target is not None) A nn.Graph instance, where the last added
                node is the loss
            (if Q_target is None) A (batch_size x 2) numpy array of Q-value
                scores, for the two actions
        """
        "*** YOUR CODE HERE ***"
        if not self.nodes:
            w1 = nn.Variable(states.shape[1], states.shape[0])
            w2 = nn.Variable(states.shape[0], states.shape[0])
            w3 = nn.Variable(states.shape[0], 2)
            b1 = nn.Variable(1, states.shape[0])
            b2 = nn.Variable(1, states.shape[0])
            b3 = nn.Variable(1, 2)
            self.nodes = nn.Graph([w1, w2, w3, b1, b2, b3])
            self.inputs = [w1, w2, w3, b1, b2, b3]
        w1 = self.inputs[0]
        w2 = self.inputs[1]
        w3 = self.inputs[2]
        b1 = self.inputs[3]
        b2 = self.inputs[4]
        b3 = self.inputs[5]
        self.nodes = nn.Graph([w1, w2, w3, b1, b2, b3])
        input_x = nn.Input(self.nodes, states)

        if Q_target is not None:
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(self.nodes, Q_target)

        xw1 = nn.MatrixMultiply(self.nodes, input_x, w1)
        xw1b1 = nn.MatrixVectorAdd(self.nodes, xw1, b1)
        xw1relu = nn.ReLU(self.nodes, xw1b1)

        xw2 = nn.MatrixMultiply(self.nodes, xw1relu, w2)
        xw2b2 = nn.MatrixVectorAdd(self.nodes, xw2, b2)
        xw2relu = nn.ReLU(self.nodes, xw2b2)

        xw3 = nn.MatrixMultiply(self.nodes, xw2relu, w3)
        final = nn.MatrixVectorAdd(self.nodes, xw3, b3)

        if Q_target is not None:
            "*** YOUR CODE HERE ***"
            loss = nn.SquareLoss(self.nodes, final, input_y)
            return self.nodes
        else:
            "*** YOUR CODE HERE ***"
            return self.nodes.get_output(self.nodes.get_nodes()[-1])
Ejemplo n.º 8
0
    def run(self, xs, y=None):
        """
        TODO: Question 8 - [Application] Language Identification

        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use `nn.SoftmaxLoss` as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]

        self.graph = nn.Graph([
            self.w1, self.w2, self.w3, self.w4, self.h0, self.b1, self.b2,
            self.b3, self.b4
        ])
        char = []
        zero = nn.Input(self.graph, np.zeros((batch_size, self.num_chars)))
        h = zero

        for i in range(len(xs)):
            char.append(nn.Input(self.graph, xs[i]))
            incorporate = nn.MatrixVectorAdd(self.graph, h, char[i])
            xm1 = nn.MatrixMultiply(self.graph, incorporate, self.w1)
            xm1_plus_b1 = nn.MatrixVectorAdd(self.graph, xm1, self.b1)
            h = nn.ReLU(self.graph, xm1_plus_b1)

        xm2 = nn.MatrixMultiply(self.graph, h, self.w2)
        xm2_plus_b2 = nn.MatrixVectorAdd(self.graph, xm2, self.b2)
        relu2 = nn.ReLU(self.graph, xm2_plus_b2)
        relu2w3 = nn.MatrixMultiply(self.graph, relu2, self.w3)
        relu2w3_plus_b3 = nn.MatrixVectorAdd(self.graph, relu2w3, self.b3)
        relu3 = nn.ReLU(self.graph, relu2w3_plus_b3)
        relu3w4 = nn.MatrixMultiply(self.graph, relu3, self.w4)
        relu3w4_plus_b4 = nn.MatrixVectorAdd(self.graph, relu3w4, self.b4)

        if y is not None:
            input_y = nn.Input(self.graph, y)
            loss = nn.SoftmaxLoss(self.graph, relu3w4_plus_b4, input_y)
            return self.graph

        else:
            return self.graph.get_output(self.graph.get_nodes()[-1])
Ejemplo n.º 9
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"
        # At each iteration, we first calculate a loss that measures how
        # good our network is. The graph keeps track of all operations used
        graph = nn.Graph([self.W1, self.b1, self.W2, self.b2])

        input_x = nn.Input(graph, x)
        neg_x = nn.Input(graph, -1 * x)

        xW1 = nn.MatrixMultiply(graph, input_x, self.W1)
        neg_xW1 = nn.MatrixMultiply(graph, neg_x, self.W1)

        xW1_plusb1 = nn.MatrixVectorAdd(graph, xW1, self.b1)
        neg_xW1_plusb1 = nn.MatrixVectorAdd(graph, neg_xW1, self.b1)

        afterReLU = nn.ReLU(graph, xW1_plusb1)
        neg_afterReLU = nn.ReLU(graph, neg_xW1_plusb1)

        x2W2 = nn.MatrixMultiply(graph, afterReLU, self.W2)
        neg_x2W2 = nn.MatrixMultiply(graph, neg_afterReLU, self.W2)

        x2W2_plusb2 = nn.MatrixVectorAdd(graph, x2W2, self.b2)
        neg_x2W2_plusb2 = nn.MatrixVectorAdd(graph, neg_x2W2, self.b2)

        negated_term = -1 * graph.get_output(neg_x2W2_plusb2)
        negated_neg = nn.Input(graph, negated_term)

        sum_terms = nn.Add(graph, x2W2_plusb2, negated_neg)

        # x2W2 = nn.MatrixMultiply(graph, afterReLU, self.W2)
        # x2W2_plusb2 = nn.MatrixVectorAdd(graph, x2W2, self.b2)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SquareLoss(graph, sum_terms, input_y)
            return graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            output = graph.get_output(sum_terms)
            # print "Output matrix size:", output.shape
            return output

        # At each iteration, we first calculate a loss that measures how
        # good our network is. The graph keeps track of all operations used
        """
Ejemplo n.º 10
0
    def run(self, xs, y=None):
        """
        TODO: Question 8 - [Application] Language Identification

        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use `nn.SoftmaxLoss` as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]
        self.iteration += 1
        if self.iteration == 10000:
            self.learning_rate = 0.02
            # self.learning_rate = 0.01
            self.learning_rate = 0.015
        elif self.iteration == 12000:
            # self.learning_rate = 0.01
            self.learning_rate = 0.010
        elif self.iteration == 14000:
            self.learning_rate = 0.005

        "*** YOUR CODE HERE ***"
        if not self.graph:
            dim = 80
            w1 = nn.Variable(self.num_chars, self.num_chars)
            w2 = nn.Variable(self.num_chars, self.num_chars)
            w3 = nn.Variable(self.num_chars, 2)
            b1 = nn.Variable(1, self.num_chars)
            b2 = nn.Variable(1, self.num_chars)
            h0 = nn.Variable(1, self.num_chars)

            w3 = nn.Variable(self.num_chars, self.num_chars)
            w4 = nn.Variable(self.num_chars, dim)
            w6 = nn.Variable(dim, 5)
            b3 = nn.Variable(1, self.num_chars)
            b4 = nn.Variable(1, dim)
            b6 = nn.Variable(1, 5)

            w5 = nn.Variable(self.num_chars, self.num_chars)
            b5 = nn.Variable(1, self.num_chars)

            self.vars = [w1, w2, b1, b2, h0, w3, w4, b3, b4, w5, b5, w6, b6]

        self.graph = nn.Graph(self.vars)

        char_inputs = []
        zeroInput = nn.Input(self.graph, np.zeros(
            (batch_size, self.num_chars)))
        h_vec = nn.MatrixVectorAdd(self.graph, zeroInput, self.vars[4])

        for i in range(len(xs)):
            char_inputs.append(nn.Input(self.graph, xs[i]))
            incorporate = nn.MatrixVectorAdd(self.graph, h_vec, char_inputs[i])
            mult = nn.MatrixMultiply(self.graph, incorporate, self.vars[0])
            add = nn.MatrixVectorAdd(self.graph, mult, self.vars[2])
            h_vec = nn.ReLU(self.graph, add)

        mult2 = nn.MatrixMultiply(self.graph, h_vec, self.vars[6])
        add2 = nn.MatrixVectorAdd(self.graph, mult2, self.vars[8])
        relu2 = nn.ReLU(self.graph, add2)
        mult3 = nn.MatrixMultiply(self.graph, relu2, self.vars[11])
        add3 = nn.MatrixVectorAdd(self.graph, mult3, self.vars[12])

        if y is not None:
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(self.graph, y)
            loss = nn.SoftmaxLoss(self.graph, add3, input_y)
            return self.graph
        else:
            "*** YOUR CODE HERE ***"
            return self.graph.get_output(self.graph.get_nodes()[-1])
Ejemplo n.º 11
0
    def run(self, x, y=None):
        """
        TODO: Question 4 - [Application] Regression

        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            len_x, len_y = len(x), len(y)
            len_x_quater, len_x_half = len_x // 4, len_x // 2
            len_y_quater, len_y_half = len_y // 4, len_y // 2

            weights, backs = [nn.Variable(len_x_quater, len_x_quater)
                              ] * 8, [nn.Variable(len_x_quater, 1)] * 8
            self.graph = nn.Graph(weights + backs)

            input_x = nn.Input(self.graph, x)
            input_y = nn.Input(self.graph, y)
            xs = [
                nn.Input(self.graph,
                         x[i * len_x_quater:(i + 1) * len_x_quater])
                for i in range(4)
            ]

            mults = [
                nn.MatrixMultiply(self.graph, weights[i], xs[i])
                for i in range(4)
            ]
            adds = [
                nn.MatrixVectorAdd(self.graph, mults[i], mults[i + 1])
                for i in range(0, 4, 2)
            ] + [
                nn.MatrixVectorAdd(self.graph, mults[i + 1], mults[i])
                for i in range(0, 4, 2)
            ]
            adds_in = [
                nn.MatrixVectorAdd(self.graph, adds[i], backs[i])
                for i in range(4)
            ]
            relus = [nn.ReLU(self.graph, add) for add in adds_in]

            mults2 = [
                nn.MatrixMultiply(self.graph, weights[i + 4], relus[i])
                for i in range(4)
            ]

            adds2 = [
                nn.MatrixVectorAdd(self.graph, mults2[i], mults2[i + 1])
                for i in range(0, 4, 2)
            ] + [
                nn.MatrixVectorAdd(self.graph, mults2[i + 1], mults2[i])
                for i in range(0, 4, 2)
            ]
            adds_in2 = [
                nn.MatrixVectorAdd(self.graph, adds2[i], backs[i + 4])
                for i in range(4)
            ]

            ys = [
                nn.Input(self.graph,
                         y[i * len_y_quater:(i + 1) * len_y_quater])
                for i in range(4)
            ]
            losses = [nn.SquareLoss(self.graph, adds_in[2], ys[0])] + [
                nn.SquareLoss(self.graph, adds_in[3], y) for y in ys[1:]
            ]
            add_end = reduce(lambda x, y: nn.Add(self.graph, x, y), losses)

            return self.graph

        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            vecs = [
                self.graph.get_output(self.graph.get_nodes()[-11 + i])
                for i in range(4)
            ]
            out = reduce(lambda x, y: np.concatenate((x, y), axis=0), vecs)
            return out
Ejemplo n.º 12
0
    def run(self, x, y=None):
        """
        TODO: Question 5 - [Application] OddRegression

        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"
        n = 4
        if not self.graph:
            w1 = nn.Variable(1, 50)
            w2 = nn.Variable(50, 50)
            w3 = nn.Variable(50, 1)
            b1 = nn.Variable(1, 50)
            b2 = nn.Variable(1, 50)
            b3 = nn.Variable(1, 1)
            self.vars = [w1, w2, w3, b1, b2, b3]
            self.weights = self.vars[:3]
            self.backs = self.vars[3:]

        self.graph = nn.Graph(self.vars)
        input_x = nn.Input(self.graph, x)
        if y is not None:
            input_y = nn.Input(self.graph, y)

        input_negati = nn.Input(self.graph, np.matrix([-1.]))
        negati = nn.MatrixMultiply(self.graph, input_x, input_negati)
        add = add_three_edges(negati, self.graph, self.vars)
        sub = nn.MatrixMultiply(self.graph, add, input_negati)

        sub0 = add_three_edges(input_x, self.graph, self.vars)

        subend = nn.MatrixVectorAdd(self.graph, sub0, sub)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            loss = nn.SquareLoss(self.graph, subend, input_y)
            return self.graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            return self.graph.get_output(self.graph.get_nodes()[-1])
Ejemplo n.º 13
0
    def run(self, xs, y=None):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use `nn.SoftmaxLoss` as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]

        "*** YOUR CODE HERE ***"

        graph = nn.Graph([self.w1, self.b1, self.w2, self.b2, self.w3])

        yeet = xs[0]

        for i in np.arange(1, len(xs), 1):
            yeet += xs[i]

        input_xs = nn.Input(graph, yeet)
        mul1 = nn.MatrixMultiply(graph, input_xs, self.w1)
        add1 = nn.MatrixVectorAdd(graph, mul1, self.b1)
        reLU = nn.ReLU(graph, add1)
        mul2 = nn.MatrixMultiply(graph, reLU, self.w2)
        add2 = nn.MatrixVectorAdd(graph, mul2, self.b2)
        mul3 = nn.MatrixMultiply(graph, add2, self.w3)

        if y is not None:
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, mul3, input_y)
            return graph
        else:
            "*** YOUR CODE HERE ***"
            return graph.get_output(mul3)
Ejemplo n.º 14
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"

        #function nodes are, multiply, add vector, relu, matrix multiply, add vector
        #variables are w1, w2, b1, b2
        #size of the input vector
        i = x.shape[1]
        #to test and modify
        h = 100

        if not self.w1:
            self.w1 = nn.Variable(i, h)
        if not self.w2:
            self.w2 = nn.Variable(h, i)
        if not self.b1:
            self.b1 = nn.Variable(h)
        if not self.b2:
            self.b2 = nn.Variable(i)

        graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])

        input_nodeX = nn.Input(graph, x)
        # print x.shape

        # xm = MatrixMultiply(graph, input_x, m)
        # xm_plus_b = MatrixVectorAdd(graph, xm, b)

        multiply1 = nn.MatrixMultiply(graph, input_nodeX, self.w1)
        add1 = nn.MatrixVectorAdd(graph, multiply1, self.b1)
        relu = nn.ReLU(graph, add1)
        multiply2 = nn.MatrixMultiply(graph, relu, self.w2)
        add2 = nn.MatrixVectorAdd(graph, multiply2, self.b2)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            input_nodeY = nn.Input(graph, y)
            loss_node = nn.SquareLoss(graph, add2, input_nodeY)
            graph.add(loss_node)

            return graph

            "*** YOUR CODE HERE ***"
        else:
            return graph.get_output(add2)
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
Ejemplo n.º 15
0
    def run(self, x, y=None):
        """
        TODO: Question 5 - [Application] OddRegression

        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
            input_y = nn.Input(graph, y)
            input_x = nn.Input(graph, x)

            # initialize -x
            inv = nn.Input(graph, np.array([[-1.0]]))
            inv_input_x = nn.MatrixMultiply(graph, input_x, inv)

            # calculate g(x)
            graph, m = self.execute_layer(input_x, y, graph)

            # calculate -g(-x)
            graph, inv_m = self.execute_layer(inv_input_x, y, graph)
            inv_m = nn.MatrixMultiply(graph, inv_m, inv)

            # f(x) = g(x) - g(-x)
            odd = nn.MatrixVectorAdd(graph, m, inv_m)

            loss = nn.SquareLoss(graph, odd, input_y)
            return graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
            input_x = nn.Input(graph, x)

            # initialize -x
            inv = nn.Input(graph, np.array([[-1.0]]))
            inv_input_x = nn.MatrixMultiply(graph, input_x, inv)

            # calculate g(x)
            graph, m = self.execute_layer(input_x, y, graph)

            # calculate -g(-x)
            graph, inv_m = self.execute_layer(inv_input_x, y, graph)
            inv_m = nn.MatrixMultiply(graph, inv_m, inv)

            # f(x) = g(x) - g(-x)
            odd = nn.MatrixVectorAdd(graph, m, inv_m)

            return graph.get_output(odd)
Ejemplo n.º 16
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        # At each iteration, we first calculate a loss that measures how
        # good our network is. The graph keeps track of all operations used
        graph = nn.Graph([self.W1, self.b1, self.W2, self.b2])
        input_x = nn.Input(graph, x)
        xW1 = nn.MatrixMultiply(graph, input_x, self.W1)
        xW1_plusb1 = nn.MatrixVectorAdd(graph, xW1, self.b1)
        afterReLU = nn.ReLU(graph, xW1_plusb1)
        x2W2 = nn.MatrixMultiply(graph, afterReLU, self.W2)
        x2W2_plusb2 = nn.MatrixVectorAdd(graph, x2W2, self.b2)
        # afterReLU2 = nn.ReLU(graph, x2W2_plusb2)
        # x3W3 = nn.MatrixMultiply(graph, afterReLU2, self.W3)
        # x3W3_plusb3 = nn.MatrixVectorAdd(graph, afterReLU2, self.b3)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, x2W2_plusb2, input_y)
            return graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            output = graph.get_output(x2W2_plusb2)
            maxval = np.max(output, 1)

            for row in range(np.size(output, 0)):
                max_in_row = np.max(output[row, :])
                for col in range(np.size(output, 1)):
                    if output[row, col] == max_in_row:
                        output[row, col] = 1
                    else:
                        output[row, col] = 0

            # for idx in range(size(output,0)):
            #     if output[idx] == maxval:
            #         output[idx] = 1
            #     else:
            #         output[idx] = 0

            # print "Output matrix size:", output.shape
            return output
Ejemplo n.º 17
0
    def run(self, xs, y=None):
        """
        TODO: Question 8 - [Application] Language Identification

        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use `nn.SoftmaxLoss` as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]

        "*** YOUR CODE HERE ***"
        batch_size = xs[0].shape[0]
        graph = nn.Graph([self.t, self.w, self.h0, self.v])

        def f(h, c):
            if h == None:

                ones = nn.Input(graph, np.ones([batch_size, 1]))
                in_h = nn.MatrixMultiply(
                    graph, ones,
                    self.h0)  #nn.Input(g, np.zeros([batch_size, self.d]))
            else:
                in_h = h
            input_c = nn.Input(graph, c)
            c_mul_w = nn.MatrixMultiply(graph, input_c,
                                        self.w)  # batchsize x d
            h_mul_v = nn.MatrixMultiply(graph, in_h, self.v)
            relu1 = nn.ReLU(graph, nn.Add(graph, c_mul_w, h_mul_v))
            return relu1

        if y is not None:
            "*** YOUR CODE HERE ***"
            h = None
            for i in range(len(xs)):
                h = f(h, xs[i])

            input_y = nn.Input(graph, y)
            in_h = h
            mul = nn.MatrixMultiply(graph, in_h, self.t)
            loss = nn.SoftmaxLoss(graph, mul, input_y)
            return graph
        else:
            "*** YOUR CODE HERE ***"
            h = None
            for i in range(len(xs)):
                h = f(h, xs[i])
            in_h = h  #nn.Input(graph, h)
            mul = nn.MatrixMultiply(graph, in_h, self.t)
            return graph.get_output(mul)
Ejemplo n.º 18
0
    def run(self, xs, y=None):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use `nn.SoftmaxLoss` as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]

        "*** YOUR CODE HERE ***"
        graph = nn.Graph([self.W1, self.b1, self.W2, self.b2])
        input_x = nn.Input(graph, np.array(xs))

        h, xW1, xW1_plusb1, xW1_plusb1c, afterReLU, x2W2 = [], [], [], [], [], []
        h.append(nn.Input(graph, np.zeros_like(y)))

        for i in range(1, self.d):
            c = xs[i]
            xW1.append(nn.MatrixMultiply(graph, h[i - 1], self.W1))
            xW1_plusb1.append(nn.MatrixVectorAdd(graph, xW1, self.b1))
            xW1_plusb1c.append(nn.MatrixVectorAdd(graph, xW1_plusb1, c))
            afterReLU.append(nn.ReLU(graph, xW1_plusb1c))
            x2W2.append(nn.MatrixMultiply(graph, afterReLU, self.W2))
            h.append(nn.MatrixVectorAdd(graph, x2W2, self.b2))

        if y is not None:
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, h[-1], input_y)
            return graph
        else:
            "*** YOUR CODE HERE ***"
            output = graph.get_output(h[-1])

            for row in range(np.size(output, 0)):
                max_in_row = np.max(output[row, :])
                for col in range(np.size(output, 1)):
                    if output[row, col] == max_in_row:
                        output[row, col] = 1
                    else:
                        output[row, col] = 0
            return output
Ejemplo n.º 19
0
    def run(self, x, y=None):
        """
        TODO: Question 6 - [Application] Digit Classification

        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"

        if not self.nodes:
            w1 = nn.Variable(x.shape[1], x.shape[0])
            w2 = nn.Variable(x.shape[0], x.shape[0])
            w3 = nn.Variable(x.shape[0], 10)
            b1 = nn.Variable(1, x.shape[0])
            b2 = nn.Variable(1, x.shape[0])
            b3 = nn.Variable(1, 10)
            self.nodes = nn.Graph([w1, w2, w3, b1, b2, b3])
            self.inputs = [w1, w2, w3, b1, b2, b3]
        w1 = self.inputs[0]
        w2 = self.inputs[1]
        w3 = self.inputs[2]
        b1 = self.inputs[3]
        b2 = self.inputs[4]
        b3 = self.inputs[5]
        self.nodes = nn.Graph([w1, w2, w3, b1, b2, b3])
        input_x = nn.Input(self.nodes, x)

        if y is not None:
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(self.nodes, y)

        xw1 = nn.MatrixMultiply(self.nodes, input_x, w1)
        xw1b1 = nn.MatrixVectorAdd(self.nodes, xw1, b1)
        xw1relu = nn.ReLU(self.nodes, xw1b1)

        xw2 = nn.MatrixMultiply(self.nodes, xw1relu, w2)
        xw2b2 = nn.MatrixVectorAdd(self.nodes, xw2, b2)
        xw2relu = nn.ReLU(self.nodes, xw2b2)

        xw3 = nn.MatrixMultiply(self.nodes, xw2relu, w3)
        final = nn.MatrixVectorAdd(self.nodes, xw3, b3)

        if y is not None:
            "*** YOUR CODE HERE ***"
            loss = nn.SoftmaxLoss(self.nodes, final, input_y)
            return self.nodes
        else:
            "*** YOUR CODE HERE ***"
            return self.nodes.get_output(self.nodes.get_nodes()[-1])
Ejemplo n.º 20
0
    def run(self, x, y=None):
        """
		Runs the model for a batch of examples.

		The correct labels are known during training, but not at test time.
		When correct labels are available, `y` is a (batch_size x 10) numpy
		array. Each row in the array is a one-hot vector encoding the correct
		class.

		Your model should predict a (batch_size x 10) numpy array of scores,
		where higher scores correspond to greater probability of the image
		belonging to a particular class. You should use `nn.SoftmaxLoss` as your
		training loss.

		Inputs:
			x: a (batch_size x 784) numpy array
			y: a (batch_size x 10) numpy array, or None
		Output:
			(if y is not None) A nn.Graph instance, where the last added node is
				the loss
			(if y is None) A (batch_size x 10) numpy array of scores (aka logits)
		"""
        "*** YOUR CODE HERE ***"

        if y is not None:
            "*** YOUR CODE HERE ***"
            if not self.w1:
                h = 200
                self.w1 = nn.Variable(np.shape(x)[1], h)
                self.w2 = nn.Variable(h, 10)
                self.b1 = nn.Variable(h)
                self.b2 = nn.Variable(10)

            graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
            input_x = nn.Input(graph, x)
            input_y = nn.Input(graph, y)

            xw1 = nn.MatrixMultiply(graph, input_x, self.w1)
            xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1)
            relu = nn.ReLU(graph, xw1_plus_b1)
            reluw2 = nn.MatrixMultiply(graph, relu, self.w2)
            reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2)

            loss = nn.SoftmaxLoss(graph, reluw2_plus_b2, input_y)

            self.learning_rate = max(self.learning_rate * 0.999, 0.001)

            return graph
        else:
            "*** YOUR CODE HERE ***"
            if not self.w1:
                h = 200
                self.w1 = nn.Variable(np.shape(x)[1], h)
                self.w2 = nn.Variable(h, 10)
                self.b1 = nn.Variable(h)
                self.b2 = nn.Variable(10)

            graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
            input_x = nn.Input(graph, x)

            xw1 = nn.MatrixMultiply(graph, input_x, self.w1)
            xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1)
            relu = nn.ReLU(graph, xw1_plus_b1)
            reluw2 = nn.MatrixMultiply(graph, relu, self.w2)
            reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2)

            return graph.get_output(reluw2_plus_b2)
Ejemplo n.º 21
0
    def run(self, xs, y=None):
        """
        TODO: Question 8 - [Application] Language Identification

        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use `nn.SoftmaxLoss` as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]

        "*** YOUR CODE HERE ***"
        if not self.nodes:
            w1 = nn.Variable(self.num_chars, self.num_chars)
            w2 = nn.Variable(self.num_chars, self.num_chars)
            w3 = nn.Variable(self.num_chars, self.num_chars)
            w4 = nn.Variable(self.num_chars, 5)
            b1 = nn.Variable(1, self.num_chars)
            b2 = nn.Variable(1, self.num_chars)
            b3 = nn.Variable(1, self.num_chars)
            b4 = nn.Variable(1, 5)
            h = nn.Variable(1, self.num_chars)
            bonusw = nn.Variable(self.num_chars, self.num_chars)
            bonusb = nn.Variable(1, self.num_chars)
            self.nodes = nn.Graph(
                [w1, w2, w3, w4, b1, b2, b3, b4, h, bonusw, bonusb])
            self.inputs = [w1, w2, w3, w4, b1, b2, b3, b4, h, bonusw, bonusb]
        w1 = self.inputs[0]
        w2 = self.inputs[1]
        w3 = self.inputs[2]
        w4 = self.inputs[3]
        b1 = self.inputs[4]
        b2 = self.inputs[5]
        b3 = self.inputs[6]
        b4 = self.inputs[7]
        h = self.inputs[8]
        bonusw = self.inputs[9]
        bonusb = self.inputs[10]
        self.nodes = nn.Graph([w1, w2, w3, w4, b1, b2, b3, b4, h])
        zeros = nn.Input(self.nodes, np.zeros((batch_size, self.num_chars)))
        h = nn.MatrixVectorAdd(self.nodes, zeros, h)

        word = []
        for s in xs:
            ch = nn.Input(self.nodes, s)
            h_sum = nn.MatrixVectorAdd(self.nodes, h, ch)
            hw1 = nn.MatrixMultiply(self.nodes, h_sum, w1)
            hw1b1 = nn.MatrixVectorAdd(self.nodes, hw1, b1)
            h = nn.ReLU(self.nodes, hw1b1)

            #hw2b = nn.MatrixMultiply(self.nodes, hw1relu, bonusw)
            #h2 = nn.MatrixVectorAdd(self.nodes, hw2b, bonusb)
            #h = nn.ReLU(self.nodes, hw2b2b)
            word.append(ch)

        hw2 = nn.MatrixMultiply(self.nodes, h, w2)
        hw2b2 = nn.MatrixVectorAdd(self.nodes, hw2, b2)
        hw2relu = nn.ReLU(self.nodes, hw2b2)

        hw3 = nn.MatrixMultiply(self.nodes, hw2relu, w3)
        hw3b3 = nn.MatrixVectorAdd(self.nodes, hw3, b3)
        hw3relu = nn.ReLU(self.nodes, hw3b3)

        hw4 = nn.MatrixMultiply(self.nodes, hw3relu, w4)
        final = nn.MatrixVectorAdd(self.nodes, hw4, b4)

        if y is not None:
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(self.nodes, y)
            finalloss = nn.SoftmaxLoss(self.nodes, final, input_y)
            return self.nodes
        else:
            "*** YOUR CODE HERE ***"
            return self.nodes.get_output(self.nodes.get_nodes()[-1])
Ejemplo n.º 22
0
    def run(self, states, Q_target=None):
        """
		Runs the DQN for a batch of states.

		The DQN takes the state and computes Q-values for all possible actions
		that can be taken. That is, if there are two actions, the network takes
		as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)]

		When Q_target == None, return the matrix of Q-values currently computed
		by the network for the input states.

		When Q_target is passed, it will contain the Q-values which the network
		should be producing for the current states. You must return a nn.Graph
		which computes the training loss between your current Q-value
		predictions and these target values, using nn.SquareLoss.

		Inputs:
			states: a (batch_size x 4) numpy array
			Q_target: a (batch_size x 2) numpy array, or None
		Output:
			(if Q_target is not None) A nn.Graph instance, where the last added
				node is the loss
			(if Q_target is None) A (batch_size x 2) numpy array of Q-value
				scores, for the two actions
		"""
        "*** YOUR CODE HERE ***"

        if Q_target is not None:
            "*** YOUR CODE HERE ***"
            if not self.w1:
                h = 100
                self.w1 = nn.Variable(self.state_size, h)
                self.w2 = nn.Variable(h, self.num_actions)
                self.b1 = nn.Variable(h)
                self.b2 = nn.Variable(self.num_actions)

            graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
            input_x = nn.Input(graph, states)
            input_y = nn.Input(graph, Q_target)

            xw1 = nn.MatrixMultiply(graph, input_x, self.w1)
            xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1)
            relu = nn.ReLU(graph, xw1_plus_b1)
            reluw2 = nn.MatrixMultiply(graph, relu, self.w2)
            reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2)

            loss = nn.SquareLoss(graph, reluw2_plus_b2, input_y)

            return graph
        else:
            "*** YOUR CODE HERE ***"
            if not self.w1:
                h = 100
                self.w1 = nn.Variable(self.state_size, h)
                self.w2 = nn.Variable(h, self.num_actions)
                self.b1 = nn.Variable(h)
                self.b2 = nn.Variable(self.num_actions)

            graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
            input_x = nn.Input(graph, states)

            xw1 = nn.MatrixMultiply(graph, input_x, self.w1)
            xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1)
            relu = nn.ReLU(graph, xw1_plus_b1)
            reluw2 = nn.MatrixMultiply(graph, relu, self.w2)
            reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2)

            return graph.get_output(reluw2_plus_b2)
Ejemplo n.º 23
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct outputs y are known during training, but not at test time.
        If correct outputs y are provided, this method must construct and
        return a nn.Graph for computing the training loss. If y is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "* YOUR CODE HERE *"
        n = 4
        if not self.graph:
                w1 = nn.Variable(1, 50)          
                w2 = nn.Variable(50, 50)
                w3 = nn.Variable(50, 1)
                b1 = nn.Variable(1, 50)
                b2 = nn.Variable(1, 50)
                b3 = nn.Variable(1, 1)
                self.l = [w1,w2,w3,b1,b2,b3]
                self.graph = nn.Graph(self.l)
        self.graph = nn.Graph(self.l)
        input_x = nn.Input(self.graph,x)
        if y is not None: #<--- THIS LITTLE CONDITIONAL SO IMPORTANT HFS
            input_y = nn.Input(self.graph,y)
        input_neg = nn.Input(self.graph, np.matrix([-1.])) #Tx1
        mult = nn.MatrixMultiply(self.graph, input_x, self.l[0]) #Tx50
        add = nn.MatrixVectorAdd(self.graph, mult, self.l[3]) 
        relu = nn.ReLU(self.graph, add)
        mult2 = nn.MatrixMultiply(self.graph, relu, self.l[1]) #Tx50
        add2 = nn.MatrixVectorAdd(self.graph, mult2, self.l[4]) #Tx50
        relu2 = nn.ReLU(self.graph, add2)
        mult3 = nn.MatrixMultiply(self.graph, relu2, self.l[2]) #Tx1
        add3 = nn.MatrixVectorAdd(self.graph, mult3, self.l[5])
        ad = add3

        neg = nn.MatrixMultiply(self.graph, input_x, input_neg)
        mult = nn.MatrixMultiply(self.graph, neg, self.l[0])
        add = nn.MatrixVectorAdd(self.graph, mult, self.l[3])
        relu = nn.ReLU(self.graph, add)
        mult2 = nn.MatrixMultiply(self.graph, relu, self.l[1])
        add2 = nn.MatrixVectorAdd(self.graph, mult2, self.l[4])
        relu2 = nn.ReLU(self.graph, add2)
        mult3 = nn.MatrixMultiply(self.graph, relu2, self.l[2])
        add3 = nn.MatrixVectorAdd(self.graph, mult3, self.l[5])
        sb = nn.MatrixMultiply(self.graph, add3, input_neg) #-g(-x)
        sub = nn.MatrixVectorAdd(self.graph, ad, sb) #g(x) - g(-x)
            
        if y is not None:
            # At training time, the correct output y is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            loss = nn.SquareLoss(self.graph, sub, input_y)
            return self.graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            #print(self.graph.get_output(self.graph.get_nodes()[-1]))
            return self.graph.get_output(self.graph.get_nodes()[-1])
Ejemplo n.º 24
0
    def run(self, xs, y=None):
        """
		Runs the model for a batch of examples.

		Although words have different lengths, our data processing guarantees
		that within a single batch, all words will be of the same length (L).

		Here `xs` will be a list of length L. Each element of `xs` will be a
		(batch_size x self.num_chars) numpy array, where every row in the array
		is a one-hot vector encoding of a character. For example, if we have a
		batch of 8 three-letter words where the last word is "cat", we will have
		xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
		is the inital (0th) letter of our combined alphabet for this task.

		The correct labels are known during training, but not at test time.
		When correct labels are available, `y` is a (batch_size x 5) numpy
		array. Each row in the array is a one-hot vector encoding the correct
		class.

		Your model should use a Recurrent Neural Network to summarize the list
		`xs` into a single node that represents a (batch_size x hidden_size)
		array, for your choice of hidden_size. It should then calculate a
		(batch_size x 5) numpy array of scores, where higher scores correspond
		to greater probability of the word originating from a particular
		language. You should use `nn.SoftmaxLoss` as your training loss.

		Inputs:
			xs: a list with L elements (one per character), where each element
				is a (batch_size x self.num_chars) numpy array
			y: a (batch_size x 5) numpy array, or None
		Output:
			(if y is not None) A nn.Graph instance, where the last added node is
				the loss
			(if y is None) A (batch_size x 5) numpy array of scores (aka logits)

		Hint: you may use the batch_size variable in your code
		"""
        batch_size = xs[0].shape[0]

        "*** YOUR CODE HERE ***"

        if y is not None:
            "*** YOUR CODE HERE ***"
            if not self.w1:
                self.setup()

            # h0, graph = self.run_helper(xs, batch_size)

            graph = nn.Graph([
                self.p1, self.p2, self.q1, self.q2, self.r1, self.s1, self.w1,
                self.w2, self.b1, self.b2
            ])
            # graph = nn.Graph([self.p1, self.p2, self.q1, self.q2, self.r1, self.s1, self.w1, self.w2, self.w3, self.b1, self.b2, self.b3])
            h0 = np.zeros([batch_size, 47])
            input_h = nn.Input(graph, h0)

            for i in range(len(xs)):
                # graph = nn.Graph([self.p1, self.p2, self.q1, self.q2, self.w1, self.w2, self.b1, self.b2])
                # input_h = nn.Input(graph, h0)
                input_c = nn.Input(graph, xs[i])

                cr1 = nn.MatrixMultiply(graph, input_c, self.r1)
                cr1_plus_s1 = nn.MatrixVectorAdd(graph, cr1, self.s1)
                h_plus_cr1 = nn.Add(graph, input_h, cr1_plus_s1)

                hp1 = nn.MatrixMultiply(graph, h_plus_cr1, self.p1)
                hp1_plus_q1 = nn.MatrixVectorAdd(graph, hp1, self.q1)
                relu = nn.ReLU(graph, hp1_plus_q1)

                relup2 = nn.MatrixMultiply(graph, relu, self.p2)
                input_h = nn.MatrixVectorAdd(graph, relup2, self.q2)

                # h0 += graph.get_output(relup2_plus_q2)
                # h0 += xs[i] * (i + 1)

            # graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
            # input_h = nn.Input(graph, h0)
            input_y = nn.Input(graph, y)

            hw1 = nn.MatrixMultiply(graph, input_h, self.w1)
            hw1_plus_b1 = nn.MatrixVectorAdd(graph, hw1, self.b1)
            relu = nn.ReLU(graph, hw1_plus_b1)

            reluw2 = nn.MatrixMultiply(graph, relu, self.w2)
            reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2)
            # relulu = nn.ReLU(graph, reluw2_plus_b2)

            # reluluw3 = nn.MatrixMultiply(graph, relulu, self.w3)
            # reluluw3_plus_b3 = nn.MatrixVectorAdd(graph, reluluw3, self.b3)
            # relululu = nn.ReLU(graph, reluluw3_plus_b3)

            # relululuw4 = nn.MatrixMultiply(graph, relululu, self.w4)
            # relululuw4_plus_b4 = nn.MatrixVectorAdd(graph, relululuw4, self.b4)

            loss = nn.SoftmaxLoss(graph, reluw2_plus_b2, input_y)

            # self.learning_rate = max(self.learning_rate * 0.9999, 0.000001)

            return graph
        else:
            "*** YOUR CODE HERE ***"
            if not self.w1:
                self.setup()

            # h0 = self.run_helper(xs, batch_size)

            graph = nn.Graph([
                self.p1, self.p2, self.q1, self.q2, self.r1, self.s1, self.w1,
                self.w2, self.b1, self.b2
            ])
            # graph = nn.Graph([self.p1, self.p2, self.q1, self.q2, self.r1, self.s1, self.w1, self.w2, self.w3, self.b1, self.b2, self.b3])
            h0 = np.zeros([batch_size, 47])
            input_h = nn.Input(graph, h0)

            for i in range(len(xs)):
                # graph = nn.Graph([self.p1, self.p2, self.q1, self.q2, self.w1, self.w2, self.b1, self.b2])
                # input_h = nn.Input(graph, h0)
                input_c = nn.Input(graph, xs[i])

                cr1 = nn.MatrixMultiply(graph, input_c, self.r1)
                cr1_plus_s1 = nn.MatrixVectorAdd(graph, cr1, self.s1)
                h_plus_cr1 = nn.Add(graph, input_h, cr1_plus_s1)

                hp1 = nn.MatrixMultiply(graph, h_plus_cr1, self.p1)
                hp1_plus_q1 = nn.MatrixVectorAdd(graph, hp1, self.q1)
                relu = nn.ReLU(graph, hp1_plus_q1)

                relup2 = nn.MatrixMultiply(graph, relu, self.p2)
                input_h = nn.MatrixVectorAdd(graph, relup2, self.q2)

                # h0 += graph.get_output(relup2_plus_q2)
                # h0 += xs[i] * (i + 1)

            # graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
            # input_h = nn.Input(graph, h0)

            hw1 = nn.MatrixMultiply(graph, input_h, self.w1)
            hw1_plus_b1 = nn.MatrixVectorAdd(graph, hw1, self.b1)
            relu = nn.ReLU(graph, hw1_plus_b1)

            reluw2 = nn.MatrixMultiply(graph, relu, self.w2)
            reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2)
            # relulu = nn.ReLU(graph, reluw2_plus_b2)

            # reluluw3 = nn.MatrixMultiply(graph, relulu, self.w3)
            # reluluw3_plus_b3 = nn.MatrixVectorAdd(graph, reluluw3, self.b3)
            # relululu = nn.ReLU(graph, reluluw3_plus_b3)

            # relululuw4 = nn.MatrixMultiply(graph, relululu, self.w4)
            # relululuw4_plus_b4 = nn.MatrixVectorAdd(graph, relululuw4, self.b4)

            return graph.get_output(reluw2_plus_b2)
Ejemplo n.º 25
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct outputs y are known during training, but not at test time.
        If correct outputs y are provided, this method must construct and
        return a nn.Graph for computing the training loss. If y is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "* YOUR CODE HERE *"
        if y is not None:
            # At training time, the correct output y is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            n = 5
            if not self.graph:
                self.l = []
                for i in range(0, n):
                    self.l.append(nn.Variable(len(x), len(x)))
                for i in range(0, n):
                    self.l.append(nn.Variable(len(x), 1))
                self.graph = nn.Graph(self.l)
                input_x = nn.Input(self.graph,x)
                input_y = nn.Input(self.graph,y)
                mult = nn.MatrixMultiply(self.graph, self.l[0], input_x)
                add = nn.MatrixVectorAdd(self.graph, mult, self.l[n])
                for i in range(0, n):
                    relu = nn.ReLU(self.graph, add)
                    mult = nn.MatrixMultiply(self.graph, self.l[i], relu)
                    add = nn.MatrixVectorAdd(self.graph, self.l[n + i], mult)
                loss = nn.SquareLoss(self.graph, add, input_y)
                return self.graph
            else:
                self.graph = nn.Graph(self.l)
                input_x = nn.Input(self.graph,x)
                input_y = nn.Input(self.graph,y)
                mult = nn.MatrixMultiply(self.graph, self.l[0], input_x)
                add = nn.MatrixVectorAdd(self.graph, mult, self.l[n])
                for i in range(0, n):
                    relu = nn.ReLU(self.graph, add)
                    mult = nn.MatrixMultiply(self.graph, self.l[i], relu)
                    add = nn.MatrixVectorAdd(self.graph, self.l[n + i], mult)
                loss = nn.SquareLoss(self.graph, add, input_y)
                return self.graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array

#             top_vec = self.graph.get_output(self.graph.get_nodes()[-4])
#             bot_vec = self.graph.get_output(self.graph.get_nodes()[-5])
#             # print(top_vec,bot_vec)
#             return np.concatenate((top_vec, bot_vec), axis=0)

            # top_add = self.graph.get_output(self.graph.get_nodes()[-4])
            # bot_add = self.graph.get_output(self.graph.get_nodes()[-5])
            # return (top_add + bot_add) * (0.5)

            return self.graph.get_output(self.graph.get_nodes()[-2])
Ejemplo n.º 26
0
    def run(self, x, y=None):
        """
		Runs the model for a batch of examples.

		The correct outputs `y` are known during training, but not at test time.
		If correct outputs `y` are provided, this method must construct and
		return a nn.Graph for computing the training loss. If `y` is None, this
		method must instead return predicted y-values.

		Inputs:
			x: a (batch_size x 1) numpy array
			y: a (batch_size x 1) numpy array, or None
		Output:
			(if y is not None) A nn.Graph instance, where the last added node is
				the loss
			(if y is None) A (batch_size x 1) numpy array of predicted y-values

		Note: DO NOT call backprop() or step() inside this method!
		"""
        "*** YOUR CODE HERE ***"

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            if not self.w1:
                h = 50
                self.w1 = nn.Variable(np.shape(x)[0], h)
                self.w2 = nn.Variable(h, np.shape(x)[0])
                self.b1 = nn.Variable(h)
                self.b2 = nn.Variable(np.shape(x)[0])

            graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
            input_x = nn.Input(graph, x.T)
            input_y = nn.Input(graph, y.T)

            xw1 = nn.MatrixMultiply(graph, input_x, self.w1)
            xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1)
            relu = nn.ReLU(graph, xw1_plus_b1)
            reluw2 = nn.MatrixMultiply(graph, relu, self.w2)
            reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2)

            loss = nn.SquareLoss(graph, reluw2_plus_b2, input_y)

            return graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            if not self.w1:
                h = 50
                self.w1 = nn.Variable(np.shape(x)[0], h)
                self.w2 = nn.Variable(h, np.shape(x)[0])
                self.b1 = nn.Variable(h)
                self.b2 = nn.Variable(np.shape(x)[0])

            graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
            input_x = nn.Input(graph, x.T)

            xw1 = nn.MatrixMultiply(graph, input_x, self.w1)
            xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1)
            relu = nn.ReLU(graph, xw1_plus_b1)
            reluw2 = nn.MatrixMultiply(graph, relu, self.w2)
            reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2)

            return graph.get_output(reluw2_plus_b2).T
Ejemplo n.º 27
0
    def run(self, xs, y=None):
        """
        TODO: Question 8 - [Application] Language Identification

        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use `nn.SoftmaxLoss` as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]
        # "*** YOUR CODE HERE ***"
        word_length = np.shape(xs)[0]
        graph = nn.Graph([self.Whh, self.Wch, self.bh, self.W1, self.b1])
        ht_1 = nn.Input(graph, np.zeros((batch_size, self.hidden_size)))
        # RNN
        for i in range(word_length):
            input_x = nn.Input(graph, xs[i])
            wct = nn.MatrixMultiply(graph, input_x, self.Wch)
            wht_1 = nn.MatrixMultiply(graph, ht_1, self.Whh)
            comb = nn.Add(graph, wct, wht_1)
            add_bias = nn.MatrixVectorAdd(graph, comb, self.bh)
            ht = nn.ReLU(graph, add_bias)
            ht_1 = ht
        # classification
        comb_features = nn.MatrixMultiply(graph, ht, self.W1)
        outputs = nn.MatrixVectorAdd(graph, comb_features, self.b1)

        if y is not None:
            # "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, outputs, input_y)
            return graph
        else:
            # "*** YOUR CODE HERE ***"
            return graph.get_output(outputs)
Ejemplo n.º 28
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.

            if not self.graph:
                for i in range(0, self.depth):
                    #make weight matrix with every layer X by X size
                    self.weights.append(nn.Variable(len(x), len(x)))

                    #make bias matrix with each layer being a vector of X by 1 size
                    self.bias.append(nn.Variable(len(x), 1))

            #create graph with initialized weights and bias variables
            self.graph = nn.Graph(self.weights +
                                  self.bias)  #weight + bias is variable vector

            #create input nodes:
            input_x = nn.Input(self.graph, x)
            odd_input_x = nn.Input(self.graph, -x)

            input_y = nn.Input(self.graph, y)

            #create first layer:
            xm = nn.MatrixMultiply(self.graph, self.weights[0], input_x)
            xm_plus_b = nn.MatrixVectorAdd(self.graph, xm, self.bias[0])

            odd_xm = nn.MatrixMultiply(self.graph, self.weights[0],
                                       odd_input_x)
            odd_xm_plus_b = nn.MatrixVectorAdd(self.graph, odd_xm,
                                               self.bias[0])

            #create the remaining layers:
            for i in range(1, self.depth):
                #add nonlinearity for previous layer:
                relu = nn.ReLU(self.graph, xm_plus_b)
                odd_relu = nn.MatrixMultiply(
                    self.graph, nn.ReLU(self.graph, odd_xm_plus_b), -1)
                odd_func = nn.MatrixVectorAdd(self.graph, relu, odd_relu)

                #create new hidden layer:
                xm = nn.MatrixMultiply(self.graph, self.weights[i], odd_relu)
                xm_plus_b = nn.MatrixVectorAdd(self.graph, xm, self.bias[i])

            #create loss node:
            loss = nn.SquareLoss(self.graph, xm_plus_b, input_y)

            return self.graph

        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            if not self.graph:
                for i in range(0, self.depth):
                    #make weight matrix with every layer X by X size
                    self.weights.append(nn.Variable(len(x), len(x)))

                    #make bias matrix with each layer being a vector of X by 1 size
                    self.bias.append(nn.Variable(len(x), 1))

            #create graph with initialized weights and bias variables
                self.graph = nn.Graph(
                    self.weights +
                    self.bias)  #weight + bias is variable vector

                #create input nodes:
                input_x = nn.Input(self.graph, x)
                odd_input_x = nn.Input(self.graph, -x)

                input_y = nn.Input(self.graph, y)

                #create first layer:
                xm = nn.MatrixMultiply(self.graph, self.weights[0], input_x)
                xm_plus_b = nn.MatrixVectorAdd(self.graph, xm, self.bias[0])

                odd_xm = nn.MatrixMultiply(self.graph, self.weights[0],
                                           odd_input_x)
                odd_xm_plus_b = nn.MatrixVectorAdd(self.graph, odd_xm,
                                                   self.bias[0])

                #create the remaining layers:
                for i in range(1, self.depth):
                    #add nonlinearity for previous layer:
                    relu = nn.ReLU(self.graph, xm_plus_b)
                    odd_relu = nn.MatrixMultiply(
                        self.graph, nn.ReLU(self.graph, odd_xm_plus_b), -1)
                    odd_func = nn.MatrixVectorAdd(self.graph, relu, odd_relu)

                    #create new hidden layer:
                    xm = nn.MatrixMultiply(self.graph, self.weights[i],
                                           odd_relu)
                    xm_plus_b = nn.MatrixVectorAdd(self.graph, xm,
                                                   self.bias[i])

                #create loss node:
                loss = nn.SquareLoss(self.graph, xm_plus_b, input_y)

            return self.graph.get_output(
                self.graph.get_nodes()
                [-2])  #returns the prediction and the loss
    def run(self, x, y=None):
        """
        TODO: Question 5 - [Application] OddRegression

        Runs the model for a batch of examples.

        The correct outputs `y` are known during training, but not at test time.
        If correct outputs `y` are provided, this method must construct and
        return a nn.Graph for computing the training loss. If `y` is None, this
        method must instead return predicted y-values.

        Inputs:
            x: a (batch_size x 1) numpy array
            y: a (batch_size x 1) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 1) numpy array of predicted y-values

        Note: DO NOT call backprop() or step() inside this method!
        """
        "*** YOUR CODE HERE ***"

        #------------------------------the f(x)-----------------------#
        #to implement f(x) = relu(x.w1+b1).w2 + b2
        graph = nn.Graph([self.w1, self.b1, self.w2, self.b2])
        input_x = nn.Input(graph, x)
        #input_y = Input(graph, y)
        #a = x.w1
        a = nn.MatrixMultiply(graph, input_x, self.w1)
        #relu(a+b1).w2 + b2
        #b = a + b1
        b = nn.MatrixVectorAdd(graph, a, self.b1)
        #relu(b).w2 + b2
        two_layer_relu = nn.ReLU(graph, b)
        #c = relu(b).w2
        c = nn.MatrixMultiply(graph, two_layer_relu, self.w2)
        #d = c + b2
        d = nn.MatrixVectorAdd(graph, c, self.b2)
        #loss = SquareLoss(graph, xm_plus_b, input_y)

        #------------------------------the -f(-x)-----------------------#
        #f(-x) = [relu(x.w1+b1).w2 + b2]
        #to implement -f(-x) = -[relu(x.w1+b1).w2 + b2]
        # graph = nn.Graph([self.w1, self.b1, self.w2, self.b2])
        neg_input_x = nn.Input(graph, x * -1)
        #input_y = Input(graph, y)
        #a = -x.w1
        neg_a = nn.MatrixMultiply(graph, neg_input_x, self.w1)
        #relu(a+b1).w2 + b2
        #b = a + b1
        neg_b = nn.MatrixVectorAdd(graph, neg_a, self.b1)
        #relu(b).w2 + b2
        neg_two_layer_relu = nn.ReLU(graph, neg_b)
        #c = relu(b).w2
        neg_c = nn.MatrixMultiply(graph, neg_two_layer_relu, self.w2)
        #d = c + b2
        neg_d = nn.MatrixVectorAdd(graph, neg_c, self.b2)
        real_neg_d = nn.Input(graph, graph.get_output(neg_d) * -1)
        #loss = SquareLoss(graph, xm_plus_b, input_y)

        #---------------------hint2------addition--------------------------#
        d_plus_real_neg_d = nn.Add(graph, real_neg_d, d)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SquareLoss(graph, d_plus_real_neg_d, input_y)
            return graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            return graph.get_output(d_plus_real_neg_d)
Ejemplo n.º 30
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        i = x.shape[1]
        # j = x.shape[0]
        #to test and modify
        h = 200

        if not self.w1:
            self.w1 = nn.Variable(i, h)
        if not self.w2:
            self.w2 = nn.Variable(h, 10)
        if not self.b1:
            self.b1 = nn.Variable(h)
        if not self.b2:
            self.b2 = nn.Variable(10)

        graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])

        input_nodeX = nn.Input(graph, x)
        multiply1 = nn.MatrixMultiply(graph, input_nodeX, self.w1)
        add1 = nn.MatrixVectorAdd(graph, multiply1, self.b1)
        relu = nn.ReLU(graph, add1)
        multiply2 = nn.MatrixMultiply(graph, relu, self.w2)
        add2 = nn.MatrixVectorAdd(graph, multiply2, self.b2)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            input_nodeY = nn.Input(graph, y)
            loss_node = nn.SoftmaxLoss(graph, add2, input_nodeY)
            graph.add(loss_node)

            return graph

            "*** YOUR CODE HERE ***"
        else:
            # print graph.get_output(add2).shape
            return graph.get_output(add2)
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"