def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_lang_id # Our dataset contains words from five different languages, and the # combined alphabets of the five languages contain a total of 47 unique # characters. # You can refer to self.num_chars or len(self.languages) in your code self.num_chars = 47 self.languages = ["English", "Spanish", "Finnish", "Dutch", "Polish"] # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = .007 self.w1 = [] self.w2 = [] self.w3 = [] self.b1 = [] self.output = [] self.hidden_size = 0 c = self.num_chars #size of the input vector # i = x.shape[1] #to test and modify d = 160 if not self.w1: self.w1 = nn.Variable(d, c) if not self.w2: self.w2 = nn.Variable(c, c) if not self.w3: self.w3 = nn.Variable(c, d) if not self.b1: self.b1 = nn.Variable(d) if not self.output: self.output = nn.Variable(d, 5) graph = nn.Graph([self.w1, self.w2, self.w3, self.b1, self.output]) h0 = np.zeros((batch_size, d), dtype=np.float) input_nodeH = nn.Input(graph, h0) # print x.shape #array of zeros # multiply1 = nn.MatrixMultiply(graph, input_nodeX, self.w1) # add1 = nn.MatrixVectorAdd(graph, multiply1, self.b1) # relu = nn.ReLU(graph, add1) # multiply2 = nn.MatrixMultiply(graph, relu, self.w2) # add2 = nn.MatrixVectorAdd(graph, multiply2, self.b2) i = 0 while i < len(xs): input_nodeC = nn.Input(graph, xs[i]) multiply1 = nn.MatrixMultiply(graph, input_nodeH, self.w1) multiply2 = nn.MatrixMultiply(graph, input_nodeC, self.w2) combine = nn.MatrixVectorAdd(graph, multiply1, multiply2) multiply3 = nn.MatrixMultiply(graph, combine, self.w3) add1 = nn.MatrixVectorAdd(graph, multiply3, self.b1) relu = nn.ReLU(graph, add1) input_nodeH = relu i = i + 1 final = nn.MatrixMultiply(graph, relu, self.output) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. input_nodeY = nn.Input(graph, y) loss_node = nn.SoftmaxLoss(graph, final, input_nodeY) graph.add(loss_node) return graph "*** YOUR CODE HERE ***" else: # print graph.get_output(add2).shape return graph.get_output(final) # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***"
def run(self, x, y=None): """ TODO: Question 5 - [Application] OddRegression Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ # "*** YOUR CODE HERE ***" graph = nn.Graph( [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3]) # pos input pinput_x = nn.Input(graph, x) # layer 1 pxm = nn.MatrixMultiply(graph, pinput_x, self.W1) pxm_plus_b = nn.MatrixVectorAdd(graph, pxm, self.b1) pa1 = nn.ReLU(graph, pxm_plus_b) # layer 2 pa1m = nn.MatrixMultiply(graph, pa1, self.W2) pa1m_plus_b = nn.MatrixVectorAdd(graph, pa1m, self.b2) pa2 = nn.ReLU(graph, pa1m_plus_b) # layer 3 pa2m = nn.MatrixMultiply(graph, pa2, self.W3) pa2m_plus_b = nn.MatrixVectorAdd(graph, pa2m, self.b3) # neg input ninput_x = nn.Input(graph, -x) # layer 1 nxm = nn.MatrixMultiply(graph, ninput_x, self.W1) nxm_plus_b = nn.MatrixVectorAdd(graph, nxm, self.b1) na1 = nn.ReLU(graph, nxm_plus_b) # layer 2 na1m = nn.MatrixMultiply(graph, na1, self.W2) na1m_plus_b = nn.MatrixVectorAdd(graph, na1m, self.b2) na2 = nn.ReLU(graph, na1m_plus_b) # layer 3 na2m = nn.MatrixMultiply(graph, na2, self.W3) na2m_plus_b = nn.MatrixVectorAdd(graph, na2m, self.b3) # output neg_op = nn.Input(graph, -np.ones((1, 1))) neg_na2m_plus_b = nn.MatrixMultiply(graph, na2m_plus_b, neg_op) # a helper function output = nn.Add(graph, pa2m_plus_b, neg_na2m_plus_b) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. # "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SquareLoss(graph, output, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array # "*** YOUR CODE HERE ***" return graph.get_output(output)
def run(self, xs, y=None): """ Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] "*** YOUR CODE HERE ***" graph = nn.Graph([ self.m0, self.b0, self.m1, self.b1, self.m2, self.b2, self.m3, self.b3 ]) # d sized vector h0 batch = np.zeros((batch_size, self.hidden_layer)) H0 = nn.Input(graph, batch) H = nn.MatrixVectorAdd(graph, H0, self.b0) for x in xs: input_x = nn.Input(graph, x) xm0 = nn.MatrixMultiply(graph, input_x, self.m0) xm0_puls_h = nn.MatrixVectorAdd(graph, H, xm0) xm1 = nn.MatrixMultiply(graph, xm0_puls_h, self.m1) xm_plus_b1 = nn.MatrixVectorAdd(graph, xm1, self.b1) rel = nn.ReLU(graph, xm_plus_b1) xm2 = nn.MatrixMultiply(graph, rel, self.m2) H = nn.MatrixVectorAdd(graph, xm2, self.b2) last_xm = nn.MatrixMultiply(graph, H, self.m3) lastone = nn.MatrixVectorAdd(graph, last_xm, self.b3) if y is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) nn.SoftmaxLoss(graph, lastone, input_y) return graph else: "*** YOUR CODE HERE ***" return graph.get_output(lastone)
def run(self, xs, y=None): """ Runs the model for a batch of examples. batxh xs[0].shape[0 c's individual elements inside array] result = batchxd multiply dx5 add c Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here xs will be a list of length L. Each element of xs will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, y is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list xs into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use nn.SoftmaxLoss as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] "* YOUR CODE HERE *" graph = nn.Graph([ self.b1, self.b2, self.b3, self.w1, self.w2, self.w3, self.add0, self.c ]) add1 = nn.Input(graph, np.zeros((batch_size, 200))) add2 = nn.MatrixVectorAdd(graph, add1, self.add0) for var in xs: input_xs = nn.Input(graph, var) c = nn.MatrixMultiply(graph, input_xs, self.c) h_update = nn.MatrixVectorAdd(graph, add2, c) mul1 = nn.MatrixMultiply(graph, h_update, self.w1) addmul1 = nn.MatrixVectorAdd(graph, mul1, self.b1) hidden_output = nn.ReLU(graph, addmul1) mul2 = nn.MatrixMultiply(graph, hidden_output, self.w2) add2 = nn.MatrixVectorAdd(graph, mul2, self.b2) xmul = nn.MatrixMultiply(graph, add2, self.w3) addmul2 = nn.MatrixVectorAdd(graph, xmul, self.b3) if y is not None: inputY = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, addmul2, inputY) return graph else: return graph.get_output(addmul2)
def run(self, xs, y=None): """ Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here xs will be a list of length L. Each element of xs will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, y is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list xs into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use nn.SoftmaxLoss as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] if not self.graph: dim = 128 w1 = nn.Variable(47, 47) # w2 = nn.Variable(47, 47) w3 = nn.Variable(50, 2) b1 = nn.Variable(1, 47) # b2 = nn.Variable(1, 47) # b3 = nn.Variable(1, 2) h0 = nn.Variable(1, 47) w3 = nn.Variable(47, 47) w4 = nn.Variable(47, dim) # w6 = nn.Variable(dim, 5) # b3 = nn.Variable(1, 47) b4 = nn.Variable(1, dim) # b6 = nn.Variable(1, 5) # w5 = nn.Variable(47, 47) b5 = nn.Variable(1, 47) self.l = [w1,w2,b1,b2,h0,w3,w4,b3,b4, w5, b5,w6,b6] # # # # # # # self.graph = nn.Graph(self.l) "* YOUR CODE HERE *" char_inputs = [] h = self.l[4] zero = np.zeros((batch_size, 47)) zeroInput = nn.Input(self.graph,zero) z = nn.MatrixVectorAdd(self.graph, zeroInput, h) h = z for i in range(len(xs)): char_inputs.append(nn.Input(self.graph, xs[i])) incorporate = nn.MatrixVectorAdd(self.graph, h, char_inputs[i]) #Tx47 x mult = nn.MatrixMultiply(self.graph, incorporate, self.l[0]) #Tx47 add = nn.MatrixVectorAdd(self.graph, mult, self.l[2]) relu = nn.ReLU(self.graph, add) # mult2 = nn.MatrixMultiply(self.graph, relu, self.l[1]) #Tx47 # add2 = nn.MatrixVectorAdd(self.graph, mult2, self.l[3]) #Tx47 # relu2 = nn.ReLU(self.graph, add2) # mult3 = nn.MatrixMultiply(self.graph, relu2, self.l[9]) #Tx47 # add3 = nn.MatrixVectorAdd(self.graph, mult3, self.l[10]) #Tx47 # relu3 = nn.ReLU(self.graph, add3) h = relu # mult = nn.MatrixMultiply(self.graph, h, self.l[5]) #Tx47 # add = nn.MatrixVectorAdd(self.graph, mult, self.l[7]) #Tx47 # relu = nn.ReLU(self.graph, add) mult2 = nn.MatrixMultiply(self.graph, h, self.l[6]) #Tx5 add2 = nn.MatrixVectorAdd(self.graph, mult2, self.l[8]) #Tx5 relu2 = nn.ReLU(self.graph, add2) mult3 = nn.MatrixMultiply(self.graph, relu2, self.l[11]) #Tx5 add3 = nn.MatrixVectorAdd(self.graph, mult3, self.l[12]) #Tx5 if y is not None: "* YOUR CODE HERE *" input_y = nn.Input(self.graph, y) #Tx5 #print(self.graph.get_output(input_y)) loss = nn.SoftmaxLoss(self.graph, add3, input_y) return self.graph else: "* YOUR CODE HERE *" return self.graph.get_output(self.graph.get_nodes()[-1])
def run(self, x, y=None): """ TODO: Question 5 - [Application] OddRegression Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" if not self.nodes: w1 = nn.Variable(1, 50) w2 = nn.Variable(50, 50) w3 = nn.Variable(50, 1) b1 = nn.Variable(1, 50) b2 = nn.Variable(1, 50) b3 = nn.Variable(1, 1) self.nodes = nn.Graph([w1, w2, w3, b1, b2, b3]) self.inputs = [w1, w2, w3, b1, b2, b3] w1 = self.inputs[0] w2 = self.inputs[1] w3 = self.inputs[2] b1 = self.inputs[3] b2 = self.inputs[4] b3 = self.inputs[5] self.nodes = nn.Graph(self.inputs) input_x = nn.Input(self.nodes, x) if y is not None: input_y = nn.Input(self.nodes, y) negation = nn.Input(self.nodes, np.matrix([-1.0])) xw1 = nn.MatrixMultiply(self.nodes, input_x, w1) xw1_plus_b1 = nn.MatrixVectorAdd(self.nodes, xw1, b1) relu_xw1b1 = nn.ReLU(self.nodes, xw1_plus_b1) xw2 = nn.MatrixMultiply(self.nodes, relu_xw1b1, w2) xw2_plus_b2 = nn.MatrixVectorAdd(self.nodes, xw2, b2) relu_xw2b2 = nn.ReLU(self.nodes, xw2_plus_b2) xw3 = nn.MatrixMultiply(self.nodes, relu_xw2b2, w3) final1 = nn.MatrixVectorAdd(self.nodes, xw3, b3) #deep breath, now calculations for negative x (might put this on a loop if i have time) x_neg = nn.MatrixMultiply(self.nodes, input_x, negation) xw1 = nn.MatrixMultiply(self.nodes, x_neg) xw1_plus_b1 = nn.MatrixVectorAdd(self.nodes, xw1, b1) relu_xw1b1 = nn.ReLU(self.nodes, xw1_plus_b1) xw2 = nn.MatrixMultiply(self.nodes, relu_xw1b1, w2) xw2_b2 = nn.MatrixVectorAdd(self.nodes, xw2, b2) relu_xw1b1 = nn.ReLU(self.nodes, xw2_b2) xw3 = nn.MatrixMultiply(self.nodes, relu_xw1b1, w3) xw3_b3 = nn.MatrixVectorAdd(self.nodes, xw3, b3) final2 = nn.MatrixMultiply(self.nodes, xw3_b3, negation) final = nn.MatrixVectorAdd(self.nodes, final1, final2) if y is not None: "*** YOUR CODE HERE ***" loss = nn.SquareLoss(self.nodes, final, input_y) return self.nodes else: "*** YOUR CODE HERE ***" return self.nodes.get_output(self.nodes.get_nodes()[-1])
def run(self, states, Q_target=None): """ TODO: Question 7 - [Application] Reinforcement Learning Runs the DQN for a batch of states. The DQN takes the state and computes Q-values for all possible actions that can be taken. That is, if there are two actions, the network takes as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)] When Q_target == None, return the matrix of Q-values currently computed by the network for the input states. When Q_target is passed, it will contain the Q-values which the network should be producing for the current states. You must return a nn.Graph which computes the training loss between your current Q-value predictions and these target values, using nn.SquareLoss. Inputs: states: a (batch_size x 4) numpy array Q_target: a (batch_size x 2) numpy array, or None Output: (if Q_target is not None) A nn.Graph instance, where the last added node is the loss (if Q_target is None) A (batch_size x 2) numpy array of Q-value scores, for the two actions """ "*** YOUR CODE HERE ***" if not self.nodes: w1 = nn.Variable(states.shape[1], states.shape[0]) w2 = nn.Variable(states.shape[0], states.shape[0]) w3 = nn.Variable(states.shape[0], 2) b1 = nn.Variable(1, states.shape[0]) b2 = nn.Variable(1, states.shape[0]) b3 = nn.Variable(1, 2) self.nodes = nn.Graph([w1, w2, w3, b1, b2, b3]) self.inputs = [w1, w2, w3, b1, b2, b3] w1 = self.inputs[0] w2 = self.inputs[1] w3 = self.inputs[2] b1 = self.inputs[3] b2 = self.inputs[4] b3 = self.inputs[5] self.nodes = nn.Graph([w1, w2, w3, b1, b2, b3]) input_x = nn.Input(self.nodes, states) if Q_target is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(self.nodes, Q_target) xw1 = nn.MatrixMultiply(self.nodes, input_x, w1) xw1b1 = nn.MatrixVectorAdd(self.nodes, xw1, b1) xw1relu = nn.ReLU(self.nodes, xw1b1) xw2 = nn.MatrixMultiply(self.nodes, xw1relu, w2) xw2b2 = nn.MatrixVectorAdd(self.nodes, xw2, b2) xw2relu = nn.ReLU(self.nodes, xw2b2) xw3 = nn.MatrixMultiply(self.nodes, xw2relu, w3) final = nn.MatrixVectorAdd(self.nodes, xw3, b3) if Q_target is not None: "*** YOUR CODE HERE ***" loss = nn.SquareLoss(self.nodes, final, input_y) return self.nodes else: "*** YOUR CODE HERE ***" return self.nodes.get_output(self.nodes.get_nodes()[-1])
def run(self, xs, y=None): """ TODO: Question 8 - [Application] Language Identification Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] self.graph = nn.Graph([ self.w1, self.w2, self.w3, self.w4, self.h0, self.b1, self.b2, self.b3, self.b4 ]) char = [] zero = nn.Input(self.graph, np.zeros((batch_size, self.num_chars))) h = zero for i in range(len(xs)): char.append(nn.Input(self.graph, xs[i])) incorporate = nn.MatrixVectorAdd(self.graph, h, char[i]) xm1 = nn.MatrixMultiply(self.graph, incorporate, self.w1) xm1_plus_b1 = nn.MatrixVectorAdd(self.graph, xm1, self.b1) h = nn.ReLU(self.graph, xm1_plus_b1) xm2 = nn.MatrixMultiply(self.graph, h, self.w2) xm2_plus_b2 = nn.MatrixVectorAdd(self.graph, xm2, self.b2) relu2 = nn.ReLU(self.graph, xm2_plus_b2) relu2w3 = nn.MatrixMultiply(self.graph, relu2, self.w3) relu2w3_plus_b3 = nn.MatrixVectorAdd(self.graph, relu2w3, self.b3) relu3 = nn.ReLU(self.graph, relu2w3_plus_b3) relu3w4 = nn.MatrixMultiply(self.graph, relu3, self.w4) relu3w4_plus_b4 = nn.MatrixVectorAdd(self.graph, relu3w4, self.b4) if y is not None: input_y = nn.Input(self.graph, y) loss = nn.SoftmaxLoss(self.graph, relu3w4_plus_b4, input_y) return self.graph else: return self.graph.get_output(self.graph.get_nodes()[-1])
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" # At each iteration, we first calculate a loss that measures how # good our network is. The graph keeps track of all operations used graph = nn.Graph([self.W1, self.b1, self.W2, self.b2]) input_x = nn.Input(graph, x) neg_x = nn.Input(graph, -1 * x) xW1 = nn.MatrixMultiply(graph, input_x, self.W1) neg_xW1 = nn.MatrixMultiply(graph, neg_x, self.W1) xW1_plusb1 = nn.MatrixVectorAdd(graph, xW1, self.b1) neg_xW1_plusb1 = nn.MatrixVectorAdd(graph, neg_xW1, self.b1) afterReLU = nn.ReLU(graph, xW1_plusb1) neg_afterReLU = nn.ReLU(graph, neg_xW1_plusb1) x2W2 = nn.MatrixMultiply(graph, afterReLU, self.W2) neg_x2W2 = nn.MatrixMultiply(graph, neg_afterReLU, self.W2) x2W2_plusb2 = nn.MatrixVectorAdd(graph, x2W2, self.b2) neg_x2W2_plusb2 = nn.MatrixVectorAdd(graph, neg_x2W2, self.b2) negated_term = -1 * graph.get_output(neg_x2W2_plusb2) negated_neg = nn.Input(graph, negated_term) sum_terms = nn.Add(graph, x2W2_plusb2, negated_neg) # x2W2 = nn.MatrixMultiply(graph, afterReLU, self.W2) # x2W2_plusb2 = nn.MatrixVectorAdd(graph, x2W2, self.b2) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SquareLoss(graph, sum_terms, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" output = graph.get_output(sum_terms) # print "Output matrix size:", output.shape return output # At each iteration, we first calculate a loss that measures how # good our network is. The graph keeps track of all operations used """
def run(self, xs, y=None): """ TODO: Question 8 - [Application] Language Identification Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] self.iteration += 1 if self.iteration == 10000: self.learning_rate = 0.02 # self.learning_rate = 0.01 self.learning_rate = 0.015 elif self.iteration == 12000: # self.learning_rate = 0.01 self.learning_rate = 0.010 elif self.iteration == 14000: self.learning_rate = 0.005 "*** YOUR CODE HERE ***" if not self.graph: dim = 80 w1 = nn.Variable(self.num_chars, self.num_chars) w2 = nn.Variable(self.num_chars, self.num_chars) w3 = nn.Variable(self.num_chars, 2) b1 = nn.Variable(1, self.num_chars) b2 = nn.Variable(1, self.num_chars) h0 = nn.Variable(1, self.num_chars) w3 = nn.Variable(self.num_chars, self.num_chars) w4 = nn.Variable(self.num_chars, dim) w6 = nn.Variable(dim, 5) b3 = nn.Variable(1, self.num_chars) b4 = nn.Variable(1, dim) b6 = nn.Variable(1, 5) w5 = nn.Variable(self.num_chars, self.num_chars) b5 = nn.Variable(1, self.num_chars) self.vars = [w1, w2, b1, b2, h0, w3, w4, b3, b4, w5, b5, w6, b6] self.graph = nn.Graph(self.vars) char_inputs = [] zeroInput = nn.Input(self.graph, np.zeros( (batch_size, self.num_chars))) h_vec = nn.MatrixVectorAdd(self.graph, zeroInput, self.vars[4]) for i in range(len(xs)): char_inputs.append(nn.Input(self.graph, xs[i])) incorporate = nn.MatrixVectorAdd(self.graph, h_vec, char_inputs[i]) mult = nn.MatrixMultiply(self.graph, incorporate, self.vars[0]) add = nn.MatrixVectorAdd(self.graph, mult, self.vars[2]) h_vec = nn.ReLU(self.graph, add) mult2 = nn.MatrixMultiply(self.graph, h_vec, self.vars[6]) add2 = nn.MatrixVectorAdd(self.graph, mult2, self.vars[8]) relu2 = nn.ReLU(self.graph, add2) mult3 = nn.MatrixMultiply(self.graph, relu2, self.vars[11]) add3 = nn.MatrixVectorAdd(self.graph, mult3, self.vars[12]) if y is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(self.graph, y) loss = nn.SoftmaxLoss(self.graph, add3, input_y) return self.graph else: "*** YOUR CODE HERE ***" return self.graph.get_output(self.graph.get_nodes()[-1])
def run(self, x, y=None): """ TODO: Question 4 - [Application] Regression Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" len_x, len_y = len(x), len(y) len_x_quater, len_x_half = len_x // 4, len_x // 2 len_y_quater, len_y_half = len_y // 4, len_y // 2 weights, backs = [nn.Variable(len_x_quater, len_x_quater) ] * 8, [nn.Variable(len_x_quater, 1)] * 8 self.graph = nn.Graph(weights + backs) input_x = nn.Input(self.graph, x) input_y = nn.Input(self.graph, y) xs = [ nn.Input(self.graph, x[i * len_x_quater:(i + 1) * len_x_quater]) for i in range(4) ] mults = [ nn.MatrixMultiply(self.graph, weights[i], xs[i]) for i in range(4) ] adds = [ nn.MatrixVectorAdd(self.graph, mults[i], mults[i + 1]) for i in range(0, 4, 2) ] + [ nn.MatrixVectorAdd(self.graph, mults[i + 1], mults[i]) for i in range(0, 4, 2) ] adds_in = [ nn.MatrixVectorAdd(self.graph, adds[i], backs[i]) for i in range(4) ] relus = [nn.ReLU(self.graph, add) for add in adds_in] mults2 = [ nn.MatrixMultiply(self.graph, weights[i + 4], relus[i]) for i in range(4) ] adds2 = [ nn.MatrixVectorAdd(self.graph, mults2[i], mults2[i + 1]) for i in range(0, 4, 2) ] + [ nn.MatrixVectorAdd(self.graph, mults2[i + 1], mults2[i]) for i in range(0, 4, 2) ] adds_in2 = [ nn.MatrixVectorAdd(self.graph, adds2[i], backs[i + 4]) for i in range(4) ] ys = [ nn.Input(self.graph, y[i * len_y_quater:(i + 1) * len_y_quater]) for i in range(4) ] losses = [nn.SquareLoss(self.graph, adds_in[2], ys[0])] + [ nn.SquareLoss(self.graph, adds_in[3], y) for y in ys[1:] ] add_end = reduce(lambda x, y: nn.Add(self.graph, x, y), losses) return self.graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" vecs = [ self.graph.get_output(self.graph.get_nodes()[-11 + i]) for i in range(4) ] out = reduce(lambda x, y: np.concatenate((x, y), axis=0), vecs) return out
def run(self, x, y=None): """ TODO: Question 5 - [Application] OddRegression Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" n = 4 if not self.graph: w1 = nn.Variable(1, 50) w2 = nn.Variable(50, 50) w3 = nn.Variable(50, 1) b1 = nn.Variable(1, 50) b2 = nn.Variable(1, 50) b3 = nn.Variable(1, 1) self.vars = [w1, w2, w3, b1, b2, b3] self.weights = self.vars[:3] self.backs = self.vars[3:] self.graph = nn.Graph(self.vars) input_x = nn.Input(self.graph, x) if y is not None: input_y = nn.Input(self.graph, y) input_negati = nn.Input(self.graph, np.matrix([-1.])) negati = nn.MatrixMultiply(self.graph, input_x, input_negati) add = add_three_edges(negati, self.graph, self.vars) sub = nn.MatrixMultiply(self.graph, add, input_negati) sub0 = add_three_edges(input_x, self.graph, self.vars) subend = nn.MatrixVectorAdd(self.graph, sub0, sub) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" loss = nn.SquareLoss(self.graph, subend, input_y) return self.graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" return self.graph.get_output(self.graph.get_nodes()[-1])
def run(self, xs, y=None): """ Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] "*** YOUR CODE HERE ***" graph = nn.Graph([self.w1, self.b1, self.w2, self.b2, self.w3]) yeet = xs[0] for i in np.arange(1, len(xs), 1): yeet += xs[i] input_xs = nn.Input(graph, yeet) mul1 = nn.MatrixMultiply(graph, input_xs, self.w1) add1 = nn.MatrixVectorAdd(graph, mul1, self.b1) reLU = nn.ReLU(graph, add1) mul2 = nn.MatrixMultiply(graph, reLU, self.w2) add2 = nn.MatrixVectorAdd(graph, mul2, self.b2) mul3 = nn.MatrixMultiply(graph, add2, self.w3) if y is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, mul3, input_y) return graph else: "*** YOUR CODE HERE ***" return graph.get_output(mul3)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" #function nodes are, multiply, add vector, relu, matrix multiply, add vector #variables are w1, w2, b1, b2 #size of the input vector i = x.shape[1] #to test and modify h = 100 if not self.w1: self.w1 = nn.Variable(i, h) if not self.w2: self.w2 = nn.Variable(h, i) if not self.b1: self.b1 = nn.Variable(h) if not self.b2: self.b2 = nn.Variable(i) graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_nodeX = nn.Input(graph, x) # print x.shape # xm = MatrixMultiply(graph, input_x, m) # xm_plus_b = MatrixVectorAdd(graph, xm, b) multiply1 = nn.MatrixMultiply(graph, input_nodeX, self.w1) add1 = nn.MatrixVectorAdd(graph, multiply1, self.b1) relu = nn.ReLU(graph, add1) multiply2 = nn.MatrixMultiply(graph, relu, self.w2) add2 = nn.MatrixVectorAdd(graph, multiply2, self.b2) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. input_nodeY = nn.Input(graph, y) loss_node = nn.SquareLoss(graph, add2, input_nodeY) graph.add(loss_node) return graph "*** YOUR CODE HERE ***" else: return graph.get_output(add2) # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***"
def run(self, x, y=None): """ TODO: Question 5 - [Application] OddRegression Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_y = nn.Input(graph, y) input_x = nn.Input(graph, x) # initialize -x inv = nn.Input(graph, np.array([[-1.0]])) inv_input_x = nn.MatrixMultiply(graph, input_x, inv) # calculate g(x) graph, m = self.execute_layer(input_x, y, graph) # calculate -g(-x) graph, inv_m = self.execute_layer(inv_input_x, y, graph) inv_m = nn.MatrixMultiply(graph, inv_m, inv) # f(x) = g(x) - g(-x) odd = nn.MatrixVectorAdd(graph, m, inv_m) loss = nn.SquareLoss(graph, odd, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_x = nn.Input(graph, x) # initialize -x inv = nn.Input(graph, np.array([[-1.0]])) inv_input_x = nn.MatrixMultiply(graph, input_x, inv) # calculate g(x) graph, m = self.execute_layer(input_x, y, graph) # calculate -g(-x) graph, inv_m = self.execute_layer(inv_input_x, y, graph) inv_m = nn.MatrixMultiply(graph, inv_m, inv) # f(x) = g(x) - g(-x) odd = nn.MatrixVectorAdd(graph, m, inv_m) return graph.get_output(odd)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" # At each iteration, we first calculate a loss that measures how # good our network is. The graph keeps track of all operations used graph = nn.Graph([self.W1, self.b1, self.W2, self.b2]) input_x = nn.Input(graph, x) xW1 = nn.MatrixMultiply(graph, input_x, self.W1) xW1_plusb1 = nn.MatrixVectorAdd(graph, xW1, self.b1) afterReLU = nn.ReLU(graph, xW1_plusb1) x2W2 = nn.MatrixMultiply(graph, afterReLU, self.W2) x2W2_plusb2 = nn.MatrixVectorAdd(graph, x2W2, self.b2) # afterReLU2 = nn.ReLU(graph, x2W2_plusb2) # x3W3 = nn.MatrixMultiply(graph, afterReLU2, self.W3) # x3W3_plusb3 = nn.MatrixVectorAdd(graph, afterReLU2, self.b3) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, x2W2_plusb2, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" output = graph.get_output(x2W2_plusb2) maxval = np.max(output, 1) for row in range(np.size(output, 0)): max_in_row = np.max(output[row, :]) for col in range(np.size(output, 1)): if output[row, col] == max_in_row: output[row, col] = 1 else: output[row, col] = 0 # for idx in range(size(output,0)): # if output[idx] == maxval: # output[idx] = 1 # else: # output[idx] = 0 # print "Output matrix size:", output.shape return output
def run(self, xs, y=None): """ TODO: Question 8 - [Application] Language Identification Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] "*** YOUR CODE HERE ***" batch_size = xs[0].shape[0] graph = nn.Graph([self.t, self.w, self.h0, self.v]) def f(h, c): if h == None: ones = nn.Input(graph, np.ones([batch_size, 1])) in_h = nn.MatrixMultiply( graph, ones, self.h0) #nn.Input(g, np.zeros([batch_size, self.d])) else: in_h = h input_c = nn.Input(graph, c) c_mul_w = nn.MatrixMultiply(graph, input_c, self.w) # batchsize x d h_mul_v = nn.MatrixMultiply(graph, in_h, self.v) relu1 = nn.ReLU(graph, nn.Add(graph, c_mul_w, h_mul_v)) return relu1 if y is not None: "*** YOUR CODE HERE ***" h = None for i in range(len(xs)): h = f(h, xs[i]) input_y = nn.Input(graph, y) in_h = h mul = nn.MatrixMultiply(graph, in_h, self.t) loss = nn.SoftmaxLoss(graph, mul, input_y) return graph else: "*** YOUR CODE HERE ***" h = None for i in range(len(xs)): h = f(h, xs[i]) in_h = h #nn.Input(graph, h) mul = nn.MatrixMultiply(graph, in_h, self.t) return graph.get_output(mul)
def run(self, xs, y=None): """ Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] "*** YOUR CODE HERE ***" graph = nn.Graph([self.W1, self.b1, self.W2, self.b2]) input_x = nn.Input(graph, np.array(xs)) h, xW1, xW1_plusb1, xW1_plusb1c, afterReLU, x2W2 = [], [], [], [], [], [] h.append(nn.Input(graph, np.zeros_like(y))) for i in range(1, self.d): c = xs[i] xW1.append(nn.MatrixMultiply(graph, h[i - 1], self.W1)) xW1_plusb1.append(nn.MatrixVectorAdd(graph, xW1, self.b1)) xW1_plusb1c.append(nn.MatrixVectorAdd(graph, xW1_plusb1, c)) afterReLU.append(nn.ReLU(graph, xW1_plusb1c)) x2W2.append(nn.MatrixMultiply(graph, afterReLU, self.W2)) h.append(nn.MatrixVectorAdd(graph, x2W2, self.b2)) if y is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, h[-1], input_y) return graph else: "*** YOUR CODE HERE ***" output = graph.get_output(h[-1]) for row in range(np.size(output, 0)): max_in_row = np.max(output[row, :]) for col in range(np.size(output, 1)): if output[row, col] == max_in_row: output[row, col] = 1 else: output[row, col] = 0 return output
def run(self, x, y=None): """ TODO: Question 6 - [Application] Digit Classification Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" if not self.nodes: w1 = nn.Variable(x.shape[1], x.shape[0]) w2 = nn.Variable(x.shape[0], x.shape[0]) w3 = nn.Variable(x.shape[0], 10) b1 = nn.Variable(1, x.shape[0]) b2 = nn.Variable(1, x.shape[0]) b3 = nn.Variable(1, 10) self.nodes = nn.Graph([w1, w2, w3, b1, b2, b3]) self.inputs = [w1, w2, w3, b1, b2, b3] w1 = self.inputs[0] w2 = self.inputs[1] w3 = self.inputs[2] b1 = self.inputs[3] b2 = self.inputs[4] b3 = self.inputs[5] self.nodes = nn.Graph([w1, w2, w3, b1, b2, b3]) input_x = nn.Input(self.nodes, x) if y is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(self.nodes, y) xw1 = nn.MatrixMultiply(self.nodes, input_x, w1) xw1b1 = nn.MatrixVectorAdd(self.nodes, xw1, b1) xw1relu = nn.ReLU(self.nodes, xw1b1) xw2 = nn.MatrixMultiply(self.nodes, xw1relu, w2) xw2b2 = nn.MatrixVectorAdd(self.nodes, xw2, b2) xw2relu = nn.ReLU(self.nodes, xw2b2) xw3 = nn.MatrixMultiply(self.nodes, xw2relu, w3) final = nn.MatrixVectorAdd(self.nodes, xw3, b3) if y is not None: "*** YOUR CODE HERE ***" loss = nn.SoftmaxLoss(self.nodes, final, input_y) return self.nodes else: "*** YOUR CODE HERE ***" return self.nodes.get_output(self.nodes.get_nodes()[-1])
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" if y is not None: "*** YOUR CODE HERE ***" if not self.w1: h = 200 self.w1 = nn.Variable(np.shape(x)[1], h) self.w2 = nn.Variable(h, 10) self.b1 = nn.Variable(h) self.b2 = nn.Variable(10) graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_x = nn.Input(graph, x) input_y = nn.Input(graph, y) xw1 = nn.MatrixMultiply(graph, input_x, self.w1) xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1) relu = nn.ReLU(graph, xw1_plus_b1) reluw2 = nn.MatrixMultiply(graph, relu, self.w2) reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2) loss = nn.SoftmaxLoss(graph, reluw2_plus_b2, input_y) self.learning_rate = max(self.learning_rate * 0.999, 0.001) return graph else: "*** YOUR CODE HERE ***" if not self.w1: h = 200 self.w1 = nn.Variable(np.shape(x)[1], h) self.w2 = nn.Variable(h, 10) self.b1 = nn.Variable(h) self.b2 = nn.Variable(10) graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_x = nn.Input(graph, x) xw1 = nn.MatrixMultiply(graph, input_x, self.w1) xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1) relu = nn.ReLU(graph, xw1_plus_b1) reluw2 = nn.MatrixMultiply(graph, relu, self.w2) reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2) return graph.get_output(reluw2_plus_b2)
def run(self, xs, y=None): """ TODO: Question 8 - [Application] Language Identification Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] "*** YOUR CODE HERE ***" if not self.nodes: w1 = nn.Variable(self.num_chars, self.num_chars) w2 = nn.Variable(self.num_chars, self.num_chars) w3 = nn.Variable(self.num_chars, self.num_chars) w4 = nn.Variable(self.num_chars, 5) b1 = nn.Variable(1, self.num_chars) b2 = nn.Variable(1, self.num_chars) b3 = nn.Variable(1, self.num_chars) b4 = nn.Variable(1, 5) h = nn.Variable(1, self.num_chars) bonusw = nn.Variable(self.num_chars, self.num_chars) bonusb = nn.Variable(1, self.num_chars) self.nodes = nn.Graph( [w1, w2, w3, w4, b1, b2, b3, b4, h, bonusw, bonusb]) self.inputs = [w1, w2, w3, w4, b1, b2, b3, b4, h, bonusw, bonusb] w1 = self.inputs[0] w2 = self.inputs[1] w3 = self.inputs[2] w4 = self.inputs[3] b1 = self.inputs[4] b2 = self.inputs[5] b3 = self.inputs[6] b4 = self.inputs[7] h = self.inputs[8] bonusw = self.inputs[9] bonusb = self.inputs[10] self.nodes = nn.Graph([w1, w2, w3, w4, b1, b2, b3, b4, h]) zeros = nn.Input(self.nodes, np.zeros((batch_size, self.num_chars))) h = nn.MatrixVectorAdd(self.nodes, zeros, h) word = [] for s in xs: ch = nn.Input(self.nodes, s) h_sum = nn.MatrixVectorAdd(self.nodes, h, ch) hw1 = nn.MatrixMultiply(self.nodes, h_sum, w1) hw1b1 = nn.MatrixVectorAdd(self.nodes, hw1, b1) h = nn.ReLU(self.nodes, hw1b1) #hw2b = nn.MatrixMultiply(self.nodes, hw1relu, bonusw) #h2 = nn.MatrixVectorAdd(self.nodes, hw2b, bonusb) #h = nn.ReLU(self.nodes, hw2b2b) word.append(ch) hw2 = nn.MatrixMultiply(self.nodes, h, w2) hw2b2 = nn.MatrixVectorAdd(self.nodes, hw2, b2) hw2relu = nn.ReLU(self.nodes, hw2b2) hw3 = nn.MatrixMultiply(self.nodes, hw2relu, w3) hw3b3 = nn.MatrixVectorAdd(self.nodes, hw3, b3) hw3relu = nn.ReLU(self.nodes, hw3b3) hw4 = nn.MatrixMultiply(self.nodes, hw3relu, w4) final = nn.MatrixVectorAdd(self.nodes, hw4, b4) if y is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(self.nodes, y) finalloss = nn.SoftmaxLoss(self.nodes, final, input_y) return self.nodes else: "*** YOUR CODE HERE ***" return self.nodes.get_output(self.nodes.get_nodes()[-1])
def run(self, states, Q_target=None): """ Runs the DQN for a batch of states. The DQN takes the state and computes Q-values for all possible actions that can be taken. That is, if there are two actions, the network takes as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)] When Q_target == None, return the matrix of Q-values currently computed by the network for the input states. When Q_target is passed, it will contain the Q-values which the network should be producing for the current states. You must return a nn.Graph which computes the training loss between your current Q-value predictions and these target values, using nn.SquareLoss. Inputs: states: a (batch_size x 4) numpy array Q_target: a (batch_size x 2) numpy array, or None Output: (if Q_target is not None) A nn.Graph instance, where the last added node is the loss (if Q_target is None) A (batch_size x 2) numpy array of Q-value scores, for the two actions """ "*** YOUR CODE HERE ***" if Q_target is not None: "*** YOUR CODE HERE ***" if not self.w1: h = 100 self.w1 = nn.Variable(self.state_size, h) self.w2 = nn.Variable(h, self.num_actions) self.b1 = nn.Variable(h) self.b2 = nn.Variable(self.num_actions) graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_x = nn.Input(graph, states) input_y = nn.Input(graph, Q_target) xw1 = nn.MatrixMultiply(graph, input_x, self.w1) xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1) relu = nn.ReLU(graph, xw1_plus_b1) reluw2 = nn.MatrixMultiply(graph, relu, self.w2) reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2) loss = nn.SquareLoss(graph, reluw2_plus_b2, input_y) return graph else: "*** YOUR CODE HERE ***" if not self.w1: h = 100 self.w1 = nn.Variable(self.state_size, h) self.w2 = nn.Variable(h, self.num_actions) self.b1 = nn.Variable(h) self.b2 = nn.Variable(self.num_actions) graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_x = nn.Input(graph, states) xw1 = nn.MatrixMultiply(graph, input_x, self.w1) xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1) relu = nn.ReLU(graph, xw1_plus_b1) reluw2 = nn.MatrixMultiply(graph, relu, self.w2) reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2) return graph.get_output(reluw2_plus_b2)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs y are known during training, but not at test time. If correct outputs y are provided, this method must construct and return a nn.Graph for computing the training loss. If y is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "* YOUR CODE HERE *" n = 4 if not self.graph: w1 = nn.Variable(1, 50) w2 = nn.Variable(50, 50) w3 = nn.Variable(50, 1) b1 = nn.Variable(1, 50) b2 = nn.Variable(1, 50) b3 = nn.Variable(1, 1) self.l = [w1,w2,w3,b1,b2,b3] self.graph = nn.Graph(self.l) self.graph = nn.Graph(self.l) input_x = nn.Input(self.graph,x) if y is not None: #<--- THIS LITTLE CONDITIONAL SO IMPORTANT HFS input_y = nn.Input(self.graph,y) input_neg = nn.Input(self.graph, np.matrix([-1.])) #Tx1 mult = nn.MatrixMultiply(self.graph, input_x, self.l[0]) #Tx50 add = nn.MatrixVectorAdd(self.graph, mult, self.l[3]) relu = nn.ReLU(self.graph, add) mult2 = nn.MatrixMultiply(self.graph, relu, self.l[1]) #Tx50 add2 = nn.MatrixVectorAdd(self.graph, mult2, self.l[4]) #Tx50 relu2 = nn.ReLU(self.graph, add2) mult3 = nn.MatrixMultiply(self.graph, relu2, self.l[2]) #Tx1 add3 = nn.MatrixVectorAdd(self.graph, mult3, self.l[5]) ad = add3 neg = nn.MatrixMultiply(self.graph, input_x, input_neg) mult = nn.MatrixMultiply(self.graph, neg, self.l[0]) add = nn.MatrixVectorAdd(self.graph, mult, self.l[3]) relu = nn.ReLU(self.graph, add) mult2 = nn.MatrixMultiply(self.graph, relu, self.l[1]) add2 = nn.MatrixVectorAdd(self.graph, mult2, self.l[4]) relu2 = nn.ReLU(self.graph, add2) mult3 = nn.MatrixMultiply(self.graph, relu2, self.l[2]) add3 = nn.MatrixVectorAdd(self.graph, mult3, self.l[5]) sb = nn.MatrixMultiply(self.graph, add3, input_neg) #-g(-x) sub = nn.MatrixVectorAdd(self.graph, ad, sb) #g(x) - g(-x) if y is not None: # At training time, the correct output y is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. loss = nn.SquareLoss(self.graph, sub, input_y) return self.graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array #print(self.graph.get_output(self.graph.get_nodes()[-1])) return self.graph.get_output(self.graph.get_nodes()[-1])
def run(self, xs, y=None): """ Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] "*** YOUR CODE HERE ***" if y is not None: "*** YOUR CODE HERE ***" if not self.w1: self.setup() # h0, graph = self.run_helper(xs, batch_size) graph = nn.Graph([ self.p1, self.p2, self.q1, self.q2, self.r1, self.s1, self.w1, self.w2, self.b1, self.b2 ]) # graph = nn.Graph([self.p1, self.p2, self.q1, self.q2, self.r1, self.s1, self.w1, self.w2, self.w3, self.b1, self.b2, self.b3]) h0 = np.zeros([batch_size, 47]) input_h = nn.Input(graph, h0) for i in range(len(xs)): # graph = nn.Graph([self.p1, self.p2, self.q1, self.q2, self.w1, self.w2, self.b1, self.b2]) # input_h = nn.Input(graph, h0) input_c = nn.Input(graph, xs[i]) cr1 = nn.MatrixMultiply(graph, input_c, self.r1) cr1_plus_s1 = nn.MatrixVectorAdd(graph, cr1, self.s1) h_plus_cr1 = nn.Add(graph, input_h, cr1_plus_s1) hp1 = nn.MatrixMultiply(graph, h_plus_cr1, self.p1) hp1_plus_q1 = nn.MatrixVectorAdd(graph, hp1, self.q1) relu = nn.ReLU(graph, hp1_plus_q1) relup2 = nn.MatrixMultiply(graph, relu, self.p2) input_h = nn.MatrixVectorAdd(graph, relup2, self.q2) # h0 += graph.get_output(relup2_plus_q2) # h0 += xs[i] * (i + 1) # graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) # input_h = nn.Input(graph, h0) input_y = nn.Input(graph, y) hw1 = nn.MatrixMultiply(graph, input_h, self.w1) hw1_plus_b1 = nn.MatrixVectorAdd(graph, hw1, self.b1) relu = nn.ReLU(graph, hw1_plus_b1) reluw2 = nn.MatrixMultiply(graph, relu, self.w2) reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2) # relulu = nn.ReLU(graph, reluw2_plus_b2) # reluluw3 = nn.MatrixMultiply(graph, relulu, self.w3) # reluluw3_plus_b3 = nn.MatrixVectorAdd(graph, reluluw3, self.b3) # relululu = nn.ReLU(graph, reluluw3_plus_b3) # relululuw4 = nn.MatrixMultiply(graph, relululu, self.w4) # relululuw4_plus_b4 = nn.MatrixVectorAdd(graph, relululuw4, self.b4) loss = nn.SoftmaxLoss(graph, reluw2_plus_b2, input_y) # self.learning_rate = max(self.learning_rate * 0.9999, 0.000001) return graph else: "*** YOUR CODE HERE ***" if not self.w1: self.setup() # h0 = self.run_helper(xs, batch_size) graph = nn.Graph([ self.p1, self.p2, self.q1, self.q2, self.r1, self.s1, self.w1, self.w2, self.b1, self.b2 ]) # graph = nn.Graph([self.p1, self.p2, self.q1, self.q2, self.r1, self.s1, self.w1, self.w2, self.w3, self.b1, self.b2, self.b3]) h0 = np.zeros([batch_size, 47]) input_h = nn.Input(graph, h0) for i in range(len(xs)): # graph = nn.Graph([self.p1, self.p2, self.q1, self.q2, self.w1, self.w2, self.b1, self.b2]) # input_h = nn.Input(graph, h0) input_c = nn.Input(graph, xs[i]) cr1 = nn.MatrixMultiply(graph, input_c, self.r1) cr1_plus_s1 = nn.MatrixVectorAdd(graph, cr1, self.s1) h_plus_cr1 = nn.Add(graph, input_h, cr1_plus_s1) hp1 = nn.MatrixMultiply(graph, h_plus_cr1, self.p1) hp1_plus_q1 = nn.MatrixVectorAdd(graph, hp1, self.q1) relu = nn.ReLU(graph, hp1_plus_q1) relup2 = nn.MatrixMultiply(graph, relu, self.p2) input_h = nn.MatrixVectorAdd(graph, relup2, self.q2) # h0 += graph.get_output(relup2_plus_q2) # h0 += xs[i] * (i + 1) # graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) # input_h = nn.Input(graph, h0) hw1 = nn.MatrixMultiply(graph, input_h, self.w1) hw1_plus_b1 = nn.MatrixVectorAdd(graph, hw1, self.b1) relu = nn.ReLU(graph, hw1_plus_b1) reluw2 = nn.MatrixMultiply(graph, relu, self.w2) reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2) # relulu = nn.ReLU(graph, reluw2_plus_b2) # reluluw3 = nn.MatrixMultiply(graph, relulu, self.w3) # reluluw3_plus_b3 = nn.MatrixVectorAdd(graph, reluluw3, self.b3) # relululu = nn.ReLU(graph, reluluw3_plus_b3) # relululuw4 = nn.MatrixMultiply(graph, relululu, self.w4) # relululuw4_plus_b4 = nn.MatrixVectorAdd(graph, relululuw4, self.b4) return graph.get_output(reluw2_plus_b2)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs y are known during training, but not at test time. If correct outputs y are provided, this method must construct and return a nn.Graph for computing the training loss. If y is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "* YOUR CODE HERE *" if y is not None: # At training time, the correct output y is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. n = 5 if not self.graph: self.l = [] for i in range(0, n): self.l.append(nn.Variable(len(x), len(x))) for i in range(0, n): self.l.append(nn.Variable(len(x), 1)) self.graph = nn.Graph(self.l) input_x = nn.Input(self.graph,x) input_y = nn.Input(self.graph,y) mult = nn.MatrixMultiply(self.graph, self.l[0], input_x) add = nn.MatrixVectorAdd(self.graph, mult, self.l[n]) for i in range(0, n): relu = nn.ReLU(self.graph, add) mult = nn.MatrixMultiply(self.graph, self.l[i], relu) add = nn.MatrixVectorAdd(self.graph, self.l[n + i], mult) loss = nn.SquareLoss(self.graph, add, input_y) return self.graph else: self.graph = nn.Graph(self.l) input_x = nn.Input(self.graph,x) input_y = nn.Input(self.graph,y) mult = nn.MatrixMultiply(self.graph, self.l[0], input_x) add = nn.MatrixVectorAdd(self.graph, mult, self.l[n]) for i in range(0, n): relu = nn.ReLU(self.graph, add) mult = nn.MatrixMultiply(self.graph, self.l[i], relu) add = nn.MatrixVectorAdd(self.graph, self.l[n + i], mult) loss = nn.SquareLoss(self.graph, add, input_y) return self.graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array # top_vec = self.graph.get_output(self.graph.get_nodes()[-4]) # bot_vec = self.graph.get_output(self.graph.get_nodes()[-5]) # # print(top_vec,bot_vec) # return np.concatenate((top_vec, bot_vec), axis=0) # top_add = self.graph.get_output(self.graph.get_nodes()[-4]) # bot_add = self.graph.get_output(self.graph.get_nodes()[-5]) # return (top_add + bot_add) * (0.5) return self.graph.get_output(self.graph.get_nodes()[-2])
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" if not self.w1: h = 50 self.w1 = nn.Variable(np.shape(x)[0], h) self.w2 = nn.Variable(h, np.shape(x)[0]) self.b1 = nn.Variable(h) self.b2 = nn.Variable(np.shape(x)[0]) graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_x = nn.Input(graph, x.T) input_y = nn.Input(graph, y.T) xw1 = nn.MatrixMultiply(graph, input_x, self.w1) xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1) relu = nn.ReLU(graph, xw1_plus_b1) reluw2 = nn.MatrixMultiply(graph, relu, self.w2) reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2) loss = nn.SquareLoss(graph, reluw2_plus_b2, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" if not self.w1: h = 50 self.w1 = nn.Variable(np.shape(x)[0], h) self.w2 = nn.Variable(h, np.shape(x)[0]) self.b1 = nn.Variable(h) self.b2 = nn.Variable(np.shape(x)[0]) graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_x = nn.Input(graph, x.T) xw1 = nn.MatrixMultiply(graph, input_x, self.w1) xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1) relu = nn.ReLU(graph, xw1_plus_b1) reluw2 = nn.MatrixMultiply(graph, relu, self.w2) reluw2_plus_b2 = nn.MatrixVectorAdd(graph, reluw2, self.b2) return graph.get_output(reluw2_plus_b2).T
def run(self, xs, y=None): """ TODO: Question 8 - [Application] Language Identification Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] # "*** YOUR CODE HERE ***" word_length = np.shape(xs)[0] graph = nn.Graph([self.Whh, self.Wch, self.bh, self.W1, self.b1]) ht_1 = nn.Input(graph, np.zeros((batch_size, self.hidden_size))) # RNN for i in range(word_length): input_x = nn.Input(graph, xs[i]) wct = nn.MatrixMultiply(graph, input_x, self.Wch) wht_1 = nn.MatrixMultiply(graph, ht_1, self.Whh) comb = nn.Add(graph, wct, wht_1) add_bias = nn.MatrixVectorAdd(graph, comb, self.bh) ht = nn.ReLU(graph, add_bias) ht_1 = ht # classification comb_features = nn.MatrixMultiply(graph, ht, self.W1) outputs = nn.MatrixVectorAdd(graph, comb_features, self.b1) if y is not None: # "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, outputs, input_y) return graph else: # "*** YOUR CODE HERE ***" return graph.get_output(outputs)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. if not self.graph: for i in range(0, self.depth): #make weight matrix with every layer X by X size self.weights.append(nn.Variable(len(x), len(x))) #make bias matrix with each layer being a vector of X by 1 size self.bias.append(nn.Variable(len(x), 1)) #create graph with initialized weights and bias variables self.graph = nn.Graph(self.weights + self.bias) #weight + bias is variable vector #create input nodes: input_x = nn.Input(self.graph, x) odd_input_x = nn.Input(self.graph, -x) input_y = nn.Input(self.graph, y) #create first layer: xm = nn.MatrixMultiply(self.graph, self.weights[0], input_x) xm_plus_b = nn.MatrixVectorAdd(self.graph, xm, self.bias[0]) odd_xm = nn.MatrixMultiply(self.graph, self.weights[0], odd_input_x) odd_xm_plus_b = nn.MatrixVectorAdd(self.graph, odd_xm, self.bias[0]) #create the remaining layers: for i in range(1, self.depth): #add nonlinearity for previous layer: relu = nn.ReLU(self.graph, xm_plus_b) odd_relu = nn.MatrixMultiply( self.graph, nn.ReLU(self.graph, odd_xm_plus_b), -1) odd_func = nn.MatrixVectorAdd(self.graph, relu, odd_relu) #create new hidden layer: xm = nn.MatrixMultiply(self.graph, self.weights[i], odd_relu) xm_plus_b = nn.MatrixVectorAdd(self.graph, xm, self.bias[i]) #create loss node: loss = nn.SquareLoss(self.graph, xm_plus_b, input_y) return self.graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array if not self.graph: for i in range(0, self.depth): #make weight matrix with every layer X by X size self.weights.append(nn.Variable(len(x), len(x))) #make bias matrix with each layer being a vector of X by 1 size self.bias.append(nn.Variable(len(x), 1)) #create graph with initialized weights and bias variables self.graph = nn.Graph( self.weights + self.bias) #weight + bias is variable vector #create input nodes: input_x = nn.Input(self.graph, x) odd_input_x = nn.Input(self.graph, -x) input_y = nn.Input(self.graph, y) #create first layer: xm = nn.MatrixMultiply(self.graph, self.weights[0], input_x) xm_plus_b = nn.MatrixVectorAdd(self.graph, xm, self.bias[0]) odd_xm = nn.MatrixMultiply(self.graph, self.weights[0], odd_input_x) odd_xm_plus_b = nn.MatrixVectorAdd(self.graph, odd_xm, self.bias[0]) #create the remaining layers: for i in range(1, self.depth): #add nonlinearity for previous layer: relu = nn.ReLU(self.graph, xm_plus_b) odd_relu = nn.MatrixMultiply( self.graph, nn.ReLU(self.graph, odd_xm_plus_b), -1) odd_func = nn.MatrixVectorAdd(self.graph, relu, odd_relu) #create new hidden layer: xm = nn.MatrixMultiply(self.graph, self.weights[i], odd_relu) xm_plus_b = nn.MatrixVectorAdd(self.graph, xm, self.bias[i]) #create loss node: loss = nn.SquareLoss(self.graph, xm_plus_b, input_y) return self.graph.get_output( self.graph.get_nodes() [-2]) #returns the prediction and the loss
def run(self, x, y=None): """ TODO: Question 5 - [Application] OddRegression Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" #------------------------------the f(x)-----------------------# #to implement f(x) = relu(x.w1+b1).w2 + b2 graph = nn.Graph([self.w1, self.b1, self.w2, self.b2]) input_x = nn.Input(graph, x) #input_y = Input(graph, y) #a = x.w1 a = nn.MatrixMultiply(graph, input_x, self.w1) #relu(a+b1).w2 + b2 #b = a + b1 b = nn.MatrixVectorAdd(graph, a, self.b1) #relu(b).w2 + b2 two_layer_relu = nn.ReLU(graph, b) #c = relu(b).w2 c = nn.MatrixMultiply(graph, two_layer_relu, self.w2) #d = c + b2 d = nn.MatrixVectorAdd(graph, c, self.b2) #loss = SquareLoss(graph, xm_plus_b, input_y) #------------------------------the -f(-x)-----------------------# #f(-x) = [relu(x.w1+b1).w2 + b2] #to implement -f(-x) = -[relu(x.w1+b1).w2 + b2] # graph = nn.Graph([self.w1, self.b1, self.w2, self.b2]) neg_input_x = nn.Input(graph, x * -1) #input_y = Input(graph, y) #a = -x.w1 neg_a = nn.MatrixMultiply(graph, neg_input_x, self.w1) #relu(a+b1).w2 + b2 #b = a + b1 neg_b = nn.MatrixVectorAdd(graph, neg_a, self.b1) #relu(b).w2 + b2 neg_two_layer_relu = nn.ReLU(graph, neg_b) #c = relu(b).w2 neg_c = nn.MatrixMultiply(graph, neg_two_layer_relu, self.w2) #d = c + b2 neg_d = nn.MatrixVectorAdd(graph, neg_c, self.b2) real_neg_d = nn.Input(graph, graph.get_output(neg_d) * -1) #loss = SquareLoss(graph, xm_plus_b, input_y) #---------------------hint2------addition--------------------------# d_plus_real_neg_d = nn.Add(graph, real_neg_d, d) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SquareLoss(graph, d_plus_real_neg_d, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" return graph.get_output(d_plus_real_neg_d)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ i = x.shape[1] # j = x.shape[0] #to test and modify h = 200 if not self.w1: self.w1 = nn.Variable(i, h) if not self.w2: self.w2 = nn.Variable(h, 10) if not self.b1: self.b1 = nn.Variable(h) if not self.b2: self.b2 = nn.Variable(10) graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_nodeX = nn.Input(graph, x) multiply1 = nn.MatrixMultiply(graph, input_nodeX, self.w1) add1 = nn.MatrixVectorAdd(graph, multiply1, self.b1) relu = nn.ReLU(graph, add1) multiply2 = nn.MatrixMultiply(graph, relu, self.w2) add2 = nn.MatrixVectorAdd(graph, multiply2, self.b2) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. input_nodeY = nn.Input(graph, y) loss_node = nn.SoftmaxLoss(graph, add2, input_nodeY) graph.add(loss_node) return graph "*** YOUR CODE HERE ***" else: # print graph.get_output(add2).shape return graph.get_output(add2) # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***"