def run(self, states, Q_target=None): """ Runs the DQN for a batch of states. The DQN takes the state and computes Q-values for all possible actions that can be taken. That is, if there are two actions, the network takes as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)] When Q_target == None, return the matrix of Q-values currently computed by the network for the input states. When Q_target is passed, it will contain the Q-values which the network should be producing for the current states. You must return a nn.Graph which computes the training loss between your current Q-value predictions and these target values, using nn.SquareLoss. Inputs: states: a (batch_size x 4) numpy array Q_target: a (batch_size x 2) numpy array, or None Output: (if Q_target is not None) A nn.Graph instance, where the last added node is the loss (if Q_target is None) A (batch_size x 2) numpy array of Q-value scores, for the two actions """ "*** YOUR CODE HERE ***" graph = nn.Graph([self.W1, self.W2, self.b1, self.b2]) input_x = nn.Input(graph, states) xW1mt = nn.MatrixMultiply(graph, input_x, self.W1) xW1b1Add = nn.MatrixVectorAdd(graph, xW1mt, self.b1) relu = nn.ReLU(graph, xW1b1Add) reluMult = nn.MatrixMultiply(graph, relu, self.W2) total = nn.MatrixVectorAdd(graph, reluMult, self.b2) # graph = nn.Graph([self.W1]) #Q(s,a) = W1Feature1 + W2feature2 + W3feature3 + W4feature4 # input_x = nn.Input(graph, states) # W1mult = nn.MatrixMultiply(graph, input_x, self.W1) #W2mult = nn.MatrixMultiply(graph, input_x, self.W2) #W3mult = nn.MatrixMultiply(graph, input_x, self.W3) #W4mult = nn.MatrixMultiply(graph, input_x, self.W4) #W1W2Add = nn.Add(graph, W1mult, W2mult) #W3W4Add = nn.Add(graph, W3mult, W4mult) #total = nn.Add(graph, W1W2Add, W3W4Add) if Q_target is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(graph, Q_target) loss_node = nn.SquareLoss(graph, total, input_y) return graph else: "*** YOUR CODE HERE ***" return graph.get_output(total)
def run(self, states, Q_target=None): """ Runs the DQN for a batch of states. The DQN takes the state and computes Q-values for all possible actions that can be taken. That is, if there are two actions, the network takes as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)] When Q_target == None, return the matrix of Q-values currently computed by the network for the input states. When Q_target is passed, it will contain the Q-values which the network should be producing for the current states. You must return a nn.Graph which computes the training loss between your current Q-value predictions and these target values, using nn.SquareLoss. Inputs: states: a (batch_size x 4) numpy array Q_target: a (batch_size x 2) numpy array, or None Output: (if Q_target is not None) A nn.Graph instance, where the last added node is the loss (if Q_target is None) A (batch_size x 2) numpy array of Q-value scores, for the two actions """ "*** YOUR CODE HERE ***" #size of the input vector i = states.shape[1] * 4 #to test and modify h = 200 if not self.w1: self.w1 = nn.Variable(self.state_size, h) if not self.w2: self.w2 = nn.Variable(h, self.num_actions) if not self.b1: self.b1 = nn.Variable(h) if not self.b2: self.b2 = nn.Variable(self.num_actions) graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) inputNodeX = nn.Input(graph, states) mult1 = nn.MatrixMultiply(graph, inputNodeX, self.w1) add1 = nn.MatrixVectorAdd(graph, mult1, self.b1) relu = nn.ReLU(graph, add1) mult2 = nn.MatrixMultiply(graph, relu, self.w2) add2 = nn.MatrixVectorAdd(graph, mult2, self.b2) if Q_target is not None: inputNodeY = nn.Input(graph, Q_target) lossNode = nn.SquareLoss(graph, add2, inputNodeY) graph.add(lossNode) # print("q target is not none") return graph else: # return [get_action(state, .5), # print("q target is none") return graph.get_output(add2)
def createNN(): if not self.graph: for i in range(0, self.depth): #make weight matrix with every layer X by X size self.weights.append(nn.Variable(len(x), len(x))) #make bias matrix with each layer being a vector of X by 1 size self.bias.append(nn.Variable(len(x), 1)) #create graph with initialized weights and bias variables self.graph = nn.Graph(self.weights + self.bias) #weight + bias is variable vector #create input nodes: input_x = nn.Input(self.graph, x) input_y = nn.Input(self.graph, y) #create first layer: xm = nn.MatrixMultiply(self.graph, self.weights[0], input_x) xm_plus_b = nn.MatrixVectorAdd(self.graph, xm, self.bias[0]) #create the remaining layers: for i in range(1, self.depth): #add nonlinearity for previous layer: relu = nn.ReLU(self.graph, xm_plus_b) #create new hidden layer: xm = nn.MatrixMultiply(self.graph, self.weights[i], relu) xm_plus_b = nn.MatrixVectorAdd(self.graph, xm, self.bias[i]) #create loss node: loss = nn.SquareLoss(self.graph, xm_plus_b, input_y)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ graph = nn.Graph([self.m, self.b, self.m2, self.b2]) input_x = nn.Input(graph, x) #============= LAYER 01 ===============# xm = nn.MatrixMultiply(graph, input_x, self.m) xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b) #============= LAYER 02 ===============# relu = nn.ReLU(graph, xm_plus_b) xm2 = nn.MatrixMultiply(graph, relu, self.m2) xm_plus_b2 = nn.MatrixVectorAdd(graph, xm2, self.b2) if y is not None: input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, xm_plus_b2, input_y) return graph else: return graph.get_output(xm_plus_b2)
def run(self, states, Q_target=None): """ """ "*** YOUR CODE HERE ***" graph = nn.Graph( [self.m1, self.b1, self.m2, self.b2, self.m3, self.b3]) input_x = nn.Input(graph, states) xm1 = nn.MatrixMultiply(graph, input_x, self.m1) xm1_plus_b1 = nn.MatrixVectorAdd(graph, xm1, self.b1) relu1 = nn.ReLU(graph, xm1_plus_b1) xm2 = nn.MatrixMultiply(graph, relu1, self.m2) xm2_plus_b2 = nn.MatrixVectorAdd(graph, xm2, self.b2) relu2 = nn.ReLU(graph, xm2_plus_b2) xm3 = nn.MatrixMultiply(graph, relu2, self.m3) m3x_plus_b3 = nn.MatrixVectorAdd(graph, xm3, self.b3) if Q_target is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(graph, Q_target) loss = nn.SquareLoss(graph, m3x_plus_b3, input_y) graph.add(loss) return graph else: "*** YOUR CODE HERE ***" result = graph.get_output(m3x_plus_b3) return result
def run(self, xs, y=None): """ Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] "*** YOUR CODE HERE ***" h = nn.Variable(batch_size, self.dimensionality) h.data = np.zeros((batch_size, self.dimensionality)) g = nn.Graph([h, self.w1, self.w2, self.w3, self.b]) for x in xs: h1 = nn.MatrixMultiply(g, h, self.w1) x2 = nn.MatrixMultiply(g, nn.Input(g, x), self.w2) h1_add_x2 = nn.Add(g, h1, x2) add_b = nn.MatrixVectorAdd(g, h1_add_x2, self.b) relu = nn.ReLU(g, add_b) h = relu result = nn.MatrixMultiply(g, h, self.w3) if y is not None: "*** YOUR CODE HERE ***" nn.SoftmaxLoss(g, result, nn.Input(g, y)) return g else: "*** YOUR CODE HERE ***" return g.get_output(result)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" # graph = nn.Graph([self.W1, self.b1, self.W2, self.b2]) # input_x = nn.Input(graph, x) # xW1 = nn.MatrixMultiply(graph, input_x, self.W1) # xW1_plus_b1 = nn.MatrixVectorAdd(graph, xW1, self.b1) # relu = nn.ReLU(graph, xW1_plus_b1) # xW2 = nn.MatrixMultiply(graph, relu, self.W2) # sin = nn.MatrixVectorAdd(graph, xW2, self.b2) "trying 3 layers" graph = nn.Graph([self.W1, self.b1, self.W2, self.b2, self.W3, self.b3]) input_x = nn.Input(graph, x) xW1 = nn.MatrixMultiply(graph, input_x, self.W1) xW1_plus_b1 = nn.MatrixVectorAdd(graph, xW1, self.b1) relu = nn.ReLU(graph, xW1_plus_b1) xW2 = nn.MatrixMultiply(graph, relu, self.W2) xW2_plus_b2 = nn.MatrixVectorAdd(graph, xW2, self.b2) relu_second = nn.ReLU(graph, xW2_plus_b2) xW3 = nn.MatrixMultiply(graph, relu_second, self.W3) sin = nn.MatrixVectorAdd(graph, xW3, self.b3) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss_node = nn.SquareLoss(graph, sin, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" return graph.get_output(sin)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ #batch_size = x.shape[0] # set up the graph oddRegressionGraph = nn.Graph([self.W1, self.b1, self.W2, self.b2]) input_x = nn.Input(oddRegressionGraph, x) xW1 = nn.MatrixMultiply(oddRegressionGraph, input_x, self.W1) xW1_plus_b1 = nn.MatrixVectorAdd(oddRegressionGraph, xW1, self.b1) ReLU_1 = nn.ReLU(oddRegressionGraph, xW1_plus_b1) R1W2 = nn.MatrixMultiply(oddRegressionGraph, ReLU_1, self.W2) R1W2_plus_b2 = nn.MatrixVectorAdd(oddRegressionGraph, R1W2, self.b2) negx = nn.Input(oddRegressionGraph, x * -1) negxW1 = nn.MatrixMultiply(oddRegressionGraph, negx, self.W1) negxW1_plus_b1 = nn.MatrixVectorAdd(oddRegressionGraph, negxW1, self.b1) ReLU_2 = nn.ReLU(oddRegressionGraph, negxW1_plus_b1) R2W2 = nn.MatrixMultiply(oddRegressionGraph, ReLU_2, self.W2) R2W2_plus_b2 = nn.MatrixVectorAdd(oddRegressionGraph, R2W2, self.b2) #neg2R2W2_plus_b2 = nn.Input(oddRegressionGraph, oddRegressionGraph.get_output(R2W2_plus_b2)*-2) #negR2W2_plus_b2 = nn.Add(oddRegressionGraph, R2W2_plus_b2, neg2R2W2_plus_b2) negR2W2_plus_b2 = nn.Input( oddRegressionGraph, oddRegressionGraph.get_output(R2W2_plus_b2) * -1) sumMatrix = nn.Add(oddRegressionGraph, R1W2_plus_b2, negR2W2_plus_b2) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. input_y = nn.Input(oddRegressionGraph, y) sumMatrix_SL_y = nn.SquareLoss(oddRegressionGraph, sumMatrix, input_y) return oddRegressionGraph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array return oddRegressionGraph.get_output(sumMatrix)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" self.graph1 = nn.Graph([self.w1,self.b1,self.w2,self.b2,self.w3,self.b3]) self.input_x = nn.Input(self.graph1, x) if y is not None: "*** YOUR CODE HERE ***" self.input_y = nn.Input(self.graph1, y) xw_1 = nn.MatrixMultiply(self.graph1, self.input_x, self.w1) xw1_plus_b1 = nn.MatrixVectorAdd(self.graph1, xw_1, self.b1) relu_l1 = nn.ReLU(self.graph1, xw1_plus_b1) l1w2 = nn.MatrixMultiply(self.graph1, relu_l1, self.w2) l1w2_plus_b2 = nn.MatrixVectorAdd(self.graph1, l1w2, self.b2) relu_l2 = nn.ReLU(self.graph1, l1w2_plus_b2) l2w3 = nn.MatrixMultiply(self.graph1, relu_l2, self.w3) l2w3_plus_b3 = nn.MatrixVectorAdd(self.graph1, l2w3, self.b3) loss = nn.SoftmaxLoss(self.graph1, l2w3_plus_b3, self.input_y) # print('loss shape',l2w3_plus_b3) return self.graph1 else: "*** YOUR CODE HERE ***" graph2 = nn.Graph([self.w1,self.b1,self.w2,self.b2,self.w3,self.b3]) input_x = nn.Input(graph2, x) xw_1 = nn.MatrixMultiply(graph2, input_x, self.w1) xw1_plus_b1 = nn.MatrixVectorAdd(graph2, xw_1, self.b1) relu_l1 = nn.ReLU(graph2, xw1_plus_b1) l1w2 = nn.MatrixMultiply(graph2, relu_l1, self.w2) l1w2_plus_b2 = nn.MatrixVectorAdd(graph2, l1w2, self.b2) relu_l2 = nn.ReLU(graph2, l1w2_plus_b2) l2w3 = nn.MatrixMultiply(graph2, relu_l2, self.w3) l2w3_plus_b3 = nn.MatrixVectorAdd(graph2, l2w3, self.b3) return graph2.get_output(l2w3_plus_b3)
def run(self, states, Q_target=None): """ Runs the DQN for a batch of states. The DQN takes the state and computes Q-values for all possible actions that can be taken. That is, if there are two actions, the network takes as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)] When Q_target == None, return the matrix of Q-values currently computed by the network for the input states. When Q_target is passed, it will contain the Q-values which the network should be producing for the current states. You must return a nn.Graph which computes the training loss between your current Q-value predictions and these target values, using nn.SquareLoss. Inputs: states: a (batch_size x 4) numpy array Q_target: a (batch_size x 2) numpy array, or None Output: (if Q_target is not None) A nn.Graph instance, where the last added node is the loss (if Q_target is None) A (batch_size x 2) numpy array of Q-value scores, for the two actions """ "*** YOUR CODE HERE ***" if not self.graph: w1 = nn.Variable(4, 50) w2 = nn.Variable(50, 50) w3 = nn.Variable(50, 2) b1 = nn.Variable(1, 50) b2 = nn.Variable(1, 50) b3 = nn.Variable(1, 2) self.list = [w1, w2, w3, b1, b2, b3] self.graph = nn.Graph(self.l) self.graph = nn.Graph(self.l) input_x = nn.Input(self.graph, states) if Q_target is not None: input_y = nn.Input(self.graph, Q_target) mult = nn.MatrixMultiply(self.graph, input_x, self.list[0]) add = nn.MatrixVectorAdd(self.graph, mult, self.list[3]) relu = nn.ReLU(self.graph, add) mult2 = nn.MatrixMultiply(self.graph, relu, self.list[1]) add2 = nn.MatrixVectorAdd(self.graph, mult2, self.list[4]) relu2 = nn.ReLU(self.graph, add2) mult3 = nn.MatrixMultiply(self.graph, relu2, self.list[2]) add3 = nn.MatrixVectorAdd(self.graph, mult3, self.list[5]) if Q_target is not None: "* YOUR CODE HERE *" loss = nn.SquareLoss(self.graph, add3, input_y) return self.graph else: "* YOUR CODE HERE *" return self.graph.get_output(self.graph.get_nodes()[-1])
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" # positive graph = nn.Graph([self.w1, self.b1, self.w2, self.b2]) input_x = nn.Input(graph, x) mul1 = nn.MatrixMultiply(graph, input_x, self.w1) add1 = nn.MatrixVectorAdd(graph, mul1, self.b1) hidden_output = nn.ReLU(graph, add1) mul2 = nn.MatrixMultiply(graph, hidden_output, self.w2) add2 = nn.MatrixVectorAdd(graph, mul2, self.b2) # negative neg_input_x = nn.Input(graph, np.dot(x, np.array([[-1]]))) neg_mul1 = nn.MatrixMultiply(graph, neg_input_x, self.w1) neg_add1 = nn.MatrixVectorAdd(graph, neg_mul1, self.b1) neg_hidden_output = nn.ReLU(graph, neg_add1) neg_mul2 = nn.MatrixMultiply(graph, neg_hidden_output, self.w2) neg_add2 = nn.MatrixVectorAdd(graph, neg_mul2, self.b2) neg_mul3 = nn.MatrixMultiply(graph, neg_add2, nn.Input(graph, np.array([[-1.0]]))) multiLayer = nn.MatrixVectorAdd(graph, add2, neg_mul3) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" inputY = nn.Input(graph, y) loss = nn.SquareLoss(graph, multiLayer, inputY) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" return graph.get_output(multiLayer)
def run(self, x, y=None): """ TODO: Question 6 - [Application] Digit Classification Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" #to implement f(x) = relu(x.w1+b1).w2 + b2 graph = nn.Graph([self.w1, self.b1, self.w2, self.b2]) input_x = nn.Input(graph, x) #input_y = Input(graph, y) #a = x.w1 a = nn.MatrixMultiply(graph, input_x, self.w1) #relu(a+b1).w2 + b2 #b = a + b1 b = nn.MatrixVectorAdd(graph, a, self.b1) #relu(b).w2 + b2 two_layer_relu = nn.ReLU(graph, b) #c = relu(b).w2 c = nn.MatrixMultiply(graph, two_layer_relu, self.w2) #d = c + b2 d = nn.MatrixVectorAdd(graph, c, self.b2) #loss = SquareLoss(graph, xm_plus_b, input_y) if y is not None: "*** YOUR CODE HERE ***" # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, d, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" return graph.get_output(d)
def run(self, x, y=None): """ TODO: Question 5 - [Application] OddRegression Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_x = nn.Input(graph, x) xm = nn.MatrixMultiply(graph, input_x, self.w1) xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b1) relu = nn.ReLU(graph, xm_plus_b) w2mul = nn.MatrixMultiply(graph, relu, self.w2) b2add = nn.MatrixVectorAdd(graph, w2mul, self.b2) graph2 = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_x2 = nn.Input(graph2, x * -1) inputsmall = nn.Input(graph2, np.array([-1.0])) xm2 = nn.MatrixMultiply(graph2, input_x2, self.w1) xm_plus_b2 = nn.MatrixVectorAdd(graph2, xm2, self.b1) relu2 = nn.ReLU(graph2, xm_plus_b2) w2mul2 = nn.MatrixMultiply(graph2, relu2, self.w2) b2add2 = nn.MatrixVectorAdd(graph2, w2mul2, self.b2) #final = nn.MatrixMultiply(graph2, b2add2, inputsmall) fuckinput = nn.Input(graph, graph2.get_output(graph2.get_nodes()[-1]) * -1) realfinal = nn.MatrixVectorAdd(graph, b2add, fuckinput) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SquareLoss(graph, realfinal, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" #print graph.get_output(graph.get_nodes()[-1]) #print graph.get_output(input_x) return graph.get_output(graph.get_nodes()[-1])
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_x = nn.Input(graph, x) xw1 = nn.MatrixMultiply(graph, input_x, self.w1) xw1_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1) relu = nn.ReLU(graph, xw1_b1) xw2 = nn.MatrixMultiply(graph, relu, self.w2) xw2_b2 = nn.MatrixVectorAdd(graph, xw2, self.b2) negate = nn.Input(graph, np.array([[-1.]])) neg_x = nn.MatrixMultiply(graph, input_x, negate) neg_xw1 = nn.MatrixMultiply(graph, neg_x, self.w1) neg_xw1_b1 = nn.MatrixVectorAdd(graph, neg_xw1, self.b1) neg_relu = nn.ReLU(graph, neg_xw1_b1) neg_xw2 = nn.MatrixMultiply(graph, neg_relu, self.w2) neg_xw2_b2 = nn.MatrixVectorAdd(graph, neg_xw2, self.b2) neg_neg = nn.MatrixMultiply(graph, neg_xw2_b2, negate) final = nn.MatrixVectorAdd(graph, xw2_b2, neg_neg) # print graph.get_output(graph.get_nodes()[-1]) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SquareLoss(graph, final, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" # # print graph.get_output(graph.get_nodes()[-1]) return graph.get_output(graph.get_nodes()[-1])
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" if len(x) == 1: return 0 if not self.graph: w1 = nn.Variable(784, 500) w2 = nn.Variable(500, 500) w3 = nn.Variable(500, 10) b1 = nn.Variable(1, 500) b2 = nn.Variable(1, 500) b3 = nn.Variable(1, 10) self.l = [w1, w2, w3, b1, b2, b3] self.graph = nn.Graph(self.l) self.graph = nn.Graph(self.l) input_x = nn.Input(self.graph, x) #Tx784 if y is not None: #<--- THIS LITTLE CONDITIONAL SO IMPORTANT HFS input_y = nn.Input(self.graph, y) mult = nn.MatrixMultiply(self.graph, input_x, self.l[0]) #Tx50 add = nn.MatrixVectorAdd(self.graph, mult, self.l[3]) relu = nn.ReLU(self.graph, add) mult2 = nn.MatrixMultiply(self.graph, relu, self.l[1]) #Tx50 add2 = nn.MatrixVectorAdd(self.graph, mult2, self.l[4]) #Tx50 relu2 = nn.ReLU(self.graph, add2) mult3 = nn.MatrixMultiply(self.graph, relu2, self.l[2]) add3 = nn.MatrixVectorAdd(self.graph, mult3, self.l[5]) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. loss = nn.SoftmaxLoss(self.graph, add3, input_y) return self.graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array #print(self.graph.get_output(self.graph.get_nodes()[-1])) return self.graph.get_output(self.graph.get_nodes()[-1])
def run(self, states, Q_target=None): """ TODO: Question 7 - [Application] Reinforcement Learning Runs the DQN for a batch of states. The DQN takes the state and computes Q-values for all possible actions that can be taken. That is, if there are two actions, the network takes as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)] When Q_target == None, return the matrix of Q-values currently computed by the network for the input states. When Q_target is passed, it will contain the Q-values which the network should be producing for the current states. You must return a nn.Graph which computes the training loss between your current Q-value predictions and these target values, using nn.SquareLoss. Inputs: states: a (batch_size x 4) numpy array Q_target: a (batch_size x 2) numpy array, or None Output: (if Q_target is not None) A nn.Graph instance, where the last added node is the loss (if Q_target is None) A (batch_size x 2) numpy array of Q-value scores, for the two actions """ "*** YOUR CODE HERE ***" #to implement f(x) = relu(x.w1+b1).w2 + b2 graph = nn.Graph([self.w1, self.b1, self.w2, self.b2]) input_x = nn.Input(graph, states) #input_y = Input(graph, y) #a = x.w1 a = nn.MatrixMultiply(graph, input_x, self.w1) #relu(a+b1).w2 + b2 #b = a + b1 b = nn.MatrixVectorAdd(graph, a, self.b1) #relu(b).w2 + b2 two_layer_relu = nn.ReLU(graph, b) #c = relu(b).w2 c = nn.MatrixMultiply(graph, two_layer_relu, self.w2) #d = c + b2 d = nn.MatrixVectorAdd(graph, c, self.b2) #loss = SquareLoss(graph, xm_plus_b, input_y) if Q_target is not None: "*** YOUR CODE HERE ***" # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(graph, Q_target) loss = nn.SquareLoss(graph, d, input_y) return graph else: "*** YOUR CODE HERE ***" return graph.get_output(d)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" self.graph = nn.Graph([self.m1, self.m2, self.b1, self.b2]) input_x = nn.Input(self.graph, x) matmult = nn.MatrixMultiply(self.graph, input_x, self.m1) matadd = nn.MatrixVectorAdd(self.graph, matmult, self.b1) relu = nn.ReLU(self.graph, matadd) matmult = nn.MatrixMultiply(self.graph, relu, self.m2) matadd1 = nn.MatrixVectorAdd(self.graph, matmult, self.b2) matrixNeg = nn.Input(self.graph, np.array([[-1.]])) matmultNeg = nn.MatrixMultiply(self.graph, input_x, matrixNeg) matmult = nn.MatrixMultiply(self.graph, matmultNeg, self.m1) matadd = nn.MatrixVectorAdd(self.graph, matmult, self.b1) relu = nn.ReLU(self.graph, matadd) matmult = nn.MatrixMultiply(self.graph, relu, self.m2) matadd2 = nn.MatrixVectorAdd(self.graph, matmult, self.b2) negMat = nn.MatrixMultiply(self.graph, matadd2, matrixNeg) func = nn.MatrixVectorAdd(self.graph, matadd1, negMat) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(self.graph, y) loss = nn.SquareLoss(self.graph, func, input_y) # self.graph.add(loss) return self.graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" return self.graph.get_output(func)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" graph = nn.Graph([self.w_one, self.w_two, self.b_one, self.b_two]) input_i = nn.Input(graph, x) mul = nn.MatrixMultiply(graph, input_i, self.w_one) add = nn.MatrixVectorAdd(graph, mul, self.b_one) relu = nn.ReLU(graph, add) # -x tempx = np.negative(x) input_i2 = nn.Input(graph, tempx) multiply = nn.MatrixMultiply(graph, input_i2, self.w_one) addition = nn.MatrixVectorAdd(graph, multiply, self.b_one) relu2 = nn.ReLU(graph, addition) # -f(-x) arr = np.negative(np.identity(self.hidden_size)) negativeOne = nn.Input(graph, arr) negatedRelu = nn.MatrixMultiply(graph, relu2, negativeOne) # f(x) + (-f(-x)) inside = nn.MatrixVectorAdd(graph, relu, negatedRelu) mul2 = nn.MatrixMultiply(graph, inside, self.w_two) #add2 = nn.MatrixVectorAdd(graph, mul2, self.b_two) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. y_in = nn.Input(graph, y) loss = nn.SquareLoss(graph, mul2, y_in) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array return graph.get_output(mul2)
def run(self, xs, y=None): """ Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] graph = nn.Graph([self.C_training, self.H_traing, self.m, self.b]) H = np.zeros((batch_size, self.hidden_size)) inputH = nn.Input(graph, H) for X in xs: inputX = nn.Input(graph, X) CWx = nn.MatrixMultiply(graph, inputX, self.C_training) HWh = nn.MatrixMultiply(graph, inputH, self.H_traing) inputH = nn.ReLU(graph, nn.Add(graph, CWx, HWh)) xm = nn.MatrixMultiply(graph, inputH, self.m) xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b) if y is not None: input_y = nn.Input(graph, y) nn.SquareLoss(graph, xm_plus_b, input_y) return graph else: return graph.get_output(xm_plus_b)
def run(self, x, y=None): """ TODO: Question 5 - [Application] OddRegression Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" # calculates g(x) graph = nn.Graph([self.w_one, self.b_one, self.w_two, self.b_two]) input_x = nn.Input(graph, x) xw = nn.MatrixMultiply(graph, input_x, self.w_one) xw_plus_b = nn.MatrixVectorAdd(graph, xw, self.b_one) relu = nn.ReLU(graph, xw_plus_b) reluw = nn.MatrixMultiply(graph, relu, self.w_two) reluw_plus_b = nn.MatrixVectorAdd(graph, reluw, self.b_two) # calculates g(-x) negone = nn.Input(graph, np.array([[-1.0]])) neg_x = nn.MatrixMultiply(graph, input_x, negone) negxw = nn.MatrixMultiply(graph, neg_x, self.w_one) negxw_plus_b = nn.MatrixVectorAdd(graph, negxw, self.b_one) negrelu = nn.ReLU(graph, negxw_plus_b) negreluw = nn.MatrixMultiply(graph, negrelu, self.w_two) negreluw_plus_b = nn.MatrixVectorAdd(graph, negreluw, self.b_two) #g(x)-(g(-x)) negG = nn.MatrixMultiply(graph, negreluw_plus_b, negone) oddFunc = nn.Add(graph, reluw_plus_b, negG) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SquareLoss(graph, oddFunc, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" return graph.get_output(oddFunc)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" graph = nn.Graph([self.w1,self.b1,self.w2]) xInput = nn.Input(graph,x) negXInput = nn.Input(graph,-x) yInput = nn.Input(graph,y) layer1 = nn.MatrixMultiply(graph, xInput, self.w1) negLayer1 = nn.MatrixMultiply(graph, negXInput, self.w1) layer2 = nn.MatrixVectorAdd(graph, layer1, self.b1) negLayer2 = nn.MatrixVectorAdd(graph, negLayer1, self.b1) layer3 = nn.ReLU(graph, layer2) negLayer3 = nn.ReLU(graph, negLayer2) combinedLayer = nn.Subtract(graph, layer3, negLayer3) layer4 = nn.MatrixMultiply(graph, combinedLayer, self.w2) layer5 = nn.SquareLoss(graph, layer4, yInput) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" graph = nn.Graph([self.w1,self.b1,self.w2]) xInput = nn.Input(graph,x) negXInput = nn.Input(graph,-x) layer1 = nn.MatrixMultiply(graph, xInput, self.w1) negLayer1 = nn.MatrixMultiply(graph, negXInput, self.w1) layer2 = nn.MatrixVectorAdd(graph, layer1, self.b1) negLayer2 = nn.MatrixVectorAdd(graph, negLayer1, self.b1) layer3 = nn.ReLU(graph, layer2) negLayer3 = nn.ReLU(graph, negLayer2) combinedLayer = nn.Subtract(graph, layer3, negLayer3) layer4 = nn.MatrixMultiply(graph, combinedLayer, self.w2) return graph.get_output(layer4)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, y is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use nn.SoftmaxLoss as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "* YOUR CODE HERE *" graph = nn.Graph([ self.weight1, self.bias1, self.weight2, self.bias2, self.weight3, self.bias3, self.weight4, self.bias4, self.weight5, self.bias5 ]) input_x = nn.Input(graph, x) xw1 = nn.MatrixMultiply(graph, input_x, self.weight1) plus1b1 = nn.MatrixVectorAdd(graph, xw1, self.bias1) relu1 = nn.ReLU(graph, plus1b1) relu1_2 = nn.MatrixMultiply(graph, relu1, self.weight2) plus2b2 = nn.MatrixVectorAdd(graph, relu1_2, self.bias2) relu2 = nn.ReLU(graph, plus2b2) relu2_3 = nn.MatrixMultiply(graph, relu2, self.weight3) plus3b3 = nn.MatrixVectorAdd(graph, relu2_3, self.bias3) relu3 = nn.ReLU(graph, plus3b3) relu3_4 = nn.MatrixMultiply(graph, relu3, self.weight4) plus4b4 = nn.MatrixVectorAdd(graph, relu3_4, self.bias4) relu4 = nn.ReLU(graph, plus4b4) relu4_5 = nn.MatrixMultiply(graph, relu4, self.weight5) plus5b5 = nn.MatrixVectorAdd(graph, relu4_5, self.bias5) if y is not None: "* YOUR CODE HERE *" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, plus5b5, input_y) return graph else: "* YOUR CODE HERE *" return graph.get_output(plus5b5)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" graph = nn.Graph([self.w1, self.b1, self.w2, self.b2]) input_x = nn.Input(graph, x) nn_mm_w1 = nn.MatrixMultiply(graph, input_x, self.w1) nn_mm_b1 = nn.MatrixVectorAdd(graph, nn_mm_w1, self.b1) nn_rl_b1 = nn.ReLU(graph, nn_mm_b1) nn_mm_w2 = nn.MatrixMultiply(graph, nn_rl_b1, self.w2) nn_mm_b2 = nn.MatrixVectorAdd(graph, nn_mm_w2, self.b2) mx = np.dot(-1, x) nn_mx = nn.Input(graph, mx) mm_mx = nn.MatrixMultiply(graph, nn_mx, self.w1) mm_mx_b1 = nn.MatrixVectorAdd(graph, mm_mx, self.b1) rl_mx_b1 = nn.ReLU(graph, mm_mx_b1) mm_mx_w2 = nn.MatrixMultiply(graph, rl_mx_b1, self.w2) mm_mx_b2 = nn.MatrixVectorAdd(graph, mm_mx_w2, self.b2) new_x = nn.Subtract(graph, nn_mm_b2, mm_mx_b2) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" nn_y = nn.Input(graph, y) loss = nn.SquareLoss(graph, new_x, nn_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" return graph.get_output(new_x)
def run(self, states, Q_target=None): """ TODO: Question 7 - [Application] Reinforcement Learning Runs the DQN for a batch of states. The DQN takes the state and computes Q-values for all possible actions that can be taken. That is, if there are two actions, the network takes as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)] When Q_target == None, return the matrix of Q-values currently computed by the network for the input states. When Q_target is passed, it will contain the Q-values which the network should be producing for the current states. You must return a nn.Graph which computes the training loss between your current Q-value predictions and these target values, using nn.SquareLoss. Inputs: states: a (batch_size x 4) numpy array Q_target: a (batch_size x 2) numpy array, or None Output: (if Q_target is not None) A nn.Graph instance, where the last added node is the loss (if Q_target is None) A (batch_size x 2) numpy array of Q-value scores, for the two actions """ "*** YOUR CODE HERE ***" hidden_layer_size = 150 if not self.w1: self.w1 = nn.Variable(self.state_size, hidden_layer_size) if not self.w2: self.w2 = nn.Variable(hidden_layer_size, self.num_actions) if not self.b1: self.b1 = nn.Variable(hidden_layer_size) if not self.b2: self.b2 = nn.Variable(self.num_actions) g = nn.Graph([self.w1, self.w2, self.b1, self.b2]) result = nn.MatrixVectorAdd( g, nn.MatrixMultiply( g, nn.ReLU( g, nn.MatrixVectorAdd( g, nn.MatrixMultiply(g, nn.Input(g, states), self.w1), self.b1)), self.w2), self.b2) if Q_target is not None: "*** YOUR CODE HERE ***" g.add(nn.SquareLoss(g, result, nn.Input(g, Q_target))) return g else: "*** YOUR CODE HERE ***" return g.get_output(result)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" graph = nn.Graph(self.variables) negative1 = -1*np.ones((1,1)) input_x = nn.Input(graph, x) neg_1 = nn.Input(graph, negative1) """First we do the positives""" xw1 = nn.MatrixMultiply(graph, input_x, self.variables[0]) sumxw1b1 = nn.MatrixVectorAdd(graph, xw1, self.variables[1]) relu = nn.ReLU(graph, sumxw1b1) reluW2 = nn.MatrixMultiply(graph, relu, self.variables[2]) """Now we do the negatives""" negx = nn.MatrixMultiply(graph, input_x, neg_1) nxw1 = nn.MatrixMultiply(graph, negx, self.variables[0]) sumnxw1 = nn.MatrixVectorAdd(graph, nxw1, self.variables[1]) nrelu = nn.ReLU(graph, sumnxw1) nreluW2 = nn.MatrixMultiply(graph, nrelu, self.variables[2]) """Set the negative value of negative x to negative""" nsumNRW2b2 = nn.MatrixMultiply(graph, nreluW2, neg_1) """Add the two sums together""" totalSum = nn.Add(graph, reluW2, nsumNRW2b2) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SquareLoss(graph, totalSum, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" nodes = graph.get_nodes() lastnode = nodes[-1] out = graph.get_output(lastnode) return out
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" graph = nn.Graph([ self.m0, self.b0, self.m1, self.b1, self.m2, self.b2, self.m3, self.b3, self.m4, self.b4, self.m5, self.b5 ]) input_x = nn.Input(graph, x) t = nn.MatrixMultiply(graph, input_x, self.m0) t = nn.MatrixVectorAdd(graph, t, self.b0) t = nn.ReLU(graph, t) t = nn.MatrixMultiply(graph, t, self.m1) t = nn.MatrixVectorAdd(graph, t, self.b1) t = nn.ReLU(graph, t) t = nn.MatrixMultiply(graph, t, self.m2) t = nn.MatrixVectorAdd(graph, t, self.b2) t = nn.ReLU(graph, t) t = nn.MatrixMultiply(graph, t, self.m3) t = nn.MatrixVectorAdd(graph, t, self.b3) t = nn.ReLU(graph, t) t = nn.MatrixMultiply(graph, t, self.m4) t = nn.MatrixVectorAdd(graph, t, self.b4) t = nn.ReLU(graph, t) t = nn.MatrixMultiply(graph, t, self.m5) t = nn.MatrixVectorAdd(graph, t, self.b5) if y is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, t, input_y) return graph else: "*** YOUR CODE HERE ***" res = graph.outputs[graph.get_nodes()[-1]] return res
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" sign = x < 0 x = np.where(sign, -x, x) graph = nn.Graph( [self.m0, self.b0, self.m1, self.b1, self.m2, self.b2]) input_x = nn.Input(graph, x) t = nn.MatrixMultiply(graph, input_x, self.m0) t = nn.MatrixVectorAdd(graph, t, self.b0) t = nn.ReLU(graph, t) t = nn.MatrixMultiply(graph, t, self.m1) t = nn.MatrixVectorAdd(graph, t, self.b1) t = nn.ReLU(graph, t) t = nn.MatrixMultiply(graph, t, self.m2) t = nn.MatrixVectorAdd(graph, t, self.b2) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" y = np.where(sign, -y, y) input_y = nn.Input(graph, y) loss = nn.SquareLoss(graph, t, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" res = graph.outputs[graph.get_nodes()[-1]] res = np.where(sign, -res, res) res = np.where(x == 0, np.zeros_like(res), res) return res
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" #print("x", x.shape) #print("y", y.shape) graph = nn.Graph( [self.W1, self.W2, self.W3, self.W4, self.W5, self.W6]) input_x = nn.Input(graph, x) #first term xW1mult = nn.MatrixMultiply(graph, input_x, self.W1) #second term xW2mult = nn.MatrixMultiply(graph, input_x, self.W2) addW1W2 = nn.Add(graph, xW1mult, xW2mult) relu1 = nn.ReLU(graph, addW1W2) reluMult = nn.MatrixMultiply(graph, relu1, self.W3) xW4mult = nn.MatrixMultiply(graph, input_x, self.W4) W4W5mult = nn.MatrixMultiply(graph, xW4mult, self.W5) per2Add = nn.Add(graph, reluMult, W4W5mult) totalMult = nn.MatrixMultiply(graph, per2Add, self.W6) #another term #lastRelu = nn.ReLU(graph, totalMult) if y is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss_node = nn.SoftmaxLoss(graph, totalMult, input_y) return graph else: "*** YOUR CODE HERE ***" return graph.get_output(totalMult)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" graph = nn.Graph(self.param_w + self.param_b) inX = [nn.Input(graph, x), nn.Input(graph, -1 * x)] last = inX for i in range(self.num_layers): newLast = last for j in range(len(last)): multNode = nn.MatrixMultiply(graph, last[j], self.param_w[i]) addNode = nn.MatrixVectorAdd(graph, multNode, self.param_b[i]) if i != self.num_layers - 1: reluNode = nn.ReLU(graph, addNode) newLast[j] = reluNode else: newLast[j] = addNode last = newLast # f(x) = g(x) + -1 * g(-x) neg = nn.Input(graph, np.array(-1.0)) multNode = nn.MatrixMultiply(graph, inX[1], neg) addNode = nn.MatrixVectorAdd(graph, inX[0], multNode) final = addNode if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" inY = nn.Input(graph, y) loss = nn.SquareLoss(graph, final, inY) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" return graph.get_output(final)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) #pos a_graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) a_input_x = nn.Input(graph, x) a_mult1 = nn.MatrixMultiply(graph, a_input_x, self.w1) a_add1 = nn.MatrixVectorAdd(graph, a_mult1, self.b1) a_relu1 = nn.ReLU(graph, a_add1) a_mult2 = nn.MatrixMultiply(graph, a_relu1, self.w2) a_add2 = nn.MatrixVectorAdd(graph, a_mult2, self.b2) #neg b_input_x = nn.Input(graph, np.dot(-1, x)) b_mult1 = nn.MatrixMultiply(graph, b_input_x, self.w1) b_add1 = nn.MatrixVectorAdd(graph, b_mult1, self.b1) b_relu1 = nn.ReLU(graph, b_add1) b_mult2 = nn.MatrixMultiply(graph, b_relu1, self.w2) b_add2 = nn.MatrixVectorAdd(graph, b_mult2, self.b2) b_output = graph.get_output(b_add2) neg_b_output = -1 * b_output neg_matrix = np.zeros(np.shape(b_output)[1]) neg = np.negative(np.identity(np.shape(b_output)[1])) neg_b_add2 = nn.Input(graph, neg) b_mult3 = nn.MatrixMultiply(graph, b_add2, neg_b_add2) result = nn.Add(graph, a_add2, b_mult3) if y is not None: input_y = nn.Input(graph, y) loss = nn.SquareLoss(graph, result, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array return graph.get_output(result)