def run(self, states, Q_target=None): """ Runs the DQN for a batch of states. The DQN takes the state and computes Q-values for all possible actions that can be taken. That is, if there are two actions, the network takes as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)] When Q_target == None, return the matrix of Q-values currently computed by the network for the input states. When Q_target is passed, it will contain the Q-values which the network should be producing for the current states. You must return a nn.Graph which computes the training loss between your current Q-value predictions and these target values, using nn.SquareLoss. Inputs: states: a (batch_size x 4) numpy array Q_target: a (batch_size x 2) numpy array, or None Output: (if Q_target is not None) A nn.Graph instance, where the last added node is the loss (if Q_target is None) A (batch_size x 2) numpy array of Q-value scores, for the two actions """ "*** YOUR CODE HERE ***" graph = nn.Graph([self.w1, self.b1, self.w2, self.b2, self.w3]) input_states = nn.Input(graph, states) mul1 = nn.MatrixMultiply(graph, input_states, self.w1) add1 = nn.MatrixVectorAdd(graph, mul1, self.b1) reLU = nn.ReLU(graph, add1) mul2 = nn.MatrixMultiply(graph, reLU, self.w2) add2 = nn.MatrixVectorAdd(graph, mul2, self.b2) reLU2 = nn.ReLU(graph, add2) mul3 = nn.MatrixMultiply(graph, reLU2, self.w3) # mul4 = nn.MatrixMultiply(graph, mul3, nn.Input(graph, np.ones_like(mul3.shape)*-1)) if Q_target is not None: "*** YOUR CODE HERE ***" input_Q_target = nn.Input(graph, Q_target) loss = nn.SquareLoss(graph, mul3, input_Q_target) return graph else: "*** YOUR CODE HERE ***" # print graph.get_output(mul3).shape # print self.num_actions return graph.get_output(mul3)
def run(self, x): """ Runs the model for a batch of examples. Inputs: x: a node with shape (batch_size x 1) Returns: A node with shape (batch_size x 1) containing predicted y-values """ # Just used a two layer nerual network as shown on the project website :) xm1 = nn.AddBias(nn.Linear(nn.ReLU(nn.AddBias(nn.Linear(nn.ReLU(nn.AddBias(nn.Linear(x, self.W1), self.B1)), self.W2), self.B2)), self.W3), self.B3) return xm1
def run(self, x): """ Runs the model for a batch of examples. Inputs: x: a node with shape (batch_size x 1) Returns: A node with shape (batch_size x 1) containing predicted y-values """ "*** YOUR CODE HERE ***" relu1 = nn.ReLU(nn.AddBias(nn.Linear(x, self.w1), self.b1)) relu2 = nn.ReLU(nn.AddBias(nn.Linear(relu1, self.w2), self.b2)) xw2 = nn.Linear(relu2, self.w3) return nn.AddBias(xw2, self.b3)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ "*** YOUR CODE HERE ***" ide = nn.Variable(1) ide.data = -np.identity(1) g = nn.Graph([self.w1, self.b1, self.w2, self.b2, ide]) x1 = nn.MatrixMultiply(g, nn.Input(g, x), self.w1) x1_add_b1 = nn.MatrixVectorAdd(g, x1, self.b1) relu = nn.ReLU(g, x1_add_b1) x2 = nn.MatrixMultiply(g, relu, self.w2) x2_add_b2 = nn.MatrixVectorAdd(g, x2, self.b2) n_x1 = nn.MatrixMultiply(g, nn.Input(g, -x), self.w1) n_x1_add_b1 = nn.MatrixVectorAdd(g, n_x1, self.b1) n_relu = nn.ReLU(g, n_x1_add_b1) n_x2 = nn.MatrixMultiply(g, n_relu, self.w2) n_x2_add_b2 = nn.MatrixVectorAdd(g, n_x2, self.b2) n_x2_add_b2 = nn.MatrixMultiply(g, n_x2_add_b2, ide) f = nn.Add(g, x2_add_b2, n_x2_add_b2) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" nn.SquareLoss(g, f, nn.Input(g, y)) return g else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" return g.get_output(f)
def run(self, x): """ Runs the model for a batch of examples. Inputs: x: a node with shape (batch_size x 1) Returns: A node with shape (batch_size x 1) containing predicted y-values """ u1 = nn.ReLU(nn.AddBias(nn.Linear(x, self.w1), self.b1)) u2 = nn.ReLU(nn.AddBias(nn.Linear(u1, self.w2), self.b2)) u3 = nn.AddBias(nn.Linear(u2, self.w3), self.b3) return u3
def encode(self,x,share=None): with tf.variable_scope("encoder",reuse=share): # kernel shape = kernel shape is filter_height, filter_width, in_channels, out_channels conv1 = nn.conv2d(x,[5,5,6,64],'conv1') pool1 = nn.max_pool_2x2(tf.nn.relu(conv1)) conv2 = nn.conv2d(pool1,[5,5,64,32],'conv2') pool2 = nn.max_pool_2x2(tf.nn.relu(conv2)) conv3 = nn.conv2d(pool2,[5,5,32,32],'conv3') pool3 = nn.max_pool_2x2(tf.nn.relu(conv3)) pool3 = tf.reshape(pool3,[self.batch_size,-1]) # reshape #pool3 = tf.reshape(pool1,[self.batch_size,-1]) l1 = nn.ReLU(pool3,512,"fc1") # flatten and pass through ReLU h_enc = nn.ReLU(l1,512,"h_enc") return h_enc
def run(self, xs): """ Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a node with shape (batch_size x self.num_chars), where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", then xs[1] will be a node that contains a 1 at position (7, 0). Here the index 7 reflects the fact that "cat" is the last word in the batch, and the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node of shape (batch_size x hidden_size), for your choice of hidden_size. It should then calculate a node of shape (batch_size x 5) containing scores, where higher scores correspond to greater probability of the word originating from a particular language. Inputs: xs: a list with L elements (one per character), where each element is a node with shape (batch_size x self.num_chars) Returns: A node with shape (batch_size x 5) containing predicted scores (also called logits) """ "*** YOUR CODE HERE ***" x = xs[0] x_w1 = nn.Linear( x, self.weights1 ) # (bs x numchars) * (numchars x hidden) = (bs x hidden) xw1_b1_sum = nn.AddBias(x_w1, self.bias1) # (bs x hidden) relu = nn.ReLU(xw1_b1_sum) # (bs x hs) h_n = nn.AddBias( nn.Linear(relu, self.weights2), self.bias2) # (bs x hs) * (hs x 5) = (bs x 5) + (bs x 5) for x in xs[1:]: x_w1 = nn.Add( nn.Linear(x, self.weights1), nn.Linear(h_n, self.weights3)) # bs x hs + bsx5 * 5xhs = bs x hs xw1_b1_sum = nn.AddBias(x_w1, self.bias1) # bs x hs relu = nn.ReLU(xw1_b1_sum) h_n = nn.AddBias(nn.Linear(relu, self.weights2), self.bias2) return h_n
def run(self, x): """ Runs the model for a batch of examples. Inputs: x: a node with shape (batch_size x 1) Returns: A node with shape (batch_size x 1) containing predicted y-values """ first = nn.AddBias(nn.Linear(x, self.first_weights), self.fb) second = nn.AddBias(nn.Linear(nn.ReLU(first), self.second_weights), self.sb) third = nn.AddBias(nn.Linear(nn.ReLU(second), self.tw), self.tb) return third
def run(self, x): """ Runs the model for a batch of examples. Inputs: x: a node with shape (batch_size x 1) Returns: A node with shape (batch_size x 1) containing predicted y-values """ "*** YOUR CODE HERE ***" layer1 = nn.ReLU(nn.AddBias(nn.Linear(x, self.m1), self.b1)) layer2 = nn.ReLU(nn.AddBias(nn.Linear(layer1, self.m2), self.b2)) layer3 = nn.ReLU(nn.AddBias(nn.Linear(layer2, self.m3), self.b3)) output_layer = nn.AddBias(nn.Linear(layer3, self.m4), self.b4) return output_layer
def run(self, x): """ Runs the model for a batch of examples. Inputs: x: a node with shape (batch_size x 1) Returns: A node with shape (batch_size x 1) containing predicted y-values """ "*** YOUR CODE HERE ***" batch = x Z1 = nn.ReLU( nn.AddBias(nn.Linear(batch, self.weights[0]), self.bias[0])) Z2 = nn.ReLU(nn.AddBias(nn.Linear(Z1, self.weights[1]), self.bias[1])) return nn.AddBias(nn.Linear(Z2, self.weights[2]), self.bias[2])
def run(self, x): """ Runs the model for a batch of examples. Inputs: x: a node with shape (batch_size x 1) Returns: A node with shape (batch_size x 1) containing predicted y-values """ hiddenlayer1 = nn.AddBias(nn.Linear(x, self.w1), self.b1) hiddenlayeractive1 = nn.ReLU(hiddenlayer1) hiddenlayer2 = nn.AddBias(nn.Linear(hiddenlayeractive1, self.w2), self.b2) hiddenlayeractive2 = nn.ReLU(hiddenlayer2) out = nn.AddBias(nn.Linear(hiddenlayeractive2, self.wo), self.bo) return out
def run(self, x): """ Runs the model for a batch of examples. Inputs: x: a node with shape (batch_size x 1) Returns: A node with shape (batch_size x 1) containing predicted y-values """ lay1 = nn.ReLU(nn.AddBias(nn.Linear(x, self.w0), self.b0)) lay2 = nn.ReLU(nn.AddBias(nn.Linear(lay1, self.w1), self.b1)) output = nn.AddBias(nn.Linear(lay2, self.w2), self.b2) return output
def decode(self, z, share=None): # reverse architecture to encode # here, h_dec should be same size as image H,W,C=self.x_dim with tf.variable_scope("decoder",reuse=share): l1 = nn.ReLU(z,512,"l1") l2 = nn.ReLU(l1,512,"l2") in_channels=512 l2 = tf.reshape(l2,[self.batch_size,1,1,in_channels]) # single pixels with 512 channels kernel=tf.get_variable('kernel',[5,5,C,in_channels],initializer=tf.truncated_normal_initializer(stddev=1e-3)) output_size=[self.batch_size,H,W,C] deconv1=tf.nn.deconv2d(l2,kernel,output_size,[1,1,1,1],padding='SAME') h_dec = tf.reshape(l2,[self.batch_size,-1]) pdb.set_trace() return h_dec
def run(self, states, Q_target=None): """ TODO: Question 7 - [Application] Reinforcement Learning Runs the DQN for a batch of states. The DQN takes the state and computes Q-values for all possible actions that can be taken. That is, if there are two actions, the network takes as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)] When Q_target == None, return the matrix of Q-values currently computed by the network for the input states. When Q_target is passed, it will contain the Q-values which the network should be producing for the current states. You must return a nn.Graph which computes the training loss between your current Q-value predictions and these target values, using nn.SquareLoss. Inputs: states: a (batch_size x 4) numpy array Q_target: a (batch_size x 2) numpy array, or None Output: (if Q_target is not None) A nn.Graph instance, where the last added node is the loss (if Q_target is None) A (batch_size x 2) numpy array of Q-value scores, for the two actions """ graph = nn.Graph( [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3]) input_x = nn.Input(graph, states) W1x = nn.MatrixMultiply(graph, input_x, self.W1) W1b = nn.MatrixVectorAdd(graph, W1x, self.b1) W1Relu = nn.ReLU(graph, W1b) W2x = nn.MatrixMultiply(graph, W1Relu, self.W2) W2b = nn.MatrixVectorAdd(graph, W2x, self.b2) W2Relu = nn.ReLU(graph, W2b) W3x = nn.MatrixMultiply(graph, W2Relu, self.W3) W3b = nn.MatrixVectorAdd(graph, W3x, self.b3) yHat = W3b if Q_target is not None: input_y = nn.Input(graph, Q_target) Loss = nn.SquareLoss(graph, yHat, input_y) return graph else: return graph.get_output(yHat)
def run(self, x, y=None): """ TODO: Question 6 - [Application] Digit Classification Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ # "*** YOUR CODE HERE ***" graph = nn.Graph( [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3]) input_x = nn.Input(graph, x) # layer 1 xm = nn.MatrixMultiply(graph, input_x, self.W1) xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b1) a1 = nn.ReLU(graph, xm_plus_b) # layer 2 a1m = nn.MatrixMultiply(graph, a1, self.W2) a1m_plus_b = nn.MatrixVectorAdd(graph, a1m, self.b2) a2 = nn.ReLU(graph, a1m_plus_b) # layer 3 a2m = nn.MatrixMultiply(graph, a2, self.W3) a2m_plus_b = nn.MatrixVectorAdd(graph, a2m, self.b3) if y is not None: # "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, a2m_plus_b, input_y) return graph else: # "*** YOUR CODE HERE ***" return graph.get_output(a2m_plus_b)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct outputs `y` are known during training, but not at test time. If correct outputs `y` are provided, this method must construct and return a nn.Graph for computing the training loss. If `y` is None, this method must instead return predicted y-values. Inputs: x: a (batch_size x 1) numpy array y: a (batch_size x 1) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 1) numpy array of predicted y-values Note: DO NOT call backprop() or step() inside this method! """ graph = nn.Graph([self.m1, self.b1, self.m2, self.b2]) input_x = nn.Input(graph, x) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. input_y = nn.Input(graph, y) xm = nn.MatrixMultiply(graph, input_x, self.m1) xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b1) loss_1 = nn.ReLU(graph, xm_plus_b) loss_1m = nn.MatrixMultiply(graph, loss_1, self.m2) loss_1m_plus_b = nn.MatrixVectorAdd(graph, loss_1m, self.b2) loss = nn.SquareLoss(graph, loss_1m_plus_b, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array xm = nn.MatrixMultiply(graph, input_x, self.m1) xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b1) loss_1 = nn.ReLU(graph, xm_plus_b) loss_1m = nn.MatrixMultiply(graph, loss_1, self.m2) loss_1m_plus_b = nn.MatrixVectorAdd(graph, loss_1m, self.b2) return graph.get_output(loss_1m_plus_b)
def execute_layer(self, input_x, y, graph): xw1 = nn.MatrixMultiply(graph, input_x, self.w1) xw1_plus_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1) relu = nn.ReLU(graph, xw1_plus_b1) relu_w2 = nn.MatrixMultiply(graph, relu, self.w2) relu_w2_plus_b2 = nn.MatrixVectorAdd(graph, relu_w2, self.b2) return graph, relu_w2_plus_b2
def run(self, x): """ Runs the model for a batch of examples. Your model should predict a node with shape (batch_size x 10), containing scores. Higher scores correspond to greater probability of the image belonging to a particular class. Inputs: x: a node with shape (batch_size x 784) Output: A node with shape (batch_size x 10) containing predicted scores (also called logits) """ "*** YOUR CODE HERE ***" new = x for i in range(self.number_layers): w = self.weights[i] b = self.biases[i] xw = nn.Linear(new, w) t = nn.AddBias(xw, b) if i < self.number_layers - 1: new = nn.ReLU(t) return t
def run(self, x): """ Runs the model for a batch of examples. Inputs: x: a node with shape (batch_size x 1) Returns: A node with shape (batch_size x 1) containing predicted y-values """ "*** YOUR CODE HERE ***" xm_1 = nn.Linear(x, self.m_1) layer_1 = nn.AddBias(xm_1, self.b_1) non_lin_1 = nn.ReLU(layer_1) xm_2 = nn.Linear(non_lin_1, self.m_2) layer_2 = nn.AddBias(xm_2, self.b_2) #non_lin_2 = nn.ReLU(layer_2) #xm_3 = nn.Linear(non_lin_2, self.m_3) #layer_3 = nn.AddBias(xm_3, self.b_3) #non_lin_3 = nn.ReLU(layer_3) return layer_2
def run(self, x): """ Runs the model for a batch of examples. Inputs: x: a node with shape (batch_size x 1) Returns: A node with shape (batch_size x 1) containing predicted y-values """ "*** YOUR CODE HERE ***" layer1_values = nn.ReLU(nn.AddBias(nn.Linear(x, self.w01), self.b1)) layer2_values = nn.ReLU( nn.AddBias(nn.Linear(layer1_values, self.w12), self.b2)) predicted_y = nn.AddBias(nn.Linear(layer2_values, self.w23), self.b3) return predicted_y
def run(self, x): """ Runs the model for a batch of examples. Your model should predict a node with shape (batch_size x 10), containing scores. Higher scores correspond to greater probability of the image belonging to a particular class. Inputs: x: a node with shape (batch_size x 784) Output: A node with shape (batch_size x 10) containing predicted scores (also called logits) """ "*** YOUR CODE HERE ***" is_bias = False for layer in self.layers[:-2]: if not is_bias: x = nn.Linear(x, layer) is_bias = True continue x = nn.AddBias(x, layer) x = nn.ReLU(x) is_bias = False # for the last layer, no relu, just multiply and bias x = nn.Linear(x, self.layers[-2]) output = nn.AddBias(x, self.layers[-1]) return output
def run(self, x): """ Runs the model for a batch of examples. Your model should predict a node with shape (batch_size x 10), containing scores. Higher scores correspond to greater probability of the image belonging to a particular class. Inputs: x: a node with shape (batch_size x 784) Output: A node with shape (batch_size x 10) containing predicted scores (also called logits) """ "*** YOUR CODE HERE ***" xm_1 = nn.Linear(x, self.m_1) layer_1 = nn.AddBias(xm_1, self.b_1) non_lin_1 = nn.ReLU(layer_1) xm_2 = nn.Linear(non_lin_1, self.m_2) layer_2 = nn.AddBias(xm_2, self.b_2) #non_lin_2 = nn.ReLU(layer_2) #xm_3 = nn.Linear(non_lin_2, self.m_3) #layer_3 = nn.AddBias(xm_3, self.b_3) #non_lin_3 = nn.ReLU(layer_3) return layer_2
def run(self, states, Q_target=None): """ Runs the DQN for a batch of states. The DQN takes the state and computes Q-values for all possible actions that can be taken. That is, if there are two actions, the network takes as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)] When Q_target == None, return the matrix of Q-values currently computed by the network for the input states. When Q_target is passed, it will contain the Q-values which the network should be producing for the current states. You must return a nn.Graph which computes the training loss between your current Q-value predictions and these target values, using nn.SquareLoss. Inputs: states: a (batch_size x 4) numpy array Q_target: a (batch_size x 2) numpy array, or None Output: (if Q_target is not None) A nn.Graph instance, where the last added node is the loss (if Q_target is None) A (batch_size x 2) numpy array of Q-value scores, for the two actions """ "*** YOUR CODE HERE ***" #size of the input vector i = states.shape[1] * 4 #to test and modify h = 200 if not self.w1: self.w1 = nn.Variable(self.state_size, h) if not self.w2: self.w2 = nn.Variable(h, self.num_actions) if not self.b1: self.b1 = nn.Variable(h) if not self.b2: self.b2 = nn.Variable(self.num_actions) graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) inputNodeX = nn.Input(graph, states) mult1 = nn.MatrixMultiply(graph, inputNodeX, self.w1) add1 = nn.MatrixVectorAdd(graph, mult1, self.b1) relu = nn.ReLU(graph, add1) mult2 = nn.MatrixMultiply(graph, relu, self.w2) add2 = nn.MatrixVectorAdd(graph, mult2, self.b2) if Q_target is not None: inputNodeY = nn.Input(graph, Q_target) lossNode = nn.SquareLoss(graph, add2, inputNodeY) graph.add(lossNode) # print("q target is not none") return graph else: # return [get_action(state, .5), # print("q target is none") return graph.get_output(add2)
def build_bottom_up(self, pretrained): backbone = self.params['backbone'] if backbone == "resnet50": model = models.resnet50(pretrained=pretrained) elif backbone == "resnet101": model = models.resnet101(pretrained=pretrained) else: raise Exception("unimplemented backbone %s" % backbone) # p3 ~ p5 are extracted from backbone p3 = nn.Sequential(model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2) p4 = model.layer3 p5 = model.layer4 # build remaining layers in_channels = self.calc_in_channel_width(p5) p6 = nn.Conv2d(in_channels, 256, 3, stride=2, padding=1) p7 = nn.Sequential(nn.ReLU(), nn.Conv2d(256, 256, 3, stride=2, padding=1)) # register bottom up layers self.bottom_up_layers = nn.ModuleList((p3, p4, p5, p6, p7))
def run(self, states, Q_target=None): """ Runs the DQN for a batch of states. The DQN takes the state and computes Q-values for all possible actions that can be taken. That is, if there are two actions, the network takes as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)] When Q_target == None, return the matrix of Q-values currently computed by the network for the input states. When Q_target is passed, it will contain the Q-values which the network should be producing for the current states. You must return a nn.Graph which computes the training loss between your current Q-value predictions and these target values, using nn.SquareLoss. Inputs: states: a (batch_size x 4) numpy array Q_target: a (batch_size x 2) numpy array, or None Output: (if Q_target is not None) A nn.Graph instance, where the last added node is the loss (if Q_target is None) A (batch_size x 2) numpy array of Q-value scores, for the two actions """ "*** YOUR CODE HERE ***" graph = nn.Graph([self.W1, self.W2, self.b1, self.b2]) input_x = nn.Input(graph, states) xW1mt = nn.MatrixMultiply(graph, input_x, self.W1) xW1b1Add = nn.MatrixVectorAdd(graph, xW1mt, self.b1) relu = nn.ReLU(graph, xW1b1Add) reluMult = nn.MatrixMultiply(graph, relu, self.W2) total = nn.MatrixVectorAdd(graph, reluMult, self.b2) # graph = nn.Graph([self.W1]) #Q(s,a) = W1Feature1 + W2feature2 + W3feature3 + W4feature4 # input_x = nn.Input(graph, states) # W1mult = nn.MatrixMultiply(graph, input_x, self.W1) #W2mult = nn.MatrixMultiply(graph, input_x, self.W2) #W3mult = nn.MatrixMultiply(graph, input_x, self.W3) #W4mult = nn.MatrixMultiply(graph, input_x, self.W4) #W1W2Add = nn.Add(graph, W1mult, W2mult) #W3W4Add = nn.Add(graph, W3mult, W4mult) #total = nn.Add(graph, W1W2Add, W3W4Add) if Q_target is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(graph, Q_target) loss_node = nn.SquareLoss(graph, total, input_y) return graph else: "*** YOUR CODE HERE ***" return graph.get_output(total)
def createNN(): if not self.graph: for i in range(0, self.depth): #make weight matrix with every layer X by X size self.weights.append(nn.Variable(len(x), len(x))) #make bias matrix with each layer being a vector of X by 1 size self.bias.append(nn.Variable(len(x), 1)) #create graph with initialized weights and bias variables self.graph = nn.Graph(self.weights + self.bias) #weight + bias is variable vector #create input nodes: input_x = nn.Input(self.graph, x) input_y = nn.Input(self.graph, y) #create first layer: xm = nn.MatrixMultiply(self.graph, self.weights[0], input_x) xm_plus_b = nn.MatrixVectorAdd(self.graph, xm, self.bias[0]) #create the remaining layers: for i in range(1, self.depth): #add nonlinearity for previous layer: relu = nn.ReLU(self.graph, xm_plus_b) #create new hidden layer: xm = nn.MatrixMultiply(self.graph, self.weights[i], relu) xm_plus_b = nn.MatrixVectorAdd(self.graph, xm, self.bias[i]) #create loss node: loss = nn.SquareLoss(self.graph, xm_plus_b, input_y)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ graph = nn.Graph([self.m, self.b, self.m2, self.b2]) input_x = nn.Input(graph, x) #============= LAYER 01 ===============# xm = nn.MatrixMultiply(graph, input_x, self.m) xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b) #============= LAYER 02 ===============# relu = nn.ReLU(graph, xm_plus_b) xm2 = nn.MatrixMultiply(graph, relu, self.m2) xm_plus_b2 = nn.MatrixVectorAdd(graph, xm2, self.b2) if y is not None: input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, xm_plus_b2, input_y) return graph else: return graph.get_output(xm_plus_b2)
def run(self, x): """ Runs the model for a batch of examples. Inputs: x: a node with shape (batch_size x 1) Returns: A node with shape (batch_size x 1) containing predicted y-values """ "*** YOUR CODE HERE ***" x = nn.AddBias(nn.Linear(x, self.w0), self.b0) x = nn.ReLU(x) x = nn.AddBias(nn.Linear(x, self.w1), self.b1) x = nn.ReLU(x) x = nn.AddBias(nn.Linear(x, self.w2), self.b2) return x
def __init__(self): # Our dataset contains words from five different languages, and the # combined alphabets of the five languages contain a total of 47 unique # characters. # You can refer to self.num_chars or len(self.languages) in your code self.num_chars = 47 self.languages = ["English", "Spanish", "Finnish", "Dutch", "Polish"] # Initialize your model parameters here "*** YOUR CODE HERE ***" self.hidden_lsize = 250 self.learning_rate = -.01 self.batch_size = 200 m_1f = nn.Parameter(self.num_chars, self.hidden_lsize) b_1f = nn.Parameter(1, self.hidden_lsize) m_2f = nn.Parameter(self.hidden_lsize, 10) b_2f = nn.Parameter(1, self.hidden_lsize) xm_1 = nn.Linear(x, m_1f) layer_1 = nn.AddBias(xm_1, b_1f) non_lin_1 = nn.ReLU(layer_1) xm_2 = nn.Linear(non_lin_1, m_2f) self.f_initial = nn.AddBias(xm_2, b_2f) self.w = nn.Parameter(self.batch_size, self.num_chars) self.w_hidden = nn.Parameter(self.hidden_lsize, 1)
def run(self, xs): """ Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a node with shape (batch_size x self.num_chars), where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", then xs[1] will be a node that contains a 1 at position (7, 0). Here the index 7 reflects the fact that "cat" is the last word in the batch, and the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node of shape (batch_size x hidden_size), for your choice of hidden_size. It should then calculate a node of shape (batch_size x 5) containing scores, where higher scores correspond to greater probability of the word originating from a particular language. Inputs: xs: a list with L elements (one per character), where each element is a node with shape (batch_size x self.num_chars) Returns: A node with shape (batch_size x 5) containing predicted scores (also called logits) """ "*** YOUR CODE HERE ***" i = 0 for x in xs: i += 1 if i == 1: h1 = nn.ReLU(nn.AddBias(nn.Linear(x, self.winput), self.b1)) h2 = nn.ReLU(nn.AddBias(nn.Linear(h1, self.w2), self.b2)) h = nn.AddBias(nn.Linear(h2, self.w3), self.b3) else: h1 = nn.ReLU( nn.AddBias( nn.Add(nn.Linear(x, self.winput), nn.Linear(h, self.whidden)), self.b1)) h2 = nn.ReLU(nn.AddBias(nn.Linear(h1, self.w2), self.b2)) h = nn.AddBias(nn.Linear(h2, self.w3), self.b3) hinf1 = nn.ReLU(nn.AddBias(nn.Linear(h, self.winf1), self.binf1)) out = nn.AddBias(nn.Linear(hinf1, self.winf2), self.binf2) return out