def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_rl self.num_actions = 2 self.state_size = 4 # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = .01 hidden_layer = 100 self.w1 = nn.Variable(4, hidden_layer) self.w2 = nn.Variable(hidden_layer, 2) self.b1 = nn.Variable(hidden_layer) self.b2 = nn.Variable(2)
def run(self, xs, y=None): """ Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] "*** YOUR CODE HERE ***" h = nn.Variable(batch_size, self.dimensionality) h.data = np.zeros((batch_size, self.dimensionality)) g = nn.Graph([h, self.w1, self.w2, self.w3, self.b]) for x in xs: h1 = nn.MatrixMultiply(g, h, self.w1) x2 = nn.MatrixMultiply(g, nn.Input(g, x), self.w2) h1_add_x2 = nn.Add(g, h1, x2) add_b = nn.MatrixVectorAdd(g, h1_add_x2, self.b) relu = nn.ReLU(g, add_b) h = relu result = nn.MatrixMultiply(g, h, self.w3) if y is not None: "*** YOUR CODE HERE ***" nn.SoftmaxLoss(g, result, nn.Input(g, y)) return g else: "*** YOUR CODE HERE ***" return g.get_output(result)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_rl self.num_actions = 2 self.state_size = 4 # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture self.learning_rate = 0.01 self.hidden_size = 100 self.xinput_dim = self.state_size self.yinput_dim = self.num_actions self.m1 = nn.Variable(self.xinput_dim, self.hidden_size) self.b1 = nn.Variable(1, self.hidden_size) self.m2 = nn.Variable(self.hidden_size, self.yinput_dim) self.b2 = nn.Variable(1, self.yinput_dim)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_regression # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" # Start off with the learning rate and hidden layers from the previous # problem, then adjust if the tests fail. self.learning_rate = 0.05 self.hidden_layers = 150 # Start off with the same parameters as the previous problem. self.W1 = nn.Variable(1, self.hidden_layers) self.b1 = nn.Variable(self.hidden_layers) self.W2 = nn.Variable(self.hidden_layers, 1) self.b2 = nn.Variable(1)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_regression # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture self.learning_rate = 0.05 # hidden layer size self.hidden_layer_size = 100 # these four parameters are going to be trained self.W1 = nn.Variable(1, self.hidden_layer_size) self.b1 = nn.Variable(self.hidden_layer_size) self.W2 = nn.Variable(self.hidden_layer_size, 1) self.b2 = nn.Variable(1)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_rl self.num_actions = 2 self.state_size = 4 self.learning_rate = 0.03 # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture self.w1 = nn.Variable(self.state_size, 400) self.w2 = nn.Variable(400, self.num_actions) self.b1 = nn.Variable(400) self.b2 = nn.Variable(self.num_actions) # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***"
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_lang_id # Our dataset contains words from five different languages, and the # combined alphabets of the five languages contain a total of 47 unique # characters. self.num_chars = 47 self.languages = ["English", "Spanish", "Finnish", "Dutch", "Polish"] self.hidden_size = 2100 self.learning_rate = .01 self.W1, self.W2, self.b1, self.b2, self.W3 = nn.Variable( self.num_chars, self.hidden_size), nn.Variable(self.hidden_size, 47), nn.Variable( self.hidden_size), nn.Variable(47), nn.Variable(47, 5)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_digit_classification # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = 0.55 #between 0.001 and 1.0 self.hidden_layer_size = 300 #300 #between 10 and 400 #to implement f(x) = relu(x.w1+b1).w2 + b2 #Each digit is of size pixels, the values of which are stored in a 784-dimensional vector #of floating point numbers. Each output we provide is a 10-dimensional vector which has zeros # in all positions, except for a one in the position corresponding to the correct class of the digit. self.w1 = nn.Variable(784, self.hidden_layer_size) self.w2 = nn.Variable(self.hidden_layer_size, 10) self.b1 = nn.Variable(self.hidden_layer_size) self.b2 = nn.Variable(10)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_rl self.num_actions = 2 self.state_size = 4 # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture self.learning_rate = 0.01 hidden_size = 250 self.w_one = nn.Variable(4, hidden_size) self.w_two = nn.Variable( hidden_size, 2) # have to figure out what the first dimension is self.b_one = nn.Variable(1, hidden_size) self.b_two = nn.Variable(1)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_rl self.num_actions = 2 self.state_size = 4 # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = 0.03 layerSize = 100 # print "batchSize", batchSize self.W1 = nn.Variable(self.state_size, layerSize) self.b1 = nn.Variable(layerSize) self.W2 = nn.Variable(layerSize, self.num_actions) self.b2 = nn.Variable(self.num_actions)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_digit_classification # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = .09 hidden_layers = 600 self.w1 = nn.Variable(784, hidden_layers) self.b1 = nn.Variable(hidden_layers) self.w2 = nn.Variable(hidden_layers, 784) self.b2 = nn.Variable(784) self.w3 = nn.Variable(784, 10)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_digit_classification # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = .75 self.hidden_size = 200 self.num_layers = 2 self.param_w = [] self.param_b = [] start_size = 784 end_size = 10 curr_size = start_size for i in range(self.num_layers): if i == self.num_layers - 1: if i % 2 == 0: self.param_w.append(nn.Variable(curr_size, end_size)) else: self.param_w.append(nn.Variable(self.hidden_size, end_size)) curr_size = end_size self.param_b.append(nn.Variable(curr_size)) break elif i % 2 == 0: self.param_b.append(nn.Variable(self.hidden_size)) else: self.param_b.append(nn.Variable(curr_size)) if i % 2 == 0: self.param_w.append(nn.Variable(curr_size, self.hidden_size)) else: self.param_w.append(nn.Variable(self.hidden_size, curr_size))
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_lang_id # Our dataset contains words from five different languages, and the # combined alphabets of the five languages contain a total of 47 unique # characters. # You can refer to self.num_chars or len(self.languages) in your code self.num_chars = 47 self.languages = ["English", "Spanish", "Finnish", "Dutch", "Polish"] self.hidden = 325 self.learning_rate = 0.20 self.j = 120 # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.w1 = nn.Variable(self.num_chars, self.j) # input*w1=> batchsize X 10 self.b1 = nn.Variable(self.j) # b1 should be batchsize x 10 self.w2 = nn.Variable(self.hidden, self.j) self.b2 = nn.Variable(self.hidden) self.w3 = nn.Variable(self.j, self.hidden) self.h = np.zeros(self.hidden) self.w3_f = nn.Variable(self.j, 5) self.b2_f = nn.Variable(5)
def setup(self): h = 100 self.p1 = nn.Variable(47, h) self.p2 = nn.Variable(h, 47) # self.p3 = nn.Variable(47, h) self.q1 = nn.Variable(h) self.q2 = nn.Variable(47) # self.q3 = nn.Variable(47, h) self.r1 = nn.Variable(47, 47) self.s1 = nn.Variable(47) self.w1 = nn.Variable(47, h) # self.w2 = nn.Variable(h, h) self.w2 = nn.Variable(h, len(self.languages)) self.b1 = nn.Variable(h) # self.b2 = nn.Variable(h) self.b2 = nn.Variable(len(self.languages))
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_digit_classification # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" # Start off with the learning rate and hidden layers from the previous # problem, then adjust if the tests fail. self.learning_rate = 0.2 self.hidden_layers = 200 # Start off with the same parameters as the previous problem. # Change self.W1 to contain 784 because that is the size of the dimensional vector. # Change self.W2 and self.b2 to contain 10 because that is the size of the output. self.W1 = nn.Variable(784, self.hidden_layers) self.b1 = nn.Variable(self.hidden_layers) self.W2 = nn.Variable(self.hidden_layers, 10) self.b2 = nn.Variable(10)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_lang_id # Our dataset contains words from five different languages, and the # combined alphabets of the five languages contain a total of 47 unique # characters. # You can refer to self.num_chars or len(self.languages) in your code self.num_chars = 47 self.languages = ["English", "Spanish", "Finnish", "Dutch", "Polish"] # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = 0.09 self.h0 = nn.Variable(1,47) self.w1 = nn.Variable(47,16) self.Hi = np.zeros(batch_size, xs[0].shape[1]) self.Hi = self.Hi + self.h0 #(batch_size, d=num_char)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_regression # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = 0.2 # hidden layer size self.h = 200 # set parameters for this question batch_size = 0 for x, y in self.get_data_and_monitor(self): batch_size, junk = x.shape break self.W1 = nn.Variable(self.h, batch_size) self.W2 = nn.Variable(batch_size, self.h) self.b1 = nn.Variable(self.h, 1) self.b2 = nn.Variable(batch_size, 1)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_rl self.num_actions = 2 self.state_size = 4 # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = 0.005 #between 0.001 and 1.0 self.hidden_layer_size = 80 #between 10 and 400 #to implement f(x) = relu(x.w1+b1).w2 + b2 #Inputs: #states: a (batch_size x 4) numpy array #Q_target: a (batch_size x 2) numpy array, or None self.w1 = nn.Variable(self.state_size, self.hidden_layer_size) self.w2 = nn.Variable(self.hidden_layer_size, self.num_actions) self.b1 = nn.Variable(self.hidden_layer_size) self.b2 = nn.Variable(self.num_actions)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_lang_id # Our dataset contains words from five different languages, and the # combined alphabets of the five languages contain a total of 47 unique # characters. # You can refer to self.num_chars or len(self.languages) in your code self.num_chars = 47 self.languages = ["English", "Spanish", "Finnish", "Dutch", "Polish"] # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.hidden_size = 300 self.learning_rate = 0.3 self.w_one = nn.Variable(self.num_chars, self.hidden_size) self.w_two = nn.Variable(self.hidden_size, 5) self.w_three = nn.Variable(self.hidden_size, 5) self.w_four = nn.Variable(5, self.hidden_size)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_regression # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = 0.01 self.variables = [] """try1: vals lr = .01, h = 10: loss = .041577 try2: vals lr = .01, h = 50: loss = .026561 try3: vals lr = .005, h = 50: loss = not good try4: vals lr = .01, h = 75: loss = .015910 lit """ i = 1 h = 150 self.variables.append(nn.Variable(i,h)) self.variables.append(nn.Variable(h)) self.variables.append(nn.Variable(h, i)) self.variables.append(nn.Variable(i))
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_regression # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" # self.learning_rate = .05 # self.learning_rate = .1 # self.hidden = 50 self.learning_rate = .05 self.hidden = 200 self.m1 = nn.Variable(1, self.hidden) self.b1 = nn.Variable(1, self.hidden) self.m2 = nn.Variable(self.hidden, 1) self.b2 = nn.Variable(1,1) # self.graph = nn.Graph([self.m1, self.b1, self.m2, self.b2]) self.graph = None
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_lang_id # Our dataset contains words from five different languages, and the # combined alphabets of the five languages contain a total of 47 unique # characters. # You can refer to self.num_chars or len(self.languages) in your code self.num_chars = 47 self.languages = ["English", "Spanish", "Finnish", "Dutch", "Polish"] self.learning_rate = 0.03 # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture self.w1 = nn.Variable(self.num_chars, 400) self.w2 = nn.Variable(400, len(self.languages)) self.b1 = nn.Variable(400) self.b2 = nn.Variable(len(self.languages)) # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***"
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_rl self.num_actions = 2 self.state_size = 4 # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture self.learning_rate = 0.04166 # hidden layer size self.hidden_layer_size = 50 # these four parameters are going to be trained self.W1 = nn.Variable(self.state_size, self.hidden_layer_size) self.b1 = nn.Variable(self.hidden_layer_size) self.W2 = nn.Variable(self.hidden_layer_size, self.num_actions) self.b2 = nn.Variable(self.num_actions)
def check_graph_linear_regression(tracker): # Runs the Graph sample code, and makes sure that the Graph and the nodes # work well together for linear regression. import nn # This is our data, where x is a 4x2 matrix and y is a 4x1 matrix x = np.array([[0., 0., 1., 1.], [0., 1., 0., 1.]]).T y = np.dot(x, np.array([[7., 8.]]).T) + 3 # Let's construct a simple model to approximate a function from 2D # points to numbers, f(x) = x_0 * m_0 + x_1 * m_1 + b # Here m and b are variables (trainable parameters): m = nn.Variable(2, 1) b = nn.Variable(1) # Instead of fixing a random seed, just set .data directly m.data[0, 0] = -1. m.data[1, 0] = -1. b.data[0] = -1. # We train our network using batch gradient descent on our data for iteration in range(500): # At each iteration, we first calculate a loss that measures how # good our network is. The graph keeps track of all operations used graph = nn.Graph([m, b]) input_x = nn.Input(graph, x) input_y = nn.Input(graph, y) xm = nn.MatrixMultiply(graph, input_x, m) xm_plus_b = nn.MatrixVectorAdd(graph, xm, b) loss = nn.SquareLoss(graph, xm_plus_b, input_y) # Then we use the graph to perform backprop and update our variables graph.backprop() graph.step(1.0) # After training, we should have recovered m=[[7],[8]] and b=[3] actual_values = [m.data[0, 0], m.data[1, 0], b.data[0]] expected_values = [7, 8, 3] assert np.allclose(actual_values, expected_values),\ "Linear regression sample code did not run correctly. Final parameters {}. Expected: {}".format( actual_values, expected_values) tracker.add_points(3)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_rl self.num_actions = 2 self.state_size = 4 # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = 0.01 # hidden layer size self.h1 = 400 self.h2 = self.num_actions # self.h3 = 300 # self.h4 = 300 # set parameters for this question self.W1 = nn.Variable(self.state_size, self.h1) self.b1 = nn.Variable(1, self.h1) self.W2 = nn.Variable(self.h1, self.h2) self.b2 = nn.Variable(1, self.h2)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_rl self.num_actions = 2 self.state_size = 4 # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" #Note: Optimal hyperparameters - learning: 0.005, hidden layer: 400 self.learning_rate = 0.01 #input shape is 64,4 #output n,2 self.H1 = 400 self.W1 = nn.Variable(4, self.H1) self.W2 = nn.Variable(self.H1, 2) self.b1 = nn.Variable(self.H1) self.b2 = nn.Variable(2)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_regression # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = 0.1 hidden_layers = 100 self.weight1 = nn.Variable(1, hidden_layers) self.weight2 = nn.Variable(hidden_layers, hidden_layers) self.weight3 = nn.Variable(hidden_layers, hidden_layers) self.weight4 = nn.Variable(hidden_layers, hidden_layers) self.weight5 = nn.Variable(hidden_layers, 1) self.bias1 = nn.Variable(hidden_layers) self.bias2 = nn.Variable(hidden_layers) self.bias3 = nn.Variable(hidden_layers) self.bias4 = nn.Variable(hidden_layers) self.bias5 = nn.Variable(1)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_regression # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = 0.06 self.hidden_size = 200 self.num_layers = 2 self.param_w = [ nn.Variable(1, self.hidden_size) if i % 2 == 0 else nn.Variable(self.hidden_size, 1) for i in range(self.num_layers) ] self.param_b = [ nn.Variable(self.hidden_size) if i % 2 == 0 else nn.Variable(1) for i in range(self.num_layers) ]
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_rl self.num_actions = 2 self.state_size = 4 # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture "*** YOUR CODE HERE ***" self.learning_rate = 0.05 self.hidden_layers = 100 # We are going to be implementing the Q function mentioned in the spec # which takes the form of : # Q(s,a) = w.T * f(s,a) = w0f0(s,a) + ... + wnfn(s,a) # The following parameters are for defining this Q(s,a) we are replacing. self.W1 = nn.Variable(4, self.hidden_layers) self.b1 = nn.Variable(self.hidden_layers) self.W2 = nn.Variable(self.hidden_layers, 2) self.b2 = nn.Variable(2)
def __init__(self): Model.__init__(self) self.get_data_and_monitor = backend.get_data_and_monitor_lang_id # Our dataset contains words from five different languages, and the # combined alphabets of the five languages contain a total of 47 unique # characters. # You can refer to self.num_chars or len(self.languages) in your code self.num_chars = 47 self.languages = ["English", "Spanish", "Finnish", "Dutch", "Polish"] # Remember to set self.learning_rate! # You may use any learning rate that works well for your architecture self.learning_rate = .01 d = 160 c = self.num_chars self.w1 = nn.Variable(d, c) self.w2 = nn.Variable(c, c) self.w3 = nn.Variable(c, d) self.b1 = nn.Variable(d) self.output = nn.Variable(d, 5)