def get_updates_with_momentum(self, loss_tensor, param_vars): """ Question 5: Returns the gradient descent updates when momentum is used. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (var, new_tensor) indicating that, at runtime, the variable var should be updated with new_tensor. You implementation should use the gradient tensors and the velocity variables (both provided below), and the member variables self.learning_rate and self.momentum. """ grad_tensors = tf.gradients(loss_tensor, param_vars) vel_vars = [ tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars ] tfu.get_session().run([vel_var.initializer for vel_var in vel_vars]) vt = [ self.momentum * i - self.learning_rate * x for i, x in zip(vel_vars, grad_tensors) ] updates = [(x, x + i) for x, i in zip(param_vars, vt)] + zip(vel_vars, vt) "*** YOUR CODE HERE ***" #util.raiseNotDefined() return updates
def get_updates_with_momentum(self, loss_tensor, param_vars): """ Question 5: Returns the gradient descent updates when momentum is used. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (var, new_tensor) indicating that, at runtime, the variable var should be updated with new_tensor. You implementation should use the gradient tensors and the velocity variables (both provided below), and the member variables self.learning_rate and self.momentum. """ grad_tensors = tf.gradients(loss_tensor, param_vars) vel_vars = [ tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars ] tfu.get_session().run([vel_var.initializer for vel_var in vel_vars]) # Updating the velocity. update_v = [(vel_vars[i], self.momentum * vel_vars[i] - self.learning_rate * grad_tensors[i]) for i in range(len(vel_vars))] # Updating the weight vectors based on the velocity. update_w = [(param_vars[i], param_vars[i] + update_v[i][1]) for i in range(len(param_vars))] # util.raiseNotDefined() return update_v + update_w
def get_updates_with_momentum(self, loss_tensor, param_vars): """ Question 5: Returns the gradient descent updates when momentum is used. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (var, new_tensor) indicating that, at runtime, the variable var should be updated with new_tensor. You implementation should use the gradient tensors and the velocity variables (both provided below), and the member variables self.learning_rate and self.momentum. """ grad_tensors = tf.gradients(loss_tensor, param_vars) vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars] tfu.get_session().run([vel_var.initializer for vel_var in vel_vars]) updates = [] for i, var in enumerate(vel_vars): new_var = self.momentum * var - self.learning_rate * grad_tensors[i] updates.append((var, new_var)) updates.append((param_vars[i], param_vars[i] + new_var)) return updates
def get_updates_with_momentum(self, loss_tensor, param_vars): """ Question 5: Returns the gradient descent updates when momentum is used. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (var, new_tensor) indicating that, at runtime, the variable var should be updated with new_tensor. You implementation should use the gradient tensors and the velocity variables (both provided below), and the member variables self.learning_rate and self.momentum. """ grad_tensors = tf.gradients(loss_tensor, param_vars) vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars] tfu.get_session().run([vel_var.initializer for vel_var in vel_vars]) updates = [] # easier to handle w/ multiple updates in a for loop # iterate simultaneously through velocity, parameters, gradients for vel_var, param_var, grad in zip(vel_vars, param_vars, grad_tensors): # vt+1 = momentum * vt - alpha*gradient step vt = (self.momentum * vel_var) - (self.learning_rate * grad) # append velocity update updates.append((vel_var, vt)) # append parameter update updates.append((param_var, param_var + vt)) return updates
def get_updates_with_momentum(self, loss_tensor, param_vars): """ Question 5: Returns the gradient descent updates when momentum is used. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (var, new_tensor) indicating that, at runtime, the variable var should be updated with new_tensor. You implementation should use the gradient tensors and the velocity variables (both provided below), and the member variables self.learning_rate and self.momentum. """ grad_tensors = tf.gradients(loss_tensor, param_vars) vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars] tfu.get_session().run([vel_var.initializer for vel_var in vel_vars]) updates = [] "*** YOUR CODE HERE ***" for i in range(len(param_vars)): new_vel_var = self.momentum * vel_vars[i] - self.learning_rate * grad_tensors[i] updates.append((param_vars[i], param_vars[i] + new_vel_var)) updates.append((vel_vars[i], new_vel_var)) return updates
def main(): dataset = tinyDataset() input_data, target_data = dataset[:2] W_init = np.array([[0.4, 0.0], [0.0, -0.2], [0.1, 0.0]], dtype=np.float32) b_init = np.array([-0.5, 0.3], dtype=np.float32) model = models.LinearRegressionModel(x_shape=(None, 3), W=W_init, b=b_init) solver = solvers.GradientDescentSolver(learning_rate=0.1, iterations=1, momentum=0.9) target_ph = tf.placeholder(tf.float32, shape=(None, 2)) loss_tensor = solvers.squared_error(model.prediction_tensor, target_ph) param_vars = model.get_param_vars(trainable=True) updates = solver.get_updates(loss_tensor, param_vars) update_ops = [tf.assign(old, new) for (old, new) in updates] # gradient and parameter values before updates grad_tensors = tf.gradients(loss_tensor, param_vars) feed_dict = dict([(model.input_ph, input_data), (target_ph, target_data)]) grads = [ grad_tensor.eval(session=tfu.get_session(), feed_dict=feed_dict) for grad_tensor in grad_tensors ] param_values = model.get_param_values() print(grads) print(param_values) tfu.get_session().run([loss_tensor] + update_ops, feed_dict=feed_dict) print((model.get_param_values()))
def get_updates_with_momentum(self, loss_tensor, param_vars): """ Question 5: Returns the gradient descent updates when momentum is used. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (var, new_tensor) indicating that, at runtime, the variable var should be updated with new_tensor. You implementation should use the gradient tensors and the velocity variables (both provided below), and the member variables self.learning_rate and self.momentum. """ grad_tensors = tf.gradients(loss_tensor, param_vars) vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars] tfu.get_session().run([vel_var.initializer for vel_var in vel_vars]) vt = [self.momentum*i - self.learning_rate*x for i, x in zip(vel_vars, grad_tensors)] updates = [(x, x + i) for x, i in zip(param_vars, vt)] + zip(vel_vars, vt) "*** YOUR CODE HERE ***" #util.raiseNotDefined() return updates
def main(): dataset = tinyDataset() input_data, target_data = dataset[:2] W_init = np.array([[0.4, 0.0], [0.0, -0.2], [0.1, 0.0]], dtype=np.float32) b_init = np.array([-0.5, 0.3], dtype=np.float32) model = models.LinearRegressionModel(x_shape=(None, 3), W=W_init, b=b_init) solver = solvers.GradientDescentSolver(learning_rate=0.1, iterations=1, momentum=0.9) target_ph = tf.placeholder(tf.float32, shape=(None, 2)) loss_tensor = solvers.squared_error(model.prediction_tensor, target_ph) param_vars = model.get_param_vars(trainable=True) updates = solver.get_updates(loss_tensor, param_vars) update_ops = [tf.assign(old, new) for (old, new) in updates] # gradient and parameter values before updates grad_tensors = tf.gradients(loss_tensor, param_vars) feed_dict = dict([(model.input_ph, input_data), (target_ph, target_data)]) grads = [grad_tensor.eval(session=tfu.get_session(), feed_dict=feed_dict) for grad_tensor in grad_tensors] param_values = model.get_param_values() print(grads) print(param_values) tfu.get_session().run([loss_tensor] + update_ops, feed_dict=feed_dict) print(model.get_param_values())
def get_update_values(self, moduleDict): # dataset dataset = getattr(datasets, self.dataset)() input_data, target_data = dataset[:2] # model if self.model_module == 'models': # need to check for this since this is not a student file module = models else: module = moduleDict[self.model_module] model_class = getattr(module, self.model_class) model_kwargs = dict(num_labels=dataset[1].shape[-1]) if self.model_class == 'ConvNetModel': model_kwargs['x_shape'] = (None,) + dataset[0].shape[1:] else: model_kwargs['num_features'] = dataset[0].shape[-1] model = model_class(**model_kwargs) # solver solver_class = getattr(moduleDict[self.solver_module], self.solver_class) solver = solver_class(learning_rate=self.learning_rate, iterations=0, momentum=self.momentum) target_ph = tf.placeholder(tf.float32, shape=(None, 2)) loss_tensor = solvers.squared_error(model.prediction_tensor, target_ph) param_vars = model.get_param_vars(trainable=True) updates = solver.get_updates(loss_tensor, param_vars) update_ops = [tf.assign(old, new) for (old, new) in updates] feed_dict = dict(zip([model.input_ph, target_ph], [input_data, target_data])) for i in range(self.update_iterations): tfu.get_session().run(update_ops, feed_dict=feed_dict) grad_tensors = tf.gradients(loss_tensor, param_vars) grads = [grad_tensor.eval(session=tfu.get_session(), feed_dict=feed_dict) for grad_tensor in grad_tensors] len_messages = len(self.messages) if not isinstance(updates, (list, tuple)): self.addMessage('updates should be a list, %r given' % updates) return updates, None, grads # Check updates are in the right format for update in updates: try: old, new = update except ValueError: self.addMessage('Each update in updates should be of length 2, but it is of length %d' % len(update)) continue if not isinstance(old, tf.Variable): self.addMessage('The first element in the tuple update should be a tf.Variable, %r given' % old) if not isinstance(new, (tf.Variable, tf.Tensor)): self.addMessage('The second element in the tuple update should be a tf.Variable or a tf.Tensor, %r given' % new) if len(self.messages) > len_messages: return updates, None, grads # Check for repeated variables if len(set(zip(*updates)[0])) != len(updates): self.addMessage('There are some repeated variables being updated: %r' % zip(*updates)[0]) return updates, None, grads update_values = [tfu.get_session().run(update, feed_dict=feed_dict) for update in updates] return updates, update_values, grads
def get_updates_with_momentum(self, loss_tensor, param_vars): """ Question 5: Returns the gradient descent updates when momentum is used. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (var, new_tensor) indicating that, at runtime, the variable var should be updated with new_tensor. You implementation should use the gradient tensors and the velocity variables (both provided below), and the member variables self.learning_rate and self.momentum. """ grad_tensors = tf.gradients(loss_tensor, param_vars) vel_vars = [ tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars ] tfu.get_session().run([vel_var.initializer for vel_var in vel_vars]) updates = [] print(type(vel_vars)) "*** YOUR CODE HERE ***" alpha = self.learning_rate mu = self.momentum i = 0 l1 = [] l2 = [] l3 = [] for velocities in vel_vars: l1.append(velocities) print(len(l1)) for weights in param_vars: l2.append(weights) print(len(l2)) for grad_tensor in grad_tensors: l3.append(grad_tensor) print(len(l3)) #print(t) for i in range(len(l3)): product1 = tf.math.scalar_mul(alpha, l3[i]) product2 = tf.math.multiply(mu, l1[i]) v_t_plus_1 = tf.math.subtract(product2, product1) w_t_plus_1 = tf.math.add(l2[i], v_t_plus_1) updates.extend([(l1[i], v_t_plus_1), (l2[i], w_t_plus_1)]) #util.raiseNotDefined() return updates
def set_param_values(self, param_values, **tags): param_vars = self.get_param_vars(**tags) if len(param_values) != len(param_vars): raise ValueError( 'there are %d parameter variables with the given tags' 'but %d parameter values were given' % (len(param_vars), len(param_values))) tfu.get_session().run([ tf.assign(param_var, param_value) for (param_var, param_value) in zip(param_vars, param_values) ])
def __init__(self, num_features=784, num_labels=10): super(SoftmaxRegressionModel, self).__init__() # input and target placeholder variables self.x = tf.placeholder(tf.float32, shape=(None, num_features)) self.input_ph = self.x # parameter variables self.W = self.add_param_var(truncated_normal([num_features, num_labels], stddev=0.1, fixed_random=self._fixed_random), name='W') self.b = self.add_param_var(tf.constant(0.1, shape=[num_labels]), name='b', regularizable=False) # prediction tensor self.y = tf.nn.softmax(tf.matmul(self.x, self.W) + self.b) self.prediction_tensor = self.y # initialize parameters tfu.get_session().run([param_var.initializer for param_var in self.get_param_vars()])
def get_updates_with_momentum(self, loss_tensor, param_vars): """ Question 5: Returns the gradient descent updates when momentum is used. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (var, new_tensor) indicating that, at runtime, the variable var should be updated with new_tensor. You implementation should use the gradient tensors and the velocity variables (both provided below), and the member variables self.learning_rate and self.momentum. """ grad_tensors = tf.gradients(loss_tensor, param_vars) vel_vars = [ tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars ] tfu.get_session().run([vel_var.initializer for vel_var in vel_vars]) updates = [] "*** YOUR CODE HERE ***" each = [] # each element in the list, will become a tuple length = len(grad_tensors) a = 0 while a <= length - 1: each.append(param_vars[a]) v = self.momentum * vel_vars[ a] - self.learning_rate * grad_tensors[a] b = param_vars[a] + v each.append(b) tuple(each) updates.append(each) each = [] each.append(vel_vars[a]) each.append(v) tuple(each) updates.append(each) each = [] a = a + 1 #util.raiseNotDefined() return updates
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.a: Optimize the model and return the intermediate losses. Optimize the model using gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. You may not need to fill in both "*** YOUR CODE HERE ***" blanks, but they are both provided so you can define variables outside and inside the for loop. Useful method: session.run """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates] train_losses = [] val_losses = [] for iter_ in range(self.iterations): # train_loss should be the loss of this iteration using all of the training data # val_loss should be the loss of this iteration using all of the validation data # run the loss tensor & update ops to get the new tensor values. iterate through placeholders/train together for dict train_loss = session.run(loss_tensor, feed_dict=dict(zip(placeholders, train_data))) # run update ops to update the loss values updates = session.run(update_ops, feed_dict=dict(zip(placeholders, train_data))) # run loss tensor for validation loss, since we're no longer updating, just checking the loss. same as above for dict val_loss = session.run(loss_tensor, feed_dict=dict(zip(placeholders, val_data))) train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def get_updates_with_momentum(self, loss_tensor, param_vars): """ Question 5: Returns the gradient descent updates when momentum is used. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (var, new_tensor) indicating that, at runtime, the variable var should be updated with new_tensor. You implementation should use the gradient tensors and the velocity variables (both provided below), and the member variables self.learning_rate and self.momentum. """ grad_tensors = tf.gradients(loss_tensor, param_vars) vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars] tfu.get_session().run([vel_var.initializer for vel_var in vel_vars]) updates = [] "*** YOUR CODE HERE ***" #util.raiseNotDefined() #Q5: Question 5. #param_vars = [W, b], or more precisely w = concatenate(flatten(W), b). Namely a set of weights, plus bias b. #grad_tensors = [PartialDeritatives of W on loss function, PartialDervatives b on loss fuction]. #For example, with 2 weights. #Gradient descent method is #For each of parameter i, # v[i] = self.momentum*v[i] - self.learning_rate*grad_tensors[0] # param_vars[i] += v[i] # updates should include both weight update and velocity update. for i in range(len( param_vars)): oldweight=param_vars[i] oldv=vel_vars[i] newv=self.momentum*oldv - self.learning_rate*grad_tensors[i] newweight= oldweight + newv updates.append((oldweight,newweight)) updates.append((oldv,newv)) #vel_vars[i]=newv #param_vars[i]=newweight return updates
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.a: Optimize the model and return the intermediate losses. Optimize the model using gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. You may not need to fill in both "*** YOUR CODE HERE ***" blanks, but they are both provided so you can define variables outside and inside the for loop. Useful method: session.run """ session = tfu.get_session() # placeholder = target. its val is ??? target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" # loss tensor currentl is a mathmatical model, it's a loss because nothing feed in. # each time, it becomes an value of loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates] train_losses = [] val_losses = [] for iter_ in range(self.iterations): # train_loss should be the loss of this iteration using all of the training data # val_loss should be the loss of this iteration using all of the validation data # trick here: input new argument train_loss = session.run([loss_tensor, update_ops], feed_dict={placeholders[0]: train_data[0], placeholders[1]: train_data[1]}) train_loss = train_loss[0] val_loss = session.run(loss_tensor, feed_dict={placeholders[0]: val_data[0], placeholders[1]: val_data[1]}) train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.a: Optimize the model and return the intermediate losses. Optimize the model using gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. You may not need to fill in both "*** YOUR CODE HERE ***" blanks, but they are both provided so you can define variables outside and inside the for loop. Useful method: session.run """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates] train_losses = [] val_losses = [] for iter_ in range(self.iterations): "*** YOUR CODE HERE ***" train_loss = session.run(loss_tensor, feed_dict={placeholders[0]:train_data[0], placeholders[1]:train_data[1]}) session.run(update_ops, feed_dict={placeholders[0]:train_data[0], placeholders[1]:train_data[1]}) val_loss = session.run(loss_tensor, feed_dict={placeholders[0]:val_data[0], placeholders[1]:val_data[1]}) # train_loss should be the loss of this iteration using all of the training data # val_loss should be the loss of this iteration using all of the validation data train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def __init__(self, num_features=784, num_labels=10): super(SoftmaxRegressionModel, self).__init__() # input and target placeholder variables self.x = tf.placeholder(tf.float32, shape=(None, num_features)) self.input_ph = self.x # parameter variables self.W = self.add_param_var(truncated_normal( [num_features, num_labels], stddev=0.1, fixed_random=self._fixed_random), name='W') self.b = self.add_param_var(tf.constant(0.1, shape=[num_labels]), name='b', regularizable=False) # prediction tensor self.y = tf.nn.softmax(tf.matmul(self.x, self.W) + self.b) self.prediction_tensor = self.y # initialize parameters tfu.get_session().run( [param_var.initializer for param_var in self.get_param_vars()])
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.b: Optimize the model and return the intermediate losses. Optimize the model using stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. In here, the gradient descent is stochastic, meaning that you don't need to use all the data at once before you update the model parameters. Instead, you update the model parameters as you iterate over the data. You must use MinibatchIndefinitelyGenerator to iterate over the data, otherwise your solution might differ from the one of the autograder. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.shuffle when you instantiate the generator. You will have to choose a proper batch size too. Useful member variables and methods: self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" train_data_generator = MinibatchIndefinitelyGenerator(train_data, 1, self.shuffle) val_data_generator = MinibatchIndefinitelyGenerator(val_data, 1, self.shuffle)
def get_updates_with_momentum(self, loss_tensor, param_vars): """ Question 5: Returns the gradient descent updates when momentum is used. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (var, new_tensor) indicating that, at runtime, the variable var should be updated with new_tensor. You implementation should use the gradient tensors and the velocity variables (both provided below), and the member variables self.learning_rate and self.momentum. """ grad_tensors = tf.gradients(loss_tensor, param_vars) vel_vars = [ tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars ] tfu.get_session().run([vel_var.initializer for vel_var in vel_vars]) updates = [] vel_updates = [] i = 0 while i < len(grad_tensors): vel_op1 = tf.multiply(self.learning_rate, grad_tensors[i]) vel_op2 = tf.multiply(self.momentum, vel_vars[i]) vel_op3 = tf.subtract(vel_op2, vel_op1) param_op = tf.add(param_vars[i], vel_op3) updates.append((param_vars[i], param_op)) vel_updates.append((vel_vars[i], vel_op3)) i += 1 updates.extend(vel_updates) return updates
def predict(self, input_): batch_size = self.get_batch_size(input_) if batch_size == 0: input_ = input_[None, :] # do the computation in smaller chunks because some GPUs don't have too much memory # the following block of code is equivalent to this line # prediction = self.prediction_tensor.eval(session=tfu.get_session(), feed_dict=dict([(self.input_ph, input_)])) predictions = [] for i in range(0, batch_size, self._max_eval_batch_size): excerpt = slice(i, min(i+self._max_eval_batch_size, batch_size)) prediction = self.prediction_tensor.eval(session=tfu.get_session(), feed_dict=dict([(self.input_ph, input_[excerpt])])) predictions.append(prediction) prediction = np.concatenate(predictions, axis=0) if batch_size == 0: prediction = np.squeeze(prediction, axis=0) return prediction
def predict(self, input_): batch_size = self.get_batch_size(input_) if batch_size == 0: input_ = input_[None, :] # do the computation in smaller chunks because some GPUs don't have too much memory # the following block of code is equivalent to this line # prediction = self.prediction_tensor.eval(session=tfu.get_session(), feed_dict=dict([(self.input_ph, input_)])) predictions = [] for i in range(0, batch_size, self._max_eval_batch_size): excerpt = slice(i, min(i + self._max_eval_batch_size, batch_size)) prediction = self.prediction_tensor.eval(session=tfu.get_session(), feed_dict=dict([ (self.input_ph, input_[excerpt]) ])) predictions.append(prediction) prediction = np.concatenate(predictions, axis=0) if batch_size == 0: prediction = np.squeeze(prediction, axis=0) return prediction
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.b: Optimize the model and return the intermediate losses. Optimize the model using stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. In here, the gradient descent is stochastic, meaning that you don't need to use all the data at once before you update the model parameters. Instead, you update the model parameters as you iterate over the data. You must use MinibatchIndefinitelyGenerator to iterate over the data, otherwise your solution might differ from the one of the autograder. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.shuffle when you instantiate the generator. You will have to choose a proper batch size too. Useful member variables and methods: self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None, ) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" loss_tensor = self.get_loss_tensor( model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [ tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates ] train_losses = [] val_losses = [] trainGenerator = MinibatchIndefinitelyGenerator(train_data, 1, True) valGenerator = MinibatchIndefinitelyGenerator(val_data, 1, True) for iter_ in xrange(self.iterations): "*** YOUR CODE HERE ***" # train_loss should be the loss of this iteration using only the training data that was used for the updates # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead #for data in xrange(num_train_batches): # iterations is the num of gradient updates! """ # should only call .next() once! .next() changes the dataset train_loss = session.run(loss_tensor, feed_dict={placeholders[0]: batch_train.next()[0], placeholders[1]: batch_train.next()[1]}) session.run(update_ops, feed_dict={placeholders[0]: batch_train.next()[0], placeholders[1]: batch_train.next()[1]}) val_loss = session.run(loss_tensor, feed_dict={placeholders[0]: batch_val.next()[0], placeholders[1]: batch_val.next()[1]}) """ tG1, tG2 = trainGenerator.next() vG1, vG2 = valGenerator.next() train_loss = session.run(loss_tensor, feed_dict={ placeholders[0]: tG1, placeholders[1]: tG2 }) session.run(update_ops, feed_dict={ placeholders[0]: tG1, placeholders[1]: tG2 }) val_loss = session.run(loss_tensor, feed_dict={ placeholders[0]: vG1, placeholders[1]: vG2 }) train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def get_update_values(self, moduleDict): # dataset dataset = getattr(datasets, self.dataset)() input_data, target_data = dataset[:2] # model if self.model_module == 'models': # need to check for this since this is not a student file module = models else: module = moduleDict[self.model_module] model_class = getattr(module, self.model_class) model_kwargs = dict(num_labels=dataset[1].shape[-1]) if self.model_class == 'ConvNetModel': model_kwargs['x_shape'] = (None, ) + dataset[0].shape[1:] else: model_kwargs['num_features'] = dataset[0].shape[-1] model = model_class(**model_kwargs) # solver solver_class = getattr(moduleDict[self.solver_module], self.solver_class) solver = solver_class(learning_rate=self.learning_rate, iterations=0, momentum=self.momentum) target_ph = tf.placeholder(tf.float32, shape=(None, 2)) loss_tensor = solvers.squared_error(model.prediction_tensor, target_ph) param_vars = model.get_param_vars(trainable=True) updates = solver.get_updates(loss_tensor, param_vars) update_ops = [tf.assign(old, new) for (old, new) in updates] feed_dict = dict( list(zip([model.input_ph, target_ph], [input_data, target_data]))) for i in range(self.update_iterations): tfu.get_session().run(update_ops, feed_dict=feed_dict) grad_tensors = tf.gradients(loss_tensor, param_vars) grads = [ grad_tensor.eval(session=tfu.get_session(), feed_dict=feed_dict) for grad_tensor in grad_tensors ] len_messages = len(self.messages) if not isinstance(updates, (list, tuple)): self.addMessage('updates should be a list, %r given' % updates) return updates, None, grads # Check updates are in the right format for update in updates: try: old, new = update except ValueError: self.addMessage( 'Each update in updates should be of length 2, but it is of length %d' % len(update)) continue if not isinstance(old, tf.Variable): self.addMessage( 'The first element in the tuple update should be a tf.Variable, %r given' % old) if not isinstance(new, (tf.Variable, tf.Tensor)): self.addMessage( 'The second element in the tuple update should be a tf.Variable or a tf.Tensor, %r given' % new) if len(self.messages) > len_messages: return updates, None, grads # Check for repeated variables if len(set(zip(*updates)[0])) != len(updates): self.addMessage( 'There are some repeated variables being updated: %r' % list(zip(*updates))[0]) return updates, None, grads update_values = [ tfu.get_session().run(update, feed_dict=feed_dict) for update in updates ] return updates, update_values, grads
def set_param_values(self, param_values, **tags): param_vars = self.get_param_vars(**tags) if len(param_values) != len(param_vars): raise ValueError('there are %d parameter variables with the given tags' 'but %d parameter values were given' % (len(param_vars), len(param_values))) tfu.get_session().run([tf.assign(param_var, param_value) for (param_var, param_value) in zip(param_vars, param_values)])
def get_param_values(self, **tags): param_vars = self.get_param_vars(**tags) return [param_var.eval(session=tfu.get_session()) for param_var in param_vars]
def __init__(self, use_batchnorm=False, use_dropout=False, x_shape=(None, 28, 28, 1), num_labels=10): super(ConvNetModel, self).__init__() _, image_size, _, num_channels = x_shape assert x_shape[2] == image_size self.x = tf.placeholder(tf.float32, shape=x_shape) self.input_ph = self.x is_train = True init_symmetry = False var_eps = 1e-20 use_global_bn = True if use_global_bn: bn_axes = [0,1,2] else: bn_axes = [0] if init_symmetry: conv1_weights = tf.Variable( tf.zeros([5, 5, num_channels, 32])) # 5x5 filter, depth 32. conv1_biases = tf.Variable(tf.zeros([32])) conv2_weights = tf.Variable( tf.zeros([5, 5, 32, 64])) conv2_biases = tf.Variable(tf.zeros([64])) fc1_weights = tf.Variable( # fully connected, depth 512. tf.constant(0.1, shape = [image_size // 4 * image_size // 4 * 64, 512], )) fc1_biases = tf.Variable(tf.constant(0.1, shape=[512])) fc2_weights = tf.Variable( tf.constant(0.1,shape=[512, num_labels], )) fc2_biases = tf.Variable(tf.constant(0.1, shape=[num_labels])) else: conv1_weights = tf.Variable( truncated_normal([5, 5, num_channels, 32], # 5x5 filter, depth 32. stddev=0.1, fixed_random=self._fixed_random)) conv1_biases = tf.Variable(tf.zeros([32])) conv2_weights = tf.Variable( truncated_normal([5, 5, 32, 64], stddev=0.1, fixed_random=self._fixed_random)) conv2_biases = tf.Variable(tf.constant(0., shape=[64])) fc1_weights = tf.Variable( # fully connected, depth 512. truncated_normal( [image_size // 4 * image_size // 4 * 64, 512], stddev=0.1, fixed_random=self._fixed_random)) fc1_biases = tf.Variable(tf.constant(0., shape=[512])) fc2_weights = tf.Variable( truncated_normal([512, num_labels], stddev=0.1, fixed_random=self._fixed_random)) fc2_biases = tf.Variable(tf.constant(0., shape=[num_labels])) # Add parameter variables for solvers self.conv1_weights = self.add_param_var(conv1_weights) self.conv1_biases = self.add_param_var(conv1_biases) self.conv2_weights = self.add_param_var(conv2_weights) self.conv2_biases = self.add_param_var(conv2_biases) self.fc1_weights = self.add_param_var(fc1_weights) self.fc1_biases = self.add_param_var(fc1_biases) self.fc2_weights = self.add_param_var(fc2_weights) self.fc2_biases = self.add_param_var(fc2_biases) #Run Inference conv = tf.nn.conv2d(self.x, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') conv = tf.nn.bias_add(conv, conv1_biases) #Add batch norm if use_batchnorm: mean,variance = tf.nn.moments(conv, bn_axes) conv = tf.nn.batch_normalization(conv, mean, variance, None,None,var_eps) relu = tf.nn.relu(conv) pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') conv = tf.nn.bias_add(conv, conv2_biases) #Add batch norm if use_batchnorm: mean,variance = tf.nn.moments(conv, bn_axes) conv = tf.nn.batch_normalization(conv, mean, variance, None,None,var_eps) relu = tf.nn.relu(conv) pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #Reshape the feature map cuboid into a 2D matrix to feed it to the # fully connected layers. pool_shape = pool.get_shape().as_list() reshape = tf.reshape( pool, [-1, pool_shape[1] * pool_shape[2] * pool_shape[3]]) # Fully connected layer. Note that the '+' operation automatically # broadcasts the biases. hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases) # Add a 50% dropout during training only. Dropout also scales # activations such that no rescaling is needed at evaluation time. if is_train and use_dropout: hidden = tf.nn.dropout(hidden, 0.5, seed=_SEED) logits = tf.matmul(hidden, fc2_weights) + fc2_biases self.prediction_tensor = tf.nn.softmax(logits) tfu.get_session().run([param_var.initializer for param_var in self.get_param_vars()])
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.c: Optimize the model and return the intermediate losses. Optimize the model using minibatch stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. For minibatch stochastic gradient descent, you will need to iterate over the data in minibatches. As before, you must use MinibatchIndefinitelyGenerator to iterate over the data. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.batch_size and self.shuffle when you instantiate the generator. Useful member variables and methods: self.batch_size self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates] train_losses = [] val_losses = [] train_data_generator = MinibatchIndefinitelyGenerator(train_data, self.batch_size, self.shuffle) val_data_generator = MinibatchIndefinitelyGenerator(val_data, self.batch_size, self.shuffle) for iter_ in range(self.iterations): train_dict = dict(zip(placeholders, train_data_generator.next())) val_dict = dict(zip(placeholders, val_data_generator.next())) train_loss, _ = session.run([loss_tensor, update_ops], feed_dict=train_dict) val_loss = session.run(loss_tensor, feed_dict=val_dict) train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.a: Optimize the model and return the intermediate losses. Optimize the model using gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. You may not need to fill in both "*** YOUR CODE HERE ***" blanks, but they are both provided so you can define variables outside and inside the for loop. Useful method: session.run """ session = tfu.get_session() #the input placeholder is already defined in the model, so we define the target placeholder target_ph = tf.placeholder(tf.float32, shape=(None, ) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] print(placeholders[0]) #variable for input training data train_data = [input_train_data, target_train_data] #variable for validation data val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" training_dict = {} i = 0 for placeholder in placeholders: training_dict[placeholder] = train_data[i] i = i + 1 validation_dict = {} j = 0 for placeholder in placeholders: validation_dict[placeholder] = val_data[j] j = j + 1 #creating the definition of the loss tensor loss_tensor = self.get_loss_tensor( model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) #update gets updates (tuples) from 4 & 5, ops assigns new values update_ops = [ tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates ] train_losses = [] # training loss val_losses = [] #testing loss for iter_ in range(self.iterations): "*** YOUR CODE HERE ***" #util.raiseNotDefined() # train_loss should be the loss of this iteration using all of the training data #compute loss(loss_tensor), compute gradients(grad_tensor), update parameters(updates_ops) train_loss, U_value = session.run([loss_tensor, update_ops], feed_dict=training_dict) #print(train_loss) #print(U_value) # val_loss should be the loss of this iteration using all of the validation data val_loss = session.run(loss_tensor, feed_dict=validation_dict) train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.b: Optimize the model and return the intermediate losses. Optimize the model using stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. In here, the gradient descent is stochastic, meaning that you don't need to use all the data at once before you update the model parameters. Instead, you update the model parameters as you iterate over the data. You must use MinibatchIndefinitelyGenerator to iterate over the data, otherwise your solution might differ from the one of the autograder. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.shuffle when you instantiate the generator. You will have to choose a proper batch size too. Useful member variables and methods: self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates] train_losses = [] val_losses = [] for iter_ in range(self.iterations): "*** YOUR CODE HERE ***" util.raiseNotDefined() # train_loss should be the loss of this iteration using only the training data that was used for the updates # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.a: Optimize the model and return the intermediate losses. Optimize the model using gradient descent by running the variable updates for ++ iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. You may not need to fill in both "*** YOUR CODE HERE ***" blanks, but they are both provided so you can define variables outside and inside the for loop. Useful method: session.run """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None, ) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" #vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars] #tfu.get_session().run([input_train_data.initializer for vel_var in vel_vars]) loss_tensor = self.get_loss_tensor( model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [ tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates ] train_losses = [] val_losses = [] #train_loss = session.run(loss_tensor, feed_dict={model.input_ph:input_train_data, target_ph: target_train_data}) #val = session.run(loss_tensor, feed_dict={model.input_ph:input_val_data, target_ph: target_val_data}) for iter_ in range(self.iterations): "*** YOUR CODE HERE ***" #print "self.iterations",self.iterations #print "update_ops",update_ops train_loss = 0 val_loss = 0 for i in range(len(update_ops)): train, updates = session.run([loss_tensor, update_ops[i]], feed_dict={ model.input_ph: input_train_data, target_ph: target_train_data }) train_loss += train val = session.run(loss_tensor, feed_dict={ model.input_ph: input_val_data, target_ph: target_val_data }) val_loss += val train_loss /= len(update_ops) val_loss /= len(update_ops) # train_loss should be the loss of this iteration using all of the training data # val_loss should be the loss of this iteration using all of the validation data train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def __init__(self, use_batchnorm=False, use_dropout=False, x_shape=(None, 28, 28, 1), num_labels=10): super(ConvNetModel, self).__init__() _, image_size, _, num_channels = x_shape assert x_shape[2] == image_size self.x = tf.placeholder(tf.float32, shape=x_shape) self.input_ph = self.x is_train = True init_symmetry = False var_eps = 1e-20 use_global_bn = True if use_global_bn: bn_axes = [0, 1, 2] else: bn_axes = [0] if init_symmetry: conv1_weights = tf.Variable(tf.zeros([5, 5, num_channels, 32 ])) # 5x5 filter, depth 32. conv1_biases = tf.Variable(tf.zeros([32])) conv2_weights = tf.Variable(tf.zeros([5, 5, 32, 64])) conv2_biases = tf.Variable(tf.zeros([64])) fc1_weights = tf.Variable( # fully connected, depth 512. tf.constant( 0.1, shape=[image_size // 4 * image_size // 4 * 64, 512], )) fc1_biases = tf.Variable(tf.constant(0.1, shape=[512])) fc2_weights = tf.Variable( tf.constant( 0.1, shape=[512, num_labels], )) fc2_biases = tf.Variable(tf.constant(0.1, shape=[num_labels])) else: conv1_weights = tf.Variable( truncated_normal( [5, 5, num_channels, 32], # 5x5 filter, depth 32. stddev=0.1, fixed_random=self._fixed_random)) conv1_biases = tf.Variable(tf.zeros([32])) conv2_weights = tf.Variable( truncated_normal([5, 5, 32, 64], stddev=0.1, fixed_random=self._fixed_random)) conv2_biases = tf.Variable(tf.constant(0., shape=[64])) fc1_weights = tf.Variable( # fully connected, depth 512. truncated_normal([image_size // 4 * image_size // 4 * 64, 512], stddev=0.1, fixed_random=self._fixed_random)) fc1_biases = tf.Variable(tf.constant(0., shape=[512])) fc2_weights = tf.Variable( truncated_normal([512, num_labels], stddev=0.1, fixed_random=self._fixed_random)) fc2_biases = tf.Variable(tf.constant(0., shape=[num_labels])) # Add parameter variables for solvers self.conv1_weights = self.add_param_var(conv1_weights) self.conv1_biases = self.add_param_var(conv1_biases) self.conv2_weights = self.add_param_var(conv2_weights) self.conv2_biases = self.add_param_var(conv2_biases) self.fc1_weights = self.add_param_var(fc1_weights) self.fc1_biases = self.add_param_var(fc1_biases) self.fc2_weights = self.add_param_var(fc2_weights) self.fc2_biases = self.add_param_var(fc2_biases) #Run Inference conv = tf.nn.conv2d(self.x, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') conv = tf.nn.bias_add(conv, conv1_biases) #Add batch norm if use_batchnorm: mean, variance = tf.nn.moments(conv, bn_axes) conv = tf.nn.batch_normalization(conv, mean, variance, None, None, var_eps) relu = tf.nn.relu(conv) pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') conv = tf.nn.bias_add(conv, conv2_biases) #Add batch norm if use_batchnorm: mean, variance = tf.nn.moments(conv, bn_axes) conv = tf.nn.batch_normalization(conv, mean, variance, None, None, var_eps) relu = tf.nn.relu(conv) pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #Reshape the feature map cuboid into a 2D matrix to feed it to the # fully connected layers. pool_shape = pool.get_shape().as_list() reshape = tf.reshape( pool, [-1, pool_shape[1] * pool_shape[2] * pool_shape[3]]) # Fully connected layer. Note that the '+' operation automatically # broadcasts the biases. hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases) # Add a 50% dropout during training only. Dropout also scales # activations such that no rescaling is needed at evaluation time. if is_train and use_dropout: hidden = tf.nn.dropout(hidden, 0.5, seed=_SEED) logits = tf.matmul(hidden, fc2_weights) + fc2_biases self.prediction_tensor = tf.nn.softmax(logits) tfu.get_session().run( [param_var.initializer for param_var in self.get_param_vars()])
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.c: Optimize the model and return the intermediate losses. Optimize the model using minibatch stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. For minibatch stochastic gradient descent, you will need to iterate over the data in minibatches. As before, you must use MinibatchIndefinitelyGenerator to iterate over the data. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.batch_size and self.shuffle when you instantiate the generator. Useful member variables and methods: self.batch_size self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" #Q6c: Question 6c #First we setup a training data generator so that it can pop up a training point one by one. #input_train_data is of size NxR. target_train_data is of size NxS. #We will get one row from input_train_data and one row from target_train_data which is the label. trainGen=tfu.MinibatchIndefinitelyGenerator(train_data,self.batch_size, self.shuffle) validateGen=tfu.MinibatchIndefinitelyGenerator(val_data,self.batch_size, self.shuffle) loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates] train_losses = [] val_losses = [] for iter_ in range(self.iterations): "*** YOUR CODE HERE ***" #util.raiseNotDefined() # Step 1a: Training step: Compute the loss based on the forward propagation # train_loss should be the loss of this iteration using only the training data that was used for the updates #Get one training data point first [oneTrainData, oneTargetData]= trainGen.next() trainFeedDict={placeholders[0]:oneTrainData, placeholders[1]:oneTargetData} train_loss =session.run(loss_tensor,trainFeedDict) train_losses.append(train_loss) # Stgep 1b: update weights of the network with a backward propagation session.run(update_ops,trainFeedDict) # Step 2: Check validation data. # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead #Get one validate data point [oneValidateData, oneValTargetData]= validateGen.next() validateFeedDict={placeholders[0]:oneValidateData, placeholders[1]:oneValTargetData} val_loss=session.run(loss_tensor,validateFeedDict) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.a: Optimize the model and return the intermediate losses. Optimize the model using gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. You may not need to fill in both "*** YOUR CODE HERE ***" blanks, but they are both provided so you can define variables outside and inside the for loop. Useful method: session.run """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" #Q6a: Question 6a trainFeedDict={placeholders[0]:train_data[0], placeholders[1]:train_data[1]} validateFeedDict={placeholders[0]:val_data[0], placeholders[1]:val_data[1]} #prepare operations to be performed, we donot actually execute these operations loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates] train_losses = [] val_losses = [] for iter_ in range(self.iterations): "*** YOUR CODE HERE ***" #util.raiseNotDefined() # Step 1a: Training step: Compute the loss based on the forward propagation # train_loss should be the loss of this iteration using all of the training data train_loss =session.run(loss_tensor,trainFeedDict) train_losses.append(train_loss) # Stgep 1b: update weights of the network with a backward propagation session.run(update_ops,trainFeedDict) # Step 2: Check validation data. #val_loss should be the loss of this iteration using all of the validation data val_loss=session.run(loss_tensor,validateFeedDict) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.c: Optimize the model and return the intermediate losses. Optimize the model using minibatch stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. For minibatch stochastic gradient descent, you will need to iterate over the data in minibatches. As before, you must use MinibatchIndefinitelyGenerator to iterate over the data. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.batch_size and self.shuffle when you instantiate the generator. Useful member variables and methods: self.batch_size self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None, ) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" loss_tensor = self.get_loss_tensor( model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [ tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates ] train_losses = [] val_losses = [] trainGenerator = MinibatchIndefinitelyGenerator( train_data, self.batch_size, True) valGenerator = MinibatchIndefinitelyGenerator(val_data, self.batch_size, True) for iter_ in range(self.iterations): # train_loss should be the loss of this iteration using only the training data that was used for the updates # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead trainData = trainGenerator.next() valData = valGenerator.next() feed_dict_train = {} for i in range(len(trainData)): # print(trainGenerator.next()) tG1 = trainData[i] feed_dict_train[placeholders[i]] = tG1 train_loss = session.run(loss_tensor, feed_dict_train) session.run(update_ops, feed_dict_train) feed_dict_val = {} for i in range(len(valData)): vG1 = valData[i] feed_dict_val[placeholders[i]] = vG1 val_loss = session.run(loss_tensor, feed_dict_val) train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def get_param_values(self, **tags): param_vars = self.get_param_vars(**tags) return [ param_var.eval(session=tfu.get_session()) for param_var in param_vars ]
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.a: Optimize the model and return the intermediate losses. Optimize the model using gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. You may not need to fill in both "*** YOUR CODE HERE ***" blanks, but they are both provided so you can define variables outside and inside the for loop. Useful method: session.run """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None, ) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" # how many iterations do we have to go throught : self.iterations # the args for input/ parameter: input_train_data target_train_data input_val_data target_val_data model # want: tuple of list, # 1st list : training loss for each iterations # 2nd list : validation loss for each itertions # method we should use session.run # here we may want to initialize sth. that's all loss_tensor = self.get_loss_tensor( model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [ tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates ] train_losses = [] val_losses = [] #print "loss:\n" #print "OK" #print target_ph #print target_train_data.shape[1:] #print train_data #print placeholders[0] #print train_data[0].shape," " #print train_data[1].shape," " #print val_data #print val_data[0].shape," " #print val_data[1].shape," " #print model.prediction_tensor #print model.get_param_vars(regularizable=True) #a=val_data[0]+val_data[1] #print a.shape #print model.get_param_vars(regularizable=True) #print updates #print update_ops # here is the place we go through all iterations and do the computations for iter_ in range(self.iterations): "*** YOUR CODE HERE ***" # util.raiseNotDefined() # train_loss should be the loss of this iteration using all of the training data # val_loss should be the loss of this iteration using all of the validation data train_loss = session.run(loss_tensor, feed_dict={ placeholders[0]: train_data[0], placeholders[1]: train_data[1] }) #train_loss=session.run(loss_tensor,feed_dict={placeholders:train_data}) #for a in range(len(updates)): # train_loss,updates[a]=session.run([loss_tensor,update_ops[a]],feed_dict={placeholders[0]:train_data[0], placeholders[1]:train_data[1]}) #updates=session.run() #for a in range(len(updates)): updates = session.run(update_ops, feed_dict={ placeholders[0]: train_data[0], placeholders[1]: train_data[1] }) val_loss = session.run(loss_tensor, feed_dict={ placeholders[0]: val_data[0], placeholders[1]: val_data[1] }) #train_loss=session.run(loss_tensor,feed_dict={model.prediction_tensor:train_data[1], target_ph:train_data[1]}) #val_loss=session.run(loss_tensor,feed_dict={model.prediction_tensor:val_data[1], target_ph: val_data[1]}) #train_loss=session.run(loss_tensor,feed_dict={model.prediction_tensor:train_data[1], target_ph:train_data[1]}) #val_loss=session.run(loss_tensor,feed_dict={model.prediction_tensor:val_data[1], target_ph: val_data[1]}) train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses