def build_optimizer(loss, update_ops=[], scope=None, reuse=None): with tf.variable_scope(scope, 'gradients', reuse=reuse): print "Building optimizer" optimizer = AdamaxOptimizer(args.lr) if args.adamax else AdamOptimizer( args.lr) # max clip and max norm hyperparameters from Sonderby's LVAE code clipped, grad_norm = clip_gradients(optimizer, loss, max_clip=0.9, max_norm=4) with tf.control_dependencies(update_ops): train_step = optimizer.apply_gradients(clipped) return train_step
def createGraph(self): """Creates graph for training""" self.base_cost = 0.0 self.accuracy = 0 num_sizes = len(self.bins) self.cost_list = [] sum_weight = 0 self.bin_losses = [] saturation_loss = [] # Create all bins and calculate losses for them with vs.variable_scope("var_lengths"): for seqLength, itemCount, ind in zip(self.bins, self.count_list, range(num_sizes)): x_in = tf.placeholder("int32", [itemCount, seqLength]) y_in = tf.placeholder("int64", [itemCount, seqLength]) self.x_input.append(x_in) self.y_input.append(y_in) self.saturation_costs = [] c, a, _, _, perItemCost, _ = self.createLoss( x_in, y_in, seqLength) weight = 1.0 #/seqLength sat_cost = tf.add_n(self.saturation_costs) / ( (seqLength**2) * itemCount) saturation_loss.append(sat_cost * weight) self.bin_losses.append(perItemCost) self.base_cost += c * weight sum_weight += weight self.accuracy += a self.cost_list.append(c) tf.get_variable_scope().reuse_variables() # calculate the total loss self.base_cost /= sum_weight self.accuracy /= num_sizes self.sat_loss = tf.reduce_sum( tf.stack(saturation_loss)) * self.saturation_weight / sum_weight cost = self.base_cost + self.sat_loss # add gradient noise proportional to learning rate tvars = tf.trainable_variables() grads_0 = tf.gradients(cost, tvars) grads = [] for grad in grads_0: grad1 = grad + tf.truncated_normal( tf.shape(grad)) * self.learning_rate * 1e-4 grads.append(grad1) # optimizer optimizer = AdamaxOptimizer(self.learning_rate, beta1=0.9, beta2=1.0 - self.beta2_rate, epsilon=1e-8) self.optimizer = optimizer.apply_gradients( zip(grads, tvars), global_step=self.global_step) # some values for printout max_vals = [] for var in tvars: varV = optimizer.get_slot(var, "m") max_vals.append(varV) self.gnorm = tf.global_norm(max_vals) self.cost_list = tf.stack(self.cost_list)