def testNonTrainable(self): """Tests the network doesn't contain trainable variables.""" shape = [10, 5] gradients = tf.random_normal(shape) net = networks.Adam() state = net.initial_state_for_inputs(gradients) net(gradients, state) variables = nn.get_variables_in_module(net) self.assertEqual(len(variables), 0)
def testTrainable(self): """Tests the network contains trainable variables.""" shape = [10, 5] gradients = tf.random_normal(shape) net = networks.CoordinateWiseDeepLSTM(layers=(1, )) state = net.initial_state_for_inputs(gradients) net(gradients, state) # Weights and biases for two layers. variables = nn.get_variables_in_module(net) self.assertEqual(len(variables), 4)
def testTrainable(self): """Tests the network contains trainable variables.""" kernel_shape = [5, 5] shape = kernel_shape + [2, 2] # The input has to be 4-dimensional. gradients = tf.random_normal(shape) net = networks.KernelDeepLSTM(layers=(1, ), kernel_shape=kernel_shape) state = net.initial_state_for_inputs(gradients) net(gradients, state) # Weights and biases for two layers. variables = nn.get_variables_in_module(net) self.assertEqual(len(variables), 4)
def save(network, sess, filename=None): """Save the variables contained by a network to disk.""" to_save = collections.defaultdict(dict) variables = nn.get_variables_in_module(network) for v in variables: split = v.name.split(":")[0].split("/") module_name = split[-2] variable_name = split[-1] to_save[module_name][variable_name] = v.eval(sess) if filename: with open(filename, "wb") as f: pickle.dump(to_save, f) return to_save
def meta_loss(self, make_loss, len_unroll, net_assignments=None, second_derivatives=False): """Returns an operator computing the meta-loss. Args: make_loss: Callable which returns the optimizee loss; note that this should create its ops in the default graph. len_unroll: Number of steps to unroll. net_assignments: variable to optimizer mapping. If not None, it should be a list of (k, names) tuples, where k is a valid key in the kwargs passed at at construction time and names is a list of variable names. second_derivatives: Use second derivatives (default is false). Returns: namedtuple containing (loss, update, reset, fx, x) """ # Construct an instance of the problem only to grab the variables. This # loss will never be evaluated. x, constants = _get_variables(make_loss) print("Optimizee variables") print([op.name for op in x]) print("Problem variables") print([op.name for op in constants]) # Create the optimizer networks and find the subsets of variables to assign # to each optimizer. nets, net_keys, subsets = _make_nets(x, self._config, net_assignments) # Store the networks so we can save them later. self._nets = nets # Create hidden state for each subset of variables. state = [] with tf.name_scope("states"): for i, (subset, key) in enumerate(zip(subsets, net_keys)): net = nets[key] with tf.name_scope("state_{}".format(i)): state.append( _nested_variable([ net.initial_state_for_inputs(x[j], dtype=tf.float32) for j in subset ], name="state", trainable=False)) def update(net, fx, x, state): """Parameter and RNN state update.""" with tf.name_scope("gradients"): gradients = tf.gradients(fx, x) # Stopping the gradient here corresponds to what was done in the # original L2L NIPS submission. However it looks like things like # BatchNorm, etc. don't support second-derivatives so we still need # this term. if not second_derivatives: gradients = [tf.stop_gradient(g) for g in gradients] with tf.name_scope("deltas"): deltas, state_next = zip( *[net(g, s) for g, s in zip(gradients, state)]) state_next = list(state_next) return deltas, state_next def time_step(t, fx_array, x, state): """While loop body.""" x_next = list(x) state_next = [] with tf.name_scope("fx"): fx = _make_with_custom_variables(make_loss, x) fx_array = fx_array.write(t, fx) with tf.name_scope("dx"): for subset, key, s_i in zip(subsets, net_keys, state): x_i = [x[j] for j in subset] deltas, s_i_next = update(nets[key], fx, x_i, s_i) for idx, j in enumerate(subset): x_next[j] += deltas[idx] state_next.append(s_i_next) with tf.name_scope("t_next"): t_next = t + 1 return t_next, fx_array, x_next, state_next # Define the while loop. fx_array = tf.TensorArray(tf.float32, size=len_unroll + 1, clear_after_read=False) _, fx_array, x_final, s_final = tf.while_loop( cond=lambda t, *_: t < len_unroll, body=time_step, loop_vars=(0, fx_array, x, state), parallel_iterations=1, swap_memory=True, name="unroll") with tf.name_scope("fx"): fx_final = _make_with_custom_variables(make_loss, x_final) fx_array = fx_array.write(len_unroll, fx_final) loss = tf.reduce_sum(fx_array.pack(), name="loss") # Reset the state; should be called at the beginning of an epoch. with tf.name_scope("reset"): variables = (nest.flatten(state) + x + constants) # Empty array as part of the reset process. reset = [tf.initialize_variables(variables), fx_array.close()] # Operator to update the parameters and the RNN state after our loop, but # during an epoch. with tf.name_scope("update"): update = (nest.flatten(_nested_assign(x, x_final)) + nest.flatten(_nested_assign(state, s_final))) # Log internal variables. for k, net in nets.iteritems(): print("Optimizer '{}' variables".format(k)) print([op.name for op in nn.get_variables_in_module(net)]) return MetaLoss(loss, update, reset, fx_final, x_final)