Esempio n. 1
0
 def testNonTrainable(self):
     """Tests the network doesn't contain trainable variables."""
     shape = [10, 5]
     gradients = tf.random_normal(shape)
     net = networks.Adam()
     state = net.initial_state_for_inputs(gradients)
     net(gradients, state)
     variables = nn.get_variables_in_module(net)
     self.assertEqual(len(variables), 0)
Esempio n. 2
0
 def testTrainable(self):
     """Tests the network contains trainable variables."""
     shape = [10, 5]
     gradients = tf.random_normal(shape)
     net = networks.CoordinateWiseDeepLSTM(layers=(1, ))
     state = net.initial_state_for_inputs(gradients)
     net(gradients, state)
     # Weights and biases for two layers.
     variables = nn.get_variables_in_module(net)
     self.assertEqual(len(variables), 4)
Esempio n. 3
0
 def testTrainable(self):
     """Tests the network contains trainable variables."""
     kernel_shape = [5, 5]
     shape = kernel_shape + [2, 2]  # The input has to be 4-dimensional.
     gradients = tf.random_normal(shape)
     net = networks.KernelDeepLSTM(layers=(1, ), kernel_shape=kernel_shape)
     state = net.initial_state_for_inputs(gradients)
     net(gradients, state)
     # Weights and biases for two layers.
     variables = nn.get_variables_in_module(net)
     self.assertEqual(len(variables), 4)
Esempio n. 4
0
def save(network, sess, filename=None):
    """Save the variables contained by a network to disk."""
    to_save = collections.defaultdict(dict)
    variables = nn.get_variables_in_module(network)

    for v in variables:
        split = v.name.split(":")[0].split("/")
        module_name = split[-2]
        variable_name = split[-1]
        to_save[module_name][variable_name] = v.eval(sess)

    if filename:
        with open(filename, "wb") as f:
            pickle.dump(to_save, f)

    return to_save
Esempio n. 5
0
    def meta_loss(self,
                  make_loss,
                  len_unroll,
                  net_assignments=None,
                  second_derivatives=False):
        """Returns an operator computing the meta-loss.

    Args:
      make_loss: Callable which returns the optimizee loss; note that this
          should create its ops in the default graph.
      len_unroll: Number of steps to unroll.
      net_assignments: variable to optimizer mapping. If not None, it should be
          a list of (k, names) tuples, where k is a valid key in the kwargs
          passed at at construction time and names is a list of variable names.
      second_derivatives: Use second derivatives (default is false).

    Returns:
      namedtuple containing (loss, update, reset, fx, x)
    """

        # Construct an instance of the problem only to grab the variables. This
        # loss will never be evaluated.
        x, constants = _get_variables(make_loss)

        print("Optimizee variables")
        print([op.name for op in x])
        print("Problem variables")
        print([op.name for op in constants])

        # Create the optimizer networks and find the subsets of variables to assign
        # to each optimizer.
        nets, net_keys, subsets = _make_nets(x, self._config, net_assignments)

        # Store the networks so we can save them later.
        self._nets = nets

        # Create hidden state for each subset of variables.
        state = []
        with tf.name_scope("states"):
            for i, (subset, key) in enumerate(zip(subsets, net_keys)):
                net = nets[key]
                with tf.name_scope("state_{}".format(i)):
                    state.append(
                        _nested_variable([
                            net.initial_state_for_inputs(x[j],
                                                         dtype=tf.float32)
                            for j in subset
                        ],
                                         name="state",
                                         trainable=False))

        def update(net, fx, x, state):
            """Parameter and RNN state update."""
            with tf.name_scope("gradients"):
                gradients = tf.gradients(fx, x)

                # Stopping the gradient here corresponds to what was done in the
                # original L2L NIPS submission. However it looks like things like
                # BatchNorm, etc. don't support second-derivatives so we still need
                # this term.
                if not second_derivatives:
                    gradients = [tf.stop_gradient(g) for g in gradients]

            with tf.name_scope("deltas"):
                deltas, state_next = zip(
                    *[net(g, s) for g, s in zip(gradients, state)])
                state_next = list(state_next)

            return deltas, state_next

        def time_step(t, fx_array, x, state):
            """While loop body."""
            x_next = list(x)
            state_next = []

            with tf.name_scope("fx"):
                fx = _make_with_custom_variables(make_loss, x)
                fx_array = fx_array.write(t, fx)

            with tf.name_scope("dx"):
                for subset, key, s_i in zip(subsets, net_keys, state):
                    x_i = [x[j] for j in subset]
                    deltas, s_i_next = update(nets[key], fx, x_i, s_i)

                    for idx, j in enumerate(subset):
                        x_next[j] += deltas[idx]
                    state_next.append(s_i_next)

            with tf.name_scope("t_next"):
                t_next = t + 1

            return t_next, fx_array, x_next, state_next

        # Define the while loop.
        fx_array = tf.TensorArray(tf.float32,
                                  size=len_unroll + 1,
                                  clear_after_read=False)
        _, fx_array, x_final, s_final = tf.while_loop(
            cond=lambda t, *_: t < len_unroll,
            body=time_step,
            loop_vars=(0, fx_array, x, state),
            parallel_iterations=1,
            swap_memory=True,
            name="unroll")

        with tf.name_scope("fx"):
            fx_final = _make_with_custom_variables(make_loss, x_final)
            fx_array = fx_array.write(len_unroll, fx_final)

        loss = tf.reduce_sum(fx_array.pack(), name="loss")

        # Reset the state; should be called at the beginning of an epoch.
        with tf.name_scope("reset"):
            variables = (nest.flatten(state) + x + constants)
            # Empty array as part of the reset process.
            reset = [tf.initialize_variables(variables), fx_array.close()]

        # Operator to update the parameters and the RNN state after our loop, but
        # during an epoch.
        with tf.name_scope("update"):
            update = (nest.flatten(_nested_assign(x, x_final)) +
                      nest.flatten(_nested_assign(state, s_final)))

        # Log internal variables.
        for k, net in nets.iteritems():
            print("Optimizer '{}' variables".format(k))
            print([op.name for op in nn.get_variables_in_module(net)])

        return MetaLoss(loss, update, reset, fx_final, x_final)