Example #1
0
def _four_layer_convnet(inputs,
                        is_training,
                        scope,
                        weight_decay,
                        reuse=tf.AUTO_REUSE,
                        params=None,
                        moments=None,
                        depth_multiplier=1.0,
                        backprop_through_moments=True,
                        use_bounded_activation=False,
                        keep_spatial_dims=False):
  """A four-layer-convnet architecture."""
  layer = tf.stop_gradient(inputs)
  model_params_keys, model_params_vars = [], []
  moments_keys, moments_vars = [], []

  with tf.variable_scope(scope, reuse=reuse):
    for i in range(4):
      with tf.variable_scope('layer_{}'.format(i), reuse=reuse):
        depth = int(64 * depth_multiplier)
        layer, conv_bn_params, conv_bn_moments = conv_bn(
            layer, [3, 3],
            depth,
            stride=1,
            weight_decay=weight_decay,
            params=params,
            moments=moments,
            is_training=is_training,
            backprop_through_moments=backprop_through_moments)
        model_params_keys.extend(conv_bn_params.keys())
        model_params_vars.extend(conv_bn_params.values())
        moments_keys.extend(conv_bn_moments.keys())
        moments_vars.extend(conv_bn_moments.values())

      if use_bounded_activation:
        layer = tf.nn.relu6(layer)
      else:
        layer = tf.nn.relu(layer)
      layer = tf.layers.max_pooling2d(layer, [2, 2], 2)
      logging.info('Output of block %d: %s', i, layer.shape)

    model_params = collections.OrderedDict(
        zip(model_params_keys, model_params_vars))
    moments = collections.OrderedDict(zip(moments_keys, moments_vars))
    if not keep_spatial_dims:
      layer = tf.layers.flatten(layer)
    return_dict = {
        'embeddings': layer,
        'params': model_params,
        'moments': moments
    }

    return return_dict
Example #2
0
def conv1d(x, filters, kernel_size, strides=1, padding='causal', dilation_rate=1, act=None,
           init=None, scope="conv1d", use_bias=True):
    batch_size, seq_len, h = x.get_shape().as_list()
    # Taken from keras, there is a faster version from magenta
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        # assert seq_len % dilation_rate == 0

        w = tf.get_variable('kernel', shape=(kernel_size, h, filters), dtype=tf.float32, initializer=init)

        if padding == 'causal':
            # causal (dilated) convolution:
            left_pad = dilation_rate * (kernel_size - 1)
            pattern = [[0, 0], [left_pad, 0], [0, 0]]
            x = tf.pad(x, pattern)
            padding = 'VALID'

        out = tf.nn.convolution(
            input=x,
            filter=w,
            dilation_rate=(dilation_rate,),
            strides=(strides,),
            padding=padding)
        if use_bias:
            b = tf.get_variable('bias', shape=(filters,), dtype=tf.float32, initializer=tf.initializers.zeros)
            out = tf.add(out, b)
        if act is not None:
            return act(out)
    return out
Example #3
0
 def _fc_layer(self, embedding):
   """The fully connected layer to be finetuned."""
   with tf.variable_scope('fc_finetune', reuse=tf.AUTO_REUSE):
     logits = functional_classifiers.linear_classifier(
         embedding, self.logit_dim, self.cosine_classifier,
         self.cosine_logits_multiplier, self.use_weight_norm)
   return logits
Example #4
0
    def get_train_op():
      loss = tf.reduce_mean(input_tensor=tf.square(q_clicked - target_clicked))
      if self.summary_writer is not None:
        with tf.variable_scope('Losses'):
          tf.summary.scalar('Loss', loss)

      return loss
Example #5
0
def recsim_dqn_network(user, doc, scope):
    inputs = tf.concat([user, doc], axis=1)
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        hidden = tf.layers.dense(inputs, 256, activation=tf.nn.relu)
        hidden = tf.layers.dense(hidden, 32, activation=tf.nn.relu)
        q_value = tf.layers.dense(hidden, 1, name='output')
    return q_value
Example #6
0
    def build_graph(self):
        """Builds the neural network graph."""

        # define graph
        self.g = tf.Graph()
        with self.g.as_default():

            # create and store a new session for the graph
            self.sess = tf.Session()

            # define placeholders
            self.x = tf.placeholder(shape=[None, self.dim_input],
                                    dtype=tf.float32)
            self.y = tf.placeholder(shape=[None, self.num_classes],
                                    dtype=tf.float32)

            # define simple model
            with tf.variable_scope('last_layer'):
                self.z = tf.layers.dense(inputs=self.x, units=self.num_classes)

            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.y,
                                                           logits=self.z))

            self.output_probs = tf.nn.softmax(self.z)

            # Variables of the last layer
            self.ll_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            self.ll_vars_concat = tf.concat(
                [self.ll_vars[0],
                 tf.expand_dims(self.ll_vars[1], axis=0)], 0)

            # Summary
            _variable_summaries(self.ll_vars_concat)

            # saving the weights of last layer when running bootstrap algorithm
            self.saver = tf.train.Saver(var_list=self.ll_vars)

            self.gd_opt = tf.train.GradientDescentOptimizer(self.step_size)

            # SGD optimizer for the last layer
            grads_vars_sgd = self.gd_opt.compute_gradients(self.loss)
            self.train_op = self.gd_opt.apply_gradients(grads_vars_sgd)

            for g, v in grads_vars_sgd:
                if g is not None:
                    s = list(v.name)
                    s[v.name.rindex(':')] = '_'
                    tf.summary.histogram(''.join(s) + '/grad_hist_boot_sgd', g)

            # Merge all the summaries and write them out
            self.all_summaries = tf.summary.merge_all()
            location = os.path.join(self.working_dir, 'logs')
            self.writer = tf.summary.FileWriter(location, graph=self.g)

            saver_network = tf.train.Saver(var_list=self.ll_vars)
            print('Loading the network...')
            # Restores from checkpoint
            saver_network.restore(self.sess, self.model_dir)
            print('Graph successfully loaded.')
Example #7
0
    def compute_logits_for_episode(self, support_embeddings, query_embeddings,
                                   data):
        """Compute CrossTransformer logits."""
        with tf.variable_scope('tformer_keys', reuse=tf.AUTO_REUSE):
            support_keys, key_params = functional_backbones.conv(
                support_embeddings, [1, 1],
                self.query_dim,
                1,
                weight_decay=self.tformer_weight_decay)
            query_queries, _ = functional_backbones.conv(
                query_embeddings, [1, 1],
                self.query_dim,
                1,
                params=key_params,
                weight_decay=self.tformer_weight_decay)

        with tf.variable_scope('tformer_values', reuse=tf.AUTO_REUSE):
            support_values, value_params = functional_backbones.conv(
                support_embeddings, [1, 1],
                self.val_dim,
                1,
                weight_decay=self.tformer_weight_decay)
            query_values, _ = functional_backbones.conv(
                query_embeddings, [1, 1],
                self.val_dim,
                1,
                params=value_params,
                weight_decay=self.tformer_weight_decay)

        onehot_support_labels = distribute_utils.aggregate(
            data.onehot_support_labels)
        support_keys = distribute_utils.aggregate(support_keys)
        support_values = distribute_utils.aggregate(support_values)

        labels = tf.argmax(onehot_support_labels, axis=1)
        if self.rematerialize:
            distances = self._get_dist_rematerialize(query_queries,
                                                     query_values,
                                                     support_keys,
                                                     support_values, labels)
        else:
            distances = self._get_dist(query_queries, query_values,
                                       support_keys, support_values, labels)

        self.test_logits = -tf.transpose(distances)

        return self.test_logits
Example #8
0
 def forward_pass_fc(self, embeddings, source):
     start_idx = tf.gather(self._start_inds_for_sources, source)
     num_classes = self.logit_dim  # a list of the datasets' numbers of classes.
     with tf.variable_scope('fc', reuse=tf.AUTO_REUSE):
         logits = functional_classifiers.separate_head_linear_classifier(
             embeddings, num_classes, source, start_idx,
             self.cosine_classifier, self.cosine_logits_multiplier)
         return logits
Example #9
0
def sn_block(x, filters, kernel_size, dilation, scope="sn_block"):
    with tf.variable_scope(scope):
        residual = x
        h = conv1d(x, filters=filters, kernel_size=kernel_size, dilation_rate=dilation, scope="conv_1", use_bias=False)
        h = tf.nn.leaky_relu(h, alpha=0.1)
        skip_out = conv1d(h, filters=1, kernel_size=1, scope="skip", use_bias=False)
        network_in = conv1d(h, filters=1, kernel_size=1, scope="network_in", use_bias=False)

        residual += network_in

    return residual, skip_out
Example #10
0
 def embedding(cls, h, units=None, kernel_size=None):
     with tf.variable_scope("embedding", reuse=tf.AUTO_REUSE):
         if units == 0:
             return h
         if kernel_size is not None:
             h = conv1d(h,
                        filters=units,
                        kernel_size=kernel_size,
                        act=tf.nn.tanh)
         else:
             h = tf.layers.dense(
                 h,
                 units=units,
                 activation=tf.nn.tanh,
                 kernel_initializer=tf.variance_scaling_initializer)
         return h
Example #11
0
    def forward_pass_fc(self, embeddings):
        """Passes the provided embeddings through the fc layer to get the logits.

    Args:
      embeddings: A Tensor of the penultimate layer activations as computed by
        BaselineLearner.forward_pass.

    Returns:
      The fc layer activations.
    """
        with tf.variable_scope('fc', reuse=tf.AUTO_REUSE):
            # Always maps to a space whose dimensionality is the number of classes
            # at meta-training time.
            logits = functional_classifiers.linear_classifier(
                embeddings, self.logit_dim, self.cosine_classifier,
                self.cosine_logits_multiplier, self.use_weight_norm)
            return logits
Example #12
0
    def _build_op(self):
        with tf.variable_scope(self._scope):
            c_x, c_y = self._split()
            x = self.x[:, :, 1:]
            y = self.x[:, :, :1]

            r = NeuralEncoder(scope="encoder")(c_x, c_y)
            self.c_z = get_z(r, self.latent, dist="normal")

            z = NeuralEncoder(scope="encoder")(x, y)
            self.z = get_z(z, latent_shape=self.latent)

            r = self.c_z.sample(self.z_samples)
            # r = tf.transpose(r, (1, 0, 2))
            # r = tf.reduce_mean(r, axis=1)

            self.d = StochasticNeuralDecoder(scope="decoder")(r, x)
            self.h = self.d.loc
Example #13
0
def kaf(linear, name, kernel='rbf', D=None, gamma=None):
    if D is None:
        D = tf.linspace(start=-2., stop=2., num=20)

    with tf.variable_scope('kaf', reuse=tf.AUTO_REUSE):
        if kernel == "rbf":
            K = gauss_kernel(linear, D, gamma=gamma)
            alpha = tf.get_variable(name, shape=(1, linear.get_shape()[-1], D.get_shape()[0]),
                                    initializer=tf.random_normal_initializer(stddev=0.1))
        elif kernel == 'rbf2d':
            Dx, Dy = tf.meshgrid(D, D)
            K = gauss_kernel2D(linear, Dx, Dy, gamma=gamma)

            alpha = tf.get_variable(name,
                                    shape=(1, linear.get_shape()[-1] // 2, D.get_shape()[0] * D.get_shape()[0]),
                                    initializer=tf.random_normal_initializer(stddev=0.1))
        else:
            raise NotImplementedError()
        act = tf.reduce_sum(tf.multiply(K, alpha), axis=-1)
        # act = tf.squeeze(act, axis=0)
    return act
Example #14
0
def get_embeddings_vars_copy_ops(embedding_vars_dict, make_copies):
    """Gets copies of the embedding variables or returns those variables.

  This is useful at meta-test time for MAML and the finetuning baseline. In
  particular, at meta-test time, we don't want to make permanent updates to
  the model's variables, but only modifications that persist in the given
  episode. This can be achieved by creating copies of each variable and
  modifying and using these copies instead of the variables themselves.

  Args:
    embedding_vars_dict: A dict mapping each variable name to the corresponding
      Variable.
    make_copies: A bool. Whether to copy the given variables. If not, those
      variables themselves will be returned. Typically, this is True at meta-
      test time and False at meta-training time.

  Returns:
    embedding_vars_keys: A list of variable names.
    embeddings_vars: A corresponding list of Variables.
    embedding_vars_copy_ops: A (possibly empty) list of operations, each of
      which assigns the value of one of the provided Variables to a new
      Variable which is its copy.
  """
    embedding_vars_keys = []
    embedding_vars = []
    embedding_vars_copy_ops = []
    for name, var in six.iteritems(embedding_vars_dict):
        embedding_vars_keys.append(name)
        if make_copies:
            with tf.variable_scope('weight_copy'):
                shape = var.shape.as_list()
                var_copy = tf.Variable(
                    tf.zeros(shape),
                    collections=[tf.GraphKeys.LOCAL_VARIABLES])
                var_copy_op = tf.assign(var_copy, var)
                embedding_vars_copy_ops.append(var_copy_op)
            embedding_vars.append(var_copy)
        else:
            embedding_vars.append(var)
    return embedding_vars_keys, embedding_vars, embedding_vars_copy_ops
Example #15
0
    def _build_train_op(self):
        """Builds a training op.

    Returns:
      train_op: An op performing one step of training from replay data.
    """
        replay_next_target_value = tf.reduce_max(
            self._replay_next_target_net_outputs.q_values, 1)
        replay_target_value = tf.reduce_max(
            self._replay_target_net_outputs.q_values, 1)

        replay_action_one_hot = tf.one_hot(self._replay.actions,
                                           self.num_actions,
                                           1.,
                                           0.,
                                           name='action_one_hot')
        replay_chosen_q = tf.reduce_sum(self._replay_net_outputs.q_values *
                                        replay_action_one_hot,
                                        axis=1,
                                        name='replay_chosen_q')
        replay_target_chosen_q = tf.reduce_sum(
            self._replay_target_net_outputs.q_values * replay_action_one_hot,
            axis=1,
            name='replay_chosen_q')

        augmented_rewards = self._replay.rewards - self.alpha * (
            replay_target_value - replay_target_chosen_q)

        target = (augmented_rewards +
                  self.cumulative_gamma * replay_next_target_value *
                  (1. - tf.cast(self._replay.terminals, tf.float32)))
        target = tf.stop_gradient(target)

        loss = tf.losses.huber_loss(target,
                                    replay_chosen_q,
                                    reduction=tf.losses.Reduction.NONE)
        if self.summary_writer is not None:
            with tf.variable_scope('Losses'):
                tf.summary.scalar('HuberLoss', tf.reduce_mean(loss))
        return self.optimizer.minimize(tf.reduce_mean(loss))
Example #16
0
def get_fc_vars_copy_ops(fc_weights, fc_bias, make_copies):
  """Gets copies of the classifier layer variables or returns those variables.

  At meta-test time, a copy is created for the given Variables, and these copies
  copies will be used in place of the original ones.

  Args:
    fc_weights: A Variable for the weights of the fc layer.
    fc_bias: A Variable for the bias of the fc layer.
    make_copies: A bool. Whether to copy the given variables. If not, those
      variables themselves are returned.

  Returns:
    fc_weights: A Variable for the weights of the fc layer. Might be the same as
      the input fc_weights or a copy of it.
    fc_bias: Analogously, a Variable for the bias of the fc layer.
    fc_vars_copy_ops: A (possibly empty) list of operations for assigning the
      value of each of fc_weights and fc_bias to a respective copy variable.
  """
  fc_vars_copy_ops = []
  if make_copies:
    with tf.variable_scope('weight_copy'):
      # fc_weights copy
      fc_weights_copy = tf.Variable(
          tf.zeros(fc_weights.shape.as_list()),
          collections=[tf.GraphKeys.LOCAL_VARIABLES])
      fc_weights_copy_op = tf.assign(fc_weights_copy, fc_weights)
      fc_vars_copy_ops.append(fc_weights_copy_op)

      # fc_bias copy
      fc_bias_copy = tf.Variable(
          tf.zeros(fc_bias.shape.as_list()),
          collections=[tf.GraphKeys.LOCAL_VARIABLES])
      fc_bias_copy_op = tf.assign(fc_bias_copy, fc_bias)
      fc_vars_copy_ops.append(fc_bias_copy_op)

      fc_weights = fc_weights_copy
      fc_bias = fc_bias_copy
  return fc_weights, fc_bias, fc_vars_copy_ops
Example #17
0
def dense(x, output_size, weight_decay, activation_fn=tf.nn.relu, params=None):
  """Fully connected layer implementation.

  Args:
    x: tf.Tensor, input.
    output_size: int, number features in  the fully connected layer.
    weight_decay: float, scaling constant for L2 weight decay on weight
      variables.
    activation_fn: function, to process pre-activations, namely x*w+b.
    params: None or a dict containing the values of the weight and bias params.
      If None, default variables are used.

  Returns:
    output: The result of applying batch normalization to the input.
    params: dict, that includes parameters used during the calculation.
  """
  with tf.variable_scope('dense'):
    scope_name = tf.get_variable_scope().name

    if len(x.shape) > 2:
      x = tf.layers.flatten(x),
    input_size = x.get_shape().as_list()[-1]

    w_name = scope_name + '/kernel'
    b_name = scope_name + '/bias'
    if params is None:
      w = weight_variable([input_size, output_size], weight_decay)
      b = bias_variable([output_size])
    else:
      w = params[w_name]
      b = params[b_name]

  x = tf.nn.xw_plus_b(x, w, b)
  params = collections.OrderedDict(zip([w_name, b_name], [w, b]))
  x = activation_fn(x)
  return x, params
Example #18
0
    def build_graph(self):
        """Builds the neural network graph."""

        # define graph
        self.g = tf.Graph()
        with self.g.as_default():

            # create and store a new session for the graph
            self.sess = tf.Session()

            # define placeholders
            self.x = tf.placeholder(shape=[None, self.dim_input],
                                    dtype=tf.float32)
            self.y = tf.placeholder(shape=[None, self.num_classes],
                                    dtype=tf.float32)

            # linear layer(WX + b)
            with tf.variable_scope('last_layer/dense') as scope:
                weights = tf.get_variable('kernel',
                                          [self.dim_input, self.num_classes],
                                          dtype=tf.float32)
                biases = tf.get_variable('bias', [self.num_classes],
                                         dtype=tf.float32)
                wb = tf.concat([weights, tf.expand_dims(biases, axis=0)], 0)
                wb_renorm = tf.matmul(self.sigma_half_inv, wb)
                weights_renorm = wb_renorm[:self.dim_input, :]
                biases_renorm = wb_renorm[-1, :]
                self.z = tf.add(tf.matmul(self.x, weights_renorm),
                                biases_renorm,
                                name=scope.name)

            # Gaussian prior
            # prior = tf.nn.l2_loss(weights) + tf.nn.l2_loss(biases)

            # Non normalized loss, because of the preconditioning
            self.loss = self.n * tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.y,
                                                           logits=self.z))

            # Bayesian loss
            self.bayesian_loss = self.loss  # + prior

            self.output_probs = tf.nn.softmax(self.z)

            # Variables of the last layer
            self.ll_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            self.ll_vars_concat = tf.concat(
                [self.ll_vars[0],
                 tf.expand_dims(self.ll_vars[1], axis=0)], 0)

            # Summary
            _variable_summaries(self.ll_vars_concat)

            # saving the weights of last layer when running SGLD/SGD/MCMC algorithm
            self.saver = tf.train.Saver(var_list=self.ll_vars,
                                        max_to_keep=self.num_samples)

            self.gd_opt = tf.train.GradientDescentOptimizer(self.step_size)
            # SGLD optimizer for the last layer
            if self.sampler in ['sgld', 'lmc']:
                grads_vars = self.gd_opt.compute_gradients(self.bayesian_loss)
                grads_vars_sgld = []

                for g, v in grads_vars:
                    if g is not None:
                        s = list(v.name)
                        s[v.name.rindex(':')] = '_'
                        # Adding Gaussian noise to the gradient
                        gaussian_noise = (np.sqrt(2. / self.step_size) *
                                          tf.random_normal(tf.shape(g)))
                        g_sgld = g + gaussian_noise
                        tf.summary.histogram(''.join(s) + '/grad_hist_mcmc', g)
                        tf.summary.histogram(
                            ''.join(s) + '/gaussian_noise_hist_mcmc',
                            gaussian_noise)
                        tf.summary.histogram(
                            ''.join(s) + '/grad_total_hist_mcmc', g_sgld)
                        grads_vars_sgld.append((g_sgld, v))

                self.train_op = self.gd_opt.apply_gradients(grads_vars_sgld)

            # SGD optimizer for the last layer
            if self.sampler == 'sgd':
                grads_vars_sgd = self.gd_opt.compute_gradients(self.loss)
                self.train_op = self.gd_opt.apply_gradients(grads_vars_sgd)

                for g, v in grads_vars_sgd:
                    if g is not None:
                        s = list(v.name)
                        s[v.name.rindex(':')] = '_'
                        tf.summary.histogram(''.join(s) + '/grad_hist_sgd', g)

            # Merge all the summaries and write them out
            self.all_summaries = tf.summary.merge_all()
            location = os.path.join(self.working_dir, 'logs')
            self.writer = tf.summary.FileWriter(location, graph=self.g)

            saver_network = tf.train.Saver(var_list=self.ll_vars)
            print('loading the network ...')
            # Restores from checkpoint
            saver_network.restore(self.sess, self.model_dir)
            print('Graph successfully loaded.')
Example #19
0
def _wide_resnet(x,
                 is_training,
                 scope,
                 n,
                 k,
                 weight_decay,
                 reuse=tf.AUTO_REUSE,
                 params=None,
                 moments=None,
                 backprop_through_moments=True,
                 use_bounded_activation=False,
                 keep_spatial_dims=False):
  """A wide ResNet."""
  widths = [i * k for i in (16, 32, 64)]
  params_keys, params_vars = [], []
  moments_keys, moments_vars = [], []

  def _update_params_lists(params_dict, params_keys, params_vars):
    params_keys.extend(params_dict.keys())
    params_vars.extend(params_dict.values())

  def _update_moments_lists(moments_dict, moments_keys, moments_vars):
    moments_keys.extend(moments_dict.keys())
    moments_vars.extend(moments_dict.values())

  with tf.variable_scope(scope, reuse=reuse):
    with tf.variable_scope('conv1'):
      x, conv_params = conv(x, [3, 3], 16, 1, weight_decay, params=params)
      _update_params_lists(conv_params, params_keys, params_vars)

    def _wide_resnet_block(x, depths, stride, use_project, moments):
      """Wrapper for a wide resnet block."""
      x, block_params, block_moments = wide_resnet_block(
          x,
          depths,
          stride,
          weight_decay,
          params=params,
          moments=moments,
          use_project=use_project,
          is_training=is_training,
          backprop_through_moments=backprop_through_moments,
          use_bounded_activation=use_bounded_activation)
      return x, block_params, block_moments

    with tf.variable_scope('conv2_x'):
      with tf.variable_scope('wide_block_0'):
        if widths[0] == 16:
          use_project = False
        else:
          use_project = True
        x, block_params, block_moments = _wide_resnet_block(
            x, widths[0], 1, use_project, moments=moments)
        _update_params_lists(block_params, params_keys, params_vars)
        _update_moments_lists(block_moments, moments_keys, moments_vars)
      for i in range(1, n):
        with tf.variable_scope('wide_block_%d' % i):
          x, block_params, block_moments = _wide_resnet_block(
              x, widths[0], 1, use_project, moments=moments)
          _update_params_lists(block_params, params_keys, params_vars)
          _update_moments_lists(block_moments, moments_keys, moments_vars)

    with tf.variable_scope('conv3_x'):
      with tf.variable_scope('wide_block_0'):
        x, block_params, block_moments = _wide_resnet_block(
            x, widths[1], 2, True, moments=moments)
        _update_params_lists(block_params, params_keys, params_vars)
        _update_moments_lists(block_moments, moments_keys, moments_vars)
      for i in range(1, n):
        with tf.variable_scope('wide_block_%d' % i):
          x, block_params, block_moments = _wide_resnet_block(
              x, widths[1], 1, use_project, moments=moments)
          _update_params_lists(block_params, params_keys, params_vars)
          _update_moments_lists(block_moments, moments_keys, moments_vars)

    with tf.variable_scope('conv4_x'):
      with tf.variable_scope('wide_block_0'):
        x, block_params, block_moments = _wide_resnet_block(
            x, widths[2], 2, True, moments=moments)
        _update_params_lists(block_params, params_keys, params_vars)
        _update_moments_lists(block_moments, moments_keys, moments_vars)
      for i in range(1, n):
        with tf.variable_scope('wide_block_%d' % i):
          x, block_params, block_moments = _wide_resnet_block(
              x, widths[2], 1, use_project, moments=moments)
          _update_params_lists(block_params, params_keys, params_vars)
          _update_moments_lists(block_moments, moments_keys, moments_vars)

    with tf.variable_scope('embedding_layer'):
      x, bn_params, bn_moments = bn(
          x,
          params=params,
          moments=moments,
          is_training=is_training,
          backprop_through_moments=backprop_through_moments)
      _update_params_lists(bn_params, params_keys, params_vars)
      _update_moments_lists(bn_moments, moments_keys, moments_vars)

      x = relu(x, use_bounded_activation=use_bounded_activation)
    img_w, img_h = x.get_shape().as_list()[1:3]
    x = tf.nn.avg_pool(
        x, ksize=[1, img_w, img_h, 1], strides=[1, 1, 1, 1], padding='VALID')
    # x.shape: [X, 1, 1, 128]
    if not keep_spatial_dims:
      x = tf.reshape(x, [-1, widths[2]])
    params = collections.OrderedDict(zip(params_keys, params_vars))
    moments = collections.OrderedDict(zip(moments_keys, moments_vars))

    return_dict = {'embeddings': x, 'params': params, 'moments': moments}
    return return_dict
Example #20
0
    def build_graph(self):
        """Builds the neural network graph."""

        # define graph
        self.g = tf.Graph()
        with self.g.as_default():

            # create and store a new session for the graph
            self.sess = tf.Session()

            # define placeholders
            self.x = tf.placeholder(shape=[None, self.dim_input],
                                    dtype=tf.float32)
            self.y = tf.placeholder(shape=[None, self.num_classes],
                                    dtype=tf.float32)

            # define simple model
            with tf.variable_scope('last_layer'):
                self.z = tf.layers.dense(inputs=self.x, units=self.num_classes)

            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.y,
                                                           logits=self.z))

            self.output_probs = tf.nn.softmax(self.z)

            # Variables of the last layer
            self.ll_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            self.ll_vars_concat = tf.concat(
                [self.ll_vars[0],
                 tf.expand_dims(self.ll_vars[1], axis=0)], 0)

            # Summary
            _variable_summaries(self.ll_vars_concat)

            # add regularization that acts as a unit Gaussian prior on the last layer
            regularizer = tf.contrib.layers.l2_regularizer(1.0)

            # regularization
            prior = tf.contrib.layers.apply_regularization(
                regularizer, self.ll_vars)
            self.bayesian_loss = self.n * self.loss + prior

            # saving the weights of last layer when running SGLD/SGD/MCMC algorithm
            self.saver = tf.train.Saver(var_list=self.ll_vars,
                                        max_to_keep=self.num_samples)

            # SGLD optimizer for the last layer
            if self.sampler in ['sgld', 'lmc']:
                step = self.step_size / self.n
                gd_opt = tf.train.GradientDescentOptimizer(step)
                grads_vars = gd_opt.compute_gradients(self.bayesian_loss)
                grads_vars_sgld = []

                for g, v in grads_vars:
                    if g is not None:
                        s = list(v.name)
                        s[v.name.rindex(':')] = '_'
                        # Adding Gaussian noise to the gradient
                        gaussian_noise = (np.sqrt(2. / step) *
                                          tf.random_normal(tf.shape(g)))
                        g_sgld = g + gaussian_noise
                        tf.summary.histogram(''.join(s) + '/grad_hist_mcmc',
                                             g / self.n)
                        tf.summary.histogram(
                            ''.join(s) + '/gaussian_noise_hist_mcmc',
                            gaussian_noise / self.n)
                        tf.summary.histogram(
                            ''.join(s) + '/grad_total_hist_mcmc',
                            g_sgld / self.n)
                        grads_vars_sgld.append((g_sgld, v))

                self.train_op = gd_opt.apply_gradients(grads_vars_sgld)

            # SGD optimizer for the last layer
            if self.sampler == 'sgd':
                gd_opt = tf.train.GradientDescentOptimizer(self.step_size)
                grads_vars_sgd = gd_opt.compute_gradients(self.loss)
                self.train_op = gd_opt.apply_gradients(grads_vars_sgd)

                for g, v in grads_vars_sgd:
                    if g is not None:
                        s = list(v.name)
                        s[v.name.rindex(':')] = '_'
                        tf.summary.histogram(''.join(s) + '/grad_hist_sgd', g)

            # Merge all the summaries and write them out
            self.all_summaries = tf.summary.merge_all()
            location = os.path.join(self.working_dir, 'logs')
            self.writer = tf.summary.FileWriter(location, graph=self.g)

            saver_network = tf.train.Saver(var_list=self.ll_vars)
            print('loading the network ...')
            # Restores from checkpoint
            # self.sess.run(tf.global_variables_initializer())
            saver_network.restore(self.sess, self.model_dir)
            print('Graph successfully loaded.')
Example #21
0
def relationnet_convnet(inputs,
                        is_training,
                        weight_decay,
                        params=None,
                        moments=None,
                        depth_multiplier=1.0,
                        reuse=tf.AUTO_REUSE,
                        scope='relationnet_convnet',
                        backprop_through_moments=True,
                        use_bounded_activation=False,
                        keep_spatial_dims=False):
  """A 4-layer-convnet architecture for RelationNet embedding.

  This is almost like the `four_layer_convnet` embedding function except
  for the following differences: (1) no padding for the first 3 layers, (2) no
  maxpool on the last (4th) layer, and (3) no flatten.

  Paper: https://arxiv.org/abs/1711.06025
  Code:
  https://github.com/floodsung/LearningToCompare_FSL/blob/master/miniimagenet/miniimagenet_train_few_shot.py

  Args:
    inputs: Tensors of shape [None, ] + image shape, e.g. [15, 84, 84, 3]
    is_training: Whether we are in the training phase.
    weight_decay: float, scaling constant for L2 weight decay on weight
      variables.
    params: None will create new params (or reuse from scope), otherwise an
      ordered dict of convolutional kernels and biases such that
      params['kernel_0'] stores the kernel of the first convolutional layer,
      etc.
    moments: A dict of the means and vars of the different layers to use for
      batch normalization. If not provided, the mean and var are computed based
      on the given inputs.
    depth_multiplier: The depth multiplier for the convnet channels.
    reuse: Whether to reuse the network's weights.
    scope: An optional scope for the tf operations.
    backprop_through_moments: Whether to allow gradients to flow through the
      given support set moments. Only applies to non-transductive batch norm.
    use_bounded_activation: Whether to enable bounded activation. This is useful
      for post-training quantization.
    keep_spatial_dims: bool, if True the spatial dimensions are kept.

  Returns:
    A 2D Tensor, where each row is the embedding of an input in inputs.
  """
  layer = tf.stop_gradient(inputs)
  model_params_keys, model_params_vars = [], []
  moments_keys, moments_vars = [], []

  with tf.variable_scope(scope, reuse=reuse):
    for i in range(4):
      with tf.variable_scope('layer_{}'.format(i), reuse=reuse):
        depth = int(64 * depth_multiplier)
        # The original implementation had VALID padding for the first two layers
        # that are followed by pooling. The rest (last two) had `SAME` padding.
        # In our setting, to avoid OOM, we pool (and apply VALID padding) to
        # the first three layers, and use SAME padding only in the last one.
        layer, conv_bn_params, conv_bn_moments = conv_bn(
            layer, [3, 3],
            depth,
            stride=1,
            weight_decay=weight_decay,
            padding='VALID' if i < 3 else 'SAME',
            params=params,
            moments=moments,
            is_training=is_training,
            backprop_through_moments=backprop_through_moments)
        model_params_keys.extend(conv_bn_params.keys())
        model_params_vars.extend(conv_bn_params.values())
        moments_keys.extend(conv_bn_moments.keys())
        moments_vars.extend(conv_bn_moments.values())

      layer = relu(layer, use_bounded_activation=use_bounded_activation)
      if i < 3:
        layer = tf.layers.max_pooling2d(layer, [2, 2], 2)
      tf.logging.info('Output of block %d: %s' % (i, layer.shape))

    model_params = collections.OrderedDict(
        zip(model_params_keys, model_params_vars))
    moments = collections.OrderedDict(zip(moments_keys, moments_vars))
    if not keep_spatial_dims:
      layer = tf.layers.flatten(layer)
    return_dict = {
        'embeddings': layer,
        'params': model_params,
        'moments': moments
    }

    return return_dict
Example #22
0
 def _graph_op(self, *args, **kwargs):
     with tf.variable_scope(self._scope, reuse=False):
         self._build_op(*args, **kwargs)
Example #23
0
def fully_connected_network(inputs,
                            is_training,
                            weight_decay,
                            params=None,
                            moments=None,
                            n_hidden_units=(64,),
                            use_batchnorm=False,
                            reuse=tf.AUTO_REUSE,
                            scope='fully_connected',
                            use_bounded_activation=False,
                            backprop_through_moments=None,
                            keep_spatial_dims=None):
  """A fully connected linear network.

  Args:
    inputs: Tensor of shape [None, num_features], where `num_features` is the
      number of input features.
    is_training: whether it's train or test mode (for batch norm).
    weight_decay: float, scaling constant for L2 weight decay on weight
      variables.
    params: None will create new params (or reuse from scope), otherwise an
      ordered dict of fully connected weights and biases such that
      params['weight_0'] stores the kernel of the first fully-connected layer,
      etc.
    moments: not used.
    n_hidden_units: tuple, Number of hidden units for each layer. If empty, it
      is the identity mapping.
    use_batchnorm: bool, Whether to use batchnorm after layers, except last.
    reuse: Whether to reuse the network's weights.
    scope: An optional scope for the tf operations.
    use_bounded_activation: Whether to enable bounded activation. This is useful
      for post-training quantization.
    backprop_through_moments: Whether to allow gradients to flow through the
      given support set moments. Only applies to non-transductive batch norm.
    keep_spatial_dims: is there only to match the interface.  This backbone
      cannot keep spatial dimensions, so it will fail if it's True.

  Returns:
    A 2D Tensor, where each row is the embedding of an input in inputs.
  """
  assert not keep_spatial_dims
  layer = inputs
  model_params_keys, model_params_vars = [], []
  moments_keys, moments_vars = [], []
  activation_fn = functools.partial(
      relu, use_bounded_activation=use_bounded_activation)
  with tf.variable_scope(scope, reuse=reuse):
    for i, n_unit in enumerate(n_hidden_units):
      with tf.variable_scope('layer_%d' % i, reuse=reuse):
        layer, dense_params = dense(
            layer,
            n_unit,
            weight_decay,
            activation_fn=activation_fn,
            params=params)
        model_params_keys.extend(dense_params.keys())
        model_params_vars.extend(dense_params.values())
        if use_batchnorm:
          layer, bn_params, bn_moments = bn(
              layer,
              params=params,
              moments=moments,
              is_training=is_training,
              backprop_through_moments=backprop_through_moments)
          model_params_keys.extend(bn_params.keys())
          model_params_keys.extend(bn_params.values())
          moments_keys.extend(bn_moments.keys())
          moments_vars.extend(bn_moments.values())

  model_params = collections.OrderedDict(
      zip(model_params_keys, model_params_vars))
  moments = collections.OrderedDict(zip(moments_keys, moments_vars))
  return_dict = {
      'embeddings': layer,
      'params': model_params,
      'moments': moments
  }
  return return_dict
Example #24
0
    def forward_pass(self, data):
        """Computes the query logits for the given episode `data`."""

        if self.film_init == 'scratch':
            self.film_selector = None
        elif self.film_init == 'imagenet':
            # Note: this makes the assumption that the first set of learned FiLM
            # parameters corresponds to the ImageNet dataset. Otherwise, the
            # following line should be changed appropriately.
            self.film_selector = 0
        elif self.film_init in ['blender', 'blender_hard']:
            dataset_logits = functional_backbones.dataset_classifier(
                data.support_images)
            if self.film_init == 'blender_hard':
                # Select only the argmax entry.
                self.film_selector = tf.one_hot(
                    tf.math.argmax(dataset_logits, axis=-1),
                    depth=tf.shape(dataset_logits)[1])
            else:
                # Take a convex combination.
                self.film_selector = tf.nn.softmax(dataset_logits, axis=-1)

        if self.num_steps:
            # Initial forward pass, required for the `unused_op` below and for placing
            # variables in tf.trainable_variables() for the below block to pick up.
            loss = self._compute_losses(data, compute_on_query=False)['loss']

            # Pick out the variables to optimize.
            self.opt_vars = []
            for var in tf.trainable_variables():
                if '_for_film_learner' in var.name:
                    self.opt_vars.append(var)
            tf.logging.info('FiLMLearner will optimize vars: {}'.format(
                self.opt_vars))

        for i in range(self.num_steps):
            if i == 0:
                # Re-initialize the variables to optimize for the new episode, to ensure
                # the FiLM parameters aren't re-used across tasks of a given dataset.
                vars_reset = tf.variables_initializer(var_list=self.opt_vars)
                # Adam related variables are created when minimize() is called.
                # We create an unused op here to put all adam varariables under
                # the 'adam_opt' namescope and create a reset op to reinitialize
                # these variables before the first finetune step.
                with tf.variable_scope('adam_opt', reuse=tf.AUTO_REUSE):
                    unused_op = self.opt.minimize(loss, var_list=self.opt_vars)
                adam_reset = tf.variables_initializer(self.opt.variables())

                with tf.control_dependencies([vars_reset, adam_reset, loss] +
                                             self.opt_vars):
                    print_op = tf.no_op()
                    if self.debug_log:
                        print_op = tf.print([
                            'step: %d' % i, self.opt_vars[0][0], 'loss:', loss
                        ],
                                            summarize=-1)

                    with tf.control_dependencies([print_op]):
                        # Get the train op.
                        results = self._get_train_op(data)
                        (train_op, loss, query_loss, acc,
                         query_acc) = (results['train_op'], results['loss'],
                                       results['query_loss'], results['acc'],
                                       results['query_acc'])

            else:
                with tf.control_dependencies([train_op, loss, acc] +
                                             self.opt_vars +
                                             [query_loss, query_acc] *
                                             int(self.debug_log)):

                    print_op = tf.no_op()
                    if self.debug_log:
                        print_list = [
                            '################',
                            'step: %d' % i,
                            self.opt_vars[0][0],
                            'support loss:',
                            loss,
                            'query loss:',
                            query_loss,
                            'support acc:',
                            acc,
                            'query acc:',
                            query_acc,
                        ]
                        print_op = tf.print(print_list)

                    with tf.control_dependencies([print_op]):
                        # Get the train op (the loss is returned just for printing).
                        results = self._get_train_op(data)
                        (train_op, loss, query_loss, acc,
                         query_acc) = (results['train_op'], results['loss'],
                                       results['query_loss'], results['acc'],
                                       results['query_acc'])

        # Training is now over, compute the final query logits.
        dependency_list = [] if not self.num_steps else [train_op
                                                         ] + self.opt_vars
        with tf.control_dependencies(dependency_list):
            results = self._compute_losses(data, compute_on_query=True)
            (loss, query_loss, query_logits, acc,
             query_acc) = (results['loss'], results['query_loss'],
                           results['query_logits'], results['acc'],
                           results['query_acc'])

            print_op = tf.no_op()
            if self.debug_log:
                print_op = tf.print([
                    'Done training',
                    'support loss:',
                    loss,
                    'query loss:',
                    query_loss,
                    'support acc:',
                    acc,
                    'query acc:',
                    query_acc,
                ])
            with tf.control_dependencies([print_op]):
                query_logits = tf.identity(query_logits)

        return query_logits
Example #25
0
def relation_module(inputs,
                    is_training,
                    weight_decay,
                    scope='relation_module',
                    reuse=tf.AUTO_REUSE,
                    params=None,
                    moments=None,
                    depth_multiplier=1.0,
                    backprop_through_moments=True,
                    use_bounded_activation=False):
  """A 2-layer-convnet architecture with fully connected layers."""
  model_params_keys, model_params_vars = [], []
  moments_keys, moments_vars = [], []
  layer = inputs
  with tf.variable_scope(scope, reuse=reuse):
    for i in range(2):
      with tf.variable_scope('layer_{}'.format(i), reuse=reuse):
        depth = int(64 * depth_multiplier)
        # Note that original has `valid` padding where we use `same`.
        layer, conv_bn_params, conv_bn_moments = conv_bn(
            layer, [3, 3],
            depth,
            1,
            weight_decay,
            params=params,
            moments=moments,
            is_training=is_training,
            backprop_through_moments=backprop_through_moments)
        model_params_keys.extend(conv_bn_params.keys())
        model_params_vars.extend(conv_bn_params.values())
        moments_keys.extend(conv_bn_moments.keys())
        moments_vars.extend(conv_bn_moments.values())

      layer = relu(layer, use_bounded_activation=use_bounded_activation)
      # This is a hacky way preventing max pooling if the spatial dimensions
      # are already reduced.
      if layer.shape[1] > 1:
        layer = tf.layers.max_pooling2d(layer, [2, 2], 2)
      tf.logging.info('Output of block %d: %s' % (i, layer.shape))

    layer = tf.layers.flatten(layer)
    relu_activation_fn = functools.partial(
        relu, use_bounded_activation=use_bounded_activation)
    with tf.variable_scope('layer_2_fc', reuse=reuse):
      layer, dense_params = dense(
          layer, 8, weight_decay, activation_fn=relu_activation_fn)
      tf.logging.info('Output layer_2_fc: %s' % layer.shape)
      model_params_keys.extend(dense_params.keys())
      model_params_vars.extend(dense_params.values())
    with tf.variable_scope('layer_3_fc', reuse=reuse):
      output, dense_params = dense(
          layer, 1, weight_decay, activation_fn=tf.nn.sigmoid)
      tf.logging.info('Output layer_3_fc: %s' % output.shape)
      model_params_keys.extend(dense_params.keys())
      model_params_vars.extend(dense_params.values())

    model_params = collections.OrderedDict(
        zip(model_params_keys, model_params_vars))
    moments = collections.OrderedDict(zip(moments_keys, moments_vars))
    return_dict = {'output': output, 'params': model_params, 'moments': moments}

    return return_dict
Example #26
0
def _resnet(x,
            is_training,
            weight_decay,
            scope,
            reuse=tf.AUTO_REUSE,
            params=None,
            moments=None,
            backprop_through_moments=True,
            use_bounded_activation=False,
            blocks=(2, 2, 2, 2),
            max_stride=None,
            deeplab_alignment=True,
            keep_spatial_dims=False):
  """A ResNet network; ResNet18 by default."""
  x = tf.stop_gradient(x)
  params_keys, params_vars = [], []
  moments_keys, moments_vars = [], []
  assert max_stride in [None, 4, 8, 16,
                        32], 'max_stride must be 4, 8, 16, 32, or None'
  with tf.variable_scope(scope, reuse=reuse):
    # We use DeepLab feature alignment rule to determine the input size.
    # Since the image size in the meta-dataset pipeline is a multiplier of 42,
    # e.g., [42, 84, 168], we align them to the closest sizes that conform to
    # the alignment rule and at the same time are larger. They are [65, 97, 193]
    # respectively. The aligned image size for 224 used in the ResNet work is
    # 225.
    #
    # References:
    # 1. ResNet https://arxiv.org/abs/1512.03385
    # 2. DeepLab https://arxiv.org/abs/1606.00915
    if deeplab_alignment:
      size = tf.cast(tf.shape(x)[1], tf.float32)
      aligned_size = tf.cast(tf.ceil(size / 32.0), tf.int32) * 32 + 1
      x = tf.image.resize_bilinear(
          x, size=[aligned_size, aligned_size], align_corners=True)

    with tf.variable_scope('conv1'):
      x, conv_bn_params, conv_bn_moments = conv_bn(
          x, [7, 7],
          64,
          2,
          weight_decay,
          params=params,
          moments=moments,
          is_training=is_training,
          backprop_through_moments=backprop_through_moments)
      params_keys.extend(conv_bn_params.keys())
      params_vars.extend(conv_bn_params.values())
      moments_keys.extend(conv_bn_moments.keys())
      moments_vars.extend(conv_bn_moments.values())

      x = relu(x, use_bounded_activation=use_bounded_activation)

    def _bottleneck(x,
                    i,
                    depth,
                    stride,
                    params,
                    moments,
                    net_stride=1,
                    net_rate=1):
      """Wrapper for bottleneck."""
      input_rate = net_rate
      output_rate = input_rate
      if i == 0:
        if max_stride and stride * net_stride > max_stride:
          output_stride = 1
          output_rate *= stride
        else:
          output_stride = stride
      else:
        output_stride = 1
      use_project = True if i == 0 else False

      x, bottleneck_params, bottleneck_moments = bottleneck(
          x, (depth, depth),
          output_stride,
          weight_decay,
          params=params,
          moments=moments,
          input_rate=input_rate,
          output_rate=output_rate,
          use_project=use_project,
          is_training=is_training,
          backprop_through_moments=backprop_through_moments)
      net_stride *= output_stride
      return x, bottleneck_params, bottleneck_moments, net_stride, output_rate

    net_stride = 4
    net_rate = 1

    with tf.variable_scope('conv2_x'):
      x = tf.nn.max_pool(
          x, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
      for i in range(blocks[0]):
        with tf.variable_scope('bottleneck_%d' % i):
          x, bottleneck_params, bottleneck_moments, net_stride, net_rate = _bottleneck(
              x, i, 64, 1, params, moments, net_stride, net_rate)
          params_keys.extend(bottleneck_params.keys())
          params_vars.extend(bottleneck_params.values())
          moments_keys.extend(bottleneck_moments.keys())
          moments_vars.extend(bottleneck_moments.values())

    with tf.variable_scope('conv3_x'):
      for i in range(blocks[1]):
        with tf.variable_scope('bottleneck_%d' % i):
          x, bottleneck_params, bottleneck_moments, net_stride, net_rate = _bottleneck(
              x, i, 128, 2, params, moments, net_stride, net_rate)
          params_keys.extend(bottleneck_params.keys())
          params_vars.extend(bottleneck_params.values())
          moments_keys.extend(bottleneck_moments.keys())
          moments_vars.extend(bottleneck_moments.values())

    with tf.variable_scope('conv4_x'):
      for i in range(blocks[2]):
        with tf.variable_scope('bottleneck_%d' % i):
          x, bottleneck_params, bottleneck_moments, net_stride, net_rate = _bottleneck(
              x, i, 256, 2, params, moments, net_stride, net_rate)
          params_keys.extend(bottleneck_params.keys())
          params_vars.extend(bottleneck_params.values())
          moments_keys.extend(bottleneck_moments.keys())
          moments_vars.extend(bottleneck_moments.values())

    with tf.variable_scope('conv5_x'):
      for i in range(blocks[3]):
        with tf.variable_scope('bottleneck_%d' % i):
          x, bottleneck_params, bottleneck_moments, net_stride, net_rate = _bottleneck(
              x, i, 512, 2, params, moments, net_stride, net_rate)
          params_keys.extend(bottleneck_params.keys())
          params_vars.extend(bottleneck_params.values())
          moments_keys.extend(bottleneck_moments.keys())
          moments_vars.extend(bottleneck_moments.values())
    if not keep_spatial_dims:
      # x.shape: [?, 1, 1, 512]
      x = tf.reduce_mean(x, axis=[1, 2], keepdims=True)
      x = tf.reshape(x, [-1, 512])
    params = collections.OrderedDict(zip(params_keys, params_vars))
    moments = collections.OrderedDict(zip(moments_keys, moments_vars))

    return_dict = {'embeddings': x, 'params': params, 'moments': moments}
    return return_dict
Example #27
0
  def forward_pass(self, data):
    """Computes the test logits of MAML.

    Args:
      data: A `meta_dataset.providers.Episode` containing the data for the
        episode.

    Returns:
      The output logits for the query data in this episode.
    """
    # Have to use one-hot labels since sparse softmax doesn't allow
    # second derivatives.
    support_embeddings_ = self.embedding_fn(
        data.support_images, self.is_training, reuse=tf.AUTO_REUSE)
    support_embeddings = support_embeddings_['embeddings']
    embedding_vars_dict = support_embeddings_['params']

    # TODO(eringrant): Refactor to make use of
    # `functional_backbones.linear_classifier`, which allows Gin-configuration.
    with tf.variable_scope('linear_classifier', reuse=tf.AUTO_REUSE):
      embedding_depth = support_embeddings.shape.as_list()[-1]
      fc_weights = functional_backbones.weight_variable(
          [embedding_depth, self.logit_dim],
          weight_decay=self.classifier_weight_decay)
      fc_bias = functional_backbones.bias_variable([self.logit_dim])

    # A list of variable names, a list of corresponding Variables, and a list
    # of operations (possibly empty) that creates a copy of each Variable.
    (embedding_vars_keys, embedding_vars,
     embedding_vars_copy_ops) = get_embeddings_vars_copy_ops(
         embedding_vars_dict, make_copies=not self.is_training)

    # A Variable for the weights of the fc layer, a Variable for the bias of the
    # fc layer, and a list of operations (possibly empty) that copies them.
    (fc_weights, fc_bias, fc_vars_copy_ops) = get_fc_vars_copy_ops(
        fc_weights, fc_bias, make_copies=not self.is_training)

    fc_vars = [fc_weights, fc_bias]
    num_embedding_vars = len(embedding_vars)
    num_fc_vars = len(fc_vars)

    def _cond(step, *args):
      del args
      num_steps = self.num_update_steps
      if not self.is_training:
        num_steps += self.additional_evaluation_update_steps
      return step < num_steps

    def _body(step, *args):
      """The inner update loop body."""
      updated_embedding_vars = args[0:num_embedding_vars]
      updated_fc_vars = args[num_embedding_vars:num_embedding_vars +
                             num_fc_vars]
      support_embeddings = self.embedding_fn(
          data.support_images,
          self.is_training,
          params=collections.OrderedDict(
              zip(embedding_vars_keys, updated_embedding_vars)),
          reuse=True)['embeddings']

      updated_fc_weights, updated_fc_bias = updated_fc_vars
      support_logits = tf.matmul(support_embeddings,
                                 updated_fc_weights) + updated_fc_bias

      support_logits = support_logits[:, 0:data.way]
      loss = tf.losses.softmax_cross_entropy(data.onehot_support_labels,
                                             support_logits)

      print_op = tf.no_op()
      if self.debug_log:
        print_op = tf.print(['step: ', step, updated_fc_bias[0], 'loss:', loss])

      with tf.control_dependencies([print_op]):
        updated_embedding_vars = gradient_descent_step(
            loss, updated_embedding_vars, self.first_order,
            self.adapt_batch_norm, self.alpha, False)['updated_vars']
        updated_fc_vars = gradient_descent_step(loss, updated_fc_vars,
                                                self.first_order,
                                                self.adapt_batch_norm,
                                                self.alpha,
                                                False)['updated_vars']

        step = step + 1
      return tuple([step] + list(updated_embedding_vars) +
                   list(updated_fc_vars))

    # MAML meta updates using query set examples from an episode.
    if self.zero_fc_layer:
      # To account for variable class sizes, we initialize the output
      # weights to zero. See if truncated normal initialization will help.
      zero_weights_op = tf.assign(fc_weights, tf.zeros_like(fc_weights))
      zero_bias_op = tf.assign(fc_bias, tf.zeros_like(fc_bias))
      fc_vars_init_ops = [zero_weights_op, zero_bias_op]
    else:
      fc_vars_init_ops = fc_vars_copy_ops

    if self.proto_maml_fc_layer_init:
      support_embeddings = self.embedding_fn(
          data.support_images,
          self.is_training,
          params=collections.OrderedDict(
              zip(embedding_vars_keys, embedding_vars)),
          reuse=True)['embeddings']

      prototypes = metric_learners.compute_prototypes(
          support_embeddings, data.onehot_support_labels)
      pmaml_fc_weights = self.proto_maml_fc_weights(
          prototypes, zero_pad_to_max_way=True)
      pmaml_fc_bias = self.proto_maml_fc_bias(
          prototypes, zero_pad_to_max_way=True)
      fc_vars = [pmaml_fc_weights, pmaml_fc_bias]

    # These control dependencies assign the value of each variable to a new copy
    # variable that corresponds to it. This is required at test time for
    # initilizing the copies as they are used in place of the original vars.
    with tf.control_dependencies(fc_vars_init_ops + embedding_vars_copy_ops):
      # Make step a local variable as we don't want to save and restore it.
      step = tf.Variable(
          0,
          trainable=False,
          name='inner_step_counter',
          collections=[tf.GraphKeys.LOCAL_VARIABLES])
      loop_vars = [step] + embedding_vars + fc_vars
      step_and_all_updated_vars = tf.while_loop(
          _cond, _body, loop_vars, swap_memory=True)
      step = step_and_all_updated_vars[0]
      all_updated_vars = step_and_all_updated_vars[1:]
      updated_embedding_vars = all_updated_vars[0:num_embedding_vars]
      updated_fc_weights, updated_fc_bias = all_updated_vars[
          num_embedding_vars:num_embedding_vars + num_fc_vars]

    # Forward pass the training images with the updated weights in order to
    # compute the means and variances, to use for the query's batch norm.
    support_set_moments = None
    if not self.transductive_batch_norm:
      support_set_moments = self.embedding_fn(
          data.support_images,
          self.is_training,
          params=collections.OrderedDict(
              zip(embedding_vars_keys, updated_embedding_vars)),
          reuse=True)['moments']

    query_embeddings = self.embedding_fn(
        data.query_images,
        self.is_training,
        params=collections.OrderedDict(
            zip(embedding_vars_keys, updated_embedding_vars)),
        moments=support_set_moments,  # Use support set stats for batch norm.
        reuse=True,
        backprop_through_moments=self.backprop_through_moments)['embeddings']

    query_logits = (tf.matmul(query_embeddings, updated_fc_weights) +
                    updated_fc_bias)[:, 0:data.way]

    return query_logits
Example #28
0
def wide_resnet_block(x,
                      depth,
                      stride,
                      weight_decay,
                      params=None,
                      moments=None,
                      use_project=False,
                      backprop_through_moments=True,
                      is_training=True,
                      use_bounded_activation=False):
  """Wide ResNet residual block."""
  params_keys, params_vars = [], []
  moments_keys, moments_vars = [], []
  with tf.variable_scope('conv1'):
    bn_1, bn_params, bn_moments = bn(
        x,
        params=params,
        moments=moments,
        is_training=is_training,
        backprop_through_moments=backprop_through_moments)
    params_keys.extend(bn_params.keys())
    params_vars.extend(bn_params.values())
    moments_keys.extend(bn_moments.keys())
    moments_vars.extend(bn_moments.values())

    out_1 = relu(bn_1, use_bounded_activation=use_bounded_activation)

    h_1, conv_params = conv(
        out_1, [3, 3], depth, stride, weight_decay, params=params)
    params_keys.extend(conv_params.keys())
    params_vars.extend(conv_params.values())
  with tf.variable_scope('conv2'):
    bn_2, bn_params, bn_moments = bn(
        h_1,
        params=params,
        moments=moments,
        is_training=is_training,
        backprop_through_moments=backprop_through_moments)
    params_keys.extend(bn_params.keys())
    params_vars.extend(bn_params.values())
    moments_keys.extend(bn_moments.keys())
    moments_vars.extend(bn_moments.values())

    out_2 = relu(bn_2, use_bounded_activation=use_bounded_activation)

    h_2, conv_params = conv(
        out_2, [3, 3],
        depth,
        stride=1,
        weight_decay=weight_decay,
        params=params)
    params_keys.extend(conv_params.keys())
    params_vars.extend(conv_params.values())

  h = h_2
  if use_bounded_activation:
    h = tf.clip_by_value(h, -6, 6)

  with tf.variable_scope('identity'):
    if use_project:
      with tf.variable_scope('projection_conv'):
        x, conv_params = conv(
            out_1, [1, 1], depth, stride, weight_decay, params=params)
        params_keys.extend(conv_params.keys())
        params_vars.extend(conv_params.values())

  params = collections.OrderedDict(zip(params_keys, params_vars))
  moments = collections.OrderedDict(zip(moments_keys, moments_vars))

  if use_bounded_activation:
    out = tf.clip_by_value(x + h, -6, 6)
  else:
    out = x + h
  return out, params, moments
Example #29
0
  def compute_logits(self, data):
    """Computes the class logits for the episode.

    Args:
      data: A `meta_dataset.providers.Episode`.

    Returns:
      The query set logits as a [num_query_images, way] matrix.

    Raises:
      ValueError: Distance must be one of l2 or cosine.
    """
    # ------------------------ Finetuning -------------------------------
    # Possibly make copies of embedding variables, if they will get modified.
    # This is for making temporary-only updates to the embedding network
    # which will not persist after the end of the episode.
    make_copies = self.finetune_all_layers

    # TODO(eringrant): Reduce the number of times the embedding function graph
    # is built with the same input.
    support_embeddings_params_moments = self.embedding_fn(
        data.support_images, self.is_training)
    support_embeddings = support_embeddings_params_moments['embeddings']
    support_embeddings_var_dict = support_embeddings_params_moments['params']

    (embedding_vars_keys, embedding_vars,
     embedding_vars_copy_ops) = get_embeddings_vars_copy_ops(
         support_embeddings_var_dict, make_copies)
    embedding_vars_copy_op = tf.group(*embedding_vars_copy_ops)

    # Compute the initial training loss (only for printing purposes). This
    # line is also needed for adding the fc variables to the graph so that the
    # tf.all_variables() line below detects them.
    logits = self._fc_layer(support_embeddings)[:, 0:data.way]
    finetune_loss = self.compute_loss(
        onehot_labels=data.onehot_support_labels,
        predictions=logits,
    )

    # Decide which variables to finetune.
    fc_vars, vars_to_finetune = [], []
    for var in tf.trainable_variables():
      if 'fc_finetune' in var.name:
        fc_vars.append(var)
        vars_to_finetune.append(var)
    if self.finetune_all_layers:
      vars_to_finetune.extend(embedding_vars)
    logging.info('Finetuning will optimize variables: %s', vars_to_finetune)

    for i in range(self.num_finetune_steps):
      if i == 0:
        # Randomly initialize the fc layer.
        fc_reset = tf.variables_initializer(var_list=fc_vars)
        # Adam related variables are created when minimize() is called.
        # We create an unused op here to put all adam varariables under
        # the 'adam_opt' namescope and create a reset op to reinitialize
        # these variables before the first finetune step.
        adam_reset = tf.no_op()
        if self.finetune_with_adam:
          with tf.variable_scope('adam_opt'):
            unused_op = self.finetune_opt.minimize(
                finetune_loss, var_list=vars_to_finetune)
          adam_reset = tf.variables_initializer(self.finetune_opt.variables())
        with tf.control_dependencies(
            [fc_reset, adam_reset, finetune_loss, embedding_vars_copy_op] +
            vars_to_finetune):
          print_op = tf.no_op()
          if self.debug_log:
            print_op = tf.print([
                'step: %d' % i, vars_to_finetune[0][0, 0], 'loss:',
                finetune_loss
            ])

          with tf.control_dependencies([print_op]):
            # Get the operation for finetuning.
            # (The logits and loss are returned just for printing).
            logits, finetune_loss, finetune_op = self._get_finetune_op(
                data, embedding_vars_keys, embedding_vars, vars_to_finetune,
                support_embeddings if not self.finetune_all_layers else None)

            if self.debug_log:
              # Test logits are computed only for printing logs.
              query_embeddings = self.embedding_fn(
                  data.query_images,
                  self.is_training,
                  params=collections.OrderedDict(
                      zip(embedding_vars_keys, embedding_vars)),
                  reuse=True)['embeddings']
              query_logits = (self._fc_layer(query_embeddings)[:, 0:data.way])

      else:
        with tf.control_dependencies([finetune_op, finetune_loss] +
                                     vars_to_finetune):
          print_op = tf.no_op()
          if self.debug_log:
            print_op = tf.print([
                'step: %d' % i,
                vars_to_finetune[0][0, 0],
                'loss:',
                finetune_loss,
                'accuracy:',
                self.compute_accuracy(
                    labels=data.onehot_support_labels, predictions=logits),
                'query accuracy:',
                self.compute_accuracy(
                    labels=data.onehot_query_labels, predictions=query_logits),
            ])

          with tf.control_dependencies([print_op]):
            # Get the operation for finetuning.
            # (The logits and loss are returned just for printing).
            logits, finetune_loss, finetune_op = self._get_finetune_op(
                data, embedding_vars_keys, embedding_vars, vars_to_finetune,
                support_embeddings if not self.finetune_all_layers else None)

            if self.debug_log:
              # Test logits are computed only for printing logs.
              query_embeddings = self.embedding_fn(
                  data.query_images,
                  self.is_training,
                  params=collections.OrderedDict(
                      zip(embedding_vars_keys, embedding_vars)),
                  reuse=True)['embeddings']
              query_logits = (self._fc_layer(query_embeddings)[:, 0:data.way])

    # Finetuning is now over, compute the query performance using the updated
    # fc layer, and possibly the updated embedding network.
    with tf.control_dependencies([finetune_op] + vars_to_finetune):
      query_embeddings = self.embedding_fn(
          data.query_images,
          self.is_training,
          params=collections.OrderedDict(
              zip(embedding_vars_keys, embedding_vars)),
          reuse=True)['embeddings']
      query_logits = self._fc_layer(query_embeddings)[:, 0:data.way]

      if self.debug_log:
        # The train logits are computed only for printing.
        support_embeddings = self.embedding_fn(
            data.support_images,
            self.is_training,
            params=collections.OrderedDict(
                zip(embedding_vars_keys, embedding_vars)),
            reuse=True)['embeddings']
        logits = self._fc_layer(support_embeddings)[:, 0:data.way]

      print_op = tf.no_op()
      if self.debug_log:
        print_op = tf.print([
            'accuracy:',
            self.compute_accuracy(
                labels=data.onehot_support_labels, predictions=logits),
            'query accuracy:',
            self.compute_accuracy(
                labels=data.onehot_query_labels, predictions=query_logits),
        ])
      with tf.control_dependencies([print_op]):
        query_logits = self._fc_layer(query_embeddings)[:, 0:data.way]

    return query_logits
Example #30
0
def bottleneck(x,
               depth,
               stride,
               weight_decay,
               params=None,
               moments=None,
               use_project=False,
               backprop_through_moments=True,
               is_training=True,
               input_rate=1,
               output_rate=1,
               use_bounded_activation=False):
  """ResNet18 residual block."""
  params_keys, params_vars = [], []
  moments_keys, moments_vars = [], []  # means and vars of different layers.
  with tf.variable_scope('conv1'):
    h, conv_bn_params, conv_bn_moments = conv_bn(
        x, [3, 3],
        depth[0],
        stride,
        weight_decay,
        params=params,
        moments=moments,
        is_training=is_training,
        rate=input_rate,
        backprop_through_moments=backprop_through_moments)
    params_keys.extend(conv_bn_params.keys())
    params_vars.extend(conv_bn_params.values())
    moments_keys.extend(conv_bn_moments.keys())
    moments_vars.extend(conv_bn_moments.values())

    h = relu(h, use_bounded_activation=use_bounded_activation)

  with tf.variable_scope('conv2'):
    h, conv_bn_params, conv_bn_moments = conv_bn(
        h, [3, 3],
        depth[1],
        stride=1,
        weight_decay=weight_decay,
        params=params,
        moments=moments,
        is_training=is_training,
        rate=output_rate,
        backprop_through_moments=backprop_through_moments)
    if use_bounded_activation:
      h = tf.clip_by_value(h, -6.0, 6.0)

    params_keys.extend(conv_bn_params.keys())
    params_vars.extend(conv_bn_params.values())
    moments_keys.extend(conv_bn_moments.keys())
    moments_vars.extend(conv_bn_moments.values())

  with tf.variable_scope('identity'):
    if use_project:
      with tf.variable_scope('projection_conv'):
        x, conv_bn_params, conv_bn_moments = conv_bn(
            x, [1, 1],
            depth[1],
            stride,
            weight_decay,
            params=params,
            moments=moments,
            is_training=is_training,
            rate=1,
            backprop_through_moments=backprop_through_moments)
        params_keys.extend(conv_bn_params.keys())
        params_vars.extend(conv_bn_params.values())
        moments_keys.extend(conv_bn_moments.keys())
        moments_vars.extend(conv_bn_moments.values())
    x = relu(x + h, use_bounded_activation=use_bounded_activation)

  params = collections.OrderedDict(zip(params_keys, params_vars))
  moments = collections.OrderedDict(zip(moments_keys, moments_vars))
  return x, params, moments