def tf_simhash_decompose(matrix, inner_dimension, seed=0):
    """Approximately decompose matrix as a product R S D.

  Args:
    matrix: the matrix to be decomposed, given as a tensorflow matrix.
    inner_dimension: the number of rows in S.
    seed: a seed for the pseudorandom matrix R.

  Returns:
    Tensorflow matrices R, S, and D, where:
    R is iid normal distributed with inner_dimension columns.
    S is a +/-1 sign matrix, and
    D is a diagonal matrix,
  """
    rows, _ = matrix.get_shape().as_list()
    np.random.seed(seed=seed)
    r = tf.convert_to_tensor(value=np.random.normal(size=(rows,
                                                          inner_dimension)),
                             dtype=tf.float32)
    s_with_zeros = tf.math.sign(tf.linalg.matmul(r, matrix, transpose_a=True))
    s = tf.compat.v1.where(tf.math.equal(s_with_zeros, tf.constant(0.)),
                           tf.ones(tf.shape(input=s_with_zeros)), s_with_zeros)
    rs_column_norms = tf.norm(tensor=tf.matmul(r, s), axis=0)
    matrix_column_norms = tf.norm(tensor=matrix, axis=0)
    d = tf.linalg.tensor_diag(
        tf.math.divide(matrix_column_norms, rs_column_norms))
    return r, s, d
Esempio n. 2
0
    def _build_train_ops(self):
        self.lr_c = tf.placeholder(tf.float32,
                                   shape=None,
                                   name='learning_rate_c')
        self.lr_a = tf.placeholder(tf.float32,
                                   shape=None,
                                   name='learning_rate_a')

        with tf.variable_scope('critic_train'):
            # self.reg_c = tf.reduce_mean([tf.nn.l2_loss(x) for x in self.critic_vars])
            self.loss_c = tf.reduce_mean(tf.square(
                self.td_error))  # + 0.001 * self.reg_c
            self.optim_c = tf.train.AdamOptimizer(self.lr_c)
            self.grads_c = self.optim_c.compute_gradients(
                self.loss_c, self.critic_vars)
            if self.clip_norm:
                self.grads_c = [(tf.clip_by_norm(grad, self.clip_norm), var)
                                for grad, var in self.grads_c]

            self.train_op_c = self.optim_c.apply_gradients(self.grads_c)

        with tf.variable_scope('actor_train'):
            # self.reg_a = tf.reduce_mean([tf.nn.l2_loss(x) for x in self.actor_vars])
            # self.entropy_a =- tf.reduce_sum(self.actor * tf.log(self.actor))
            self.loss_a = tf.reduce_mean(
                tf.stop_gradient(self.td_error) *
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.actor, labels=self.a),
                name='loss_actor')  # + 0.001 * self.reg_a
            self.optim_a = tf.train.AdamOptimizer(self.lr_a)
            self.grads_a = self.optim_a.compute_gradients(
                self.loss_a, self.actor_vars)
            if self.clip_norm:
                self.grads_a = [(tf.clip_by_norm(grad, self.clip_norm), var)
                                for grad, var in self.grads_a]

            self.train_op_a = self.optim_a.apply_gradients(self.grads_a)

        with tf.variable_scope('summary'):
            self.ep_reward = tf.placeholder(tf.float32, name='episode_reward')
            self.summary = [
                tf.summary.scalar('loss/critic', self.loss_c),
                tf.summary.scalar('loss/actor', self.loss_a),
                tf.summary.scalar('episode_reward', self.ep_reward)
            ]
            self.summary += [
                tf.summary.scalar('grads/a_' + var.name, tf.norm(grad))
                for grad, var in self.grads_a if grad is not None
            ]
            self.summary += [
                tf.summary.scalar('grads/c_' + var.name, tf.norm(grad))
                for grad, var in self.grads_c if grad is not None
            ]
            self.merged_summary = tf.summary.merge_all(
                key=tf.GraphKeys.SUMMARIES)

        self.train_ops = [self.train_op_a, self.train_op_c]

        self.sess.run(tf.global_variables_initializer())
Esempio n. 3
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        assignments = []
        for (grad, param) in grads_and_vars:
            if grad is None or param is None:
                continue

            param_name = param.op.name

            v = tf.get_variable(name=param_name + "/Momentum",
                                shape=param.shape.as_list(),
                                dtype=tf.float32,
                                trainable=False,
                                initializer=tf.zeros_initializer())

            if self._use_weight_decay(param_name):
                grad += self.weight_decay * param

            if self.classic_momentum:
                trust_ratio = 1.0
                if self._do_layer_adaptation(param_name):
                    w_norm = tf.norm(param, ord=2)
                    g_norm = tf.norm(grad, ord=2)
                    trust_ratio = tf.where(
                        tf.greater(w_norm, 0),
                        tf.where(tf.greater(g_norm, 0),
                                 (self.eeta * w_norm / g_norm), 1.0), 1.0)
                scaled_lr = self.learning_rate * trust_ratio

                next_v = tf.multiply(self.momentum, v) + scaled_lr * grad
                if self.use_nesterov:
                    update = tf.multiply(self.momentum,
                                         next_v) + scaled_lr * grad
                else:
                    update = next_v
                next_param = param - update
            else:
                next_v = tf.multiply(self.momentum, v) + grad
                if self.use_nesterov:
                    update = tf.multiply(self.momentum, next_v) + grad
                else:
                    update = next_v

                trust_ratio = 1.0
                if self._do_layer_adaptation(param_name):
                    w_norm = tf.norm(param, ord=2)
                    v_norm = tf.norm(update, ord=2)
                    trust_ratio = tf.where(
                        tf.greater(w_norm, 0),
                        tf.where(tf.greater(v_norm, 0),
                                 (self.eeta * w_norm / v_norm), 1.0), 1.0)
                scaled_lr = trust_ratio * self.learning_rate
                next_param = param - scaled_lr * update

            assignments.extend([param.assign(next_param), v.assign(next_v)])

        if global_step is not None:
            new_global_step = global_step + 1
            assignments.append(global_step.assign(new_global_step))
        return tf.group(*assignments, name=name)
Esempio n. 4
0
 def normalize_grad_fn(grads_and_vars):
     normalized_grads = []
     for grad, var in grads_and_vars:
         normalized_grads += [
             (grad / (tf.norm(grad) + tf.constant(1e-10)) *
              tf.norm(var) * self.config['alpha'], var)
         ]
     return normalized_grads
Esempio n. 5
0
 def add_compression_summaries(self):
   """Adds summaries of alpha value and last update step."""
   with tf.name_scope(self._spec.name + '_summaries'):
     tf.summary.scalar('last_alpha_update_step', self._last_alpha_update_step)
     tf.summary.scalar(self.alpha.op.name + '/alpha', self.alpha)
     tf.summary.scalar(self.a_matrix_tfvar.op.name + '/a_matrix_norm',
                       tf.norm(self.a_matrix_tfvar))
     tf.summary.scalar(self.b_matrix_tfvar.op.name + '/b_matrix_norm',
                       tf.norm(self.b_matrix_tfvar))
Esempio n. 6
0
def image_encoder(image_feat,
                  hparams,
                  name="image_encoder",
                  save_weights_to=None,
                  make_image_summary=True):
    """A stack of self attention layers."""

    x = image_feat
    with tf.variable_scope(name):
        for layer in range(hparams.num_encoder_layers
                           or hparams.num_hidden_layers):
            with tf.variable_scope("layer_%d" % layer):
                with tf.variable_scope("self_attention"):
                    y = vqa_layers.multihead_attention(
                        common_layers.layer_preprocess(x, hparams),
                        None,
                        None,
                        hparams.attention_key_channels
                        or hparams.image_hidden_size,
                        hparams.attention_value_channels
                        or hparams.image_hidden_size,
                        hparams.image_hidden_size,
                        hparams.num_heads,
                        hparams.attention_dropout,
                        attention_type=hparams.self_attention_type,
                        save_weights_to=save_weights_to,
                        max_relative_position=None,
                        make_image_summary=make_image_summary,
                        dropout_broadcast_dims=None,
                        max_length=None,
                        vars_3d=False,
                        scale_otproduct=hparams.scale_dotproduct)
                    utils.collect_named_outputs("norms",
                                                "image_feat_self_attention",
                                                tf.norm(y, axis=-1))
                    x = common_layers.layer_postprocess(x, y, hparams)
                    utils.collect_named_outputs(
                        "norms", "image_feat_self_attention_zero_add",
                        tf.norm(x, axis=-1))
                with tf.variable_scope("ffn"):
                    y = common_layers.dense_relu_dense(
                        common_layers.layer_preprocess(x, hparams),
                        hparams.image_filter_size,
                        hparams.image_hidden_size,
                        dropout=hparams.relu_dropout,
                        dropout_broadcast_dims=None)
                    utils.collect_named_outputs("norms", "image_feat_ffn",
                                                tf.norm(y, axis=-1))
                    x = common_layers.layer_postprocess(x, y, hparams)
                    utils.collect_named_outputs("norms",
                                                "image_feat_ffn_zero_add",
                                                tf.norm(x, axis=-1))
        # if normalization is done in layer_preprocess, then it should also be done
        # on the output, since the output can grow very large, being the sum of
        # a whole stack of unnormalized layer outputs.
        return common_layers.layer_preprocess(x, hparams)
def question_encoder(question,
                     question_self_attention_bias,
                     hparams,
                     name="question_encoder",
                     save_weights_to=None,
                     make_image_summary=True):
    """A stack of self attention layers."""
    x = question
    with tf.variable_scope(name):
        for layer in range(hparams.num_encoder_layers
                           or hparams.num_hidden_layers):
            with tf.variable_scope("layer_%d" % layer):
                with tf.variable_scope("self_attention"):
                    y = vqa_layers.multihead_attention(
                        common_layers.layer_preprocess(x, hparams),
                        None,
                        question_self_attention_bias,
                        hparams.attention_key_channels or hparams.hidden_size,
                        hparams.attention_value_channels
                        or hparams.hidden_size,
                        hparams.hidden_size,
                        hparams.num_heads,
                        hparams.attention_dropout,
                        attention_type=hparams.question_self_attention_type,
                        block_length=hparams.block_length,
                        save_weights_to=save_weights_to,
                        make_image_summary=make_image_summary,
                        scale_dotproduct=hparams.scale_dotproduct,
                    )
                    utils.collect_named_outputs(
                        "norms", "query_self_attention_%d" % (layer),
                        tf.norm(y, axis=-1))
                    x = common_layers.layer_postprocess(x, y, hparams)
                    utils.collect_named_outputs(
                        "norms",
                        "query_self_attention_postprocess_%d" % (layer),
                        tf.norm(x, axis=-1))
                with tf.variable_scope("ffn"):
                    y = common_layers.dense_relu_dense(
                        common_layers.layer_preprocess(x, hparams),
                        hparams.filter_size,
                        hparams.hidden_size,
                        dropout=hparams.relu_dropout,
                    )
                    utils.collect_named_outputs("norms",
                                                "query_ffn_%d" % (layer),
                                                tf.norm(y, axis=-1))
                    x = common_layers.layer_postprocess(x, y, hparams)
                    utils.collect_named_outputs(
                        "norms", "query_ffn_postprocess_%d" % (layer),
                        tf.norm(x, axis=-1))
        # if normalization is done in layer_preprocess, then it should also be done
        # on the output, since the output can grow very large, being the sum of
        # a whole stack of unnormalized layer outputs.
        return common_layers.layer_preprocess(x, hparams)
Esempio n. 8
0
 def pca_error(self, y, z):
     norm_type = self.norm_type
     z = tf.matmul( z , tf.transpose(self.A) )
     if norm_type in ['MSE', 'mse', 'Frob', 'F']:
         return tf.reduce_mean(tf.square(tf.norm(y-z, ord=2, axis=1)))
     elif norm_type in ['L1', 'l1']:
         return tf.reduce_mean(tf.norm(y-z, ord=1, axis=1))
     elif norm_type in ['LAD', 'lad', 'L21', 'l21', 'L2', 'l2']:
         return tf.reduce_mean(tf.norm(y-z, ord=2, axis=1))
     else:
         raise Exception("Norm type error!")   
Esempio n. 9
0
    def build_network(self):
        print("num_factor_1=%d, num_factor_2=%d, hidden_dimension=%d" % (
            self.num_factor_1, self.num_factor_2, self.hidden_dimension))

        # placeholder
        self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id')
        self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id')
        self.y = tf.placeholder("float", [None], 'rating')

        # Variable
        P = tf.Variable(tf.random_normal([self.n_users, self.num_factor_1], stddev=0.01))
        Q = tf.Variable(tf.random_normal([self.n_items, self.num_factor_1], stddev=0.01))

        U = tf.Variable(tf.random_normal([self.n_users, self.num_factor_2], stddev=0.01))
        V = tf.Variable(tf.random_normal([self.n_items, self.num_factor_2], stddev=0.01))

        # forward
        input = tf.concat(values=[tf.nn.embedding_lookup(P, self.user_id),
                                  tf.nn.embedding_lookup(Q, self.item_id),
                                  tf.multiply(tf.nn.embedding_lookup(U, self.user_id),
                                              tf.nn.embedding_lookup(V, self.item_id))
                                  ], axis=1)

        # tf1->tf2
        # regularizer = tf.contrib.layers.l2_regularizer(scale=self.reg_rate)
        regularizer = tf.keras.regularizers.l2(self.reg_rate)
        layer_1 = tf.layers.dense(inputs=input, units=2 * self.num_factor_1 + self.num_factor_2,
                                  bias_initializer=tf.random_normal_initializer,
                                  kernel_initializer=tf.random_normal_initializer, activation=tf.sigmoid,
                                  kernel_regularizer=regularizer)
        layer_2 = tf.layers.dense(inputs=layer_1, units=self.hidden_dimension, activation=tf.sigmoid,
                                  bias_initializer=tf.random_normal_initializer,
                                  kernel_initializer=tf.random_normal_initializer,
                                  kernel_regularizer=regularizer)
        layer_3 = tf.layers.dense(inputs=layer_2, units=self.hidden_dimension, activation=tf.sigmoid,
                                  bias_initializer=tf.random_normal_initializer,
                                  kernel_initializer=tf.random_normal_initializer,
                                  kernel_regularizer=regularizer)
        layer_4 = tf.layers.dense(inputs=layer_3, units=self.hidden_dimension, activation=tf.sigmoid,
                                  bias_initializer=tf.random_normal_initializer,
                                  kernel_initializer=tf.random_normal_initializer,
                                  kernel_regularizer=regularizer)
        output = tf.layers.dense(inputs=layer_4, units=1, activation=None,
                                 bias_initializer=tf.random_normal_initializer,
                                 kernel_initializer=tf.random_normal_initializer,
                                 kernel_regularizer=regularizer)
        self.pred_rating = tf.reshape(output, [-1])

        # backward
        self.loss = tf.reduce_sum(tf.square(self.y - self.pred_rating)) \
                    + tf.losses.get_regularization_loss() + self.reg_rate * (
                            tf.norm(U) + tf.norm(V) + tf.norm(P) + tf.norm(Q))
        self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
 def add_compression_summaries(self):
   """Adds summaries of alpha value and last update step."""
   with tf.name_scope(self._spec.name + '_summaries'):
     logging.info('add_compression_summaries scope name is %s',
                  self._spec.name)
     tf.summary.scalar(self.alpha.op.name + '/alpha', self.alpha)
     tf.summary.scalar(self.a_matrix_tfvar.op.name + '/a_matrix_norm',
                       tf.norm(self.a_matrix_tfvar))
     tf.summary.scalar(self.b_matrix_tfvar.op.name + '/d_matrix_norm',
                       tf.norm(tf.reshape(self.b_matrix_tfvar, [-1]), ord=1))
     tf.summary.scalar(self.c_matrix_tfvar.op.name + '/c_matrix_norm',
                       tf.reduce_sum(self.c_matrix_tfvar))
Esempio n. 11
0
def compute_Fl(flow_gt, flow_est, mask):
    # F1 measure
    err = tf.multiply(flow_gt - flow_est, mask)
    err_norm = tf.norm(err, axis=-1)

    flow_gt_norm = tf.maximum(tf.norm(flow_gt, axis=-1), 1e-12)
    F1_logic = tf.logical_and(err_norm > 3,
                              tf.divide(err_norm, flow_gt_norm) > 0.05)
    F1_logic = tf.cast(tf.logical_and(tf.expand_dims(F1_logic, -1), mask > 0),
                       tf.float32)
    F1 = tf.reduce_sum(F1_logic) / (tf.reduce_sum(mask) + 1e-6)
    return F1
Esempio n. 12
0
 def reconstruction_loss(self, x, x_tilde):    
     norm_type = self.loss_norm_type
     x = tf.reshape(x, (tf.shape(x)[0], -1))        
     x_tilde = tf.reshape(x_tilde, (tf.shape(x_tilde)[0], -1))
     if norm_type in ['MSE', 'mse', 'Frob', 'F']:
         return tf.square(tf.norm(x-x_tilde, ord=2, axis=1))
     elif norm_type in ['L1', 'l1']:
         return tf.norm(x-x_tilde, ord=1, axis=1)
     elif norm_type in ['LAD', 'lad', 'L21', 'l21', 'L2', 'l2']:
         return tf.norm(x-x_tilde, ord=2, axis=1)
     else:
         raise Exception("Norm type error!")   
Esempio n. 13
0
    def body(self, features):
        hp = self.hparams
        # pylint: disable=eval-used
        if hp.image_input_type == "image":
            image_feat = vqa_layers.image_embedding(
                features["inputs"],
                model_fn=eval(hp.image_model_fn),
                trainable=hp.train_resnet,
                is_training=hp.mode == tf.estimator.ModeKeys.TRAIN)
        else:
            image_feat = features["inputs"]

        image_feat = common_layers.flatten4d3d(image_feat)
        image_hidden_size = hp.hidden_size
        image_feat = common_layers.dense(image_feat, image_hidden_size)
        utils.collect_named_outputs("norms", "image_feat_after_proj",
                                    tf.norm(image_feat, axis=-1))

        question = common_layers.flatten4d3d(features["question"])
        utils.collect_named_outputs("norms", "question_embedding",
                                    tf.norm(question, axis=-1))
        (encoder_input, encoder_self_attention_bias,
         encoder_decoder_attention_bias) = prepare_image_question_encoder(
             image_feat, question, hp)
        encoder_input = tf.nn.dropout(encoder_input,
                                      keep_prob=1. -
                                      hp.layer_prepostprocess_dropout)
        encoder_output = image_question_encoder(encoder_input,
                                                encoder_self_attention_bias,
                                                hp)
        utils.collect_named_outputs("norms", "encoder_output",
                                    tf.norm(encoder_output, axis=-1))

        # scale query by sqrt(hidden_size)
        query = tf.get_variable("query",
                                [hp.hidden_size]) * hp.hidden_size**0.5
        query = tf.expand_dims(tf.expand_dims(query, axis=0), axis=0)
        batch_size = common_layers.shape_list(encoder_input)[0]
        query = tf.tile(query, [batch_size, 1, 1])
        query = tf.nn.dropout(query,
                              keep_prob=1. - hp.layer_prepostprocess_dropout)

        decoder_output = decoder(query, encoder_output, None,
                                 encoder_decoder_attention_bias, hp)
        utils.collect_named_outputs("norms", "decoder_output",
                                    tf.norm(decoder_output, axis=-1))

        norm_tensors = utils.convert_collection_to_dict("norms")
        vqa_layers.summarize_tensors(norm_tensors, tag="norms/")

        # Expand dimension 1 and 2
        return tf.expand_dims(decoder_output, axis=1)
Esempio n. 14
0
def build_graph(hub_module_url, target_image_path):
    # Step 1) Prepare pre-trained model for extracting image features.
    module = hub.Module(hub_module_url)
    height, width = hub.get_expected_image_size(module)

    # Copied a method of https://github.com/GoogleCloudPlatform/cloudml-samples/blob/bf0680726/flowers/trainer/model.py#L181
    # and fixed for all type images (not only jpeg)
    def decode_and_resize(image_str_tensor):
        """Decodes jpeg string, resizes it and returns a uint8 tensor."""
        image = tf.image.decode_image(image_str_tensor, channels=CHANNELS)
        # Note resize expects a batch_size, but tf_map supresses that index,
        # thus we have to expand then squeeze.  Resize returns float32 in the
        # range [0, uint8_max]
        image = tf.expand_dims(image, 0)
        image = tf.image.resize_bilinear(image, [height, width],
                                         align_corners=False)
        image = tf.squeeze(image, squeeze_dims=[0])
        image = tf.cast(image, dtype=tf.uint8)
        return image

    def to_img_feature(images):
        """Extract the feature of image vectors"""
        outputs = module(dict(images=images),
                         signature="image_feature_vector",
                         as_dict=True)
        return outputs['default']

    # Step 2) Extract image features of the target image.
    target_image_bytes = tf.gfile.GFile(target_image_path, 'rb').read()
    target_image = tf.constant(target_image_bytes, dtype=tf.string)
    target_image = decode_and_resize(target_image)
    target_image = tf.image.convert_image_dtype(target_image, dtype=tf.float32)
    target_image = tf.expand_dims(target_image, 0)
    target_image = to_img_feature(target_image)

    # Step 3) Extract image features of input images.
    input_byte = tf.placeholder(tf.string, shape=[None])
    input_image = tf.map_fn(decode_and_resize,
                            input_byte,
                            back_prop=False,
                            dtype=tf.uint8)
    input_image = tf.image.convert_image_dtype(input_image, dtype=tf.float32)
    input_image = to_img_feature(input_image)

    # Step 4) Compare cosine_similarities of the target image and the input images.
    dot = tf.tensordot(target_image, tf.transpose(input_image), 1)
    similarity = dot / (tf.norm(target_image, axis=1) *
                        tf.norm(input_image, axis=1))
    similarity = tf.reshape(similarity, [-1])

    return input_byte, similarity
    def gradient_panalty(self, real, fake, scope="discriminator_A"):
        if self.gan_type.__contains__('dragan'):
            eps = tf.random_uniform(shape=tf.shape(real), minval=0., maxval=1.)
            _, x_var = tf.nn.moments(real, axes=[0, 1, 2, 3])
            x_std = tf.sqrt(
                x_var)  # magnitude of noise decides the size of local region

            fake = real + 0.5 * x_std * eps

        alpha = tf.random_uniform(shape=[self.batch_size, 1, 1, 1],
                                  minval=0.,
                                  maxval=1.)
        interpolated = real + alpha * (fake - real)

        logit, cam_logit, _, _ = self.discriminator(interpolated,
                                                    reuse=True,
                                                    scope=scope)

        GP = []
        cam_GP = []

        for i in range(2):
            grad = tf.gradients(logit[i],
                                interpolated)[0]  # gradient of D(interpolated)
            grad_norm = tf.norm(flatten(grad), axis=1)  # l2 norm

            # WGAN - LP
            if self.gan_type == 'wgan-lp':
                GP.append(
                    self.ld *
                    tf.reduce_mean(tf.square(tf.maximum(0.0, grad_norm - 1.))))

            elif self.gan_type == 'wgan-gp' or self.gan_type == 'dragan':
                GP.append(self.ld * tf.reduce_mean(tf.square(grad_norm - 1.)))

        for i in range(2):
            grad = tf.gradients(cam_logit[i],
                                interpolated)[0]  # gradient of D(interpolated)
            grad_norm = tf.norm(flatten(grad), axis=1)  # l2 norm

            # WGAN - LP
            if self.gan_type == 'wgan-lp':
                cam_GP.append(
                    self.ld *
                    tf.reduce_mean(tf.square(tf.maximum(0.0, grad_norm - 1.))))

            elif self.gan_type == 'wgan-gp' or self.gan_type == 'dragan':
                cam_GP.append(self.ld *
                              tf.reduce_mean(tf.square(grad_norm - 1.)))

        return sum(GP), sum(cam_GP)
 def create(self):
     vectors = tf.get_variable('unorthogonal_rotation',
                               [self.dim, self.dim],
                               dtype=tf.float32)
     # add batch dimension for matmul
     basis = tf.expand_dims(vectors[0, :] / tf.norm(vectors[0, :]), 0)
     for i in range(1, vectors.get_shape()[0].value):
         v = vectors[i, :]
         # add batch dimension for matmul
         v = tf.expand_dims(v, 0)
         w = v - tf.matmul(tf.matmul(v, basis, transpose_b=True), basis)
         # I assume that my matrix is close to orthogonal
         basis = tf.concat([basis, w / tf.norm(w)], axis=0)
     return basis
    def _resource_apply_dense(self, grad, var):
        beta1_power, beta2_power = self._get_beta_accumulators()
        beta1_power = tf.cast(beta1_power, var.dtype.base_dtype)
        beta2_power = tf.cast(beta2_power, var.dtype.base_dtype)
        lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = tf.cast(self._epsilon_t, var.dtype.base_dtype)
        weight_decay_rate_t = tf.cast(self._weight_decay_rate_t,
                                      var.dtype.base_dtype)
        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = m * beta1_t + m_scaled_g_values
        m_t = tf.assign(m, m_t, use_locking=self._use_locking)
        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = v * beta2_t + v_scaled_g_values
        v_t = tf.assign(v, v_t, use_locking=self._use_locking)

        # ==== The following is with m_t_hat and v_t_hat
        m_t_hat = m_t / (1. - beta1_power)
        v_t_hat = v_t / (1. - beta2_power)

        v_sqrt = tf.sqrt(v_t_hat)
        update = m_t_hat / (v_sqrt + epsilon_t)

        # ==== The following is the original LAMBOptimizer implementation
        # v_sqrt = tf.sqrt(v_t_hat)
        # update = m_t / (v_sqrt + epsilon_t)

        var_name = self._get_variable_name(var.name)
        if self._do_use_weight_decay(var_name):
            update += weight_decay_rate_t * var

        ratio = 1.0
        if self._do_layer_adaptation(var_name):
            if var.shape.ndims > 1 and var.shape[0] == 24:
                w_norm = tf.norm(var, 2, range(1, var.shape.ndims), True)
                g_norm = tf.norm(update, 2, range(1, var.shape.ndims), True)
            else:
                w_norm = tf.norm(var, ord=2)
                g_norm = tf.norm(update, ord=2)
            ratio = tf.where(
                tf.greater(w_norm, 0),
                tf.where(tf.greater(g_norm, 0), (w_norm / g_norm), 1.0), 1.0)

        var_update = var - ratio * lr_t * update
        return tf.assign(var, var_update, use_locking=self._use_locking).op
    def _apply_sparse_shared(self, grad, var, indices, scatter_add):
        beta1_power, beta2_power = self._get_beta_accumulators()
        beta1_power = tf.cast(beta1_power, var.dtype.base_dtype)
        beta2_power = tf.cast(beta2_power, var.dtype.base_dtype)
        lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = tf.cast(self._epsilon_t, var.dtype.base_dtype)
        weight_decay_rate_t = tf.cast(self._weight_decay_rate_t,
                                      var.dtype.base_dtype)
        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = tf.assign(m, m * beta1_t, use_locking=self._use_locking)
        with tf.control_dependencies([m_t]):
            m_t = scatter_add(m, indices, m_scaled_g_values)
        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = tf.assign(v, v * beta2_t, use_locking=self._use_locking)
        with tf.control_dependencies([v_t]):
            v_t = scatter_add(v, indices, v_scaled_g_values)

        # ==== The following is with m_t_hat and v_t_hat
        m_t_hat = m_t / (1. - beta1_power)
        v_t_hat = v_t / (1. - beta2_power)

        v_sqrt = tf.sqrt(v_t_hat)
        update = m_t_hat / (v_sqrt + epsilon_t)

        # ==== The following is the original LAMBOptimizer implementation
        # v_sqrt = tf.sqrt(v_t_hat)
        # update = m_t / (v_sqrt + epsilon_t)

        var_name = self._get_variable_name(var.name)
        if self._do_use_weight_decay(var_name):
            update += weight_decay_rate_t * var

        ratio = 1.0
        if self._do_layer_adaptation(var_name):
            w_norm = tf.norm(var, ord=2)
            g_norm = tf.norm(update, ord=2)
            ratio = tf.where(
                tf.greater(w_norm, 0),
                tf.where(tf.greater(g_norm, 0), (w_norm / g_norm), 1.0), 1.0)
        var_update = tf.assign_sub(var,
                                   ratio * lr_t * update,
                                   use_locking=self._use_locking)
        return tf.group(*[var_update, m_t, v_t])
Esempio n. 19
0
def pgd(model_fn,
        inputs,
        optimizer=None,
        layer_name='word_embeddings',
        epsilon=0.05,
        n_loop=2):
    with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
        model_outputs = model_fn(inputs, True)
        grads_and_vars = utils.compute_gradients(model_outputs['loss'],
                                                 optimizer)
    acc_r = 0.0
    attack_op = tf.no_op()
    for k in range(n_loop):
        with tf.variable_scope(tf.get_variable_scope(),
                               reuse=tf.AUTO_REUSE), tf.control_dependencies(
                                   [attack_op]):
            adv_outputs = model_fn(inputs, True)
            attack_grad_and_vars = utils.compute_gradients(
                adv_outputs['loss'], optimizer)
            embedding_gradients, embeddings = utils.find_grad_and_var(
                attack_grad_and_vars, layer_name)

            tmp_r = tf.multiply(
                1 / n_loop,
                embedding_gradients / (tf.norm(embedding_gradients) + 1e-9))

            norm = tf.norm(acc_r + tmp_r)
            cur_r = tf.cond(norm > epsilon, lambda:
                            (acc_r + tmp_r) * tf.divide(epsilon, norm), lambda:
                            (acc_r + tmp_r))
            r = cur_r - acc_r  # calculate current step
            attack_op = embeddings.assign(embeddings + r)
            acc_r = cur_r

    # restore
    with tf.variable_scope(tf.get_variable_scope(),
                           reuse=tf.AUTO_REUSE), tf.control_dependencies(
                               [attack_op]):
        attack_outputs = model_fn(inputs, True)
        attack_grad_and_vars = utils.compute_gradients(attack_outputs['loss'],
                                                       optimizer)
        embedding_gradients, embeddings = utils.find_grad_and_var(
            attack_grad_and_vars, layer_name)
        restore_op = embeddings.assign(embeddings - acc_r)
    # sum up
    with tf.control_dependencies([restore_op]):
        grads_and_vars = utils.average_grads_and_vars(
            [grads_and_vars, attack_grad_and_vars])
    return AdversarialOutput(model_outputs, grads_and_vars)
Esempio n. 20
0
def learning(ACS,target_input,Rx, Ry,sess, ACS_dim_X, ACS_dim_Y, ACS_dim_Z, target_dim_X,target_dim_Y,target_dim_Z, target, kernel_x_1, kernel_x_2, kernel_y_1, kernel_y_2, layer1_channels, layer2_channels, kernel_last_x, kernel_last_y, LearningRate, MaxIteration):

    [target_dim0,target_dim1,target_dim2,target_dim3] = np.shape(target)
    input_ACS = tf.placeholder(tf.float32, [1, ACS_dim_X,ACS_dim_Y,ACS_dim_Z])
    input_Target = tf.placeholder(tf.float32, [1, target_dim_X,target_dim_Y,target_dim3])

    Input = tf.reshape(input_ACS, [1, ACS_dim_X, ACS_dim_Y, ACS_dim_Z])

    W_conv1 = weight_variable([kernel_x_1, kernel_y_1, ACS_dim_Z, layer1_channels],'W1')
    #h_conv1 = tf.nn.relu(conv2d_dilate(Input, W_conv1,accrate_input))
    h_conv1 = conv2d_dilate(Input, W_conv1, Rx, Ry)

    W_conv2 = weight_variable([kernel_x_2, kernel_y_2, layer1_channels, layer2_channels],'W2')
    h_conv2 = tf.nn.relu(conv2d_dilate(h_conv1, W_conv2, Rx, Ry))

    W_conv3 = weight_variable([kernel_last_x, kernel_last_y, layer2_channels, target_dim3],'W3')
    h_conv3 = conv2d_dilate(h_conv2, W_conv3, Rx, Ry)

    #error_norm = tf.norm(input_Target - h_conv3)

    #error_norm = (tf.norm(input_Target - h_conv3, ord=2) + tf.norm(input_Target - h_conv3, ord=1))*0.5

    error_norm = (tf.norm(input_Target - h_conv3, ord=2) + tf.norm(input_Target - h_conv3, ord=1))*0.5 + 0.2*(tf.nn.l2_loss(W_conv1)+0.9*tf.nn.l2_loss(W_conv2)+0.8*tf.nn.l2_loss(W_conv3))


    global_step = tf.Variable(0, trainable=False)
    lr = tf.train.exponential_decay(LearningRate, global_step=global_step,decay_steps=50,decay_rate=0.95)
    train_step = tf.train.AdamOptimizer(lr).minimize(error_norm)
    #train_step = tf.train.AdamOptimizer(LearningRate).minimize(error_norm)

    if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
        init = tf.initialize_all_variables()
    else:
        init = tf.global_variables_initializer()
    sess.run(init)

    error_prev = 1
    for i in range(MaxIteration+1):

        sess.run(train_step, feed_dict={input_ACS: ACS, input_Target: target, global_step:i})
        if i % 100 == 0:
            error_now=sess.run(error_norm,feed_dict={input_ACS: ACS, input_Target: target})
            print('The',i,'th iteration gives an error',error_now)



    #error = sess.run(error_norm,feed_dict={input_ACS: ACS, input_Target: target})
    return sess.run([W_conv1,W_conv2,W_conv3])
Esempio n. 21
0
    def compute_x(self, param_name, param, m, prev_w_norm, prev_eta,
                  prev_beta):
        """Compute prev x value on the fly.

    Alternatively, we can store this as a slot but that would double the
    memory usage of our parameters. We don't like that!

    Args:
      param_name: Name of the parameter. Used to check whether to normalize the
        gradients for this layer.
      param: The parameter `Tensor`.
      m: Accumulated momentum `Tensor` of shape same as param.
      prev_w_norm: Scalar tracking norm of the param tensor at previous
        iteration.
      prev_eta: Scalar tracking the learning rate applied at previous iteration.
      prev_beta: Scalar tracking momentum applied at previous iteration.

    Returns:
      x: An intermediate `Tensor` of shape same as param. Will be used for the
        final update.
    """
        prev_ratio = 1.0
        if self._do_layer_adaptation(param_name):
            prev_g_norm = tf.norm(m, ord=2)
            prev_ratio = self.gamma * tf.where(
                tf.math.greater(prev_w_norm, 0),
                tf.where(tf.math.greater(prev_g_norm, 0),
                         (prev_w_norm / prev_g_norm), 1.0), 1.0)
        prev_normalized_m_with_lr = prev_ratio * prev_eta * m

        x = param - tf.divide(
            tf.multiply(prev_beta, prev_normalized_m_with_lr), prev_beta - 1.0)
        return x
Esempio n. 22
0
    def _apply_dense(self, grad, var):
        # We actually apply grads in _finish. This function is used
        # to record intermediate variables related to the individual gradients
        # which we eventually combine in _finish to obtain global statistics
        # (e.g. the L1 norm of the full gradient).

        self.grads[var] = grad

        betting_fraction = self.get_slot(var, OUTER_BETTING_FRACTION)
        self.betting_fraction_dot_product_deltas[var] = tf.reduce_sum(
            betting_fraction * grad)

        # Wealth increases by -g \cdot w where w is the parameter value.
        # Since w = Wealth * v with betting fraction v, we can write
        # the wealth increment as -(g \cdot v) Wealth.
        # TODO(cutkosky): at one point there was a bug in which epsilon
        # was not added here. It seemed performance may have degraded
        # somewhat after fixing this. Find out why this would be.
        wealth_delta = -self.betting_fraction_dot_product_deltas[
            var] * self._get_non_slot(OUTER_WEALTH)
        self.wealth_deltas[var] = wealth_delta

        self.grad_norms[var] = tf.norm(grad, 1)

        return tf.no_op()
def dense_weightnorm(name,
                     x,
                     n_out,
                     x_mask,
                     init_scale,
                     init,
                     dtype=tf.float32):
    """Dense layer with weight normalization."""
    n_in = common_layers.shape_list(x)[2]
    eps = tf.keras.backend.epsilon()
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
        v = tf.get_variable("v", [n_in, n_out],
                            dtype,
                            initializer=tf.random_normal_initializer(0, 0.05),
                            trainable=True)
        v = v / tf.norm(v, axis=0, keepdims=True)
        t = tf.matmul(x, v)  # [B, L, n_out]
        mean, var = moments_over_bl(t, x_mask)
        g_init = init_scale / (tf.sqrt(var) + eps)
        g = get_variable_ddi("g", [n_out],
                             g_init,
                             init,
                             initializer=tf.zeros_initializer,
                             dtype=dtype,
                             trainable=True)
        b = get_variable_ddi("b", [n_out],
                             -mean * g_init,
                             init,
                             initializer=tf.zeros_initializer,
                             dtype=dtype,
                             trainable=True)
        w = g * v
        y = tf.matmul(x, w) + b
        tf.summary.histogram("_g", g)
        return y
Esempio n. 24
0
def sobel_edges(images):
    """Computes edge intensity of image using sobel operator."""
    batch_size, h, w, _ = images.shape.as_list()
    edges = tf.image.sobel_edges(tf.image.rgb_to_grayscale(images))
    edges = tf.reshape(edges, (batch_size, h, w, 2))
    edge_intensity = tf.norm(edges, ord="euclidean", axis=-1)
    return edge_intensity
 def compute_lr(self, grad, var):
     scaled_lr = self._learning_rate
     if self._skip_list is None or not any(
             v in var.name for v in self._skip_list):
         w_norm = tf.norm(var, ord=2)
         g_norm = tf.norm(grad, ord=2)
         trust_ratio = tf.where(
             tf.math.greater(w_norm, 0),
             tf.where(tf.math.greater(g_norm, 0),
                      (self._eeta * w_norm /
                       (g_norm + self._weight_decay * w_norm +
                        self._epsilon)), 1.0), 1.0)
         scaled_lr = self._learning_rate * trust_ratio
         # Add the weight regularization gradient
         grad = grad + self._weight_decay * var
     return scaled_lr, grad
Esempio n. 26
0
    def __call__(self, codes):
        """Uses codebook to find nearest neighbor for each code.

    Args:
      codes: A `float`-like `Tensor` containing the latent
        vectors to be compared to the codebook. These are rank-3 with shape
        `[batch_size, latent_size, code_size]`.

    Returns:
      nearest_codebook_entries: The 1-nearest neighbor in Euclidean distance for
        each code in the batch.
      one_hot_assignments: The one-hot vectors corresponding to the matched
        codebook entry for each code in the batch.
    """
        distances = tf.norm(
            tensor=tf.expand_dims(codes, 2) -
            tf.reshape(self.codebook, [1, 1, self.num_codes, self.code_size]),
            axis=3)
        assignments = tf.argmin(input=distances, axis=2)
        one_hot_assignments = tf.one_hot(assignments, depth=self.num_codes)
        nearest_codebook_entries = tf.reduce_sum(
            input_tensor=tf.expand_dims(one_hot_assignments, -1) *
            tf.reshape(self.codebook, [1, 1, self.num_codes, self.code_size]),
            axis=2)
        return nearest_codebook_entries, one_hot_assignments
    def test_use_resolution(self, is_training, use_resolution):
        config = dram_config.get_config()
        image_shape = (28, 28, 1)
        batch_size = 5
        output_dims = 10
        config.glimpse_model_config.output_dims = output_dims
        config.glimpse_model_config.glimpse_shape = config.glimpse_shape
        config.glimpse_model_config.num_resolutions = config.num_resolutions
        config.glimpse_model_config.glimpse_shape = (8, 8)
        config.glimpse_model_config.num_resolutions = 3
        locations = tf.placeholder(shape=(batch_size, 2), dtype=tf.float32)
        model = glimpse_model.GlimpseNetwork(config.glimpse_model_config)
        images = tf.random_uniform(minval=-1,
                                   maxval=1,
                                   shape=(batch_size, ) + image_shape,
                                   dtype=tf.float32)
        locations = tf.zeros(shape=(batch_size, 2), dtype=tf.float32)
        model = glimpse_model.GlimpseNetwork(config.glimpse_model_config)
        g, endpoints = model(images,
                             locations,
                             is_training=is_training,
                             use_resolution=use_resolution)
        gnorms = [
            tf.norm(grad)
            for grad in tf.gradients(g[:, 0], endpoints["model_input_list"])
        ]
        self.evaluate(tf.global_variables_initializer())
        gnorms = self.evaluate(gnorms)

        for use, gnorm in zip(use_resolution, gnorms):
            if use:
                self.assertGreater(gnorm, 0.)
            else:
                self.assertEqual(gnorm, 0.)
Esempio n. 28
0
def rodrigues(r):
    """
  Rodrigues' rotation formula that turns axis-angle tensor into rotation
  matrix in a batch-ed manner.

  Parameter:
  ----------
  r: Axis-angle rotation tensor of shape [batch_size, 1, 3].

  Return:
  -------
  Rotation matrix of shape [batch_size, 3, 3].

  """
    theta = tf.norm(r + tf.random_normal(r.shape, 0, 1e-8, dtype=tf.float64),
                    axis=(1, 2),
                    keepdims=True)
    # avoid divide by zero
    r_hat = r / theta
    cos = tf.cos(theta)
    z_stick = tf.zeros(theta.get_shape().as_list()[0], dtype=tf.float64)
    m = tf.stack(
        (z_stick, -r_hat[:, 0, 2], r_hat[:, 0, 1], r_hat[:, 0, 2], z_stick,
         -r_hat[:, 0, 0], -r_hat[:, 0, 1], r_hat[:, 0, 0], z_stick),
        axis=1)
    m = tf.reshape(m, (-1, 3, 3))
    i_cube = tf.expand_dims(tf.eye(3, dtype=tf.float64), axis=0) + tf.zeros(
        (theta.get_shape().as_list()[0], 3, 3), dtype=tf.float64)
    A = tf.transpose(r_hat, (0, 2, 1))
    B = r_hat
    dot = tf.matmul(A, B)
    R = cos * i_cube + (1 - cos) * dot + tf.sin(theta) * m
    return R
Esempio n. 29
0
    def obs_cost_fn(obs):
        '''
        state
        0:2 relative_angle
        3:5 angular velocity
        6:8 relative_position
        9:11 velocity
        12:14 acceleration
        '''
        w_alt, w_dist, w_ang = 0.0, 0.8, 0.1

        # define altitude cost
        alt_cost = tf.abs(obs[:, 8])
        alt_cost = tf.math.tanh(0.05 * alt_cost, name=None)  #value~-0.3

        # define distance cost, temporarily disabled
        dist_cost = obs[:, 6:9]
        dist_cost = tf.norm(dist_cost, ord='euclidean', axis=1, name=None)
        dist_cost = tf.math.tanh(0.05 * dist_cost, name=None)  #value~-0.3

        # define angle cost
        ang_cost = obs[:, 0:3]
        ang_cost = tf.math.reduce_mean(tf.abs(ang_cost), axis=1)
        ang_cost = tf.math.tanh(ang_cost, name=None)  #value~-0.8

        #plotter
        # dist_cost = tf.Print(dist_cost,[dist_cost],message="This is dist_cost: ")

        return w_alt * alt_cost + w_dist * dist_cost + w_ang * ang_cost
Esempio n. 30
0
def loss_fn(flo_preds, flo_gt):
    # Use multi-scale loss, as described in Sec. 3 in the original paper.
    flo_losses = 0.
    for flo_pred, weight in zip(flo_preds, FLAGS.losses_weight):
        _, gt_height, _, _ = tf.unstack(tf.shape(flo_gt))
        _, pred_height, _, _ = tf.unstack(tf.shape(flo_pred))

        scaled_flow_gt = tf.image.resize(flo_gt,
                                         tf.shape(flo_pred)[1:3],
                                         method=tf.image.ResizeMethod.BILINEAR)
        scaled_flow_gt /= tf.cast(gt_height / pred_height, dtype=tf.float32)

        l2_norm = tf.norm(flo_pred - scaled_flow_gt, ord=2, axis=3)
        flo_loss = tf.reduce_mean(tf.reduce_sum(l2_norm, axis=(1, 2)))

        flo_losses += flo_loss * weight

    # Calculate the L2 norm to regularize.
    l2_losses = [
        FLAGS.gamma * tf.nn.l2_loss(v) for v in tf.trainable_variables()
    ]
    l2_losses = tf.reduce_sum(l2_losses)

    total_losses = flo_losses + l2_losses

    return total_losses