def initialize_mod_binary_MERA(phys_dim,
                               chi,
                               dtype=tf.float64):
                          
    """
    Parameters:
    -------------------
    phys_dim:         int 
                      Hilbert space dimension of the bottom layer
    chi:              int 
                      maximum bond dimension
    dtype:            tensorflow dtype
                      dtype of the MERA tensors
    Returns:
    -------------------
    (wC, vC, uC, rhoAB, rhoBA)
    wC, vC, uC:      list of tf.Tensor
    rhoAB, rhoBA:    tf.Tensor
    """
    
    wC, vC, uC = increase_bond_dimension_by_adding_layers(chi_new=chi,
                                                          wC=[tf.random_uniform(shape=[phys_dim, phys_dim, phys_dim],dtype=dtype)],
                                                          vC=[tf.random_uniform(shape=[phys_dim, phys_dim, phys_dim],dtype=dtype)],
                                                          uC=[tf.random_uniform(shape=[phys_dim, phys_dim, phys_dim, phys_dim],dtype=dtype)])
    chi_top = wC[-1].shape[2]
    rhoAB = tf.reshape(tf.eye(chi_top * chi_top, dtype=dtype),
                       (chi_top, chi_top, chi_top, chi_top))

    rhoBA = tf.reshape(tf.eye(chi_top * chi_top, dtype=dtype),
                       (chi_top, chi_top, chi_top, chi_top))
    
    return wC, vC, uC, rhoAB, rhoBA
 def test_with_tensors(self):
   net = tensornetwork.TensorNetwork()
   a = net.add_node(tf.eye(2) * 2, name="T")
   b = net.add_node(tf.eye(2) * 3, name="A")
   e1 = net.connect(a[0], b[0], "edge")
   e2 = net.connect(a[1], b[1], "edge2")
   net.check_correct()
   net.contract(e1)
   net.check_correct()
   val = net.contract(e2)
   net.check_correct()
   self.assertAlmostEqual(val.get_tensor().numpy(), 12.0)
Beispiel #3
0
 def _build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = len(self.feature)
     err = self.Y - self.mean_function(self.X)
     Kuf = self.feature.Kuf(self.kern, self.X)
     Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
     Kus = self.feature.Kuf(self.kern, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
     B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type)
     LB = tf.cholesky(B)
     Aerr = tf.matmul(A, err)
     c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tmp2, c, transpose_a=True)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
               - tf.matmul(tmp1, tmp1, transpose_a=True)
         shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
               - tf.reduce_sum(tf.square(tmp1), 0)
         shape = tf.stack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean + self.mean_function(Xnew), var
Beispiel #4
0
    def _build_likelihood(self):
        """
        q_alpha, q_lambda are variational parameters, size N x R
        This method computes the variational lower bound on the likelihood,
        which is:
            E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]
        with
            q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) .
        """
        K = self.kern.K(self.X)
        K_alpha = tf.matmul(K, self.q_alpha)
        f_mean = K_alpha + self.mean_function(self.X)

        # compute the variance for each of the outputs
        I = tf.tile(tf.expand_dims(tf.eye(self.num_data, dtype=settings.float_type), 0),
                    [self.num_latent, 1, 1])
        A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \
            tf.expand_dims(tf.transpose(self.q_lambda), 2) * K
        L = tf.cholesky(A)
        Li = tf.matrix_triangular_solve(L, I)
        tmp = Li / tf.expand_dims(tf.transpose(self.q_lambda), 1)
        f_var = 1. / tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1))

        # some statistics about A are used in the KL
        A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(L)))
        trAi = tf.reduce_sum(tf.square(Li))

        KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent +
                    tf.reduce_sum(K_alpha * self.q_alpha))

        v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y)
        return tf.reduce_sum(v_exp) - KL
  def body(self, features):
    with tf.variable_scope('string_embedding'):
      string_embedding = self.encode(features, 'inputs')

    if 'targets' in features:
      with tf.variable_scope('code_embedding'):
        code_embedding = self.encode(features, 'targets')

      string_embedding_norm = tf.nn.l2_normalize(string_embedding, axis=1)
      code_embedding_norm = tf.nn.l2_normalize(code_embedding, axis=1)

      # All-vs-All cosine distance matrix, reshaped as row-major.
      cosine_dist = 1.0 - tf.matmul(string_embedding_norm, code_embedding_norm,
                                    transpose_b=True)
      cosine_dist_flat = tf.reshape(cosine_dist, [-1, 1])

      # Positive samples on the diagonal, reshaped as row-major.
      label_matrix = tf.eye(tf.shape(cosine_dist)[0], dtype=tf.int32)
      label_matrix_flat = tf.reshape(label_matrix, [-1])

      logits = tf.concat([1.0 - cosine_dist_flat, cosine_dist_flat], axis=1)
      labels = tf.one_hot(label_matrix_flat, 2)

      loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels,
                                                     logits=logits)

      return string_embedding, {'training': loss}

    return string_embedding
Beispiel #6
0
 def testDimensionGuardDynamicShape(self):
   testee_lkj = tfd.LKJ(
       dimension=3, concentration=[1., 4.], validate_args=True)
   with self.assertRaisesOpError('dimension mismatch'):
     self.evaluate(
         testee_lkj.log_prob(
             tf.placeholder_with_default(tf.eye(4), shape=None)))
Beispiel #7
0
def maximum_mean_discrepancy(k_xx, k_yy, k_xy):
	samples_x = tf.cast(tf.shape(k_xx)[0], dtype=tf.float32)
	samples_y = tf.cast(tf.shape(k_yy)[0], dtype=tf.float32)

	k_xx_diag = tf.multiply(k_xx, tf.eye(tf.shape(k_xx)[0]))
	k_xx = k_xx - k_xx_diag

	k_yy_diag = tf.multiply(k_yy, tf.eye(tf.shape(k_yy)[0]))
	k_yy = k_yy - k_yy_diag

	E_xx = tf.reduce_sum(k_xx)/(samples_x*(samples_x-1))
	E_yy = tf.reduce_sum(k_yy)/(samples_y*(samples_y-1))
	E_xy = tf.reduce_mean(k_xy)
	mmd_2 = E_xx + E_yy - 2*E_xy
	mmd = tf.sqrt(tf.maximum(mmd_2,0))
	return mmd
def test_sample_mvn(session_tf, cov_structure, num_samples):
    """
    Draws 10,000 samples from a distribution
    with known mean and covariance. The test checks
    if the mean and covariance of the samples is
    close to the true mean and covariance.
    """

    N, D = 10000, 2
    means = tf.ones((N, D), dtype=float_type)
    if cov_structure == "full":
        covs = tf.eye(D, batch_shape=[N], dtype=float_type)
    elif cov_structure == "diag":
        covs = tf.ones((N, D), dtype=float_type)

    samples = _sample_mvn(means, covs, cov_structure, num_samples=num_samples)
    value = session_tf.run(samples)

    if num_samples is None:
        assert value.shape == (N, D)
    else:
        assert value.shape == (num_samples, N, D)
        value = value.reshape(-1, D)

    samples_mean = np.mean(value, axis=0)
    samples_cov = np.cov(value, rowvar=False)
    np.testing.assert_array_almost_equal(samples_mean, [1., 1.], decimal=1)
    np.testing.assert_array_almost_equal(samples_cov, [[1., 0.], [0., 1.]], decimal=1)
Beispiel #9
0
    def _build_predict(self, Xnew, full_cov=False):
        """
        Xnew is a data matrix, point at which we want to predict

        This method computes

            p(F* | Y )

        where F* are points on the GP at Xnew, Y are noisy observations at X.

        """
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X) + tf.eye(tf.shape(self.X)[0], dtype=settings.float_type) * self.likelihood.variance
        L = tf.cholesky(K)
        A = tf.matrix_triangular_solve(L, Kx, lower=True)
        V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X))
        fmean = tf.matmul(A, V, transpose_a=True) + self.mean_function(Xnew)
        if full_cov:
            fvar = self.kern.K(Xnew) - tf.matmul(A, A, transpose_a=True)
            shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
            fvar = tf.tile(tf.expand_dims(fvar, 2), shape)
        else:
            fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
            fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, tf.shape(self.Y)[1]])
        return fmean, fvar
Beispiel #10
0
  def radial_symmetry(self, d_cutoff, d, atom_numbers):
    """ Radial Symmetry Function """
    embedding = tf.eye(np.max(self.atom_cases) + 1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    Rs = np.linspace(0., self.radial_cutoff, self.radial_length)
    ita = np.ones_like(Rs) * 3 / (Rs[1] - Rs[0])**2
    Rs = tf.cast(np.reshape(Rs, (1, 1, 1, -1)), tf.float32)
    ita = tf.cast(np.reshape(ita, (1, 1, 1, -1)), tf.float32)
    length = ita.get_shape().as_list()[-1]

    d_cutoff = tf.stack([d_cutoff] * length, axis=3)
    d = tf.stack([d] * length, axis=3)

    out = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff
    if self.atomic_number_differentiated:
      out_tensors = []
      for atom_type in self.atom_cases:
        selected_atoms = tf.expand_dims(
            tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1),
            axis=3)
        out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2))
      return tf.concat(out_tensors, axis=2)
    else:
      return tf.reduce_sum(out, axis=2)
 def _update_ortho(self,v,i):
   s = self.gan.ops.shape(v)
   if len(s) == 4 and s[0] == s[1]:
     w=v
     newv = []
     #s = self.ops.shape(v_transpose)
     #identity = tf.reshape(identity, [s[0],s[1],1,1])
     #identity = tf.tile(identity, [1,1,s[2],s[3]])
     decay = self.config.decay or 0.01
     w = tf.transpose(w, perm=[2,3,0,1])
     for i in range(self.config.iterations or 3):
         wt = tf.transpose(w, perm=[1,0,2,3])
         w2 = tf.reshape(w,[-1, s[0],s[1]])
         wt2 = tf.reshape(wt,[-1, s[0],s[1]])
         wtw = tf.matmul(wt2,w2)
         eye = tf.eye(s[0],s[1])
         eye = tf.tile(eye, [1,s[2]*s[3]])
         eye = tf.reshape(eye, self.gan.ops.shape(w))
         wtw = tf.reshape(wtw, self.gan.ops.shape(w))
         qk = eye - wtw
         w = w * (eye + 0.5*qk)
     w = tf.transpose(w, perm=[2,3,0,1])
     newv = w
     newv=(1.0+decay)*v - decay*(newv)
     newv = tf.reshape(newv,self.ops.shape(v))
     return tf.assign(v, newv)
   else:
     return None
  def _get_fldj_numerical(self, bijector, x, event_ndims,
                          eps=1.e-6,
                          input_to_vector=tfb.Identity,
                          output_to_vector=tfb.Identity):
    """Numerically approximate the forward log det Jacobian of a bijector.

    Args:
      bijector: the bijector whose Jacobian we wish to approximate
      x: the value for which we want to approximate the Jacobian
      event_ndims: number of dimensions in an event
      eps: epsilon to add when forming (f(x+eps)-f(x)) / eps
      input_to_vector: a bijector that maps the input value to a vector
      output_to_vector: a bijector that maps the output value to a vector

    Returns:
      A numerical approximation to the log det Jacobian of bijector.forward
      evaluated at x.
    """
    x_vector = input_to_vector.forward(x)
    n = tf.shape(x_vector)[-1]
    x_plus_eps_vector = x_vector + eps * tf.eye(n, dtype=x_vector.dtype)
    x_plus_eps = input_to_vector.inverse(x_plus_eps_vector)

    f_x = bijector.forward(x)
    f_x_vector = output_to_vector.forward(f_x)
    f_x_plus_eps = bijector.forward(x_plus_eps)
    f_x_plus_eps_vector = output_to_vector.forward(f_x_plus_eps)

    jacobian_numerical = (f_x_plus_eps_vector - f_x_vector) / eps
    return (
        tf.log(tf.abs(tf.matrix_determinant(jacobian_numerical))) +
        input_to_vector.forward_log_det_jacobian(x, event_ndims=event_ndims) -
        output_to_vector.forward_log_det_jacobian(f_x, event_ndims=event_ndims))
Beispiel #13
0
  def initial_state(self, batch_size, trainable=False):
    """Creates the initial memory.

    We should ensure each row of the memory is initialized to be unique,
    so initialize the matrix to be the identity. We then pad or truncate
    as necessary so that init_state is of size
    (batch_size, self._mem_slots, self._mem_size).

    Args:
      batch_size: The size of the batch.
      trainable: Whether the initial state is trainable. This is always True.

    Returns:
      init_state: A truncated or padded matrix of size
        (batch_size, self._mem_slots, self._mem_size).
    """
    init_state = tf.eye(self._mem_slots, batch_shape=[batch_size])

    # Pad the matrix with zeros.
    if self._mem_size > self._mem_slots:
      difference = self._mem_size - self._mem_slots
      pad = tf.zeros((batch_size, self._mem_slots, difference))
      init_state = tf.concat([init_state, pad], -1)
    # Truncation. Take the first `self._mem_size` components.
    elif self._mem_size < self._mem_slots:
      init_state = init_state[:, :, :self._mem_size]
    return init_state
Beispiel #14
0
  def testMultivariateNormalNd(self, event_size, num_samples):
    def target_log_prob_fn(event):
      return tfd.MultivariateNormalFullCovariance(
          loc=tf.zeros(event_size),
          covariance_matrix=tf.eye(event_size)).log_prob(event)

    state = tf.zeros(event_size)
    samples = []
    for seed in range(num_samples):
      [state], _, _ = no_u_turn_sampler.kernel(
          target_log_prob_fn=target_log_prob_fn,
          current_state=[state],
          step_size=[0.3],
          seed=seed)
      npstate = state.numpy()
      samples.append([npstate[0], npstate[1]])

    samples = np.array(samples)
    plt.scatter(samples[:, 0], samples[:, 1])
    savefig("projection_chain_{}d_normal_{}_steps.png".format(
        event_size, num_samples))
    plt.close()

    target_samples = tfd.MultivariateNormalFullCovariance(
        loc=tf.zeros(event_size),
        covariance_matrix=tf.eye(event_size)).sample(
            num_samples, seed=4).numpy()
    plt.scatter(target_samples[:, 0], target_samples[:, 1])
    savefig("projection_independent_{}d_normal_{}_samples.png".format(
        event_size, num_samples))
    plt.close()
Beispiel #15
0
 def test_non_batch_2x2(self):
   num_rows = 2
   dtype = np.float32
   np_eye = np.eye(num_rows).astype(dtype)
   with self.test_session():
     eye = tf.eye(num_rows, dtype=dtype)
     self.assertAllEqual((num_rows, num_rows), eye.get_shape())
     self.assertAllEqual(np_eye, eye.eval())
Beispiel #16
0
 def distance_cutoff(self, d, cutoff, flags):
   """ Generate distance matrix with trainable cutoff """
   # Cutoff with threshold Rc
   d_flag = flags * tf.sign(cutoff - d)
   d_flag = tf.nn.relu(d_flag)
   d_flag = d_flag * tf.expand_dims((1 - tf.eye(self.max_atoms)), 0)
   d = 0.5 * (tf.cos(np.pi * d / cutoff) + 1)
   return d * d_flag
Beispiel #17
0
def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
    """
    Sample some ROIs from all proposals for training.
    #fg is guaranteed to be > 0, because grount truth boxes are added as RoIs.

    Args:
        boxes: nx4 region proposals, floatbox
        gt_boxes: mx4, floatbox
        gt_labels: m, int32

    Returns:
        sampled_boxes: tx4 floatbox, the rois
        sampled_labels: t labels, in [0, #class-1]. Positive means foreground.
        fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
            It contains the matching GT of each foreground roi.
    """
    iou = pairwise_iou(boxes, gt_boxes)     # nxm
    proposal_metrics(iou)

    # add ground truth as proposals as well
    boxes = tf.concat([boxes, gt_boxes], axis=0)    # (n+m) x 4
    iou = tf.concat([iou, tf.eye(tf.shape(gt_boxes)[0])], axis=0)   # (n+m) x m
    # #proposal=n+m from now on

    def sample_fg_bg(iou):
        fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH

        fg_inds = tf.reshape(tf.where(fg_mask), [-1])
        num_fg = tf.minimum(int(
            cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO),
            tf.size(fg_inds), name='num_fg')
        fg_inds = tf.random_shuffle(fg_inds)[:num_fg]

        bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1])
        num_bg = tf.minimum(
            cfg.FRCNN.BATCH_PER_IM - num_fg,
            tf.size(bg_inds), name='num_bg')
        bg_inds = tf.random_shuffle(bg_inds)[:num_bg]

        add_moving_summary(num_fg, num_bg)
        return fg_inds, bg_inds

    fg_inds, bg_inds = sample_fg_bg(iou)
    # fg,bg indices w.r.t proposals

    best_iou_ind = tf.argmax(iou, axis=1)   # #proposal, each in 0~m-1
    fg_inds_wrt_gt = tf.gather(best_iou_ind, fg_inds)   # num_fg

    all_indices = tf.concat([fg_inds, bg_inds], axis=0)   # indices w.r.t all n+m proposal boxes
    ret_boxes = tf.gather(boxes, all_indices)

    ret_labels = tf.concat(
        [tf.gather(gt_labels, fg_inds_wrt_gt),
         tf.zeros_like(bg_inds, dtype=tf.int64)], axis=0)
    # stop the gradient -- they are meant to be training targets
    return tf.stop_gradient(ret_boxes, name='sampled_proposal_boxes'), \
        tf.stop_gradient(ret_labels, name='sampled_labels'), \
        tf.stop_gradient(fg_inds_wrt_gt)
 def _forward(self, x):
   with tf.control_dependencies(self._assertions(x)):
     x_shape = tf.shape(x)
     identity_matrix = tf.eye(
         x_shape[-1], batch_shape=x_shape[:-2], dtype=x.dtype.base_dtype)
     # Note `matrix_triangular_solve` implicitly zeros upper triangular of `x`.
     y = tf.matrix_triangular_solve(x, identity_matrix)
     y = tf.matmul(y, y, adjoint_a=True)
     return tf.cholesky(y)
Beispiel #19
0
def get_self_correlated_mat(num_out_A, scope=None, reuse=None):
    with tf.variable_scope(scope or 'Self_Correlated_mat', reuse=reuse):
        cooc1 = get_var('pa_corr', shape=[num_out_A, num_out_A],
                        initializer_fn=tf.contrib.layers.variance_scaling_initializer(factor=0.1,
                                                                                      mode='FAN_AVG',
                                                                                      uniform=True,
                                                                                      dtype=tf.float32),
                        regularizer_fn=tf.contrib.layers.l2_regularizer(scale=3e-4))
        return tf.matmul(cooc1, cooc1, transpose_b=True) + tf.eye(num_out_A)
Beispiel #20
0
 def build(self):
   """ Parameters for the Gaussian """
   len_Rs = len(self.Rs_init)
   len_ita = len(self.ita_init)
   self.length = len_Rs * len_ita
   Rs_init, ita_init = np.meshgrid(self.Rs_init, self.ita_init)
   self.Rs = tf.constant(Rs_init.flatten(), dtype=tf.float32)
   self.ita = tf.constant(ita_init.flatten(), dtype=tf.float32)
   self.atom_number_embedding = tf.eye(max(self.atom_number_cases) + 1)
Beispiel #21
0
 def _identity(self):
   batch = tf.shape(self.concentration)
   answer = tf.eye(
       num_rows=self.dimension, batch_shape=batch,
       dtype=self.concentration.dtype)
   # set_shape only necessary because tf.eye doesn't do it itself: b/111413915
   answer.set_shape(
       answer.shape[:-2].concatenate([self.dimension, self.dimension]))
   return answer
Beispiel #22
0
 def test_non_batch_0x2(self):
   num_rows = 0
   num_columns = 2
   dtype = np.int64
   np_eye = np.eye(num_rows, num_columns).astype(dtype)
   with self.test_session():
     eye = tf.eye(num_rows, num_columns=num_columns, dtype=dtype)
     self.assertAllEqual((num_rows, num_columns), eye.get_shape())
     self.assertAllEqual(np_eye, eye.eval())
def ising_hamiltonian(N, dtype):
    X = tf.convert_to_tensor([[0.0, 1.0], [1.0, 0.0]], dtype=dtype)
    Z = tf.convert_to_tensor([[1.0, 0.0], [0.0, -1.0]], dtype=dtype)
    I = tf.eye(2, dtype=dtype)
    h = -tf.tensordot(X, X, axes=0) - tf.tensordot(Z, I, axes=0)
    h_last = h - tf.tensordot(I, Z, axes=0)
    h = tf.transpose(h, (0,2,1,3))
    h_last = tf.transpose(h_last, (0,2,1,3))
    H = [h]*(N-2) + [h_last]
    return H
def entanglement_specs_1site(isos_012):
    specs = []
    state = tf.eye(isos_012[-1].shape[0], dtype=isos_012[0][0].dtype)
    for l in reversed(range(len(isos_012))):
        iso_021 = tf.transpose(isos_012[l], (0, 2, 1))
        state = descend_state_1site(state, isos_012[l], iso_021)
        e = tf.linalg.eigvalsh(state)
        e = tf.cast(e, e.dtype.real_dtype)
        specs.insert(0, e)
    return specs
Beispiel #25
0
def Kuu(feat, kern, *, jitter=0.0):
    with params_as_tensors_for(feat, kern):
        Zmu, Zlen = kern._slice(feat.Z, feat.scales)
        idlengthscales2 = tf.square(kern.lengthscales + Zlen)
        sc = tf.sqrt(
            tf.expand_dims(idlengthscales2, 0) + tf.expand_dims(idlengthscales2, 1) - tf.square(
                kern.lengthscales))
        d = feat._cust_square_dist(Zmu, Zmu, sc)
        Kzz = kern.variance * tf.exp(-d / 2) * tf.reduce_prod(kern.lengthscales / sc, 2)
        Kzz += jitter * tf.eye(len(feat), dtype=settings.float_type)
    return Kzz
Beispiel #26
0
  def angular_symmetry(self, d_cutoff, d, atom_numbers, coordinates):
    """ Angular Symmetry Function """

    max_atoms = self.max_atoms
    embedding = tf.eye(np.max(self.atom_cases) + 1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    Rs = np.linspace(0., self.angular_cutoff, self.angular_length)
    ita = 3 / (Rs[1] - Rs[0])**2
    thetas = np.linspace(0., np.pi, self.angular_length)
    zeta = float(self.angular_length**2)

    ita, zeta, Rs, thetas = np.meshgrid(ita, zeta, Rs, thetas)
    zeta = tf.cast(np.reshape(zeta, (1, 1, 1, 1, -1)), tf.float32)
    ita = tf.cast(np.reshape(ita, (1, 1, 1, 1, -1)), tf.float32)
    Rs = tf.cast(np.reshape(Rs, (1, 1, 1, 1, -1)), tf.float32)
    thetas = tf.cast(np.reshape(thetas, (1, 1, 1, 1, -1)), tf.float32)
    length = zeta.get_shape().as_list()[-1]

    vector_distances = tf.stack([coordinates] * max_atoms, 1) - tf.stack(
        [coordinates] * max_atoms, 2)
    R_ij = tf.stack([d] * max_atoms, axis=3)
    R_ik = tf.stack([d] * max_atoms, axis=2)
    f_R_ij = tf.stack([d_cutoff] * max_atoms, axis=3)
    f_R_ik = tf.stack([d_cutoff] * max_atoms, axis=2)

    # Define angle theta = arccos(R_ij(Vector) dot R_ik(Vector)/R_ij(distance)/R_ik(distance))
    vector_mul = tf.reduce_sum(tf.stack([vector_distances] * max_atoms, axis=3) * \
                               tf.stack([vector_distances] * max_atoms, axis=2), axis=4)
    vector_mul = vector_mul * tf.sign(f_R_ij) * tf.sign(f_R_ik)
    theta = tf.acos(tf.math.divide(vector_mul, R_ij * R_ik + 1e-5))

    R_ij = tf.stack([R_ij] * length, axis=4)
    R_ik = tf.stack([R_ik] * length, axis=4)
    f_R_ij = tf.stack([f_R_ij] * length, axis=4)
    f_R_ik = tf.stack([f_R_ik] * length, axis=4)
    theta = tf.stack([theta] * length, axis=4)

    out_tensor = tf.pow((1. + tf.cos(theta - thetas)) / 2., zeta) * \
                 tf.exp(-ita * tf.square((R_ij + R_ik) / 2. - Rs)) * f_R_ij * f_R_ik * 2

    if self.atomic_number_differentiated:
      out_tensors = []
      for id_j, atom_type_j in enumerate(self.atom_cases):
        for atom_type_k in self.atom_cases[id_j:]:
          selected_atoms = tf.stack([atom_numbers_embedded[:, :, atom_type_j]] * max_atoms, axis=2) * \
                           tf.stack([atom_numbers_embedded[:, :, atom_type_k]] * max_atoms, axis=1)
          selected_atoms = tf.expand_dims(
              tf.expand_dims(selected_atoms, axis=1), axis=4)
          out_tensors.append(
              tf.reduce_sum(out_tensor * selected_atoms, axis=(2, 3)))
      return tf.concat(out_tensors, axis=2)
    else:
      return tf.reduce_sum(out_tensor, axis=(2, 3))
Beispiel #27
0
 def test_1x3_batch_4x4(self):
   num_rows = 4
   batch_shape = [1, 3]
   dtype = np.float32
   np_eye = np.eye(num_rows).astype(dtype)
   with self.test_session():
     eye = tf.eye(num_rows, batch_shape=batch_shape, dtype=dtype)
     self.assertAllEqual(batch_shape + [num_rows, num_rows], eye.get_shape())
     eye_v = eye.eval()
     for i in range(batch_shape[0]):
       for j in range(batch_shape[1]):
         self.assertAllEqual(np_eye, eye_v[i, j, :, :])
def random_tree_tn_uniform(Ds, dtype, top_rank=1):
    num_layers = len(Ds)
    Ds = Ds + [top_rank]
    isos = []
    for j in range(num_layers):
        if Ds[j + 1] == Ds[j]**2:
            iso = tf.eye(Ds[j + 1], dtype=dtype)
        else:
            iso = random_isometry(Ds[j + 1], Ds[j]**2, dtype)
        iso = tf.reshape(iso, (Ds[j + 1], Ds[j], Ds[j]))
        isos.append(iso)
    return isos
def _dense_ham_term(H):
    h1, (h2L, h2R) = H
    D = h1.shape[0]
    dtype = h1.dtype

    E = tf.eye(D, dtype=dtype)

    h = tensornetwork.ncon([h1, E], [(-1, -3), (-2, -4)])
    for (hl, hr) in zip(h2L, h2R):
        h += tensornetwork.ncon([hl, hr], [(-1, -3), (-2, -4)])

    return h
Beispiel #30
0
    def _build_likelihood(self):
        r"""
        Construct a tensorflow function to compute the likelihood.

            \log p(Y | theta).

        """
        K = self.kern.K(self.X) + tf.eye(tf.shape(self.X)[0], dtype=settings.float_type) * self.likelihood.variance
        L = tf.cholesky(K)
        m = self.mean_function(self.X)
        logpdf = multivariate_normal(self.Y, m, L)  # (R,) log-likelihoods for each independent dimension of Y

        return tf.reduce_sum(logpdf)
    def setup_image_loss(self):
        pix_x = tf.lin_space(-6.0,6.0,224)
        pix_y = tf.lin_space(-6.0,6.0,224)
        pix_x, pix_y = tf.meshgrid(pix_x, pix_y)
        pix = tf.stack([pix_x,pix_y], axis=-1)

        # batch of [1,63] gaussian distributions in R2, one for each edge
        self.image_pred = - sample_equdistance(self.pred, 64)  # annotated configuration need to be negated to match render
        self.norm_rgb = self.rgb/255

        center = (self.image_pred[:,1:,:]+self.image_pred[:,:-1,:]) / 2.0
        diff = (self.image_pred[:,1:,:]-self.image_pred[:,:-1,:]) / 2.0
        dist = tf.norm(diff, axis=2)
        self.dist = dist

        pix = tf.reshape(pix, [224,224,1,1,2]) # to broadcast
        pix_to_center = pix - center
        # perpendicular distance to segments
        # cross(pix-center, diff) / dist
        pix_to_segment_p = (pix_to_center[:,:,:,:,0]*diff[:,:,1]-pix_to_center[:,:,:,:,1]*diff[:,:,0]) / dist
        # longitutional distance to segments
        # max{ abs[ dot(pix-center, diff) / dist ] - dist, 0 }
        pix_to_segment_l = (pix_to_center[:,:,:,:,0]*diff[:,:,0]+pix_to_center[:,:,:,:,1]*diff[:,:,1]) / dist
        pix_to_segment_l = tf.maximum(tf.abs(pix_to_segment_l)-dist, 0)

        #sigma = tf.constant(0.1, dtype=tf.float32)
        self.sigma = tf.get_variable('sigma',dtype=tf.float32, shape=(), initializer=tf.constant_initializer(0.1))
        sigma = tf.maximum(self.sigma, 0.1)

        pix_prob = tf.exp((-tf.square(pix_to_segment_p)-tf.square(pix_to_segment_l))/(2*sigma*sigma))
        sum_pix_prob = tf.reduce_sum(pix_prob, axis=3)
#        self.reg_loss_e = tf.reduce_mean(sum_pix_prob*sum_pix_prob, axis=[0,1]) - \
#                          tf.reduce_mean(pix_prob*pix_prob, axis=[0,1,3])*tf.cast(tf.shape(pix_prob)[3], tf.float32)
#        self.reg_loss_e = self.reg_loss_e * 10
        pix_prob = tf.reduce_max(pix_prob, axis=3) # shape should be [224,224,batch]
        pix_prob = tf.transpose(pix_prob, perm=[2,0,1]) # [batch, 224,224]
        pix_prob = tf.clip_by_value(pix_prob, 0, 1)
        pix_prob = tf.expand_dims(pix_prob, axis=3)
        self.pix_prob=pix_prob # debug
        # simple case, assume foreground and background color is known
        #pix_positive = tf.constant([1,0,0], dtype=tf.float32) # red pixel
        #pix_negative = tf.constant([1,1,1], dtype=tf.float32) # white pixel
        # harder case, foreground / background color by averaging prediction
        mean_positive = tf.reduce_mean(pix_prob, axis=[1,2], keep_dims=True) # B x 1 x 1 x 1
        mean_negative = tf.reduce_mean(1-pix_prob, axis=[1,2], keep_dims=True)
        pix_positive = tf.reduce_mean(pix_prob*self.norm_rgb, axis=[1,2], keep_dims=True) / mean_positive
        pix_negative = tf.reduce_mean((1-pix_prob)*self.norm_rgb, axis=[1,2], keep_dims=True) / mean_negative
        # render is for visualization, not in loss
        self.render = pix_prob * pix_positive + (1-pix_prob) * pix_negative
        self.render = tf.clip_by_value(self.render, 0, 1)

        mean_positive = tf.squeeze(mean_positive, axis=[1,2,3])
        mean_negative = tf.squeeze(mean_negative, axis=[1,2,3])
        pix_prob = tf.squeeze(pix_prob, axis=3)
        pix_prob = tf.expand_dims(tf.expand_dims(pix_prob, -1), -1) # annoying reshaping to broadcast correctly
        mean_positive = tf.expand_dims(tf.expand_dims(mean_positive, -1), -1)
        mean_negative = tf.expand_dims(tf.expand_dims(mean_negative, -1), -1)
        cov_positive = tf.reduce_mean(pix_prob*tf.matmul(
                                               tf.expand_dims(self.norm_rgb-pix_positive, -1),
                                               tf.expand_dims(self.norm_rgb-pix_positive, -1),
                                               transpose_b=True), axis=[1,2]) / mean_positive
        cov_negative = tf.reduce_mean((1-pix_prob)*tf.matmul(
                                               tf.expand_dims(self.norm_rgb-pix_negative, -1),
                                               tf.expand_dims(self.norm_rgb-pix_negative, -1),
                                               transpose_b=True), axis=[1,2]) / mean_negative

        positive_gaussian = tfp.distributions.MultivariateNormalFullCovariance(
                                       loc=tf.squeeze(pix_positive, axis=[1,2]),
                                       covariance_matrix=cov_positive+tf.eye(3)*0.0001)
        negative_gaussian = tfp.distributions.MultivariateNormalFullCovariance(
                                       loc=tf.squeeze(pix_negative, axis=[1,2]),
                                       covariance_matrix=cov_negative+tf.eye(3)*0.0001)

        mean_positive = tf.reshape(mean_positive, [1,1,-1])
        mean_negative = tf.reshape(mean_negative, [1,1,-1])
        pix_prob = tf.transpose(tf.squeeze(pix_prob, axis=[3,4]), perm=[1,2,0])
        image_reshape = tf.transpose(self.norm_rgb, perm=[1,2,0,3]) # H x W x B x 3
        prob_positive = mean_positive * positive_gaussian.prob(image_reshape) # H x W x B
        prob_negative = mean_negative * negative_gaussian.prob(image_reshape)
        prob_positive = tf.maximum(prob_positive, 1e-30)
        prob_negative = tf.maximum(prob_negative, 1e-30)
        # for debuging
        self.pix_prob=pix_prob
        self.prob_positive=prob_positive
        self.prob_negative=prob_negative
        self.image_loss_e = - tf.reduce_mean(tf.log(pix_prob*prob_positive + (1-pix_prob)*prob_negative), axis=[0,1]) # image loss per instance
        self.image_loss = tf.reduce_mean(self.image_loss_e)
        self.reg_loss_e = tf.reduce_mean(dist*dist, axis=1)*4.0     # - tf.reduce_mean(tf.reduce_mean(dist,axis=[1])*tf.reduce_mean(dist,axis=[1]), axis=0)*0.95
        self.reg_loss = tf.reduce_mean(self.reg_loss_e)
Beispiel #32
0
def contextual_attention(f, b, mask=None, ksize=3, stride=1, rate=1,
                         fuse_k=3, softmax_scale=10., training=True, fuse=True):
    """ Contextual attention layer implementation.

    Contextual attention is first introduced in publication:
        Generative Image Inpainting with Contextual Attention, Yu et al.

    Args:
        x: Input feature to match (foreground).
        t: Input feature for match (background).
        mask: Input mask for t, indicating patches not available.
        ksize: Kernel size for contextual attention.
        stride: Stride for extracting patches from t.
        rate: Dilation for matching.
        softmax_scale: Scaled softmax for attention.
        training: Indicating if current graph is training or inference.

    Returns:
        tf.Tensor: output

    """
    # get shapes
    raw_fs = tf.shape(f)
    raw_int_fs = f.get_shape().as_list()
    raw_int_bs = b.get_shape().as_list()
    # extract patches from background with stride and rate
    kernel = 2*rate
    raw_w = tf.extract_image_patches(
        b, [1,kernel,kernel,1], [1,rate*stride,rate*stride,1], [1,1,1,1], padding='SAME')
    raw_w = tf.reshape(raw_w, [raw_int_bs[0], -1, kernel, kernel, raw_int_bs[3]])
    raw_w = tf.transpose(raw_w, [0, 2, 3, 4, 1])  # transpose to b*k*k*c*hw
    # downscaling foreground option: downscaling both foreground and
    # background for matching and use original background for reconstruction.
    f = resize(f, scale=1./rate, func=tf.image.resize_nearest_neighbor)
    b = resize(b, to_shape=[int(raw_int_bs[1]/rate), int(raw_int_bs[2]/rate)], func=tf.image.resize_nearest_neighbor)  # https://github.com/tensorflow/tensorflow/issues/11651
    if mask is not None:
        mask = resize(mask, scale=1./rate, func=tf.image.resize_nearest_neighbor)
    fs = tf.shape(f)
    int_fs = f.get_shape().as_list()
    f_groups = tf.split(f, int_fs[0], axis=0)
    # from t(H*W*C) to w(b*k*k*c*h*w)
    bs = tf.shape(b)
    int_bs = b.get_shape().as_list()
    w = tf.extract_image_patches(
        b, [1,ksize,ksize,1], [1,stride,stride,1], [1,1,1,1], padding='SAME')
    w = tf.reshape(w, [int_fs[0], -1, ksize, ksize, int_fs[3]])
    w = tf.transpose(w, [0, 2, 3, 4, 1])  # transpose to b*k*k*c*hw
    # process mask
    if mask is None:
        mask = tf.zeros([1, bs[1], bs[2], 1])
    m = tf.extract_image_patches(
        mask, [1,ksize,ksize,1], [1,stride,stride,1], [1,1,1,1], padding='SAME')
    m = tf.reshape(m, [1, -1, ksize, ksize, 1])
    m = tf.transpose(m, [0, 2, 3, 4, 1])  # transpose to b*k*k*c*hw
    m = m[0]
    mm = tf.cast(tf.equal(tf.reduce_mean(m, axis=[0,1,2], keep_dims=True), 0.), tf.float32)
    w_groups = tf.split(w, int_bs[0], axis=0)
    raw_w_groups = tf.split(raw_w, int_bs[0], axis=0)
    y = []
    offsets = []
    k = fuse_k
    scale = softmax_scale
    fuse_weight = tf.reshape(tf.eye(k), [k, k, 1, 1])
    for xi, wi, raw_wi in zip(f_groups, w_groups, raw_w_groups):
        # conv for compare
        wi = wi[0]
        wi_normed = wi / tf.maximum(tf.sqrt(tf.reduce_sum(tf.square(wi), axis=[0,1,2])), 1e-4)
        yi = tf.nn.conv2d(xi, wi_normed, strides=[1,1,1,1], padding="SAME")

        # conv implementation for fuse scores to encourage large patches
        if fuse:
            yi = tf.reshape(yi, [1, fs[1]*fs[2], bs[1]*bs[2], 1])
            yi = tf.nn.conv2d(yi, fuse_weight, strides=[1,1,1,1], padding='SAME')
            yi = tf.reshape(yi, [1, fs[1], fs[2], bs[1], bs[2]])
            yi = tf.transpose(yi, [0, 2, 1, 4, 3])
            yi = tf.reshape(yi, [1, fs[1]*fs[2], bs[1]*bs[2], 1])
            yi = tf.nn.conv2d(yi, fuse_weight, strides=[1,1,1,1], padding='SAME')
            yi = tf.reshape(yi, [1, fs[2], fs[1], bs[2], bs[1]])
            yi = tf.transpose(yi, [0, 2, 1, 4, 3])
        yi = tf.reshape(yi, [1, fs[1], fs[2], bs[1]*bs[2]])

        # softmax to match
        yi *=  mm  # mask
        yi = tf.nn.softmax(yi*scale, 3)
        yi *=  mm  # mask

        offset = tf.argmax(yi, axis=3, output_type=tf.int32)
        offset = tf.stack([offset // fs[2], offset % fs[2]], axis=-1)
        # deconv for patch pasting
        # 3.1 paste center
        wi_center = raw_wi[0]
        yi = tf.nn.conv2d_transpose(yi, wi_center, tf.concat([[1], raw_fs[1:]], axis=0), strides=[1,rate,rate,1]) / 4.
        y.append(yi)
        offsets.append(offset)
    y = tf.concat(y, axis=0)
    y.set_shape(raw_int_fs)
    offsets = tf.concat(offsets, axis=0)
    offsets.set_shape(int_bs[:3] + [2])
    # case1: visualize optical flow: minus current position
    h_add = tf.tile(tf.reshape(tf.range(bs[1]), [1, bs[1], 1, 1]), [bs[0], 1, bs[2], 1])
    w_add = tf.tile(tf.reshape(tf.range(bs[2]), [1, 1, bs[2], 1]), [bs[0], bs[1], 1, 1])
    offsets = offsets - tf.concat([h_add, w_add], axis=3)
    # to flow image
    flow = flow_to_image_tf(offsets)
    # # case2: visualize which pixels are attended
    # flow = highlight_flow_tf(offsets * tf.cast(mask, tf.int32))
    if rate != 1:
        flow = resize(flow, scale=rate, func=tf.image.resize_bilinear)
    return y, flow
 def _a(self):
     a_for_one_arm = 1.0 + 4.0 * tf.eye(self._encoding_dim,
                                        dtype=tf.float32)
     return [a_for_one_arm] * self._num_actions
Beispiel #34
0
        def step_fn(inputs):
            """Per-Replica StepFn."""
            images, labels = inputs

            logits_list = []
            stddev_list = []

            for i in range(FLAGS.ensemble_size):
                logits = model(images, training=False)
                if isinstance(logits, tuple):
                    # If model returns a tuple of (logits, covmat), extract both
                    logits, covmat = logits
                else:
                    covmat = tf.eye(FLAGS.per_core_batch_size)
                if FLAGS.use_bfloat16:
                    logits = tf.cast(logits, tf.float32)
                logits = mean_field_logits(
                    logits,
                    covmat,
                    mean_field_factor=FLAGS.gp_mean_field_factor)
                stddev = tf.sqrt(tf.linalg.diag_part(covmat))

                stddev_list.append(stddev)
                logits_list.append(logits)

                member_probs = tf.nn.softmax(logits)
                member_loss = tf.keras.losses.sparse_categorical_crossentropy(
                    labels, member_probs)
                metrics['test/nll_member_{}'.format(i)].update_state(
                    member_loss)
                metrics['test/accuracy_member_{}'.format(i)].update_state(
                    labels, member_probs)
            # Logits dimension is (num_samples, batch_size, num_classes).
            logits_list = tf.stack(logits_list, axis=0)
            stddev_list = tf.stack(stddev_list, axis=0)

            stddev = tf.reduce_mean(stddev_list, axis=0)
            probs_list = tf.nn.softmax(logits_list)
            probs = tf.reduce_mean(probs_list, axis=0)

            labels_broadcasted = tf.broadcast_to(
                labels, [FLAGS.ensemble_size, labels.shape[0]])
            log_likelihoods = -tf.keras.losses.sparse_categorical_crossentropy(
                labels_broadcasted, logits_list, from_logits=True)
            negative_log_likelihood = tf.reduce_mean(
                -tf.reduce_logsumexp(log_likelihoods, axis=[0]) +
                tf.math.log(float(FLAGS.ensemble_size)))

            if dataset_name == 'clean':
                metrics['test/negative_log_likelihood'].update_state(
                    negative_log_likelihood)
                metrics['test/accuracy'].update_state(labels, probs)
                metrics['test/ece'].update_state(labels, probs)
                metrics['test/stddev'].update_state(stddev)
            else:
                corrupt_metrics['test/nll_{}'.format(
                    dataset_name)].update_state(negative_log_likelihood)
                corrupt_metrics['test/accuracy_{}'.format(
                    dataset_name)].update_state(labels, probs)
                corrupt_metrics['test/ece_{}'.format(
                    dataset_name)].update_state(labels, probs)
                corrupt_metrics['test/stddev_{}'.format(
                    dataset_name)].update_state(stddev)
Beispiel #35
0
import tensorflow as tf
import tensorflow_probability as tfp
from time import time

n_samples = 4096
n_electrons = 4
n = 100
mu = tf.zeros(3)
sig = 0.02
sigma = tf.eye(3) * sig
sig = tf.sqrt(sig)

prev_sample = tf.random.uniform((n_samples, n_electrons, 3))
step_gaussian = tfp.distributions.MultivariateNormalFullCovariance(mu, sigma)

t0 = time()
for _ in range(n):
    x = step_gaussian.sample(prev_sample.shape[:-1], dtype=tf.float32)
print(time() - t0)

t0 = time()
for _ in range(n):
    x = tf.random.normal(prev_sample.shape, stddev=sig)
print(time() - t0)
            def grad(dL):
                aones = tf.fill(tf.shape(pt_in),np.float64(1.))
                bones = tf.fill(tf.shape(pt_out),np.float64(1.))

                Mnew = tf.cast(tf.transpose(ground_distance,perm=[0,2,1]),tf.float64)

                T = tf.cast(tf.transpose(match,perm=[0,2,1]),tf.float64)
                Ttilde = T[:,:,:-1]

                L = T * Mnew
                Ltilde = L[:,:,:-1]

                D1 = tf.linalg.diag(tf.reduce_sum(T,axis=-1))
                D2 = tf.linalg.diag(1/(tf.reduce_sum(Ttilde,axis=-2) + np.float64(1e-100))) # Add epsilon to ensure invertibility

                H = D1 - tf.matmul(tf.matmul(Ttilde,D2),Ttilde,transpose_b=True) + epsilon* tf.eye(num_rows = tf.shape(bones)[-1],batch_shape = [tf.shape(bones)[0]],dtype=tf.float64) # Add small diagonal piece to make sure H is invertible in edge cases.

                f = - tf.reduce_sum(L,axis=-1) + tf.squeeze(tf.matmul(tf.matmul(Ttilde,D2),tf.expand_dims(tf.reduce_sum(Ltilde,axis=-2),-1)),axis=-1)
                g = tf.squeeze(tf.matmul(tf.linalg.inv(H),tf.expand_dims(f,-1)),axis=-1)

                grad_pT = g - bones*tf.expand_dims(tf.reduce_sum(g,axis=-1),-1)/tf.cast(tf.shape(bones)[1],tf.float64)
                
                grad_x_out = tf.gradients(recon_loss,x_out)[0]
                
                return [-tf.expand_dims(dL,-1) * tf.cast(grad_pT,tf.float32),
                        tf.expand_dims(tf.expand_dims(dL,-1),-1)*tf.cast(grad_x_out,tf.float32)]
Beispiel #37
0
    def call(self, inputs):
        if len(inputs) == 3:
            X, A, I = inputs
            if K.ndim(I) == 2:
                I = I[:, 0]
        else:
            X, A = inputs
            I = None

        N = K.shape(A)[-1]
        # Check if the layer is operating in mixed or batch mode
        mode = ops.autodetect_mode(A, X)
        self.reduce_loss = mode in (ops.modes['M'], ops.modes['B'])

        # Get normalized adjacency
        if K.is_sparse(A):
            I_ = tf.sparse.eye(N, dtype=A.dtype)
            A_ = tf.sparse.add(A, I_)
        else:
            I_ = tf.eye(N, dtype=A.dtype)
            A_ = A + I_
        fltr = ops.normalize_A(A_)

        # Node embeddings
        Z = K.dot(X, self.kernel_emb)
        Z = ops.filter_dot(fltr, Z)
        if self.activation is not None:
            Z = self.activation(Z)

        # Compute cluster assignment matrix
        S = K.dot(X, self.kernel_pool)
        S = ops.filter_dot(fltr, S)
        S = activations.softmax(S, axis=-1)  # softmax applied row-wise

        # Link prediction loss
        S_gram = ops.matmul_A_BT(S, S)
        if K.is_sparse(A):
            LP_loss = tf.sparse.add(A, -S_gram)  # A/tf.norm(A) - S_gram/tf.norm(S_gram)
        else:
            LP_loss = A - S_gram
        LP_loss = tf.norm(LP_loss, axis=(-1, -2))
        if self.reduce_loss:
            LP_loss = K.mean(LP_loss)
        self.add_loss(LP_loss)

        # Entropy loss
        entr = tf.negative(tf.reduce_sum(tf.multiply(S, K.log(S + K.epsilon())), axis=-1))
        entr_loss = K.mean(entr, axis=-1)
        if self.reduce_loss:
            entr_loss = K.mean(entr_loss)
        self.add_loss(entr_loss)

        # Pooling
        X_pooled = ops.matmul_AT_B(S, Z)
        A_pooled = ops.matmul_AT_B_A(S, A)

        output = [X_pooled, A_pooled]

        if I is not None:
            I_mean = tf.math.segment_mean(I, I)
            I_pooled = ops.repeat(I_mean, tf.ones_like(I_mean) * self.k)
            output.append(I_pooled)

        if self.return_mask:
            output.append(S)

        return output
Beispiel #38
0
    def _build(self, x, y):
        print('build start')
        opts = self.opts
        time_loss_start = opts.time_loss_start
        time_loss_end = opts.time_loss_end
        batch_size = opts.batch_size
        self.decay = opts.dt / opts.tau
        assert opts.activation_fn in [
            'relu', 'tanh', 'relu6', 'retanh', 'sigmoid'
        ], "Invalid nonlinearity"

        fn = opts.activation_fn
        if fn == 'sigmoid':
            self.fn = tf.nn.sigmoid
        elif fn == 'tanh':
            self.fn = tf.tanh
        elif fn == 'relu':
            self.fn = tf.nn.relu
        elif fn == 'relu6':
            self.fn = tf.nn.relu6
        else:
            self.fn = lambda L: tf.nn.relu(tf.nn.tanh(L))

        inputs_series = tf.unstack(x, axis=1)
        labels_series = tf.unstack(y, axis=1)

        layer_size = opts.layer_size
        layer_size.insert(0, x.shape[-1])

        # EI_in = opts.EI_in  # either a percentage excitatory/inhibitory for each layer or None for random init
        # EI_h = opts.EI_h
        # EI_out = opts.EI_out

        self.Wxh, self.Whh, self.Wh_bias, self.Whh_mask, self.recurrent_mask, self.forward_mask, init_state = \
            [], [], [], [], [], [], []
        for i in range(1, len(layer_size)):
            prev = layer_size[i - 1]
            cur = layer_size[i]
            self.Wxh.append(
                tf.get_variable(f"input_weights_{i-1}", [prev, cur]))
            self.Whh.append(
                tf.get_variable(f"hidden_weights_{i-1}", [cur, cur]))
            self.Wh_bias.append(
                tf.Variable(tf.zeros([1, cur]), name=f"hidden_bias_{i-1}"))
            self.Whh_mask.append(1 - tf.eye(cur))

        self.Wout = tf.get_variable("output_weights",
                                    [layer_size[-1], y.shape[-1]])
        self.Wout_bias = tf.Variable(tf.zeros([1, y.shape[-1]]),
                                     name="output_bias")

        # layer_size.pop(0)
        next_state = [
            tf.zeros(shape=[batch_size, L], dtype=tf.float32)
            for L in layer_size[1:]
        ]
        state_series = []
        logit_series = []

        for i, current_input in enumerate(inputs_series):
            next_state, next_logit = self.scan_fn(next_state, current_input,
                                                  opts)
            state_series.append(next_state)
            logit_series.append(next_logit)

        self.predictions = [tf.nn.softmax(log) for log in logit_series]
        xe = [
            tf.nn.softmax_cross_entropy_with_logits_v2(labels=lab, logits=log)
            for lab, log in zip(labels_series, logit_series)
        ]

        self.error_loss = tf.reduce_mean(xe[time_loss_start:time_loss_end])

        rnn_activity = tf.stack(
            [tf.stack([s for s in state], axis=2) for state in state_series],
            axis=1)
        self.activity_loss = opts.activity_alpha * tf.reduce_mean(
            tf.square(rnn_activity))  # zero activity
        self.weight_loss = opts.weight_alpha * (
            tf.reduce_mean([tf.reduce_mean(tf.square(W)) for W in self.Whh]) +
            tf.reduce_mean([tf.reduce_mean(tf.square(W)) for W in self.Wxh]))
        self.total_loss = self.error_loss + self.weight_loss + self.activity_loss

        layer_ix = np.cumsum(layer_size)
        self.states = [
            rnn_activity[:, :, layer_size[i]:layer_size[i + 1]]
            for i in range(len(layer_ix) - 1)
        ]
        self.logits = tf.stack(logit_series, axis=1)
Beispiel #39
0
def levmarq(settings,
            x_train,
            y_train,
            mu_init=3.0,
            min_error=1e-10,
            max_steps=100,
            mu_multiply=10,
            mu_divide=10,
            m_into_epoch=10,
            verbose=False):
    outs = settings["outs"]
    m = settings["input_len"]
    print(5 * "=" + ">Training info<" + 5 * "=", "\n")
    print("Settings: ")
    for i in settings.keys():
        print(f"         {i}:{settings[i]}")
    print("\ntf version: ", tf.__version__, "\n")
    print(
        f"shape X:\t{x_train.shape}\nshape y:\t{y_train.shape}\n      m:\t{m}\n      p:\t{outs}"
    )
    print("\n")

    x = tf.compat.v1.placeholder(tf.float64, shape=[m, settings["inputs"]])
    y = tf.compat.v1.placeholder(tf.float64, shape=[m, settings["outs"]])

    # hidden layers
    nn = settings["architecture"]

    st = [x_train.shape[-1]] + nn + [y_train.shape[-1]]

    sizes = []
    shapes = []
    for i in range(len(nn) + 1):
        shapes.append((st[i], st[i + 1]))
        shapes.append((1, st[i + 1]))
    sizes = [h * w for h, w in shapes]
    neurons_cnt = sum(sizes)

    print(
        f"Complex:\n        [parameters]x[data lenth]\n        {neurons_cnt}x{m}\n"
    )

    if settings["activation"] == "relu":
        activation = tf.nn.relu
    if settings["activation"] == "tanh":
        activation = tf.nn.tanh
    else:
        activation = tf.nn.sigmoid

    # feed forward
    initializer = tf.contrib.layers.xavier_initializer()
    p = tf.Variable(initializer([neurons_cnt], dtype=tf.float64))
    parms = tf.split(p, sizes, 0)
    for i in range(len(parms)):
        parms[i] = tf.reshape(parms[i], shapes[i])
    Ws = parms[0:][::2]
    bs = parms[1:][::2]

    y_hat = x
    for i in range(len(nn)):
        y_hat = activation(tf.matmul(y_hat, Ws[i]) + bs[i])
    y_hat = tf.matmul(y_hat, Ws[-1]) + bs[-1]
    y_hat_flat = tf.squeeze(y_hat)

    r = y - y_hat
    loss = tf.reduce_mean(tf.square(r))

    # feed dicts for map placeholders to actual values

    train_dict = {x: x_train, y: y_train}

    Error_estimate = 10 * \
        math.log10(1/(4*len(x_train) * int(y_train.shape[-1])))

    opt = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=1)

    mu = tf.compat.v1.placeholder(tf.float64, shape=[1])

    p_store = tf.Variable(tf.zeros([neurons_cnt], dtype=tf.float64))

    save_parms = tf.compat.v1.assign(p_store, p)
    restore_parms = tf.compat.v1.assign(p, p_store)

    def jacobian(y, x, m):
        loop_vars = [
            tf.constant(0, tf.int32),
            tf.TensorArray(tf.float64, size=m),
        ]

        _, jacobian = tf.while_loop(
            lambda i, _: i < m, lambda i, res:
            (i + 1, res.write(i,
                              tf.gradients(y[i], x)[0])), loop_vars)

        return jacobian.stack()

    I = tf.eye(neurons_cnt, dtype=tf.float64)

    j = jacobian(y_hat_flat, p, m)
    jT = tf.transpose(j)
    jTj = tf.matmul(jT, j)
    jTr = tf.matmul(jT, r)
    jTj = tf.hessians(loss, p)[0]
    jTr = -tf.gradients(loss, p)[0]
    jTr = tf.reshape(jTr, shape=(neurons_cnt, 1))

    jTj_store = tf.Variable(
        tf.zeros((neurons_cnt, neurons_cnt), dtype=tf.float64))
    jTr_store = tf.Variable(tf.zeros((neurons_cnt, 1), dtype=tf.float64))
    save_jTj_jTr = [
        tf.compat.v1.assign(jTj_store, jTj),
        tf.compat.v1.assign(jTr_store, jTr)
    ]

    dx = tf.matmul(tf.linalg.inv(jTj_store + tf.multiply(mu, I)), jTr_store)
    dx = tf.squeeze(dx)
    _dx = tf.matmul(tf.linalg.inv(jTj + tf.multiply(mu, I)), jTr)
    _dx = -tf.squeeze(_dx)

    lm = opt.apply_gradients([(-dx, p)])

    # Train
    session = tf.compat.v1.Session()

    train_dict[mu] = np.array([mu_init])
    history = []
    step = 0
    session.run(tf.compat.v1.global_variables_initializer())
    current_loss = session.run(loss, train_dict)
    while current_loss > min_error and step < max_steps:
        step += 1
        if step % int(max_steps / 5) == 0 and verbose:
            print(
                f'LM step: {step}, mu: {train_dict[mu][0]:.2e}, current loss: {current_loss:.2e}'
            )
        session.run(save_parms)
        session.run(save_jTj_jTr, train_dict)
        success = False
        for i in range(m_into_epoch):
            session.run(lm, train_dict)
            new_loss = session.run(loss, train_dict)
            if new_loss < current_loss:
                train_dict[mu] /= mu_divide
                current_loss = new_loss
                success = True
                break
            train_dict[mu] *= mu_multiply
            session.run(restore_parms)
        history.append(current_loss)
        if not success:
            print(
                f'LM failed to improve, on step {step:}, loss: {current_loss:.2e}\n'
            )
            tp = session.run(p)
            session.close()
            tf.compat.v1.reset_default_graph()
            return np.asarray(history), tp
            break

    print(f'LevMarq ended on: {step:},\tfinal loss: {current_loss:.2e}\n')
    tp = session.run(p)
    session.close()
    tf.compat.v1.reset_default_graph()
    return np.asarray(history), tp
Beispiel #40
0
 def __init__(self, num_features, l2reg=0.001):
     self.num_features = num_features
     self.l2reg = l2reg
     self.eye = tf.eye(num_features)
Beispiel #41
0
def build_graph(Tlims,
                num_inducing_points=11,
                dim=1,
                alphas_init_val=1,
                gamma_init_val=1.,
                ag_poser='exp',
                m_init_val=0.1,
                lvech_init_val=None,
                stabilizer_value=0.01,
                kzz_stabilizer_value=1e-8,
                optimize_inducing_points=False,
                assert_correct_covariances=False):

    ## ######### ##
    # PLACEHOLDER #
    ## ######### ##
    if not optimize_inducing_points:  # TODO change back to None
        Z_ph = tf.placeholder(DTYPE, [None, None],
                              name='inducing_point_locations')

    u_ph = tf.placeholder(DTYPE, [], name='inducing_point_mean')
    X_ph = tf.placeholder(DTYPE, [None, None], name='input_data')
    #a_ph = tf.placeholder(DTYPE, [None] ,name='alphas')

    # TODO: set constants as variables and create two optimizers with var_lists to optimize with/without hyperparams
    #a_const = 1 * tf.ones([1]) # dimension = tf.shape(Z_ph)[1]
    #g_const = tf.ones([1]) # later we have to define gamma as variable
    C = tf.constant(0.57721566490153286, dtype=DTYPE)

    #
    Tlims = tf.constant(Tlims, dtype=DTYPE)
    assert (Tlims.shape == (dim, 2))

    #Tlims
    Tmins = tf.reduce_min(Tlims, axis=1)
    Tmaxs = tf.reduce_max(Tlims, axis=1)

    assert (Tmins.dtype == DTYPE)
    assert (len(Tmins.shape) == 1)

    ## ####### ##
    # VARIABLES #
    ## ####### ##

    if optimize_inducing_points:
        # optimize inducing point location
        with tf.name_scope('inducing_point_optimization'):
            omegas_init = (
                tf.random_uniform([num_inducing_points, dim], dtype=DTYPE) -
                0.5) * tf.constant(2, dtype=DTYPE)
            omegas = tf.Variable(omegas_init,
                                 dtype=DTYPE,
                                 name='ind_point_omegas')

            with tf.name_scope('omegas'):
                if dim == 1:
                    for z in range(num_inducing_points):
                        tf.summary.scalar('omegas_{}'.format(z),
                                          tf.squeeze(omegas[z]))

                elif dim == 2:
                    print('omega 2d treatment not yet implemented')
                    # TODO: add fancy 2d movement as images
                    # for z in range(num_inducing_points):
                    #    tf.summary.('omegas_{}'.format(z), omegas[z])

                else:
                    print(
                        'omega summaries not available for dimensions higher than 2'
                    )

            dim_mean = tf.reduce_mean(Tlims, axis=1)
            dim_shifter = tf.subtract(Tmins, Tmaxs,
                                      name='ind_point_ranges') / 2

            dim_mean = tf.expand_dims(dim_mean, 0)
            dim_shifter = tf.expand_dims(dim_shifter, 0)

            assert (dim_mean.shape == (1, dim))
            assert (dim_shifter.shape == (1, dim))

            Z_ph = tf.subtract(dim_mean,
                               dim_shifter * tf.tanh(omegas),
                               name='inducing_point_locations')
    else:
        omegas = None

    Tlims = tf.cast(Tlims, dtype=DTYPE)

    with tf.name_scope('kernel_hyperparameters'):

        # poser fun makes sure the values for alphas and gamas are non-negative
        if ag_poser == 'abs':
            tf_poser_fun = lambda x: tf.abs(x)
            tf_poser_fun_inv = lambda x: tf.abs(x)
        elif ag_poser == 'square':
            tf_poser_fun = lambda x: tf.square(x)
            tf_poser_fun_inv = lambda x: tf.sqrt(x)
        elif ag_poser == None:
            tf_poser_fun = lambda x: x
            tf_poser_fun_inv = lambda x: x
        else:
            # default: exp
            tf_poser_fun = lambda x: tf.exp(x)
            tf_poser_fun_inv = lambda x: tf.log(x)

        #alphas
        alphas_init_val = tf.constant(alphas_init_val, dtype=DTYPE)
        alphas_init = tf.ones([dim],
                              dtype=DTYPE) * tf_poser_fun_inv(alphas_init_val)
        alphas_base = tf.Variable(alphas_init,
                                  name='variational_alphas',
                                  dtype=DTYPE)

        alphas = tf_poser_fun(alphas_base)

        with tf.name_scope('alphas'):
            for a in range(dim):
                tf.summary.scalar('alphas_{}'.format(a), alphas[a])

        #gamma
        gamma_init_val = tf.constant(gamma_init_val, dtype=DTYPE)
        gamma_init_val = tf_poser_fun_inv(gamma_init_val)
        gamma_base = tf.Variable(gamma_init_val,
                                 name='variational_gamma',
                                 dtype=DTYPE)

        gamma = tf_poser_fun(gamma_base)

        tf.summary.scalar('gamma_base', gamma_base)
        tf.summary.scalar('gamma', gamma)
        tf.summary.tensor_summary('alphas_base', alphas_base)
        tf.summary.tensor_summary('alphas', alphas)

    # kernel call
    K_zz = ard_kernel(Z_ph, Z_ph, gamma=gamma, alphas=alphas) + tf.eye(
        num_inducing_points, dtype=DTYPE) * kzz_stabilizer_value
    K_zz_inv = tf.matrix_inverse(K_zz)

    with tf.name_scope('variational_distribution_parameters'):

        # mean
        m_init = tf.ones([num_inducing_points], dtype=DTYPE) * m_init_val
        m = tf.Variable(m_init, name='variational_mean', dtype=DTYPE)

        ## #### ##
        # S INIT #
        ## #### ##

        # vectorized version of covariance matrix S (ensure valid covariance matrix)
        vech_size = tf.cast(
            (num_inducing_points * (num_inducing_points + 1)) / 2, DTYPE_INT)
        vech_indices = tf.transpose(tf_tril_indices(num_inducing_points))

        # L_vech_init = tf.ones([vech_size])
        '''
        if lvech_init_val is None:
            lvechinitializer = np.zeros([(num_inducing_points * (num_inducing_points+1)) // 2])
            lvechinitializer[(np.cumsum(np.arange(num_inducing_points+1)) - 1)[1:]] = 1.
            L_vech_init = tf.constant(lvechinitializer, dtype=DTYPE)

        else:
            L_vech_init = tf.constant(lvech_init_val, dtype=DTYPE)
        '''

        lvech_init_stddev = 0.01
        L_vech_init = tf.random_normal([vech_size],
                                       stddev=lvech_init_stddev,
                                       dtype=DTYPE)

        L_vech = tf.Variable(L_vech_init, dtype=DTYPE)
        L_shape = tf.constant([num_inducing_points, num_inducing_points])

        L_st = tf.SparseTensor(tf.to_int64(vech_indices), L_vech,
                               tf.to_int64(L_shape))

        L = tf.sparse_add(tf.zeros(L_shape, dtype=DTYPE), L_st)
        # L = tf.sparse_add(tf.eye(L_shape[0], num_columns=L_shape[1]), L_st)
        S = tf.matmul(L, tf.transpose(L), name='variational_covariance')

        with tf.name_scope('variational_dist_parameters'):
            tf.summary.histogram('mean_at_inducing_points', m)
            tf.summary.histogram('cov_at_inducing_points', S)

    with tf.name_scope('positive_definiteness_check'):
        kzz_eigvals, kzz_eigvecs = tf.linalg.eigh(K_zz)
        S_eigvals, S_eigvecs = tf.linalg.eigh(S)

        tf.summary.histogram('kzz', K_zz)
        tf.summary.histogram('kzz_eigenvalues', kzz_eigvals)
        tf.summary.histogram('S_eigenvalues', S_eigvals)

    with tf.name_scope('integration-over-region-T'):

        with tf.name_scope('psi_matrix'):
            psi_matrix = psi_term(Z_ph, Z_ph, alphas, gamma, Tmins, Tmaxs)

        with tf.name_scope('T_integral'):
            integral_over_T = T_Integral(m, S, K_zz_inv, psi_matrix, gamma,
                                         Tmins, Tmaxs)

    with tf.name_scope('expectation_at_datapoints'):
        with tf.name_scope('mu_and_sig_calculation'):
            mu_t, sig_t_sqr, sigsqr_lmr = mu_tilde_square(
                X_ph, Z_ph, S, m, K_zz_inv, alphas, gamma)

        with tf.name_scope('squaring_that_mu'):
            mu_t_square = tf.square(mu_t)

        exp_term = exp_at_datapoints(mu_t_square, sig_t_sqr, C)

    with tf.name_scope('KL-divergence'):
        kl_term_op, logdet_dbg = kl_term(m, S, K_zz, K_zz_inv, u_ph, L,
                                         stabilizer_value)

    with tf.name_scope('calculate_bound'):
        lower_bound = -integral_over_T + exp_term - kl_term_op

    tf.summary.scalar('variational_lower_bound', tf.squeeze(lower_bound))
    tf.summary.scalar('integral_over_T', tf.squeeze(integral_over_T))
    tf.summary.scalar('exp_term', tf.squeeze(exp_term))
    tf.summary.scalar('kl_div', kl_term_op)

    # m_grad = tf.gradients(kl_term_op, [m])[0]
    # L_vech_grad = tf.gradients(kl_term_op, [L_vech])[0]

    if assert_correct_covariances:
        # assert positive semidefinite covariance matrices
        S_symm_assert = tf.Assert(tf.reduce_all(tf.equal(S, tf.transpose(S))),
                                  [S])
        S_possemidef_assert = tf.Assert(
            tf.reduce_all(tf.greater_equal(tf.linalg.eigh(S)[0], 0)), [S])
        covariance_asserts = [S_symm_assert, S_possemidef_assert]
    else:
        covariance_asserts = []

    interesting_gradient = tf.gradients(lower_bound, [exp_term])[0]

    merged = tf.summary.merge_all()

    return lower_bound, merged, Z_ph, u_ph, X_ph, m, S, L_vech, interesting_gradient, K_zz_inv, alphas_base, gamma_base, K_zz, omegas, covariance_asserts, sig_t_sqr, sigsqr_lmr, logdet_dbg
Beispiel #42
0
    def inner_cca_objective(y_true, y_pred):
        """
        It is the loss function of CCA as introduced in the original paper. There can be other formulations.
        It is implemented on Tensorflow based on github@VahidooX's cca loss on Theano.
        y_true is just ignored
        """

        r1 = 1e-4
        r2 = 1e-4
        eps = 1e-12
        o1 = o2 = int(y_pred.shape[1] // 2)

        # unpack (separate) the output of networks for view 1 and view 2
        H1 = tf.transpose(y_pred[:, 0:o1])
        H2 = tf.transpose(y_pred[:, o1:o1 + o2])

        m = tf.shape(H1)[1]

        H1bar = H1 - tf.cast(tf.divide(1, m), tf.float32) * tf.matmul(
            H1, tf.ones([m, m]))
        H2bar = H2 - tf.cast(tf.divide(1, m), tf.float32) * tf.matmul(
            H2, tf.ones([m, m]))

        SigmaHat12 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul(
            H1bar, H2bar, transpose_b=True)  # [dim, dim]
        SigmaHat11 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul(
            H1bar, H1bar, transpose_b=True) + r1 * tf.eye(o1)
        SigmaHat22 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul(
            H2bar, H2bar, transpose_b=True) + r2 * tf.eye(o2)

        # Calculating the root inverse of covariance matrices by using eigen decomposition
        [D1, V1] = tf.self_adjoint_eig(SigmaHat11)
        [D2,
         V2] = tf.self_adjoint_eig(SigmaHat22)  # Added to increase stability

        posInd1 = tf.where(tf.greater(D1, eps))
        D1 = tf.gather_nd(D1,
                          posInd1)  # get eigen values that are larger than eps
        V1 = tf.transpose(
            tf.nn.embedding_lookup(tf.transpose(V1), tf.squeeze(posInd1)))

        posInd2 = tf.where(tf.greater(D2, eps))
        D2 = tf.gather_nd(D2, posInd2)
        V2 = tf.transpose(
            tf.nn.embedding_lookup(tf.transpose(V2), tf.squeeze(posInd2)))

        SigmaHat11RootInv = tf.matmul(tf.matmul(V1, tf.diag(D1**-0.5)),
                                      V1,
                                      transpose_b=True)  # [dim, dim]
        SigmaHat22RootInv = tf.matmul(tf.matmul(V2, tf.diag(D2**-0.5)),
                                      V2,
                                      transpose_b=True)

        Tval = tf.matmul(tf.matmul(SigmaHat11RootInv, SigmaHat12),
                         SigmaHat22RootInv)

        if use_all_singular_values:
            corr = tf.sqrt(tf.trace(tf.matmul(Tval, Tval, transpose_a=True)))
        else:
            [U,
             V] = tf.self_adjoint_eig(tf.matmul(Tval, Tval, transpose_a=True))
            U = tf.gather_nd(U, tf.where(tf.greater(U, eps)))
            kk = tf.reshape(tf.cast(tf.shape(U), tf.int32), [])
            K = tf.minimum(kk, outdim_size)
            w, _ = tf.nn.top_k(U, k=K)
            corr = tf.reduce_sum(tf.sqrt(w))

        return -corr
Beispiel #43
0
 def eye_diff(x):
     shape = K.shape(x)
     return x - mul * tf.eye(shape[0], shape[1])
def motion_field_consistency_loss(frame1transformed_pixelxy, mask, rotation1,
                                  translation1, rotation2, translation2):
    """Computes a cycle consistency loss between two motion maps.

  Given two rotation and translation maps (of two frames), and a mapping from
  one frame to the other, this function assists in imposing that the fields at
  frame 1 represent the opposite motion of the ones in frame 2.

  In other words: At any given pixel on frame 1, if we apply the translation and
  rotation designated at that pixel, we land on some pixel in frame 2, and if we
  apply the translation and rotation designated there, we land back at the
  original pixel at frame 1.

  Args:
    frame1transformed_pixelxy: A tf.Tensor of shape [B, H, W, 2] representing
      the motion-transformed location of each pixel in frame 1. It is assumed
      (but not verified) that frame1transformed_pixelxy was obtained by properly
      applying rotation1 and translation1 on the depth map of frame 1.
    mask: A tf.Tensor of shape [B, H, W, 2] expressing the weight of each pixel
      in the calculation of the consistency loss.
    rotation1: A tf.Tensor of shape [B, 3] representing rotation angles.
    translation1: A tf.Tensor of shape [B, H, W, 3] representing translation
      vectors.
    rotation2: A tf.Tensor of shape [B, 3] representing rotation angles.
    translation2: A tf.Tensor of shape [B, H, W, 3] representing translation
      vectors.

  Returns:
    A dicionary from string to tf.Tensor, with the following entries:
      rotation_error: A tf scalar, the rotation consistency error.
      translation_error: A tf scalar, the translation consistency error.
  """

    translation2resampled = tf.contrib.resampler.resampler(
        translation2, tf.stop_gradient(frame1transformed_pixelxy))
    rotation1field = tf.broadcast_to(_expand_dims_twice(rotation1, -2),
                                     tf.shape(translation1))
    rotation2field = tf.broadcast_to(_expand_dims_twice(rotation2, -2),
                                     tf.shape(translation2))
    rotation1matrix = transform_utils.matrix_from_angles(rotation1field)
    rotation2matrix = transform_utils.matrix_from_angles(rotation2field)

    rot_unit, trans_zero = transform_utils.combine(rotation2matrix,
                                                   translation2resampled,
                                                   rotation1matrix,
                                                   translation1)
    eye = tf.eye(3, batch_shape=tf.shape(rot_unit)[:-2])

    transform_utils.matrix_from_angles(rotation1field)  # Delete this later
    transform_utils.matrix_from_angles(rotation2field)  # Delete this later

    # We normalize the product of rotations by the product of their norms, to make
    # the loss agnostic of their magnitudes, only wanting them to be opposite in
    # directions. Otherwise the loss has a tendency to drive the rotations to
    # zero.
    rot_error = tf.reduce_mean(tf.square(rot_unit - eye), axis=(3, 4))
    rot1_scale = tf.reduce_mean(tf.square(rotation1matrix - eye), axis=(3, 4))
    rot2_scale = tf.reduce_mean(tf.square(rotation2matrix - eye), axis=(3, 4))
    rot_error /= (1e-24 + rot1_scale + rot2_scale)
    rotation_error = tf.reduce_mean(rot_error)

    def norm(x):
        return tf.reduce_sum(tf.square(x), axis=-1)

    # Here again, we normalize by the magnitudes, for the same reason.
    translation_error = tf.reduce_mean(
        mask * norm(trans_zero) /
        (1e-24 + norm(translation1) + norm(translation2)))

    return {
        'rotation_error': rotation_error,
        'translation_error': translation_error
    }
Beispiel #45
0
    def construct_model(self):
        with self.sess.graph.as_default():
            last_layer = self.config['nn_layers'][-1]
                
            # build priors
            self.SigEps = self.sigma_eps*tf.eye(self.y_dim)
            self.SigEps = tf.reshape(self.SigEps, (1,1,self.y_dim,self.y_dim))
            
            self.K = tf.get_variable('K_init',shape=[last_layer,self.y_dim]) #\bar{K}_0

            self.L_asym = tf.get_variable('L_asym',initializer=tf.eye(last_layer)) # cholesky decomp of \Lambda_0
            self.L = self.L_asym @ tf.matrix_transpose(self.L_asym) # \Lambda_0
    
            
            # context_x,y: x,y points available for context (M, N_context, x_dim/y_dim)
            self.context_x = tf.placeholder(tf.float32, shape=[self.M,None,self.x_dim], name="cx")
            self.context_y = tf.placeholder(tf.float32, shape=[self.M,None,self.y_dim], name="cy")
            
            # y: query points (M, N_test, x_dim)
            self.x = tf.placeholder(tf.float32, shape=[self.M,None,self.x_dim], name="x")
            self.y = tf.placeholder(tf.float32, shape=[self.M,None,self.y_dim], name="y")
            
            # encode x to phi(x)
            self.context_phi = tf.map_fn( lambda x: self.basis(x),
                                          elems=self.context_x,
                                          dtype=tf.float32)
            self.phi = tf.map_fn( lambda x: self.basis(x),
                                  elems=self.x,
                                  dtype=tf.float32)
            
            # build invertible flow network
            self.flow_bijector = self.build_flow()
        
            # num_updates: number of context points from context_x,y to use when computing posterior. size (M,)
            self.num_models = tf.shape(self.context_y)[0]
            self.max_num_context = tf.shape(self.context_y)[1]*tf.ones((self.num_models,), dtype=tf.int32)
            self.num_context = tf.placeholder_with_default(self.max_num_context, shape=(None,))

            # in the case of conditional density est, map x to feature space
            
            # map context y to latent space
            # self.context_z is (M, N_context, y_dim)
            self.context_z = tf.map_fn( lambda xy: self.flow_bijector.inverse(xy[1], x=xy[0]),
                                        elems=(self.context_x,self.context_y),
                                        dtype=tf.float32)
            
            # compute posteriors
            self.K_N, self.Linv_N = tf.map_fn(lambda x: self.batch_blr(*x),
                                                            elems=(self.context_phi, self.context_y, self.num_context),
                                                            dtype=(tf.float32, tf.float32) 
                                                            )
            
            # compute posterior predictive in latent space
            self.mu_N = batch_matmul(tf.matrix_transpose(self.K_N), self.phi)
            spread_fac = 1 + batch_quadform(self.Linv_N, self.phi)
            self.Sig_N = tf.expand_dims(spread_fac, axis=-1)*self.SigEps

            
#             print_op = tf.print(tf.reduce_mean(self.Sig_N, axis=(0,1)), tf.linalg.det(self.Linv_N), tf.linalg.det(tf.linalg.inv(self.L)))
#             with tf.control_dependencies([print_op]):
            self.base = tfd.MultivariateNormalFullCovariance(loc=self.mu_N, covariance_matrix=self.Sig_N)
            
            # map test data to latent space to evaluate log likelihood
            self.z = tf.map_fn( lambda xy: self.flow_bijector.inverse(xy[1], x=xy[0]),
                                        elems=(self.x,self.y),
                                        dtype=tf.float32)
            
            rmse_z = tf.reduce_mean( tf.sqrt( tf.reduce_sum( (self.z - tf.expand_dims(self.mu_N, axis=1))**2, axis=-1) ) )
            tf.summary.scalar("rmse_z", rmse_z)
            
            logdetinvJ = tf.map_fn( lambda xy: self.flow_bijector.inverse_log_det_jacobian(xy[1], event_ndims=1, x=xy[0]),
                                      elems=(self.x, self.y),
                                      dtype=tf.float32)
            
            self.loss = -self.base.log_prob(self.z) -logdetinvJ
            
            # map to observation space
            #self.transformed_dist = tfd.ConditionalTransformedDistribution(distribution=self.base,bijector=self.flow_bijector)
            
            
            #self.loss = -(self.transformed_dist.log_prob(self.y, x=self.x)) 
            self.total_loss = tf.reduce_mean(self.loss)
            tf.summary.scalar("loss", self.total_loss)
            
            optimizer = tf.train.AdamOptimizer(self.config['learning_rate'])
            gs, vs = zip(*optimizer.compute_gradients(self.total_loss))
            gs, _ = tf.clip_by_global_norm(gs, 5.)
            self.train_op = optimizer.apply_gradients(zip(gs, vs)) #minimize(self.total_loss)#
                        
            self.train_writer = tf.summary.FileWriter('summaries/'+str(time.time()), self.sess.graph, flush_secs=10)
            self.merged = tf.summary.merge_all()

            self.saver = tf.train.Saver()
                        
            self.sess.run(tf.global_variables_initializer())
Beispiel #46
0
def wct(content, style, alpha=1, eps=1e-8):
    '''TensorFlow version of Whiten-Color Transform
	   Assume that content/style encodings have shape 1xHxWxC

	   See p.4 of the Universal Style Transfer paper for corresponding equations:
	   https://arxiv.org/pdf/1705.08086.pdf
	'''
    # Remove batch dim and reorder to CxHxW
    Cc = content.shape[3]
    Cs = style.shape[3]

    content_t = tf.transpose(tf.squeeze(content), (2, 0, 1))
    style_t = tf.transpose(tf.squeeze(style), (2, 0, 1))

    Cc, Hc, Wc = tf.unstack(tf.shape(content_t))
    Cs, Hs, Ws = tf.unstack(tf.shape(style_t))

    # CxHxW -> CxH*W
    content_flat = tf.reshape(content_t, (Cc, Hc * Wc))
    style_flat = tf.reshape(style_t, (Cs, Hs * Ws))

    # Content covariance
    # keep_dims wurde in keepdims umbenannt, das is doch scheiße
    mc = tf.reduce_mean(content_flat, axis=1, keepdims=True)
    fc = content_flat - mc
    fcfc = tf.matmul(fc, fc, transpose_b=True) / (
        tf.cast(Hc * Wc, tf.float32) - 1.) + tf.eye(Cc) * eps

    # Style covariance
    ms = tf.reduce_mean(style_flat, axis=1, keepdims=True)
    fs = style_flat - ms
    fsfs = tf.matmul(fs, fs, transpose_b=True) / (
        tf.cast(Hs * Ws, tf.float32) - 1.) + tf.eye(Cs) * eps

    # tf.svd is slower on GPU, see https://github.com/tensorflow/tensorflow/issues/13603
    with tf.device('/cpu:0'):
        Sc, Uc, _ = tf.linalg.svd(fcfc)
        Ss, Us, _ = tf.linalg.svd(fsfs)

    ## Uncomment to perform SVD for content/style with np in one call
    ## This is slower than CPU tf.svd but won't segfault for ill-conditioned matrices
    # @jit
    # def np_svd(content, style):
    #     '''tf.py_func helper to run SVD with NumPy for content/style cov tensors'''
    #     Uc, Sc, _ = np.linalg.svd(content)
    #     Us, Ss, _ = np.linalg.svd(style)
    #     return Uc, Sc, Us, Ss
    # Uc, Sc, Us, Ss = tf.py_func(np_svd, [fcfc, fsfs], [tf.float32, tf.float32, tf.float32, tf.float32])

    # Filter small singular values
    k_c = tf.reduce_sum(tf.cast(tf.greater(Sc, 1e-5), tf.int32))
    k_s = tf.reduce_sum(tf.cast(tf.greater(Ss, 1e-5), tf.int32))

    # Whiten content feature
    Dc = tf.linalg.diag(tf.pow(Sc[:k_c], -0.5))
    fc_hat = tf.matmul(
        tf.matmul(tf.matmul(Uc[:, :k_c], Dc), Uc[:, :k_c], transpose_b=True),
        fc)

    # Color content with style
    Ds = tf.linalg.diag(tf.pow(Ss[:k_s], 0.5))
    fcs_hat = tf.matmul(
        tf.matmul(tf.matmul(Us[:, :k_s], Ds), Us[:, :k_s], transpose_b=True),
        fc_hat)

    # Re-center with mean of style
    fcs_hat = fcs_hat + ms

    # Blend whiten-colored feature with original content feature
    blended = alpha * fcs_hat + (1 - alpha) * (fc + mc)

    # CxH*W -> CxHxW
    blended = tf.reshape(blended, (Cc, Hc, Wc))
    # CxHxW -> 1xHxWxC
    blended = tf.expand_dims(tf.transpose(blended, (1, 2, 0)), 0)

    return blended
Beispiel #47
0
    def __call__(self, beta, theta, get_skin=False, name=None):
        """
        Obtain SMPL with shape (beta) & pose (theta) inputs.
        Theta includes the global rotation.
        Args:
          beta: N x 10
          theta: N x 72 (with 3-D axis-angle rep)

        Updates:
        self.J_transformed: N x 24 x 3 joint location after shaping
                 & posing with beta and theta
        Returns:
          - joints: N x 19 or 14 x 3 joint locations depending on joint_type
        If get_skin is True, also returns
          - Verts: N x 6980 x 3
        """

        with tf.name_scope(name, "smpl_main", [beta, theta]):
            print(beta)
            print(beta.shape)
            num_batch = beta.shape[0].value

            # 1. Add shape blend shapes
            # (N x 10) x (10 x 6890*3) = N x 6890 x 3
            v_shaped = tf.reshape(
                tf.matmul(beta, self.shapedirs, name='shape_bs'),
                [-1, self.size[0], self.size[1]]) + self.v_template

            # 2. Infer shape-dependent joint locations.
            Jx = tf.matmul(v_shaped[:, :, 0], self.J_regressor)
            Jy = tf.matmul(v_shaped[:, :, 1], self.J_regressor)
            Jz = tf.matmul(v_shaped[:, :, 2], self.J_regressor)
            J = tf.stack([Jx, Jy, Jz], axis=2)

            # 3. Add pose blend shapes
            # N x 24 x 3 x 3
            Rs = tf.reshape(batch_rodrigues(tf.reshape(theta, [-1, 3])),
                            [-1, 24, 3, 3])
            with tf.name_scope("lrotmin"):
                # Ignore global rotation.
                pose_feature = tf.reshape(Rs[:, 1:, :, :] - tf.eye(3),
                                          [-1, 207])

            # (N x 207) x (207, 20670) -> N x 6890 x 3
            v_posed = tf.reshape(tf.matmul(pose_feature, self.posedirs),
                                 [-1, self.size[0], self.size[1]]) + v_shaped

            #4. Get the global joint location
            self.J_transformed, A = batch_global_rigid_transformation(
                Rs, J, self.parents)

            # 5. Do skinning:
            # W is N x 6890 x 24
            W = tf.reshape(tf.tile(self.weights, [num_batch, 1]),
                           [num_batch, -1, 24])
            # (N x 6890 x 24) x (N x 24 x 16)
            T = tf.reshape(tf.matmul(W, tf.reshape(A, [num_batch, 24, 16])),
                           [num_batch, -1, 4, 4])
            v_posed_homo = tf.concat(
                [v_posed,
                 tf.ones([num_batch, v_posed.shape[1].value, 1])], 2)
            v_homo = tf.matmul(T, tf.expand_dims(v_posed_homo, -1))

            verts = v_homo[:, :, :3, 0]

            # Get cocoplus or lsp joints:
            joint_x = tf.matmul(verts[:, :, 0], self.joint_regressor)
            joint_y = tf.matmul(verts[:, :, 1], self.joint_regressor)
            joint_z = tf.matmul(verts[:, :, 2], self.joint_regressor)
            joints = tf.stack([joint_x, joint_y, joint_z], axis=2)

            if get_skin:
                return verts, joints, Rs
            else:
                return joints
Beispiel #48
0
    def call(self, x):
        r1 = tf.constant([1e-4])
        r2 = tf.constant([1e-4])
        eps = tf.constant([1e-12])
        o1 = o2 = tf.shape(x)[1] // 2

        H1 = T.transpose(x[:, 0:o1])
        H2 = T.transpose(x[:, o1:o1 + o2])

        one = tf.constant([1.0])
        m = tf.shape(H1)[1]
        m_float = tf.cast(m, 'float')

        # minus the mean value
        partition = tf.divide(one, m_float)
        H1bar = H1 - partition * tf.matmul(H1, tf.ones([m, m]))
        H2bar = H2 - partition * tf.matmul(H2, tf.ones([m, m]))

        # calculate the auto-covariance and cross-covariance
        partition2 = tf.divide(one, (m_float - 1))
        SigmaHat12 = partition2 * tf.matmul(H1bar, tf.transpose(H2bar))
        SigmaHat11 = partition2 * \
            tf.matmul(H1bar, tf.transpose(H1bar)) + r1 * tf.eye(o1)
        SigmaHat22 = partition2 * \
            tf.matmul(H2bar, tf.transpose(H2bar)) + r2 * tf.eye(o2)

        # calculate the root inverse of covariance matrices by using eigen decomposition
        [D1, V1] = tf.py_func(my_eigen, [SigmaHat11], [tf.float32, tf.float32])
        [D2, V2] = tf.py_func(my_eigen, [SigmaHat22], [tf.float32, tf.float32])

        # for stability
        D1_indices = tf.where(D1 > eps)
        D1_indices = tf.squeeze(D1_indices)
        V1 = tf.gather(V1, D1_indices)
        D1 = tf.gather(D1, D1_indices)

        D2_indices = tf.where(D2 > eps)
        D2_indices = tf.squeeze(D2_indices)
        V2 = tf.gather(V2, D2_indices)
        D2 = tf.gather(D2, D2_indices)

        pow_value = tf.constant([-0.5])
        SigmaHat11RootInv = tf.matmul(
            tf.matmul(V1, tf.diag(tf.pow(D1, pow_value))), tf.transpose(V1))
        SigmaHat22RootInv = tf.matmul(
            tf.matmul(V2, tf.diag(tf.pow(D2, pow_value))), tf.transpose(V2))

        Tval = tf.matmul(tf.matmul(SigmaHat11RootInv,
                                   SigmaHat12), SigmaHat22RootInv)

        if self.use_all_singular_values:
            # all singular values are used to calculate the correlation
            self.corr = tf.trace(T.sqrt(tf.matmul(tf.transpose(Tval), Tval)))
        else:
            # just the top outdim_size singular values are used
            TT = tf.matmul(tf.transpose(Tval), Tval)
            U, V = tf.self_adjoint_eig(TT)
            U_sort, _ = tf.nn.top_k(U, self.cca_space_dim)
            self.corr = T.sum(T.sqrt(U_sort))

        return -self.corr
 def test_box_classification_loss_relative(self):
     gt_classes = tf.reshape(tf.constant([1, 1], dtype=tf.int32), [2, 1])
     gt_length = tf.reshape(tf.constant([1, 1], dtype=tf.float32), [2, 1])
     gt_height = tf.reshape(tf.constant([1, 1], dtype=tf.float32), [2, 1])
     gt_width = tf.reshape(tf.constant([1, 1], dtype=tf.float32), [2, 1])
     gt_center = tf.reshape(
         tf.constant([1, 1, 1, 1, 1, 1], dtype=tf.float32), [2, 3])
     gt_rotation_matrix = tf.tile(tf.expand_dims(tf.eye(3), axis=0),
                                  [2, 1, 1])
     logits1 = tf.reshape(
         tf.constant(
             [[-2.0, 2.0, -3.0, -2.0, 0.0], [-2.0, 2.0, -3.0, -2.0, 0.0]],
             dtype=tf.float32), [2, 5])
     logits2 = tf.reshape(
         tf.constant(
             [[-2.0, 0.0, -3.0, -2.0, 2.0], [-2.0, 0.0, -3.0, -2.0, 2.0]],
             dtype=tf.float32), [2, 5])
     gt_instance_ids = tf.reshape(tf.constant([1, 1], dtype=tf.int32),
                                  [2, 1])
     inputs = {
         standard_fields.InputDataFields.num_valid_voxels:
         tf.constant([2, 2], dtype=tf.int32),
         standard_fields.InputDataFields.object_class_voxels:
         tf.stack([gt_classes, gt_classes], axis=0),
         standard_fields.InputDataFields.object_length_voxels:
         tf.stack([gt_length, gt_length], axis=0),
         standard_fields.InputDataFields.object_height_voxels:
         tf.stack([gt_height, gt_height], axis=0),
         standard_fields.InputDataFields.object_width_voxels:
         tf.stack([gt_width, gt_width], axis=0),
         standard_fields.InputDataFields.object_center_voxels:
         tf.stack([gt_center, gt_center], axis=0),
         standard_fields.InputDataFields.object_rotation_matrix_voxels:
         tf.stack([gt_rotation_matrix, gt_rotation_matrix], axis=0),
         standard_fields.InputDataFields.object_instance_id_voxels:
         tf.stack([gt_instance_ids, gt_instance_ids], axis=0),
     }
     outputs1 = {
         standard_fields.DetectionResultFields.object_semantic_voxels:
         tf.stack([logits1, logits1], axis=0),
         standard_fields.DetectionResultFields.object_length_voxels:
         tf.stack([gt_length, gt_length], axis=0),
         standard_fields.DetectionResultFields.object_height_voxels:
         tf.stack([gt_height, gt_height], axis=0),
         standard_fields.DetectionResultFields.object_width_voxels:
         tf.stack([gt_width, gt_width], axis=0),
         standard_fields.DetectionResultFields.object_center_voxels:
         tf.stack([gt_center, gt_center], axis=0),
         standard_fields.DetectionResultFields.object_rotation_matrix_voxels:
         tf.stack([gt_rotation_matrix, gt_rotation_matrix], axis=0),
     }
     outputs2 = {
         standard_fields.DetectionResultFields.object_semantic_voxels:
         tf.stack([logits2, logits2], axis=0),
         standard_fields.DetectionResultFields.object_length_voxels:
         tf.stack([gt_length, gt_length], axis=0),
         standard_fields.DetectionResultFields.object_height_voxels:
         tf.stack([gt_height, gt_height], axis=0),
         standard_fields.DetectionResultFields.object_width_voxels:
         tf.stack([gt_width, gt_width], axis=0),
         standard_fields.DetectionResultFields.object_center_voxels:
         tf.stack([gt_center, gt_center], axis=0),
         standard_fields.DetectionResultFields.object_rotation_matrix_voxels:
         tf.stack([gt_rotation_matrix, gt_rotation_matrix], axis=0),
     }
     loss1 = classification_losses.box_classification_loss(inputs=inputs,
                                                           outputs=outputs1)
     loss2 = classification_losses.box_classification_loss(inputs=inputs,
                                                           outputs=outputs2)
     self.assertGreater(loss2.numpy(), loss1.numpy())
Beispiel #50
0
def gauss_kl(q_mu, q_sqrt, K=None, *, K_cholesky=None):
    """
    Compute the KL divergence KL[q || p] between

          q(x) = N(q_mu, q_sqrt^2)
    and
          p(x) = N(0, K)    if K is not None
          p(x) = N(0, I)    if K is None

    We assume L multiple independent distributions, given by the columns of
    q_mu and the first or last dimension of q_sqrt. Returns the *sum* of the
    divergences.

    q_mu is a matrix ([M, L]), each column contains a mean.

    q_sqrt can be a 3D tensor ([L, M, M]), each matrix within is a lower
        triangular square-root matrix of the covariance of q.
    q_sqrt can be a matrix ([M, L]), each column represents the diagonal of a
        square-root matrix of the covariance of q.

    K is the covariance of p (positive-definite matrix).  The K matrix can be
    passed either directly as `K`, or as its Cholesky factor, `K_cholesky`.  In
    either case, it can be a single matrix [M, M], in which case the sum of the
    L KL divergences is computed by broadcasting, or L different covariances
    [L, M, M].

    Note: if no K matrix is given (both `K` and `K_cholesky` are None),
    `gauss_kl` computes the KL divergence from p(x) = N(0, I) instead.
    """

    if (K is not None) and (K_cholesky is not None):
        raise ValueError(
            "Ambiguous arguments: gauss_kl() must only be passed one of `K` or `K_cholesky`."
        )

    is_white = (K is None) and (K_cholesky is None)
    is_diag = len(q_sqrt.shape) == 2

    shape_constraints = [
        (q_mu, ["M", "L"]),
        (q_sqrt, (["M", "L"] if is_diag else ["L", "M", "M"])),
    ]
    if not is_white:
        if K is not None:
            shape_constraints.append(
                (K, (["L", "M", "M"] if len(K.shape) == 3 else ["M", "M"])))
        else:
            shape_constraints.append((K_cholesky, (["L", "M", "M"] if len(
                K_cholesky.shape) == 3 else ["M", "M"])))
    tf.debugging.assert_shapes(shape_constraints,
                               message="gauss_kl() arguments")

    M, L = tf.shape(q_mu)[0], tf.shape(q_mu)[1]

    if is_white:
        alpha = q_mu  # [M, L]
    else:
        if K is not None:
            Lp = tf.linalg.cholesky(K)  # [L, M, M] or [M, M]
        elif K_cholesky is not None:
            Lp = K_cholesky  # [L, M, M] or [M, M]

        is_batched = len(Lp.shape) == 3

        q_mu = tf.transpose(
            q_mu)[:, :, None] if is_batched else q_mu  # [L, M, 1] or [M, L]
        alpha = tf.linalg.triangular_solve(Lp, q_mu,
                                           lower=True)  # [L, M, 1] or [M, L]

    if is_diag:
        Lq = Lq_diag = q_sqrt
        Lq_full = tf.linalg.diag(tf.transpose(q_sqrt))  # [L, M, M]
    else:
        Lq = Lq_full = tf.linalg.band_part(
            q_sqrt, -1, 0)  # force lower triangle # [L, M, M]
        Lq_diag = tf.linalg.diag_part(Lq)  # [M, L]

    # Mahalanobis term: μqᵀ Σp⁻¹ μq
    mahalanobis = tf.reduce_sum(tf.square(alpha))

    # Constant term: - L * M
    constant = -to_default_float(tf.size(q_mu, out_type=tf.int64))

    # Log-determinant of the covariance of q(x):
    logdet_qcov = tf.reduce_sum(tf.math.log(tf.square(Lq_diag)))

    # Trace term: tr(Σp⁻¹ Σq)
    if is_white:
        trace = tf.reduce_sum(tf.square(Lq))
    else:
        if is_diag and not is_batched:
            # K is [M, M] and q_sqrt is [M, L]: fast specialisation
            LpT = tf.transpose(Lp)  # [M, M]
            Lp_inv = tf.linalg.triangular_solve(Lp,
                                                tf.eye(M,
                                                       dtype=default_float()),
                                                lower=True)  # [M, M]
            K_inv = tf.linalg.diag_part(
                tf.linalg.triangular_solve(
                    LpT, Lp_inv, lower=False))[:, None]  # [M, M] -> [M, 1]
            trace = tf.reduce_sum(K_inv * tf.square(q_sqrt))
        else:
            # TODO: broadcast instead of tile when tf allows -- tf2.1 segfaults
            # (https://github.com/tensorflow/tensorflow/issues/37584).
            # See # https://github.com/GPflow/GPflow/issues/1321
            Lp_full = Lp if is_batched else tf.tile(tf.expand_dims(Lp, 0),
                                                    [L, 1, 1])
            LpiLq = tf.linalg.triangular_solve(Lp_full, Lq_full, lower=True)
            trace = tf.reduce_sum(tf.square(LpiLq))

    twoKL = mahalanobis + constant - logdet_qcov + trace

    # Log-determinant of the covariance of p(x):
    if not is_white:
        log_sqdiag_Lp = tf.math.log(tf.square(tf.linalg.diag_part(Lp)))
        sum_log_sqdiag_Lp = tf.reduce_sum(log_sqdiag_Lp)
        # If K is [L, M, M], num_latent_gps is no longer implicit, no need to multiply the single kernel logdet
        scale = 1.0 if is_batched else to_default_float(L)
        twoKL += scale * sum_log_sqdiag_Lp

    tf.debugging.assert_shapes(
        [(twoKL, ())], message="gauss_kl() return value")  # returns scalar
    return 0.5 * twoKL
Beispiel #51
0
    def compute_reward(self, m, s):
        '''
        Reward function, calculating mean and variance of rewards, given
        mean and variance of state distribution, along with the target State
        and a weight matrix.
        Input m : [1, k]
        Input s : [k, k]
        Output M : [1, 1]
        Output S  : [1, 1]
        '''
        # for robot arm
        m=m[:,:9]
        s=s[:9,:9]

        SW = s @ self.W

        iSpW = tf.transpose(
                tf.matrix_solve( (tf.eye(self.state_dim, dtype=float_type) + SW),
                tf.transpose(self.W), adjoint=True))

        muR = tf.exp(-(m-self.t) @  iSpW @ tf.transpose(m-self.t)/2) / \
                tf.sqrt( tf.linalg.det(tf.eye(self.state_dim, dtype=float_type) + SW) )

        i2SpW = tf.transpose(
                tf.matrix_solve( (tf.eye(self.state_dim, dtype=float_type) + 2*SW),
                tf.transpose(self.W), adjoint=True))

        r2 =  tf.exp(-(m-self.t) @ i2SpW @ tf.transpose(m-self.t)) / \
                tf.sqrt( tf.linalg.det(tf.eye(self.state_dim, dtype=float_type) + 2*SW) )

        sR = r2 - muR @ muR
        muR.set_shape([1, 1])
        sR.set_shape([1, 1])
        return muR, sR
# import abc
# import tensorflow as tf
# from gpflow import Parameterized, Param, params_as_tensors, settings
# import numpy as np
#
# float_type = settings.dtypes.float_type
#
#
# class Reward(Parameterized):
#     def __init__(self):
#         Parameterized.__init__(self)
#
#     @abc.abstractmethod
#     def compute_reward(self, m, s):
#         raise NotImplementedError
#
#
# class ExponentialReward(Reward):
#     def __init__(self, state_dim, W=None, t=None):
#         Reward.__init__(self)
#         self.state_dim = state_dim
#         if W is not None:
#             self.W = Param(np.reshape(W, (state_dim, state_dim)), trainable=False)
#         else:
#             self.W = Param(np.eye(state_dim), trainable=False)
#         self.t=t
#         # if t is not None:
#         #     self.t = Param(np.reshape(t, (1, state_dim)), trainable=False)
#         # else:
#         #     self.t = Param(np.zeros((1, state_dim)), trainable=False)
#
#     def update_target(self,t):
#         # self.t.assign(np.reshape(t, (1, self.state_dim)))
#         self.t=t
#
#     @params_as_tensors
#     def compute_reward(self, m, s):
#         '''
#         Reward function, calculating mean and variance of rewards, given
#         mean and variance of state distribution, along with the target State
#         and a weight matrix.
#         Input m : [1, k]
#         Input s : [k, k]
#
#         Output M : [1, 1]
#         Output S  : [1, 1]
#         '''
#         # for robot arm
#         m=m[:,:3]
#         s=s[:3,:3]
#
#         SW = s @ self.W
#
#         iSpW = tf.transpose(
#                 tf.matrix_solve( (tf.eye(self.state_dim, dtype=float_type) + SW),
#                 tf.transpose(self.W), adjoint=True))
#
#         muR = tf.exp(-(m-self.t) @  iSpW @ tf.transpose(m-self.t)/2) / \
#                 tf.sqrt( tf.linalg.det(tf.eye(self.state_dim, dtype=float_type) + SW) )
#
#         i2SpW = tf.transpose(
#                 tf.matrix_solve( (tf.eye(self.state_dim, dtype=float_type) + 2*SW),
#                 tf.transpose(self.W), adjoint=True))
#
#         r2 =  tf.exp(-(m-self.t) @ i2SpW @ tf.transpose(m-self.t)) / \
#                 tf.sqrt( tf.linalg.det(tf.eye(self.state_dim, dtype=float_type) + 2*SW) )
#
#         sR = r2 - muR @ muR
#         muR.set_shape([1, 1])
#         sR.set_shape([1, 1])
#         return muR, sR
    def testActionBatchWithVariablesAndPolicyUpdate(self, batch_size,
                                                    actions_from_reward_layer):

        a_list = []
        a_new_list = []
        b_list = []
        b_new_list = []
        num_samples_list = []
        num_samples_new_list = []
        for k in range(1, self._num_actions + 1):
            a_initial_value = k + 1 + 2 * k * tf.eye(self._encoding_dim,
                                                     dtype=tf.float32)
            a_for_one_arm = tf.compat.v2.Variable(a_initial_value)
            a_list.append(a_for_one_arm)
            b_initial_value = tf.constant(k * np.ones(self._encoding_dim),
                                          dtype=tf.float32)
            b_for_one_arm = tf.compat.v2.Variable(b_initial_value)
            b_list.append(b_for_one_arm)
            num_samples_initial_value = tf.constant([1], dtype=tf.float32)
            num_samples_for_one_arm = tf.compat.v2.Variable(
                num_samples_initial_value)
            num_samples_list.append(num_samples_for_one_arm)

            # Variables for the new policy (they differ by an offset).
            a_new_for_one_arm = tf.compat.v2.Variable(a_initial_value +
                                                      _POLICY_VARIABLES_OFFSET)
            a_new_list.append(a_new_for_one_arm)
            b_new_for_one_arm = tf.compat.v2.Variable(b_initial_value +
                                                      _POLICY_VARIABLES_OFFSET)
            b_new_list.append(b_new_for_one_arm)
            num_samples_for_one_arm_new = tf.compat.v2.Variable(
                num_samples_initial_value + _POLICY_VARIABLES_OFFSET)
            num_samples_new_list.append(num_samples_for_one_arm_new)

        policy = neural_linucb_policy.NeuralLinUCBPolicy(
            encoding_network=DummyNet(self._obs_spec),
            encoding_dim=self._encoding_dim,
            reward_layer=get_reward_layer(),
            actions_from_reward_layer=tf.constant(actions_from_reward_layer,
                                                  dtype=tf.bool),
            cov_matrix=a_list,
            data_vector=b_list,
            num_samples=num_samples_list,
            epsilon_greedy=0.0,
            time_step_spec=self._time_step_spec)

        new_policy = neural_linucb_policy.NeuralLinUCBPolicy(
            encoding_network=DummyNet(self._obs_spec),
            encoding_dim=self._encoding_dim,
            reward_layer=get_reward_layer(),
            actions_from_reward_layer=tf.constant(actions_from_reward_layer,
                                                  dtype=tf.bool),
            cov_matrix=a_new_list,
            data_vector=b_new_list,
            num_samples=num_samples_new_list,
            epsilon_greedy=0.0,
            time_step_spec=self._time_step_spec)

        action_step = policy.action(
            self._time_step_batch(batch_size=batch_size))
        new_action_step = new_policy.action(
            self._time_step_batch(batch_size=batch_size))
        self.assertEqual(action_step.action.shape,
                         new_action_step.action.shape)
        self.assertEqual(action_step.action.dtype,
                         new_action_step.action.dtype)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(new_policy.update(policy))

        action_fn = common.function_in_tf1()(policy.action)
        action_step = action_fn(self._time_step_batch(batch_size=batch_size))
        new_action_fn = common.function_in_tf1()(new_policy.action)
        new_action_step = new_action_fn(
            self._time_step_batch(batch_size=batch_size))

        actions_, new_actions_ = self.evaluate(
            [action_step.action, new_action_step.action])
        self.assertAllEqual(actions_, new_actions_)
Beispiel #53
0
    def test_inv_update_thunks(self):
        """Ensures inverse update ops run once per global_step."""
        with self._graph.as_default(), self.test_session() as sess:
            fisher_estimator = estimator.FisherEstimatorRoundRobin(
                variables=[self.weights],
                layer_collection=self.layer_collection,
                damping=0.2,
                cov_ema_decay=0.0)

            # Construct op that updates one inverse per global step.
            global_step = tf.train.get_or_create_global_step()
            (cov_variable_thunks, _, inv_variable_thunks, inv_update_op_thunks
             ) = fisher_estimator.create_ops_and_vars_thunks()
            for thunk in cov_variable_thunks:
                thunk()
            for thunk in inv_variable_thunks:
                thunk()
            inv_matrices = [
                matrix
                for fisher_factor in self.layer_collection.get_factors() for
                matrix in fisher_factor._matpower_by_exp_and_damping.values()
            ]
            inv_update_op = tf.case([
                (tf.equal(global_step, i), thunk)
                for i, thunk in enumerate(inv_update_op_thunks)
            ])
            increment_global_step = global_step.assign_add(1)

            sess.run(tf.global_variables_initializer())
            initial_inv_values = sess.run(inv_matrices)

            # Ensure there's one update per inverse matrix. This is true as long as
            # there's no fan-in/fan-out or parameter re-use.
            self.assertEqual(len(inv_matrices), len(inv_update_op_thunks))

            # Test is no-op if only 1 invariance matrix.
            assert len(inv_matrices) > 1

            # Assign each covariance matrix a value other than the identity. This
            # ensures that the inverse matrices are updated to something different as
            # well.
            cov_matrices = [
                fisher_factor.get_cov()
                for fisher_factor in self.layer_collection.get_factors()
            ]
            sess.run([
                cov_matrix.assign(2 * tf.eye(int(cov_matrix.shape[0])))
                for cov_matrix in cov_matrices
            ])

            for i in range(len(inv_matrices)):
                # Compare new and old inverse values
                new_inv_values = sess.run(inv_matrices)
                is_inv_equal = [
                    np.allclose(initial_inv_value, new_inv_value)
                    for (initial_inv_value, new_inv_value
                         ) in zip(initial_inv_values, new_inv_values)
                ]
                num_inv_equal = sum(is_inv_equal)

                # Ensure exactly one inverse matrix changes per step.
                self.assertEqual(num_inv_equal, len(inv_matrices) - i)

                # Run all inverse update ops.
                sess.run(inv_update_op)
                sess.run(increment_global_step)
Beispiel #54
0
    def construct_model(self):
        with self.sess.graph.as_default():
            
            #build priors
            if self.sig_0 is list:
                raise ValueError('need to define inits for this case')
            else:
                # self.V0_inv = (1./self.sig_0)*tf.eye(self.x_dim)
                self.V0_asym = tf.get_variable('V0_asym',initializer=1/self.sig_0*tf.eye(self.y_dim))
                self.V0_inv = self.V0_asym @ tf.transpose(self.V0_asym) #
                self.V0 = tf.linalg.inv(self.V0_inv)
                
                # making S0 trainable; enforce invertibility via cholesky
                # self.S0_asym = tf.Variable(5*self.sig_0*tf.eye(self.x_dim)) # cholesky decomp of \Lambda_0
                # self.S0_inv = self.S0_asym @ tf.transpose(self.S0_asym)
                self.S0_inv = self.sig_0*tf.eye(self.y_dim)
                self.S0 = tf.linalg.inv(self.S0_inv)
    
    
            self.mu_0 = tf.constant(self.mu_0, dtype=tf.float32)
            
            # context_x,y: x,y points available for context (M, N_context, x_dim/y_dim)
            self.context_y = tf.placeholder(tf.float32, shape=[None,None,self.y_dim], name="cy")
            
            # y: query points (M, N_test, x_dim)
            self.y = tf.placeholder(tf.float32, shape=[None,None,self.y_dim], name="y")
            
            # build network
            self.flow_bijector = self.build_flow()
        
            # num_updates: number of context points from context_x,y to use when computing posterior. size (M,)
            self.num_models = tf.shape(self.context_y)[0]
            self.max_num_context = tf.shape(self.context_y)[1]*tf.ones((self.num_models,), dtype=tf.int32)
            self.num_context = tf.placeholder_with_default(self.max_num_context, shape=(None,))

            # in the case of conditional density est, map x to feature space
            
            # map context data to latent space
            # self.context_phi is (M, N_context, phi_dim)
            self.context_z = tf.map_fn( lambda y: self.flow_bijector.inverse(y),
                                        elems=self.context_y,
                                        dtype=tf.float32)
            
            # compute posteriors
            self.mu_N, self.V_N = tf.map_fn(lambda x: self.batch_gaussian_update(*x),
                                                            elems=(self.context_z, self.num_context),
                                                            dtype=(tf.float32, tf.float32) 
                                                            )
            
            
            # posterior base distribution
            self.base = tfd.MultivariateNormalFullCovariance(loc=self.mu_N,                     covariance_matrix=self.V_N + self.S0)
                        
            self.transformed_dist = tfd.TransformedDistribution(distribution=self.base,bijector=self.flow_bijector)
            
            y_transposed = tf.transpose(self.y, perm=[1,0,2])
            self.loss = -(self.transformed_dist.log_prob(y_transposed)) 
            self.total_loss = tf.reduce_mean(self.loss)
            
            optimizer = tf.train.AdamOptimizer(self.config['learning_rate'])
            gs, vs = zip(*optimizer.compute_gradients(self.total_loss))
            # v_names = [v.name for v in tf.trainable_variables()]
            # global_norms = [tf.reduce_max(g) for g in gs]
            #print_op = tf.print(list(zip(v_names,global_norms)))
            
            #with tf.control_dependencies([print_op]):
                # gs, _ = tf.clip_by_global_norm(gs, 5.)
            self.train_op = optimizer.apply_gradients(zip(gs, vs))
            
            
            
            
            # rmse_z = tf.reduce_mean( tf.sqrt( tf.reduce_sum( (self.z - tf.expand_dims(self.mu_N, axis=1))**2, axis=-1) ) )
            # tf.summary.scalar("rmse_z", rmse_z)
            
            norm_S0_inv = tf.reduce_mean( tf.norm(self.S0_inv, ord='fro', axis=(-2,-1)) )
            tf.summary.scalar("norm_S0_inv", norm_S0_inv)
            
            norm_V0_inv = tf.reduce_mean( tf.norm(self.V0_inv, ord='fro', axis=(-2,-1)) )
            tf.summary.scalar("norm_V0_inv", norm_V0_inv)
            
            # mean_invJ_logdet = tf.reduce_mean( logdetinvJ )
            # tf.summary.scalar("mean_invJ_logdet", mean_invJ_logdet)
                        
            self.train_writer = tf.summary.FileWriter('summaries/'+str(time.time()), self.sess.graph, flush_secs=10)
            self.merged = tf.summary.merge_all()

            self.saver = tf.train.Saver()
                        
            self.sess.run(tf.global_variables_initializer())
Beispiel #55
0
def kl_term(m, S, K_zz, K_zz_inv, u_ovln, L, stabilizer_value):
    # mean_diff = (u_ovln * tf.ones([tf.shape(Z_ph)[0]]) - m)
    mean_diff = tf.expand_dims(
        u_ovln * tf.ones([tf.shape(m)[0]], dtype=DTYPE) - m, 1)
    first = tf.trace(tf.matmul(K_zz_inv, S), name='kl_first')

    # #########################################
    # TODO: solve matrix determinant Problem
    # Approaches:

    # 1. naive impl of determinants
    # -> Problem: NaN as Determimants get very large for big matrices
    # Code:
    # kzz_det = tf.matrix_determinant(K_zz)
    # S_det   = tf.matrix_determinant(S)
    # second = tf.log(kzz_det / S_det, name='kl_second')

    # 2. Logdet and Cholesky decomp
    # -> Problem: Cholesky decomp not always possible (only pos semidefinite by our constr?)
    # -> Adding Eye to S might be a possible solution

    with tf.name_scope('log_of_determinant_ratio'):

        # posdef_stabilizer = tf.diag(tf.random_normal([tf.shape(K_zz)[0]], stddev=stabilizer_value))
        posdef_stabilizer = tf.eye(tf.shape(K_zz)[0],
                                   dtype=DTYPE) * stabilizer_value

        with tf.name_scope('K_zz_logdet'):
            K_zz_logdet = tf.linalg.logdet(K_zz + posdef_stabilizer)

        with tf.name_scope('S_logdet'):
            S_logdet = tf.linalg.logdet(S + posdef_stabilizer)

            alt_logdet_via_L = tf.diag_part(
                L)  # 2 * tf.reduce_sum(tf.log(tf.diag_part(L)))

        # S_logdet = 2 * tf.reduce_sum(tf.log(tf.diag_part(L)))
        # posdef_stabilizer = tf.eye(L_shape[0]) * lambda
        second = tf.subtract(K_zz_logdet, S_logdet, name='kl_second')

    # 3. Using tf.slogdet
    # -> Problem: slogdet doesn't seem to have a gradient defined
    #kzz_lds, kzz_ldav = tf.linalg.slogdet(tf.expand_dims(K_zz, 0))
    #K_zz_logdet = kzz_lds[0] * kzz_ldav[0]
    #S_lds, S_ldav = tf.linalg.slogdet(tf.expand_dims(S, 0))
    #S_logdet = S_lds[0] * S_ldav[0]
    #second = tf.subtract(K_zz_logdet, S_logdet, name='kl_second')
    # #########################################

    if DTYPE == tf.float32:
        third = tf.to_float(tf.shape(m)[0], name='kl_third')
    elif DTYPE == tf.float64:
        third = tf.to_double(tf.shape(m)[0], name='kl_third')
    else:
        print('ERROR: DTYPE must be set to either tf.float32 or tf.float64')
    # fourth = tf.reduce_sum(tf.multiply(tf.reduce_sum(tf.multiply(mean_diff, tf.transpose(K_zz_inv)), axis=1) , mean_diff))

    fourth = tf.squeeze(tf.matmul(tf.matmul(tf.transpose(mean_diff), K_zz_inv),
                                  mean_diff),
                        name='kl_fourth')

    return 0.5 * (first + second - third + fourth), [
        S_logdet, alt_logdet_via_L
    ]
Beispiel #56
0
def transformer_xl(inp_k, n_token, n_layer, d_model, n_head,
                d_head, d_inner, dropout, dropatt, attn_type,
                bi_data, initializer, is_training, mem_len=None,
                inp_q=None, mems=None,
                same_length=False, clamp_len=-1, untie_r=False,
                use_tpu=True, input_mask=None,
                perm_mask=None, seg_id=None, reuse_len=None,
                ff_activation='relu', target_mapping=None,
                use_bfloat16=False, scope='transformer', **kwargs):
  """
    Defines a Transformer-XL computation graph with additional
    support for XLNet.

    Args:

    inp_k: int32 Tensor in shape [len, bsz], the input token IDs.
    seg_id: int32 Tensor in shape [len, bsz], the input segment IDs.
    input_mask: float32 Tensor in shape [len, bsz], the input mask.
      0 for real tokens and 1 for padding.
    mems: a list of float32 Tensors in shape [mem_len, bsz, d_model], memory
      from previous batches. The length of the list equals n_layer.
      If None, no memory is used.
    perm_mask: float32 Tensor in shape [len, len, bsz].
      If perm_mask[i, j, k] = 0, i attend to j in batch k;
      if perm_mask[i, j, k] = 1, i does not attend to j in batch k.
      If None, each position attends to all the others.
    target_mapping: float32 Tensor in shape [num_predict, len, bsz].
      If target_mapping[i, j, k] = 1, the i-th predict in batch k is
      on the j-th token.
      Only used during pretraining for partial prediction.
      Set to None during finetuning.
    inp_q: float32 Tensor in shape [len, bsz].
      1 for tokens with losses and 0 for tokens without losses.
      Only used during pretraining for two-stream attention.
      Set to None during finetuning.

    n_layer: int, the number of layers.
    d_model: int, the hidden size.
    n_head: int, the number of attention heads.
    d_head: int, the dimension size of each attention head.
    d_inner: int, the hidden size in feed-forward layers.
    ff_activation: str, "relu" or "gelu".
    untie_r: bool, whether to untie the biases in attention.
    n_token: int, the vocab size.

    is_training: bool, whether in training mode.
    use_tpu: bool, whether TPUs are used.
    use_bfloat16: bool, use bfloat16 instead of float32.
    dropout: float, dropout rate.
    dropatt: float, dropout rate on attention probabilities.
    init: str, the initialization scheme, either "normal" or "uniform".
    init_range: float, initialize the parameters with a uniform distribution
      in [-init_range, init_range]. Only effective when init="uniform".
    init_std: float, initialize the parameters with a normal distribution
      with mean 0 and stddev init_std. Only effective when init="normal".
    mem_len: int, the number of tokens to cache.
    reuse_len: int, the number of tokens in the currect batch to be cached
      and reused in the future.
    bi_data: bool, whether to use bidirectional input pipeline.
      Usually set to True during pretraining and False during finetuning.
    clamp_len: int, clamp all relative distances larger than clamp_len.
      -1 means no clamping.
    same_length: bool, whether to use the same attention length for each token.
    summary_type: str, "last", "first", "mean", or "attn". The method
      to pool the input to get a vector representation.
    initializer: A tf initializer.
    scope: scope name for the computation graph.
  """
  tf.logging.info('memory input {}'.format(mems))
  tf_float = tf.bfloat16 if use_bfloat16 else tf.float32
  tf.logging.info('Use float type {}'.format(tf_float))

  new_mems = []
  with tf.variable_scope(scope):
    if untie_r:
      r_w_bias = tf.get_variable('r_w_bias', [n_layer, n_head, d_head],
                                 dtype=tf_float, initializer=initializer)
      r_r_bias = tf.get_variable('r_r_bias', [n_layer, n_head, d_head],
                                 dtype=tf_float, initializer=initializer)
    else:
      r_w_bias = tf.get_variable('r_w_bias', [n_head, d_head],
                                 dtype=tf_float, initializer=initializer)
      r_r_bias = tf.get_variable('r_r_bias', [n_head, d_head],
                                 dtype=tf_float, initializer=initializer)

    bsz = tf.shape(inp_k)[1]
    qlen = tf.shape(inp_k)[0]
    mlen = tf.shape(mems[0])[0] if mems is not None else 0
    klen = mlen + qlen

    ##### Attention mask
    # causal attention mask
    if attn_type == 'uni':
      attn_mask = _create_mask(qlen, mlen, tf_float, same_length)
      attn_mask = attn_mask[:, :, None, None]
    elif attn_type == 'bi':
      attn_mask = None
    else:
      raise ValueError('Unsupported attention type: {}'.format(attn_type))

    # data mask: input mask & perm mask
    if input_mask is not None and perm_mask is not None:
      data_mask = input_mask[None] + perm_mask
    elif input_mask is not None and perm_mask is None:
      data_mask = input_mask[None]
    elif input_mask is None and perm_mask is not None:
      data_mask = perm_mask
    else:
      data_mask = None

    if data_mask is not None:
      # all mems can be attended to
      mems_mask = tf.zeros([tf.shape(data_mask)[0], mlen, bsz],
                           dtype=tf_float)
      data_mask = tf.concat([mems_mask, data_mask], 1)
      if attn_mask is None:
        attn_mask = data_mask[:, :, :, None]
      else:
        attn_mask += data_mask[:, :, :, None]

    if attn_mask is not None:
      attn_mask = tf.cast(attn_mask > 0, dtype=tf_float)

    if attn_mask is not None:
      non_tgt_mask = -tf.eye(qlen, dtype=tf_float)
      non_tgt_mask = tf.concat([tf.zeros([qlen, mlen], dtype=tf_float),
                                non_tgt_mask], axis=-1)
      non_tgt_mask = tf.cast((attn_mask + non_tgt_mask[:, :, None, None]) > 0,
                             dtype=tf_float)
    else:
      non_tgt_mask = None

    ##### Word embedding
    word_emb_k, lookup_table = embedding_lookup(
        x=inp_k,
        n_token=n_token,
        d_embed=d_model,
        initializer=initializer,
        use_tpu=use_tpu,
        dtype=tf_float,
        scope='word_embedding')

    if inp_q is not None:
      with tf.variable_scope('mask_emb'):
        mask_emb = tf.get_variable('mask_emb', [1, 1, d_model], dtype=tf_float)
        if target_mapping is not None:
          word_emb_q = tf.tile(mask_emb, [tf.shape(target_mapping)[0], bsz, 1])
        else:
          inp_q_ext = inp_q[:, :, None]
          word_emb_q = inp_q_ext * mask_emb + (1 - inp_q_ext) * word_emb_k
    output_h = tf.layers.dropout(word_emb_k, dropout, training=is_training)
    if inp_q is not None:
      output_g = tf.layers.dropout(word_emb_q, dropout, training=is_training)
    else:
      output_g = None
    ##### Segment embedding
    if seg_id is not None:
      if untie_r:
        r_s_bias = tf.get_variable('r_s_bias', [n_layer, n_head, d_head],
                                   dtype=tf_float, initializer=initializer)
      else:
        # default case (tie)
        r_s_bias = tf.get_variable('r_s_bias', [n_head, d_head],
                                   dtype=tf_float, initializer=initializer)

      seg_embed = tf.get_variable('seg_embed', [n_layer, 2, n_head, d_head],
                                  dtype=tf_float, initializer=initializer)

      # Convert `seg_id` to one-hot `seg_mat`
      mem_pad = tf.zeros([mlen, bsz], dtype=tf.int32)
      cat_ids = tf.concat([mem_pad, seg_id], 0)

      # `1` indicates not in the same segment [qlen x klen x bsz]
      seg_mat = tf.cast(
          tf.logical_not(tf.equal(seg_id[:, None], cat_ids[None, :])),
          tf.int32)
      seg_mat = tf.one_hot(seg_mat, 2, dtype=tf_float)
    else:
      seg_mat = None

    ##### Positional encoding
    pos_emb = relative_positional_encoding(
        qlen, klen, d_model, clamp_len, attn_type, bi_data,
        bsz=bsz, dtype=tf_float)
    pos_emb = tf.layers.dropout(pos_emb, dropout, training=is_training)

    ##### Attention layers
    if mems is None:
      mems = [None] * n_layer

    hidden_states = []

    for i in range(n_layer):
      # cache new mems
      new_mems.append(_cache_mem(output_h, mems[i], mem_len, reuse_len))

      # segment bias
      if seg_id is None:
        r_s_bias_i = None
        seg_embed_i = None
      else:
        r_s_bias_i = r_s_bias if not untie_r else r_s_bias[i]
        seg_embed_i = seg_embed[i]

      with tf.variable_scope('layer_{}'.format(i)):
        if inp_q is not None:
          o = tf.transpose(output_h, [1, 0, 2])
          q = tf.transpose(output_g, [1, 0, 2])
          hidden_states.append((o, q))
          output_h, output_g = two_stream_rel_attn(
              h=output_h,
              g=output_g,
              r=pos_emb,
              r_w_bias=r_w_bias if not untie_r else r_w_bias[i],
              r_r_bias=r_r_bias if not untie_r else r_r_bias[i],
              seg_mat=seg_mat,
              r_s_bias=r_s_bias_i,
              seg_embed=seg_embed_i,
              attn_mask_h=non_tgt_mask,
              attn_mask_g=attn_mask,
              mems=mems[i],
              target_mapping=target_mapping,
              d_model=d_model,
              n_head=n_head,
              d_head=d_head,
              dropout=dropout,
              dropatt=dropatt,
              is_training=is_training,
              kernel_initializer=initializer)
          reuse = True
        else:
          o = tf.transpose(output_h, [1, 0, 2])
          hidden_states.append(o)
          reuse = False

          output_h, special = rel_multihead_attn(
              h=output_h,
              r=pos_emb,
              r_w_bias=r_w_bias if not untie_r else r_w_bias[i],
              r_r_bias=r_r_bias if not untie_r else r_r_bias[i],
              seg_mat=seg_mat,
              r_s_bias=r_s_bias_i,
              seg_embed=seg_embed_i,
              attn_mask=non_tgt_mask,
              mems=mems[i],
              d_model=d_model,
              n_head=n_head,
              d_head=d_head,
              dropout=dropout,
              dropatt=dropatt,
              is_training=is_training,
              kernel_initializer=initializer,
              reuse=reuse)

        if i == 0:
          special_out = special

        if inp_q is not None:
          output_g = positionwise_ffn(
              inp=output_g,
              d_model=d_model,
              d_inner=d_inner,
              dropout=dropout,
              kernel_initializer=initializer,
              activation_type=ff_activation,
              is_training=is_training)

        output_h = positionwise_ffn(
            inp=output_h,
            d_model=d_model,
            d_inner=d_inner,
            dropout=dropout,
            kernel_initializer=initializer,
            activation_type=ff_activation,
            is_training=is_training,
            reuse=reuse)

    if inp_q is not None:
      o = tf.transpose(output_h, [1, 0, 2])
      q = tf.transpose(output_g, [1, 0, 2])
      hidden_states.append((o, q))
      output = tf.layers.dropout(output_g, dropout, training=is_training)
    else:
      o = tf.transpose(output_h, [1, 0, 2])
      hidden_states.append(o)
      output = tf.layers.dropout(output_h, dropout, training=is_training)

    return output, new_mems, lookup_table, hidden_states, special_out
Beispiel #57
0
    def _apply_dense(self, grad, var):
        rms = self.get_slot(var, "rms")
        mom = self.get_slot(var, "momentum")
        eps = self.get_slot(var, 'eps')
        tf.summary.scalar('grad_norm', tf.norm(grad))
        # debug_here()
        if 'orthogonal_stiefel' in var.name and 'bias' not in var.name:
            with tf.variable_scope("orthogonal_update"):
                print('Appling an orthogonality preserving step to', var.name)
                # apply the rms update rule.
                new_rms = self._decay_tensor * rms + (1. - self._decay_tensor) \
                    * tf.square(grad)
                rms_assign_op = tf.assign(rms, new_rms)
                # scale the gradient.
                if self._nat_grad_normalization:
                    grad = grad / (tf.sqrt(rms) + eps)
                # the update should preserve orthogonality.
                grad_shape = tf.Tensor.get_shape(grad).as_list()
                # W_new_lst = []
                eye = tf.eye(grad_shape[0], dtype=tf.float32)
                G = grad
                W = var
                # Reunitarize after n steps.
                if self._qr_steps is not None:
                    W = tf.cond(tf.equal(tf.mod(self._global_step_tensor,
                                         self._qr_steps), 0),
                                lambda: self.re_unitarize(W), lambda: W)
                # A = tf.matmul(tf.transpose(G), W) - tf.matmul(tf.transpose(W), G)
                A = tf.matmul(G, tf.transpose(W)) - tf.matmul(W, tf.transpose(G))
                cayleyDenom = eye + (self._learning_rate_tensor/2.0) * A
                cayleyNumer = eye - (self._learning_rate_tensor/2.0) * A
                C = tf.matmul(tf.matrix_inverse(cayleyDenom), cayleyNumer)
                W_new = tf.matmul(C, W)
                if self._debug:
                    # self._summary_A(A)
                    self._summary_C(C)
                    self._summary_W(W)
                var_update_op = tf.assign(var, W_new)
                return tf.group(*[var_update_op, rms_assign_op])
        elif 'unitary_stiefel' in var.name and 'bias' not in var.name:
            with tf.variable_scope("unitary_update"):
                print('Appling an unitarity preserving step to', var.name)
                # apply the rms update rule.
                new_rms = self._decay_tensor * rms + (1. - self._decay_tensor) \
                    * tf.square(grad)
                rms_assign_op = tf.assign(rms, new_rms)
                # scale the gradient.
                if self._nat_grad_normalization:
                    grad = grad / (tf.sqrt(new_rms) + eps)
                # do an update step, which preserves unitary structure.
                # checking shapes.
                grad_shape = tf.Tensor.get_shape(grad).as_list()
                assert grad_shape[0] == grad_shape[1]
                eye = tf.eye(grad_shape[0], dtype=tf.complex64)
                G = tf.complex(grad[:, :, 0], grad[:, :, 1])
                W = tf.complex(var[:, :, 0], var[:, :, 1])

                # Reunitarize after n steps.
                if self._qr_steps is not None:
                    W = tf.cond(tf.equal(tf.mod(self._global_step_tensor,
                                         self._qr_steps), 0),
                                lambda: self.re_unitarize(W), lambda: W)

                A = tf.matmul(G, tf.conj(tf.transpose(W))) \
                    - tf.matmul(W, tf.conj(tf.transpose(G)))
                # A must be skew symmetric.
                larning_rate_scale = tf.complex(self._learning_rate_tensor/2.0,
                                                tf.zeros_like(self._learning_rate_tensor))
                cayleyDenom = eye + larning_rate_scale * A
                cayleyNumer = eye - larning_rate_scale * A
                C = tf.matmul(tf.matrix_inverse(cayleyDenom), cayleyNumer)
                W_new = tf.matmul(C, W)
                if self._debug:
                    # self._summary_A(A)
                    self._summary_C(C)
                    self._summary_W(W)
                # debug_here()
                W_new_re = tf.real(W_new)
                W_new_img = tf.imag(W_new)
                W_array = tf.stack([W_new_re, W_new_img], -1)
                var_update_op = tf.assign(var, W_array)
                return tf.group(*[var_update_op, rms_assign_op])
        else:
            # do the usual RMSprop update
            if 1:
                # tensorflow default.
                print('Appling standard rmsprop to', var.name)
                return training_ops.apply_rms_prop(
                    var, rms, mom,
                    tf.cast(self._learning_rate_tensor, var.dtype.base_dtype),
                    tf.cast(self._decay_tensor, var.dtype.base_dtype),
                    tf.cast(self._momentum_tensor, var.dtype.base_dtype),
                    tf.cast(self._epsilon_tensor, var.dtype.base_dtype),
                    grad, use_locking=False).op
            else:
                # My rmsprop implementation.
                new_rms = self._decay_tensor * rms \
                    + (1. - self._decay_tensor) * tf.square(grad)
                rms_assign_op = tf.assign(rms, new_rms)
                W_new = var - self._learning_rate_tensor * grad / (tf.sqrt(new_rms) + eps)
                var_update_op = tf.assign(var, W_new)
                return tf.group(*[var_update_op, rms_assign_op])
Beispiel #58
0
 def _summary_C(self, C):
     # C must be unitary/orthogonal:
     eye = tf.eye(*tf.Tensor.get_shape(C).as_list(), dtype=C.dtype)
     test_c = eye - tf.matmul(tf.transpose(tf.conj(C)), C)
     test_c_norm = tf.real(tf.norm(test_c))
     tf.summary.scalar('I-C.HC', test_c_norm)
 def identity(length, dtype=tf.float64):
     return tf.eye(length, dtype=dtype)
 def __init__(self, inputSize, outputSize):
     self.weight = tf.zeros((inputSize, outputSize), dtype=tf.float32)
     self.nParams = tf.size(self.weight)
     self.inft = tf.reshape(tf.eye(self.nParams, dtype=tf.float32),
                            (self.nParams, *self.weight.shape))