def imputation(self):
     # more information of zero probabilities
     if self.zi:
         self.zero_prob = tf.nn.softplus(- self.px_dropout + tf.exp(self.px_r) * self.px_r - tf.exp(self.px_r) \
                          * tf.log(tf.exp(self.px_r) + self.px_rate + 1e-8)) \
                          - tf.nn.softplus( - self.px_dropout)
         self.dropout_prob = - tf.nn.softplus( - self.px_dropout)
  def dot_product_attention(self,x_sen,y_sen,x_len,y_len):
    '''
    function: use the dot-production of left_sen and right_sen to compute the attention weight matrix
    :param left_sen: a list of 2D tensor (x_len,hidden_units)
    :param right_sen: a list of 2D tensor (y_len,hidden_units)
    :return: (1) weighted_y: the weightd sum of y_sen, a 3D tensor with shape (b,x_len,2*h)
             (2)weghted_x:  the weighted sum of x_sen, a 3D tensor with shape (b,y_len,2*h)
    '''
    
    weight_matrix =tf.matmul(x_sen, tf.transpose(y_sen,perm=[0,2,1])) #(b,x_len,h) x (b,h,y_len)->(b,x_len,y_len)

    weight_matrix_y =tf.exp(weight_matrix - tf.reduce_max(weight_matrix,axis=2,keep_dims=True))  #(b,x_len,y_len)
    weight_matrix_x =tf.exp(tf.transpose((weight_matrix - tf.reduce_max(weight_matrix,axis=1,keep_dims=True)),perm=[0,2,1]))  #(b,y_len,x_len)

    weight_matrix_y=weight_matrix_y*self.y_mask[:,None,:]#(b,x_len,y_len)*(b,1,y_len)
    weight_matrix_x=weight_matrix_x*self.x_mask[:,None,:]#(b,y_len,x_len)*(b,1,x_len)
    
    alpha=weight_matrix_y/(tf.reduce_sum(weight_matrix_y,2,keep_dims=True)+1e-8)#(b,x_len,y_len)
    beta=weight_matrix_x/(tf.reduce_sum(weight_matrix_x,2,keep_dims=True)+1e-8)#(b,y_len,x_len)

    #(b,1,y_len,2*h)*(b,x_len,y_len,1)*=>(b,x_len,y_len,2*h) =>(b,x_len,2*h)
    weighted_y =tf.reduce_sum(tf.expand_dims(y_sen,1) *tf.expand_dims(alpha,-1),2)

    #(b,1,x_len,2*h)*(b,y_len,x_len,1) =>(b,y_len,x_len,2*h) =>(b,y_len,2*h)
    weighted_x =tf.reduce_sum(tf.expand_dims(x_sen,1) * tf.expand_dims(beta,-1),2)

    return weighted_y,weighted_x
Exemple #3
0
    def entropy(self, n, p):
        # Note that given n and p where p is a probability vector of
        # length k, the entropy requires a sum over all
        # possible configurations of a k-vector which sums to n. It's
        # expensive.
        # http://stackoverflow.com/questions/36435754/generating-a-numpy-array-with-all-combinations-of-numbers-that-sum-to-less-than
        sess = tf.Session()
        n = sess.run(tf.cast(tf.squeeze(n), dtype=tf.int32))
        sess.close()
        p = tf.cast(tf.squeeze(p), dtype=tf.float32)
        if isinstance(n, np.int32):
            k = get_dims(p)[0]
            max_range = np.zeros(k, dtype=np.int32) + n
            x = np.array([i for i in product(*(range(i+1) for i in max_range))
                                 if sum(i)==n])
            logpmf = self.logpmf(x, n, p)
            return tf.reduce_sum(tf.mul(tf.exp(logpmf), logpmf))
        else:
            out = []
            for j in range(n.shape[0]):
                k = get_dims(p)[0]
                max_range = np.zeros(k, dtype=np.int32) + n[j]
                x = np.array([i for i in product(*(range(i+1) for i in max_range))
                                     if sum(i)==n[j]])
                logpmf = self.logpmf(x, n[j], p[j, :])
                out += [tf.reduce_sum(tf.mul(tf.exp(logpmf), logpmf))]

            return tf.pack(out)
Exemple #4
0
  def _testSampleLogProbExact(
      self, concentrations, det_bounds, dim, means,
      num_samples=int(1e5), dtype=np.float32, target_discrepancy=0.1, seed=42):
    # For test methodology see the comment in
    # _testSampleConsistentLogProbInterval, except that this test
    # checks those parameter settings where the true volume is known
    # analytically.
    concentration = np.array(concentrations, dtype=dtype)
    det_bounds = np.array(det_bounds, dtype=dtype)
    means = np.array(means, dtype=dtype)
    # Add a tolerance to guard against some of the importance_weights exceeding
    # the theoretical maximum (importance_maxima) due to numerical inaccuracies
    # while lower bounding the determinant. See corresponding comment in
    # _testSampleConsistentLogProbInterval.
    high_tolerance = 1e-6

    testee_lkj = tfd.LKJ(
        dimension=dim, concentration=concentration, validate_args=True)
    x = testee_lkj.sample(num_samples, seed=seed)
    importance_weights = (
        tf.exp(-testee_lkj.log_prob(x)) * _det_ok_mask(x, det_bounds))
    importance_maxima = (1. / det_bounds) ** (concentration - 1) * tf.exp(
        testee_lkj._log_normalization())

    chk1 = st.assert_true_mean_equal_by_dkwm(
        importance_weights, low=0., high=importance_maxima + high_tolerance,
        expected=means, false_fail_rate=1e-6)
    chk2 = tf.assert_less(
        st.min_discrepancy_of_true_means_detectable_by_dkwm(
            num_samples, low=0., high=importance_maxima + high_tolerance,
            false_fail_rate=1e-6, false_pass_rate=1e-6),
        dtype(target_discrepancy))
    self.evaluate([chk1, chk2])
def transform_box(bbox, height, width):
    """ Transform the bounding box format 
        Args:
            bbox: [N X 4] input N bbox
                  fromat = [cx, cy, log(w/W), log(h/H)]
            height: height of original image
            width: width of original image

        Return:
            bbox: [N X 4] output rounded N bbox
                  format = [left top right bottom]
    """
    x, y, w, h = tf.split(1, 4, bbox)

    h = tf.exp(h) * height
    w = tf.exp(w) * width
    x = (x + 1) * width / 2
    y = (y + 1) * height / 2

    x1 = x - w / 2
    y1 = y - h / 2
    x2 = x + w / 2
    y2 = y + h / 2

    bbox_out = tf.concat(1, [x1, y1, x2, y2])

    return bbox_out
Exemple #6
0
 def _kl_entropy(self):
     """
     Add to Graph:
         1. KL divergence between old and new distributions
         2. Entropy of present policy given states and actions
     """
     log_det_cov_old = tf.reduce_sum(self.old_log_vars_ph)
     log_det_cov_new = tf.reduce_sum(self.log_vars)
     tr_old_new = tf.reduce_sum(tf.exp(self.old_log_vars_ph - self.log_vars))
     #KL Divergence formultivariate normal ditributions
     #https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence#Multivariate_normal_distributions
     # log(sigma1/sigma0) = log(sigma1)-log(sigma0) 
     # tr: matrix trace
     self.kl = 0.5 * tf.reduce_mean(log_det_cov_new - log_det_cov_old + tr_old_new +
                                    # (mu1-mu0 )T*SIGMA^-1*(mu1-mu0):
                                    tf.reduce_sum(tf.square(self.means - self.old_means_ph) /
                                                  tf.exp(self.log_vars), axis=1) -
                                    self.act_dim)
                                    # k  = act_dim;
     self.kl = tf.identity(self.kl, name="kl")
                                    
     # simply the entropy formula of a multivariate normal distribution
     # https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Entropy
     self.entropy = 0.5 * (self.act_dim * (np.log(2 * np.pi) + 1) +
                           tf.reduce_sum(self.log_vars))
     self.entropy = tf.identity(self.entropy, name="entropy")
Exemple #7
0
def build_psi_stats_rbf(Z, kern, mu, S):

    # use only active dimensions
    mu, S = kern._slice(mu, S)  # only use the active dimensions.
    Z, _ = kern._slice(Z, None)

    # psi0
    N = tf.shape(mu)[0]
    psi0 = tf.cast(N, tf.float64) * kern.variance

    # psi1
    lengthscale2 = tf.square(kern.lengthscales)
    psi1_logdenom = tf.expand_dims(tf.reduce_sum(tf.log(S / lengthscale2 + 1.), 1), 1)  # N x 1
    d = tf.square(tf.expand_dims(mu, 1)-tf.expand_dims(Z, 0))  # N x M x Q
    psi1_log = - 0.5 * (psi1_logdenom + tf.reduce_sum(d/tf.expand_dims(S+lengthscale2, 1), 2))
    psi1 = kern.variance * tf.exp(psi1_log)

    # psi2
    psi2_logdenom = -0.5 * tf.expand_dims(tf.reduce_sum(tf.log(2.*S/lengthscale2 + 1.), 1), 1)  # N # 1
    psi2_logdenom = tf.expand_dims(psi2_logdenom, 1)
    psi2_exp1 = 0.25 * tf.reduce_sum(tf.square(tf.expand_dims(Z, 1)-tf.expand_dims(Z, 0))/lengthscale2, 2)  # M x M
    psi2_exp1 = tf.expand_dims(psi2_exp1, 0)

    Z_hat = 0.5 * (tf.expand_dims(Z, 1) + tf.expand_dims(Z, 0))  # MxMxQ
    denom = 1./(2.*S+lengthscale2)
    a = tf.expand_dims(tf.expand_dims(tf.reduce_sum(tf.square(mu)*denom, 1), 1), 1)  # N x 1 x 1
    b = tf.reduce_sum(tf.expand_dims(tf.expand_dims(denom, 1), 1) * tf.square(Z_hat), 3)  # N M M
    c = -2*tf.reduce_sum(tf.expand_dims(tf.expand_dims(mu*denom, 1), 1) * Z_hat, 3)  # N M M
    psi2_exp2 = a + b + c

    psi2 = tf.square(kern.variance) * tf.reduce_sum(tf.exp(psi2_logdenom - psi2_exp1 - psi2_exp2), 0)
    return psi0, psi1, psi2
def w(input_data, cu, kappas_t_1, config):
	
	batch_size = config.batch_size
	mixture_size = config.mixture_size
	vocab_length = config.vocab_length

	# split along dim of mixture size * 3
	hat_alphas_t, hat_betas_t, hat_kappas_t = tf.split(1, 3, input_data)

	alphas_t = tf.exp(hat_alphas_t)
	betas_t = tf.exp(hat_betas_t)
	kappas_t = tf.add(kappas_t_1, tf.exp(hat_kappas_t))

	speech_length = tf.shape(cu)[1]

	u = tf.linspace(1.0, tf.cast(speech_length,tf.float32) , speech_length)
	u = tf.expand_dims(u, 0)
	u = tf.expand_dims(u, 0)
	u = tf.tile(u, [batch_size, mixture_size, 1])

	alphas_t_expanded = tf.tile(tf.expand_dims(alphas_t, -1), [1, 1, speech_length])
	betas_t_expanded = tf.tile(tf.expand_dims(betas_t, -1), [1, 1, speech_length])
	kappas_t_expanded = tf.tile(tf.expand_dims(kappas_t, -1), [1, 1, speech_length])

	calc = tf.square(tf.sub(kappas_t_expanded, u))
	calc = tf.mul(calc, tf.neg(betas_t_expanded))
	calc = tf.exp(calc)
	calc = tf.mul(calc, alphas_t_expanded)

	phi_t = tf.expand_dims(tf.reduce_sum(calc, 1), 1)

	output = tf.squeeze(tf.batch_matmul(phi_t, cu), [1])

	return output, kappas_t, phi_t
    def get_mixture_coef(output):
      # returns the tf slices containing mdn dist params
      # ie, eq 18 -> 23 of http://arxiv.org/abs/1308.0850
      z = output
      z_eos = z[:, 0:1]
      z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr = tf.split(1, 6, z[:, 1:])

      # process output z's into MDN paramters

      # end of stroke signal
      z_eos = tf.sigmoid(z_eos) # should be negated, but doesn't matter.

      # softmax all the pi's:
      max_pi = tf.reduce_max(z_pi, 1, keep_dims=True)
      z_pi = tf.sub(z_pi, max_pi)
      z_pi = tf.exp(z_pi)
      normalize_pi = tf.inv(tf.reduce_sum(z_pi, 1, keep_dims=True))
      z_pi = tf.mul(normalize_pi, z_pi)

      # exponentiate the sigmas and also make corr between -1 and 1.
      z_sigma1 = tf.exp(z_sigma1)
      z_sigma2 = tf.exp(z_sigma2)
      z_corr = tf.tanh(z_corr)

      return [z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr, z_eos]
Exemple #10
0
def encode(x, h, generator, **kwargs):
    x_transformed = kwargs["x_transformed"]
    z_dim = kwargs["z_dim"]
    phi_interm = kwargs["phi_interm"]
    prior_interm = kwargs["prior_interm"]
    weight_factor = kwargs.get("weight_factor", 1.0)
    layers_num = kwargs.get("layers_num", 1)
    batch_size = x.get_shape().as_list()[0]
    
    x_t = fun(x, nout = x_transformed, act = tf.nn.relu, name = "x_transformed", weight_factor = weight_factor, layers_num = layers_num)

    prior = fun(h, nout = prior_interm, act = tf.nn.relu, name = "prior", weight_factor = weight_factor, layers_num = layers_num)
    prior_mu = fun(prior, nout = z_dim, act = tf.identity, name = "prior_mu", weight_factor = weight_factor)
    prior_sigma = fun(prior, nout = z_dim, act = tf.nn.softplus, name = "prior_sigma", weight_factor = weight_factor)

    phi = fun(x_t, h, nout = phi_interm, act = tf.nn.relu, name = "phi", weight_factor = weight_factor, layers_num = layers_num)
    z_mu = fun(phi, nout = z_dim, act = tf.identity, name = "z_mu", weight_factor = weight_factor)
    z_sigma = fun(phi, nout = z_dim, act = tf.nn.softplus, name = "z_sigma", weight_factor = weight_factor)

    epsilon = tf.random_normal((batch_size, z_dim), name='epsilon')

    z = tf.cond(
        generator, 
        lambda: prior_mu + tf.exp(prior_sigma) * epsilon, 
        lambda: z_mu + tf.exp(z_sigma) * epsilon
    )

    return z, z_mu, z_sigma, prior_mu, prior_sigma, x_t
Exemple #11
0
                def copy_net_logit_function(state):
                    state = tf.nn.dropout(state, self.dropout_placeholder)

                    # the logits for generating the next word are computed in
                    # the standard way
                    generate_logits = tf.matmul(state, decoding_w) + decoding_b

                    # Equation 8 in the paper ... in shape of source sentence
                    # (batch x time)
                    copy_logits_in_time = tf.reduce_sum(
                        projected_inputs * tf.expand_dims(state, 1), [2])

                    # mask out the padding in exponential domain
                    copy_logits_in_time_exp_masked = tf.exp(
                        tf.minimum([[80.0]], copy_logits_in_time)) * copy_mask

                    #  ... in shape of vocabulary (batch x time x vocabulary)
                    copy_logits_in_vocabulary = tf.expand_dims(
                        copy_logits_in_time_exp_masked,
                        2) * vocabulary_shaped_indices

                    # Equation 6 without normalization
                    copy_logits_exp = tf.reduce_sum(copy_logits_in_vocabulary,
                                                    [1])

                    logits_exp = copy_logits_exp \
                                 + tf.exp(tf.minimum([[80.0]], generate_logits))

                    return (tf.log(tf.maximum([[1e-40]], logits_exp)),
                            copy_logits_in_time)
 def _forward(self, x):
   x = self._maybe_assert_valid_x(x)
   if self.power == 0.:
     return tf.exp(x)
   # If large x accuracy is an issue, consider using:
   # (1. + x * self.power)**(1. / self.power) when x >> 1.
   return tf.exp(tf.log1p(x * self.power) / self.power)
def filterbank_matrices(g_x, g_y, delta, sigma, N, A, B):
    ''' Computer filter bank matrices. All inputs are in batches.

    Args:
        g_x, g_y: grid centers, relative to the center of the image
        delta: strides
        sigma: isotropic variance
        N: grid dimension
        A, B: input image dimensions, width and height
    Returns:
        F_x, F_y: filter banks matrices [batch, N, A] and [batch, N, B]
    '''

    rng = tf.reshape(tf.cast(tf.range(N), tf.float32), [1, -1])

    # eq 19
    mu_x = g_x + (rng - N / 2 - 0.5) * delta

    # eq 20
    mu_y = g_y + (rng - N / 2 - 0.5) * delta

    a = tf.reshape(tf.cast(tf.range(A), tf.float32), [1, 1, -1])
    b = tf.reshape(tf.cast(tf.range(B), tf.float32), [1, 1, -1])

    # reshape for broadcasting
    mu_x = tf.reshape(mu_x, [-1, N, 1])
    mu_y = tf.reshape(mu_y, [-1, N, 1])
    sigma = tf.reshape(sigma, [-1, 1, 1])

    F_x = tf.exp(-tf.square((a - mu_x) / sigma))
    F_y = tf.exp(-tf.square((b - mu_y) / sigma))

    # transform in a convenient form for further use
    return F_x, F_y
def tf_ssd_bboxes_decode_layer(feat_localizations,
                               anchors_layer,
                               prior_scaling=[0.1, 0.1, 0.2, 0.2]):
    """Compute the relative bounding boxes from the layer features and
    reference anchor bounding boxes.

    Arguments:
      feat_localizations: Tensor containing localization features.
      anchors: List of numpy array containing anchor boxes.

    Return:
      Tensor Nx4: ymin, xmin, ymax, xmax
    """
    yref, xref, href, wref = anchors_layer

    # Compute center, height and width
    cx = feat_localizations[:, :, :, :, 0] * wref * prior_scaling[0] + xref
    cy = feat_localizations[:, :, :, :, 1] * href * prior_scaling[1] + yref
    w = wref * tf.exp(feat_localizations[:, :, :, :, 2] * prior_scaling[2])
    h = href * tf.exp(feat_localizations[:, :, :, :, 3] * prior_scaling[3])
    # Boxes coordinates.
    ymin = cy - h / 2.
    xmin = cx - w / 2.
    ymax = cy + h / 2.
    xmax = cx + w / 2.
    bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1)
    return bboxes
Exemple #15
0
def softmax(x):
  """
  Compute the softmax function in tensorflow.

  You might find the tensorflow functions tf.exp, tf.reduce_max,
  tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may
  not need to use all of these functions). Recall also that many common
  tensorflow operations are sugared (e.g. x * y does a tensor multiplication
  if x and y are both tensors). Make sure to implement the numerical stability
  fixes as in the previous homework!

  Args:
    x:   tf.Tensor with shape (n_samples, n_features). Note feature vectors are
         represented by row-vectors. (For simplicity, no need to handle 1-d
         input as in the previous homework)
  Returns:
    out: tf.Tensor with shape (n_sample, n_features). You need to construct this
         tensor in this problem.
  """

  ### YOUR CODE HERE
  x -= tf.reduce_max(x, reduction_indices=1, keep_dims=True)
  out = tf.exp(x) / tf.reduce_sum(tf.exp(x), reduction_indices=1, keep_dims=True)
  ### END YOUR CODE
  
  return out 
Exemple #16
0
def variational_bayes(h, n_code):
    """Summary

    Parameters
    ----------
    h : TYPE
        Description
    n_code : TYPE
        Description

    Returns
    -------
    name : TYPE
        Description
    """
    z_mu = tf.nn.tanh(linear(h, n_code, name='mu')[0])
    z_log_sigma = 0.5 * tf.nn.tanh(linear(h, n_code, name='log_sigma')[0])

    # Sample from noise distribution p(eps) ~ N(0, 1)
    epsilon = tf.random_normal(tf.stack([tf.shape(h)[0], n_code]))

    # Sample from posterior
    z = tf.add(z_mu, tf.multiply(epsilon, tf.exp(z_log_sigma)), name='z')
    # -log(p(z)/q(z|x)), bits by coding.
    # variational bound coding costs kl(p(z|x)||q(z|x))
    # d_kl(q(z|x)||p(z))
    loss_z = -0.5 * tf.reduce_sum(
        1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma),
        1)
    return z, z_mu, z_log_sigma, loss_z
    def _decode(self, rel_codes, anchors):
        """Decode relative codes to boxes.

        Args:
          rel_codes: a tensor representing N anchor-encoded boxes.
          anchors: BoxList of anchors.

        Returns:
          boxes: BoxList holding N bounding boxes.
        """
        ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()

        ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
        if self._scale_factors:
            ty /= self._scale_factors[0]
            tx /= self._scale_factors[1]
            th /= self._scale_factors[2]
            tw /= self._scale_factors[3]
        w = tf.exp(tw) * wa
        h = tf.exp(th) * ha
        ycenter = ty * ha + ycenter_a
        xcenter = tx * wa + xcenter_a
        ymin = ycenter - h / 2.
        xmin = xcenter - w / 2.
        ymax = ycenter + h / 2.
        xmax = xcenter + w / 2.
        return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
    def _bulid_model(self):
        """The inner function to build the model"""
        # Input placeholder
        self.x = tf.placeholder(tf.float32, shape=[self.batch_size, self.input_dim])
        # The encoder: determine the mean and (log) variance of Gaussian distribution
        self.z_mean, self.z_log_sigma_sq = self._encoder(self.x)
        # Sampling from Gaussian distribution
        eps = tf.random_normal([self.batch_size, self.z_dim], mean=0.0, stddev=1.0)
        # z = mean + sigma*epsilon
        self.z = tf.add(self.z_mean, tf.mul(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps))

        # Decoder: determine the mean of Bernoulli distribution of reconstructed input
        self.x_reconstr_mean = self._decoder(self.z)
        
        # Compute the loss
        with tf.name_scope("loss"):
            # The reconstruction loss: cross entropy
            reconstr_loss = -tf.reduce_sum(self.x * tf.log(1e-10 + self.x_reconstr_mean) + \
                            (1.0 - self.x) * tf.log(1e-10 + 1.0 - self.x_reconstr_mean), axis=1)
            # The latent loss: KL divergence
            latent_loss = -0.5 * tf.reduce_sum(1.0 + self.z_log_sigma_sq - tf.square(self.z_mean) - \
                                    tf.exp(self.z_log_sigma_sq), axis=1)
            # Average over the batch
            self.cost = tf.reduce_mean(reconstr_loss + latent_loss)
        
        # The optimizer
        self.lr = tf.Variable(0.001, trainable=False)
        vars = tf.trainable_variables()
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.cost, var_list=vars)
    def __init__(self, n_input, n_hidden, optimizer = tf.train.AdamOptimizer()):
        self.n_input = n_input
        self.n_hidden = n_hidden

        network_weights = self._initialize_weights()
        self.weights = network_weights

        # model
        self.x = tf.placeholder(tf.float32, [None, self.n_input])
        self.z_mean = tf.add(tf.matmul(self.x, self.weights['w1']), self.weights['b1'])
        self.z_log_sigma_sq = tf.add(tf.matmul(self.x, self.weights['log_sigma_w1']), self.weights['log_sigma_b1'])

        # sample from gaussian distribution
        eps = tf.random_normal(tf.stack([tf.shape(self.x)[0], self.n_hidden]), 0, 1, dtype = tf.float32)
        self.z = tf.add(self.z_mean, tf.multiply(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps))

        self.reconstruction = tf.add(tf.matmul(self.z, self.weights['w2']), self.weights['b2'])

        # cost
        reconstr_loss = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
        latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq
                                           - tf.square(self.z_mean)
                                           - tf.exp(self.z_log_sigma_sq), 1)
        self.cost = tf.reduce_mean(reconstr_loss + latent_loss)
        self.optimizer = optimizer.minimize(self.cost)

        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)
    def log_prob(self, xs, zs):
        """Returns a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])]."""
        if self.prior == 'Lognormal':
            zs = tf.exp(zs)
        elif self.prior != 'Gaussian':
            raise NotImplementedError("prior not available.")

        log_prior = -self.prior_variance * tf.reduce_sum(zs*zs)

        s = tf.reshape(zs[:,:self.n_rows*self.K], [self.n_rows,self.K])
        t = tf.reshape(zs[:,self.n_cols*self.K:], [self.n_cols,self.K])

        xp = tf.matmul(s, t, transpose_b=True)
        if self.interaction == 'multiplicative':
            xp = tf.exp(xp)
        elif self.interaction != 'additive':
            raise NotImplementedError("interaction type unknown.")

        if self.like == 'Gaussian':
            log_lik = tf.reduce_sum(norm.logpdf(xs['x'], xp))
        elif self.like == 'Poisson':
            if not (self.interaction == "additive" or self.prior == "Lognormal"):
                raise NotImplementedError("Rate of Poisson has to be nonnegatve.")

            log_lik = tf.reduce_sum(poisson.logpmf(xs['x'], xp))
        else:
            raise NotImplementedError("likelihood not available.")

        return log_lik + log_prior
Exemple #21
0
def bbox_transform_inv_tf(boxes, deltas):
    """
    TF implementation of bbox_transform_inv. Note here we assume
    that boxes and deltas are always of shape (n, 4).
    """
    boxes = tf.cast(boxes, deltas.dtype) # TODO maybe remove?
    widths = boxes[:, 2] - boxes[:, 0] + 1.0
    heights = boxes[:, 3] - boxes[:, 1] + 1.0
    ctr_x = boxes[:, 0] + 0.5 * widths
    ctr_y = boxes[:, 1] + 0.5 * heights

    dx = deltas[:, 0]
    dy = deltas[:, 1]
    dw = deltas[:, 2]
    dh = deltas[:, 3]

    pred_ctr_x = dx * widths + ctr_x
    pred_ctr_y = dy * heights + ctr_y
    pred_w = tf.exp(dw) * widths
    pred_h = tf.exp(dh) * heights

    pred_boxes = tf.transpose(tf.stack([
        # x1
        pred_ctr_x - 0.5 * pred_w,
        # y1
        pred_ctr_y - 0.5 * pred_h,
        # x2
        pred_ctr_x + 0.5 * pred_w,
        # y2
        pred_ctr_y + 0.5 * pred_h,]))

    return pred_boxes
Exemple #22
0
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
    def phi(x):
        return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
    def Phi(x):
        return 0.5 + 0.5*tf.erf(x/np.sqrt(2))

    smean = tf.square(mean)
    var = tf.exp(log_var)
    log_std = 0.5*log_var
    std = tf.exp(log_std)

    smean0 = tf.square(mean0)
    var0 = tf.exp(log_var0)
    log_std0 = 0.5*log_var0
    std0 = tf.exp(log_std0)

    tol = 1.0e-10
    pzero = Phi(-mean/std)
    kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
    kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
    kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
    kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
    kld = tf.reduce_sum(kld, 1)
    if reduce_mean:
        kld = tf.reduce_mean(kld)
    return kld
Exemple #23
0
def _expectation(p, kern, feat, none1, none2, nghp=None):
    """
    Compute the expectation:
    <K_{X, Z}>_p(X)
        - K_{.,.} :: RBF kernel

    :return: NxM
    """
    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        # use only active dimensions
        Xcov = kern._slice_cov(p.cov)
        Z, Xmu = kern._slice(feat.Z, p.mu)
        D = tf.shape(Xmu)[1]
        if kern.ARD:
            lengthscales = kern.lengthscales
        else:
            lengthscales = tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov)  # NxDxD

        all_diffs = tf.transpose(Z) - tf.expand_dims(Xmu, 2)  # NxDxM
        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True)  # NxDxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)
Exemple #24
0
    def __init__(self, encoder, decoder):
        self.x = tf.placeholder(tf.float32, name='input')
        self.latent_shape = (encoder.output_shape[0], encoder.output_shape[1] // 2)
        self.encoder = encoder
        self.decoder = decoder
        self.batch_size = self.latent_shape[0]

        assert None not in self.latent_shape, "All dimensions must be known"
        encoded = tf.reshape(encoder(self.x), (self.batch_size, 2, self.latent_shape[1]))
        self.mu, self.log_sigma = encoded[:, 0, :], encoded[:, 1, :]
        self.mu = tf.reshape(self.mu, self.latent_shape)
        self.log_sigma = tf.reshape(self.log_sigma, self.latent_shape)

        self.eps = tf.random_normal(self.latent_shape,
                                    mean=0.0, stddev=1.0, name="eps")
        self.z = self.mu + tf.exp(self.log_sigma) * self.eps

        decoded = decoder(self.z)
        decoder_shape = decoder.output_shape
        if len(decoder_shape) == 2:
            decoded = tf.reshape(decoded, (self.batch_size, decoder_shape[1] // 2, 1, 2))
        else:
            assert decoder_shape[-1] == 2

        self.x_hat_mu, self.x_hat_log_sigma = decoded[:, :, :, 0], decoded[:, :, :, 1]
        self.x_hat_mu = tf.reshape(self.x_hat_mu, (self.batch_size, decoder_shape[1] // 2))
        self.x_hat_log_sigma = tf.reshape(self.x_hat_log_sigma, (self.batch_size, decoder_shape[1] // 2))

        self.params = encoder.trainable_weights + decoder.trainable_weights

        self.latent_loss = -0.5 * tf.reduce_mean(1 + self.log_sigma - self.mu**2 - tf.exp(self.log_sigma))
        self.reconstruction_loss = -tf.reduce_mean(((self.x_hat_mu - self.x)**2) / (2 * tf.exp(self.x_hat_log_sigma)))

        self.loss = self.latent_loss + self.reconstruction_loss
Exemple #25
0
def contrastive_loss_andre(left_feature, right_feature, label, margin):
  """
  Compute the contrastive loss as in
  https://gitlab.idiap.ch/biometric/xfacereclib.cnn/blob/master/xfacereclib/cnn/scripts/experiment.py#L156
  With Y = [-1 +1] --> [POSITIVE_PAIR NEGATIVE_PAIR]
  L = log( m + exp( Y * d^2)) / N
  **Parameters**
   left_feature: First element of the pair
   right_feature: Second element of the pair
   label: Label of the pair (0 or 1)
   margin: Contrastive margin
  **Returns**
   Return the loss operation
  """

  with tf.name_scope("contrastive_loss_andre"):
    label = tf.to_float(label)
    d = compute_euclidean_distance(left_feature, right_feature)

    loss = tf.log(tf.exp(tf.mul(label, d)))
    loss = tf.reduce_mean(loss)

    # Within class part
    genuine_factor = tf.mul(label - 1, 0.5)
    within_class = tf.reduce_mean(tf.log(tf.exp(tf.mul(genuine_factor, d))))

    # Between class part
    impostor_factor = tf.mul(label + 1, 0.5)
    between_class = tf.reduce_mean(tf.log(tf.exp(tf.mul(impostor_factor, d))))

    # first_part = tf.mul(one - label, tf.square(d))  # (Y-1)*(d^2)
    return loss, between_class, within_class
  def __call__(self, inputs, state, scope=None):
    with _checked_scope(self, scope or "rwa_cell", reuse=self._reuse):
      h, n, d, a_max = state

      with vs.variable_scope("u"):
        u = _linear(inputs, self._num_units, True)

      with vs.variable_scope("g"):
        g = _linear([inputs, h], self._num_units, True)

      with vs.variable_scope("a"):
        a = _linear([inputs, h], self._num_units, False) # The bias term when factored out of the numerator and denominator cancels and is unnecessary

      z = tf.multiply(u, tanh(g))

      a_newmax = tf.maximum(a_max, a)
      exp_diff = tf.exp(a_max - a_newmax)
      exp_scaled = tf.exp(a - a_newmax)

      n = tf.multiply(n, exp_diff) + tf.multiply(z, exp_scaled)  # Numerically stable update of numerator
      d = tf.multiply(d, exp_diff) + exp_scaled  # Numerically stable update of denominator
      h_new = self._activation(tf.div(n, d))

      new_state = RWACellTuple(h_new, n, d, a_newmax)

    return h_new, new_state
Exemple #27
0
    def log_normal(self, position, mean, log_var, type_=1):
        '''
        Log of normal distribution

        type 1:
        position is [P, D]
        mean is [D]
        log_var is [D]
        output is [P]

        type 2:
        position is [P, D]
        mean is [P,D]
        log_var is [P,D]
        output is [P]
        '''

        n_D = tf.to_float(tf.shape(position)[1])
        term1 = n_D * tf.log(2*math.pi)

        if type_==1:
            term2 = tf.reduce_sum(log_var, 0) #sum over D [1]
            dif_cov = tf.square(position - mean) / tf.exp(log_var)
            term3 = tf.reduce_sum(dif_cov, 1) #sum over D [P]
            all_ = term1 + term2 + term3
            log_normal_ = -.5 * all_

        elif type_==2:
            term2 = tf.reduce_sum(log_var, 1) #sum over D [1]
            dif_cov = tf.square(position - mean) / tf.exp(log_var)
            term3 = tf.reduce_sum(dif_cov, 1) #sum over D [P]
            all_ = term1 + term2 + term3
            log_normal_ = -.5 * all_

        return log_normal_
Exemple #28
0
def _expectation(p, mean, none, kern, feat, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <x_n K_{x_n, Z}>_p(x_n)
        - K_{.,.} :: RBF kernel

    :return: NxDxM
    """
    Xmu, Xcov = p.mu, p.cov

    with tf.control_dependencies([tf.assert_equal(
            tf.shape(Xmu)[1], tf.constant(kern.input_dim, settings.tf_int),
            message="Currently cannot handle slicing in exKxz.")]):
        Xmu = tf.identity(Xmu)

    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        D = tf.shape(Xmu)[1]
        lengthscales = kern.lengthscales if kern.ARD \
            else tf.zeros((D,), dtype=settings.float_type) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov)  # NxDxD
        all_diffs = tf.transpose(feat.Z) - tf.expand_dims(Xmu, 2)  # NxDxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        exponent_mahalanobis = tf.cholesky_solve(chol_L_plus_Xcov, all_diffs)  # NxDxM
        non_exponent_term = tf.matmul(Xcov, exponent_mahalanobis, transpose_a=True)
        non_exponent_term = tf.expand_dims(Xmu, 2) + non_exponent_term  # NxDxM

        exponent_mahalanobis = tf.reduce_sum(all_diffs * exponent_mahalanobis, 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)[:, None, :] * non_exponent_term
Exemple #29
0
 def kl_divergence(self, other):
     assert isinstance(other, Gaussian)
     l2_dist = tf.square(self.mean - other.mean)
     std_dev1 = tf.exp(x=self.log_std_dev)
     sqr_std_dev2 = tf.square(x=tf.exp(x=other.log_std_dev))
     kl_div = tf.reduce_mean(self.log_std_dev - other.log_std_dev + (std_dev1 + l2_dist) / (2 * sqr_std_dev2 + util.epsilon) - 0.5, axis=0)
     return kl_div
  def build_encoder(self):
    """Inference Network. q(h|X)"""
    with tf.variable_scope("encoder") as scope_encoder:
      self.l1_w = tf.get_variable(
                    "l1_w",
                    shape=[self.reader.vocab_size,self.embed_dim],
                    initializer=tf.contrib.layers.xavier_initializer())
      self.l2_w = tf.get_variable(
                    "l2_w",
                    shape=[self.embed_dim,self.embed_dim],
                    initializer=tf.contrib.layers.xavier_initializer())
      
      self.mean_w = tf.get_variable(
                    "mean_w",
                    shape=[self.embed_dim,self.h_dim],
                    initializer=tf.contrib.layers.xavier_initializer())
      self.sigma_w = tf.get_variable(
                    "sigma_w",
                    shape=[self.embed_dim,self.h_dim],
                    initializer=tf.contrib.layers.xavier_initializer())
      
      self.l1 = tf.nn.relu(tf.matmul(tf.expand_dims(self.x,0),self.l1_w))
      self.l2 = tf.nn.relu(tf.matmul(self.l1,self.l2_w)) 


      self.mean = tf.matmul(self.l2,self.mean_w)
      self.log_sigma = tf.matmul(self.l2,self.sigma_w)
      self.sigma = tf.exp(self.log_sigma)
      
      self.kl = -0.5 * tf.reduce_sum(1 + 2*self.log_sigma - tf.square(self.mean) - tf.exp(2*self.log_sigma))
Exemple #31
0
def exp2(x):
    with tf.name_scope('Exp2'):
        return tf.exp(x * np.float32(np.log(2.0)))
def build_graph(mode, hparams_string, input_size, num_classes,
                sequence_example_file=None):
  """Builds the TensorFlow graph.

  Args:
    mode: 'train', 'eval', or 'generate'. Only mode related ops are added to
        the graph.
    hparams_string: A string literal of a Python dictionary, where keys are
        hyperparameter names and values replace default values. For example:
        '{"batch_size":64,"rnn_layer_sizes":[128,128]}'
    input_size: The size of the input vectors in the inputs batch. Each
        inputs batch should have a shape [batch_size, num_steps, input_size].
    num_classes: The number of classes the labels can be.
    sequence_example_file: A string path to a TFRecord file containing
        tf.train.SequenceExamples. Only needed for training and evaluation.

  Returns:
    A tf.Graph instance which contains the TF ops.

  Raises:
    ValueError: If mode is not 'train', 'eval', or 'generate', or if
        sequence_example_file does not match a file when mode is 'train' or
        'eval'.
  """
  if mode not in ('train', 'eval', 'generate'):
    raise ValueError('The mode parameter must be \'train\', \'eval\', '
                     'or \'generate\'. The mode parameter was: %s' % mode)

  with tf.Graph().as_default() as graph:
    hparams = default_hparams()
    hparams = hparams.parse(hparams_string)
    tf.logging.info('hparams = %s', hparams.values())

    inputs, labels, lengths, = None, None, None
    state_is_tuple = True

    if mode == 'train' or mode == 'eval':
      inputs, labels, lengths = sequence_example_lib.get_padded_batch(
          [sequence_example_file], hparams.batch_size, input_size)

    elif mode == 'generate':
      inputs = tf.placeholder(tf.float32, [hparams.batch_size, None,
                                           input_size])
      # If state_is_tuple is True, the output RNN cell state will be a tuple
      # instead of a tensor. During training and evaluation this improves
      # performance. However, during generation, the RNN cell state is fed
      # back into the graph with a feed dict. Feed dicts require passed in
      # values to be tensors and not tuples, so state_is_tuple is set to False.
      state_is_tuple = False

    cells = []
    for num_units in hparams.rnn_layer_sizes:
      cell = tf.nn.rnn_cell.BasicLSTMCell(
          num_units, state_is_tuple=state_is_tuple)
      cell = tf.nn.rnn_cell.DropoutWrapper(
          cell, output_keep_prob=hparams.dropout_keep_prob)
      cells.append(cell)

    cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=state_is_tuple)

    initial_state = cell.zero_state(hparams.batch_size, tf.float32)

    outputs, final_state = tf.nn.dynamic_rnn(
        cell, inputs, lengths, initial_state, parallel_iterations=1,
        swap_memory=True)

    outputs_flat = tf.reshape(outputs, [-1, hparams.rnn_layer_sizes[-1]])
    logits_flat = tf.contrib.layers.linear(outputs_flat, num_classes)

    if mode == 'train' or mode == 'eval':
      if hparams.skip_first_n_losses:
        logits = tf.reshape(logits_flat, [hparams.batch_size, -1, num_classes])
        logits = logits[:, hparams.skip_first_n_losses:, :]
        logits_flat = tf.reshape(logits, [-1, num_classes])
        labels = labels[:, hparams.skip_first_n_losses:]

      labels_flat = tf.reshape(labels, [-1])
      loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits_flat, labels_flat))
      perplexity = tf.exp(loss)

      correct_predictions = tf.nn.in_top_k(logits_flat, labels_flat, 1)
      accuracy = tf.reduce_mean(tf.to_float(correct_predictions)) * 100

      global_step = tf.Variable(0, trainable=False, name='global_step')

      tf.add_to_collection('loss', loss)
      tf.add_to_collection('perplexity', perplexity)
      tf.add_to_collection('accuracy', accuracy)
      tf.add_to_collection('global_step', global_step)

      if mode == 'train':
        learning_rate = tf.train.exponential_decay(
            hparams.initial_learning_rate, global_step, hparams.decay_steps,
            hparams.decay_rate, staircase=True, name='learning_rate')

        opt = tf.train.AdamOptimizer(learning_rate)
        params = tf.trainable_variables()
        gradients = tf.gradients(loss, params)
        clipped_gradients, _ = tf.clip_by_global_norm(gradients,
                                                      hparams.clip_norm)
        train_op = opt.apply_gradients(zip(clipped_gradients, params),
                                       global_step)
        tf.add_to_collection('learning_rate', learning_rate)
        tf.add_to_collection('train_op', train_op)

        tf.scalar_summary('loss', loss)
        tf.scalar_summary('perplexity', perplexity)
        tf.scalar_summary('accuracy', accuracy)
        tf.scalar_summary('learning_rate', learning_rate)

      if mode == 'eval':
        summary_op = tf.merge_summary([
            tf.scalar_summary('loss', loss),
            tf.scalar_summary('perplexity', perplexity),
            tf.scalar_summary('accuracy', accuracy)])

        tf.add_to_collection('summary_op', summary_op)

    elif mode == 'generate':
      softmax_flat = tf.nn.softmax(logits_flat)
      softmax = tf.reshape(softmax_flat, [hparams.batch_size, -1, num_classes])

      tf.add_to_collection('inputs', inputs)
      tf.add_to_collection('initial_state', initial_state)
      tf.add_to_collection('final_state', final_state)
      tf.add_to_collection('softmax', softmax)

  return graph
Exemple #33
0
    def _init_graph(self):
        with self.graph.as_default():
            # Model.
            u_ids = self.train_features[:, 0]
            i_ids = self.train_features[:, 1]

            # cold sampling
            drop_u_pos = tf.cast(tf.multinomial(
                tf.log([[1 - self.cs_ratio, self.cs_ratio]]),
                tf.shape(u_ids)[0]),
                                 dtype=self.d_type)
            drop_i_pos = tf.cast(tf.multinomial(
                tf.log([[1 - self.cs_ratio, self.cs_ratio]]),
                tf.shape(i_ids)[0]),
                                 dtype=self.d_type)
            drop_u_pos = tf.reshape(drop_u_pos, shape=[-1])
            drop_i_pos = tf.reshape(drop_i_pos, shape=[-1])
            drop_u_pos_zero = tf.zeros(shape=tf.shape(drop_u_pos),
                                       dtype=self.d_type)
            drop_i_pos_zero = tf.zeros(shape=tf.shape(drop_i_pos),
                                       dtype=self.d_type)

            drop_u_pos = tf.cond(self.train_phase, lambda: drop_u_pos,
                                 lambda: drop_u_pos_zero)
            drop_i_pos = tf.cond(self.train_phase, lambda: drop_i_pos,
                                 lambda: drop_i_pos_zero)
            drop_u_pos_v = tf.reshape(drop_u_pos, shape=[-1, 1])
            drop_i_pos_v = tf.reshape(drop_i_pos, shape=[-1, 1])

            # bias
            self.u_bias = tf.nn.embedding_lookup(self.weights['user_bias'],
                                                 u_ids) * (1 - drop_u_pos)
            self.i_bias = tf.nn.embedding_lookup(self.weights['item_bias'],
                                                 i_ids) * (1 - drop_i_pos)
            self.cross_bias = tf.reduce_sum(tf.reduce_sum(
                tf.nn.embedding_lookup(self.weights['cross_bias'],
                                       self.train_features[:, 2:]),
                axis=1),
                                            axis=1)
            self.bias = self.cross_bias + self.weights['global_bias']
            self.bias += self.u_bias + self.i_bias

            # cf part
            cf_u_vectors = tf.nn.embedding_lookup(
                self.weights['uid_embeddings'], u_ids)
            cf_i_vectors = tf.nn.embedding_lookup(
                self.weights['iid_embeddings'], i_ids)

            random_u_vectors = tf.random_normal(tf.shape(cf_u_vectors),
                                                0,
                                                0.01,
                                                dtype=self.d_type)
            random_i_vectors = tf.random_normal(tf.shape(cf_i_vectors),
                                                0,
                                                0.01,
                                                dtype=self.d_type)

            cf_u_vectors = random_u_vectors * drop_u_pos_v + cf_u_vectors * (
                1 - drop_u_pos_v)
            cf_i_vectors = random_i_vectors * drop_i_pos_v + cf_i_vectors * (
                1 - drop_i_pos_v)

            cf_u_vectors = tf.nn.dropout(cf_u_vectors, self.dropout_keep)
            cf_i_vectors = tf.nn.dropout(cf_i_vectors, self.dropout_keep)

            self.cf_prediction = tf.reduce_sum(tf.multiply(
                cf_u_vectors, cf_i_vectors),
                                               axis=1)

            # cb part
            u_fs = self.train_features[:, 2:2 + self.user_feature_num]
            i_fs = self.train_features[:, 2 + self.user_feature_num:]
            uf_vectors = tf.nn.embedding_lookup(
                self.weights['feature_embeddings'], u_fs)
            if_vectors = tf.nn.embedding_lookup(
                self.weights['feature_embeddings'], i_fs)

            summed_u_features_emb = tf.reduce_sum(uf_vectors, axis=1)
            summed_u_features_emb_square = tf.square(summed_u_features_emb)
            squared_u_features_emb = tf.square(uf_vectors)
            squared_sum_u_features_emb = tf.reduce_sum(squared_u_features_emb,
                                                       axis=1)
            u_fm = 0.5 * tf.subtract(summed_u_features_emb_square,
                                     squared_sum_u_features_emb)

            summed_i_features_emb = tf.reduce_sum(if_vectors, axis=1)
            summed_i_features_emb_square = tf.square(summed_i_features_emb)
            squared_i_features_emb = tf.square(if_vectors)
            squared_sum_i_features_emb = tf.reduce_sum(squared_i_features_emb,
                                                       axis=1)
            i_fm = 0.5 * tf.subtract(summed_i_features_emb_square,
                                     squared_sum_i_features_emb)

            uf_layer = tf.reshape(
                uf_vectors, (-1, self.f_vector_size * self.user_feature_num))
            uf_layer = tf.concat([uf_layer, u_fm], axis=1)
            uf_layer = tf.layers.batch_normalization(uf_layer,
                                                     training=self.train_phase,
                                                     name='u_bn_fs')
            uf_layer = tf.nn.dropout(uf_layer, self.dropout_keep)
            if_layer = tf.reshape(
                if_vectors, (-1, self.f_vector_size * self.item_feature_num))
            if_layer = tf.concat([if_layer, i_fm], axis=1)
            if_layer = tf.layers.batch_normalization(if_layer,
                                                     training=self.train_phase,
                                                     name='i_bn_fs')
            if_layer = tf.nn.dropout(if_layer, self.dropout_keep)

            self.lrp_layers_u, self.lrp_layers_i = [uf_layer], [if_layer]
            for i in range(0, len(self.cb_hidden_layers) + 1):
                uf_layer = tf.add(
                    tf.matmul(uf_layer, self.weights['cb_user_layer_%d' % i]),
                    self.weights['cb_user_bias_%d' % i])
                if_layer = tf.add(
                    tf.matmul(if_layer, self.weights['cb_item_layer_%d' % i]),
                    self.weights['cb_item_bias_%d' % i])
                uf_layer = tf.layers.batch_normalization(
                    uf_layer, training=self.train_phase, name='u_bn_%d' % i)
                if_layer = tf.layers.batch_normalization(
                    if_layer, training=self.train_phase, name='i_bn_%d' % i)
                if i < len(self.cb_hidden_layers):
                    uf_layer = tf.nn.relu(uf_layer)
                    uf_layer = tf.nn.dropout(uf_layer, self.dropout_keep)
                    if_layer = tf.nn.relu(if_layer)
                    if_layer = tf.nn.dropout(if_layer, self.dropout_keep)
                    self.lrp_layers_u.append(uf_layer)
                    self.lrp_layers_i.append(if_layer)
            cb_u_vectors, cb_i_vectors = uf_layer, if_layer
            self.cb_prediction = tf.reduce_sum(tf.multiply(
                cb_u_vectors, cb_i_vectors),
                                               axis=1)

            # attention
            ah_cf_u = tf.add(
                tf.matmul(cf_u_vectors, self.weights['attention_weights']),
                self.weights['attention_bias'])
            ah_cf_u = tf.tanh(ah_cf_u)
            # ah_cf_u = tf.nn.relu(ah_cf_u)
            ah_cf_u = tf.nn.dropout(ah_cf_u, self.dropout_keep)

            a_cf_u = tf.reduce_sum(tf.multiply(ah_cf_u,
                                               self.weights['attention_pre']),
                                   axis=1)
            a_cf_u = tf.exp(a_cf_u)
            ah_cb_u = tf.add(
                tf.matmul(cb_u_vectors, self.weights['attention_weights']),
                self.weights['attention_bias'])
            ah_cb_u = tf.tanh(ah_cb_u)
            # ah_cb_u = tf.nn.relu(ah_cb_u)
            ah_cb_u = tf.nn.dropout(ah_cb_u, self.dropout_keep)

            a_cb_u = tf.reduce_sum(tf.multiply(ah_cb_u,
                                               self.weights['attention_pre']),
                                   axis=1)
            a_cb_u = tf.exp(a_cb_u)
            a_sum = a_cf_u + a_cb_u

            self.a_cf_u = tf.reshape(a_cf_u / a_sum, shape=[-1, 1])
            self.a_cb_u = tf.reshape(a_cb_u / a_sum, shape=[-1, 1])

            ah_cf_i = tf.add(
                tf.matmul(cf_i_vectors, self.weights['attention_weights']),
                self.weights['attention_bias'])
            ah_cf_i = tf.tanh(ah_cf_i)
            # ah_cf_i = tf.nn.relu(ah_cf_i)
            ah_cf_i = tf.nn.dropout(ah_cf_i, self.dropout_keep)

            a_cf_i = tf.reduce_sum(tf.multiply(ah_cf_i,
                                               self.weights['attention_pre']),
                                   axis=1)
            a_cf_i = tf.exp(a_cf_i)
            ah_cb_i = tf.add(
                tf.matmul(cb_i_vectors, self.weights['attention_weights']),
                self.weights['attention_bias'])
            ah_cb_i = tf.tanh(ah_cb_i)
            # ah_cb_i = tf.nn.relu(ah_cb_i)
            ah_cb_i = tf.nn.dropout(ah_cb_i, self.dropout_keep)

            a_cb_i = tf.reduce_sum(tf.multiply(ah_cb_i,
                                               self.weights['attention_pre']),
                                   axis=1)
            a_cb_i = tf.exp(a_cb_i)
            a_sum = a_cf_i + a_cb_i

            self.a_cf_i = tf.reshape(a_cf_i / a_sum, shape=[-1, 1])
            self.a_cb_i = tf.reshape(a_cb_i / a_sum, shape=[-1, 1])

            # prediction
            self.u_vector = self.a_cf_u * cf_u_vectors + self.a_cb_u * cb_u_vectors
            self.i_vector = self.a_cf_i * cf_i_vectors + self.a_cb_i * cb_i_vectors
            self.prediction = self.bias + tf.reduce_sum(
                tf.multiply(self.u_vector, self.i_vector), axis=1)
Exemple #34
0
def std_clip_transform(stddevs):
    stddevs = tf.nest.map_structure(lambda t: tf.clip_by_value(t, -20, 2),
                                    stddevs)
    return tf.exp(stddevs)
Exemple #35
0
    def critic_loss(self,
                    time_steps,
                    actions,
                    next_time_steps,
                    td_errors_loss_fn,
                    gamma=1.0,
                    reward_scale_factor=1.0,
                    weights=None):
        """Computes the critic loss for SAC training.

    Args:
      time_steps: A batch of timesteps.
      actions: A batch of actions.
      next_time_steps: A batch of next timesteps.
      td_errors_loss_fn: A function(td_targets, predictions) to compute
        elementwise (per-batch-entry) loss.
      gamma: Discount for future rewards.
      reward_scale_factor: Multiplicative factor to scale rewards.
      weights: Optional scalar or elementwise (per-batch-entry) importance
        weights.

    Returns:
      critic_loss: A scalar critic loss.
    """
        with tf.name_scope('critic_loss'):
            tf.nest.assert_same_structure(actions, self.action_spec)
            tf.nest.assert_same_structure(time_steps, self.time_step_spec)
            tf.nest.assert_same_structure(next_time_steps, self.time_step_spec)

            next_actions, next_log_pis = self._actions_and_log_probs(
                next_time_steps)
            target_input_1 = (next_time_steps.observation, next_actions)
            target_q_values1, unused_network_state1 = self._target_critic_network1(
                target_input_1, next_time_steps.step_type)
            target_input_2 = (next_time_steps.observation, next_actions)
            target_q_values2, unused_network_state2 = self._target_critic_network2(
                target_input_2, next_time_steps.step_type)
            target_q_values = (tf.minimum(target_q_values1, target_q_values2) -
                               tf.exp(self._log_alpha) * next_log_pis)

            td_targets = tf.stop_gradient(
                reward_scale_factor * next_time_steps.reward +
                gamma * next_time_steps.discount * target_q_values)

            pred_input_1 = (time_steps.observation, actions)
            pred_td_targets1, unused_network_state1 = self._critic_network1(
                pred_input_1, time_steps.step_type)
            pred_input_2 = (time_steps.observation, actions)
            pred_td_targets2, unused_network_state2 = self._critic_network2(
                pred_input_2, time_steps.step_type)
            critic_loss1 = td_errors_loss_fn(td_targets, pred_td_targets1)
            critic_loss2 = td_errors_loss_fn(td_targets, pred_td_targets2)
            critic_loss = critic_loss1 + critic_loss2

            if weights is not None:
                critic_loss *= weights

            # Take the mean across the batch.
            critic_loss = tf.reduce_mean(input_tensor=critic_loss)

            if self._debug_summaries:
                td_errors1 = td_targets - pred_td_targets1
                td_errors2 = td_targets - pred_td_targets2
                td_errors = tf.concat([td_errors1, td_errors2], axis=0)
                common.generate_tensor_summaries('td_errors', td_errors,
                                                 self.train_step_counter)
                common.generate_tensor_summaries('td_targets', td_targets,
                                                 self.train_step_counter)
                common.generate_tensor_summaries('pred_td_targets1',
                                                 pred_td_targets1,
                                                 self.train_step_counter)
                common.generate_tensor_summaries('pred_td_targets2',
                                                 pred_td_targets2,
                                                 self.train_step_counter)

            return critic_loss
Exemple #36
0
def _MixN(fractions, Xs, name=None):
    # Convert Xs to be iterable incase we are dealing with the 1D case
    Xs = [(x, ) if isinstance(x, tf.Tensor) else tuple(x) for x in Xs]

    # Ensure all subdistributions have the same dimensionality
    if len(set(len(x) for x in Xs)) != 1:
        raise DistributionError("All components passed to 'MixN' must have "
                                "the same dimensionality.")

    n_dims = len(Xs[0])

    nd_X = tuple(
        tf.placeholder(config.dtype, name=name) for i in range(n_dims))
    nd_mix_bounds = Distribution.bounds(n_dims)

    current_model = Model._current_model

    full_pdf = []
    all_integrals = []
    for dists, f_scale in zip(Xs, fractions):
        nd_logps = []
        nd_bounds = []
        nd_integrals = []
        nd_normalisation_1 = []
        for dist, mix_bounds, X in zip(dists, nd_mix_bounds, nd_X):
            logp, integral, bounds, frac, _ = current_model._description[dist]
            bounds = find_common_bounds(mix_bounds, bounds)
            normalisation_1 = _integrate_component(bounds, integral)

            nd_logps.append(logp - tf.log(normalisation_1))
            nd_bounds.append(bounds)
            nd_integrals.append(integral)
            nd_normalisation_1.append(normalisation_1)

            # Modify the current model to recognize that 'deps' has been removed
            if dist in current_model._silently_replace.values():
                # We need to copy the items to a list as we're adding items
                for key, value in list(
                        current_model._silently_replace.items()):
                    if value != dist:
                        continue
                    current_model._silently_replace[value] = X
                    current_model._silently_replace[key] = X
                    if dist in current_model._description:
                        del current_model._description[dist]

            else:
                current_model._silently_replace[dist] = X
                del current_model._description[dist]

            _recurse_deps(dist, f_scale, bounds)

        full_pdf.append(f_scale * tf.exp(tf.add_n(nd_logps)))

        all_integrals.append(
            (f_scale, nd_bounds, nd_integrals, nd_normalisation_1))

    # Set properties on Distribution
    Distribution.logp = tf.log(tf.add_n(full_pdf))

    def _integral(lower, upper):
        result = []
        for f_scale, nd_bounds, nd_integral, normalisation_1 in all_integrals:
            nd_normalisation_2 = []
            for bounds, integral in zip(nd_bounds, nd_integral):
                integral_bounds = find_common_bounds([Region(lower, upper)],
                                                     bounds)
                nd_normalisation_2.append(
                    _integrate_component(integral_bounds, integral))
            result.append(f_scale / tf.add_n(normalisation_1) *
                          tf.add_n(nd_normalisation_2))
        return tf.add_n(result)

    Distribution.integral = _integral

    if n_dims == 1:
        Distribution.depends = [x[0] for x in Xs]
    else:
        Distribution.depends = Xs

    return nd_X
    reshaped_semb = tf.reshape( t_sup_emb, [-1, 1, embedding_dim] )
    reshaped_uemb = tf.reshape( t_unsup_emb, [-1, 1, embedding_dim] )
    
    stacked_semb = tf.stack(unsup_batch_size*[t_sup_emb], 1)
    stacked_uemb = tf.stack(sup_per_batch*NUM_LABELS*[t_unsup_emb], 1)
    
    uemb_T = tf.transpose(stacked_uemb, perm=[1,0,2])
     
    sigma = 1
    pairwise_dist = (stacked_semb - uemb_T)#, axis=2)
    pairwise_norm = tf.norm( pairwise_dist, axis=2)
    pairwise_sq   = tf.square(pairwise_norm)
    
    division = - tf.divide( pairwise_sq, tf.constant(2*sigma**2, dtype=tf.float32))
    
    match_ab   = tf.exp(division, name='match_ab')
    
    
    
    
    p_ab = tf.nn.softmax(match_ab, name='p_ab')
    p_ba = tf.nn.softmax(tf.transpose(match_ab), name='p_ba')
    p_aba = tf.matmul(p_ab, p_ba, name='p_aba')

    model.create_walk_statistics(p_aba, equality_matrix)
    
    loss_aba = tf.losses.softmax_cross_entropy(
        p_target,
        tf.log(1e-8 + p_aba),
        weights=walker_weight,
        scope='loss_aba')
Exemple #38
0
 def init_network(self):
     """
     [input]
     self.obs
     self.action_n
     self.advant
     self.old_dist_means_n
     self.old_dist_logstds_n
     [output]
     self.action_dist_means_n
     self.action_dist_logstds_n
     var_list
     """
     if self.pms.min_std is not None:
         log_std_var = tf.maximum(self.net.action_dist_logstds_n,
                                  np.log(self.pms.min_std))
     if self.pms.max_std is not None:
         log_std_var = tf.minimum(self.net.action_dist_logstds_n,
                                  np.log(self.pms.max_std))
     self.action_dist_stds_n = tf.exp(log_std_var)
     self.old_dist_info_vars = dict(mean=self.net.old_dist_means_n,
                                    log_std=self.net.old_dist_logstds_n)
     self.new_dist_info_vars = dict(mean=self.net.action_dist_means_n,
                                    log_std=self.net.action_dist_logstds_n)
     self.likehood_action_dist = self.distribution.log_likelihood_sym(
         self.net.action_n, self.new_dist_info_vars)
     self.ratio_n = self.distribution.likelihood_ratio_sym(
         self.net.action_n, self.new_dist_info_vars,
         self.old_dist_info_vars)
     surr = -tf.reduce_mean(
         self.ratio_n * self.net.advant)  # Surrogate loss
     batch_size = tf.shape(self.net.obs)[0]
     batch_size_float = tf.cast(batch_size, tf.float32)
     kl = tf.reduce_mean(
         self.distribution.kl_sym(self.old_dist_info_vars,
                                  self.new_dist_info_vars))
     ent = self.distribution.entropy(self.old_dist_info_vars)
     # ent = tf.reduce_sum(-p_n * tf.log(p_n + eps)) / Nf
     self.losses = [surr, kl, ent]
     var_list = self.net.var_list
     self.gf = GetFlat(var_list)  # get theta from var_list
     self.gf.session = self.session
     self.sff = SetFromFlat(var_list)  # set theta from var_List
     self.sff.session = self.session
     # get g
     self.pg = flatgrad(surr, var_list)
     # get A
     # KL divergence where first arg is fixed
     # replace old->tf.stop_gradient from previous kl
     kl_firstfixed = self.distribution.kl_sym_firstfixed(
         self.new_dist_info_vars) / batch_size_float
     grads = tf.gradients(kl_firstfixed, var_list)
     self.flat_tangent = tf.placeholder(dtype, shape=[None])
     shapes = map(var_shape, var_list)
     start = 0
     tangents = []
     for shape in shapes:
         size = np.prod(shape)
         param = tf.reshape(self.flat_tangent[start:(start + size)], shape)
         tangents.append(param)
         start += size
     self.gvp = [tf.reduce_sum(g * t) for (g, t) in zip(grads, tangents)]
     self.fvp = flatgrad(tf.reduce_sum(self.gvp), var_list)  # get kl''*p
     self.session.run(tf.global_variables_initializer())
     self.net.asyc_parameters(session=self.session)
Exemple #39
0
    def build_roi_network(self, model):
        ########################################
        #Region of Interest
        ########################################
        with tf.variable_scope('RoI'):
            if self.is_train:
                nms_num = cfg.max_nms_num
            else:
                nms_num = cfg.test_max_nms_num

            nms_thresh = cfg.nms_thresh
            # idx 0 : object가 없다
            # idx 1 : object가 있다
            cls = tf.nn.softmax(self.cls)
            scores = cls[:, 1]

            # anchor x1,y1,x2,y2 => x,y,w,h
            anchor_x = tf.add(self.anchors[:, 2], self.anchors[:, 0]) * 0.5
            anchor_y = tf.add(self.anchors[:, 3], self.anchors[:, 1]) * 0.5
            acnhor_w = tf.subtract(self.anchors[:, 2], self.anchors[:, 0]) + 1.0
            acnhor_h = tf.subtract(self.anchors[:, 3], self.anchors[:, 1]) + 1.0

            # 기존 앵커 값들은 다 정해져있으니, model이 내뱉는 값을에 acnhor 값을 곱해줌
            # 모델이 각 anchor마다 예측하는 4개의 좌표가 나옴
            # cood 값은 gt bbox 처럼 이미지 전체에서 좌표 값들임 (open cv2가 rectangle 그리듯이)
            # model이 예측한 bbox의 좌표(x, y, w, h)

            prdict_x = self.bbox[:, 0] * acnhor_w + anchor_x
            prdict_y = self.bbox[:, 1] * acnhor_h + anchor_y
            prdict_w = tf.exp(self.bbox[:, 2]) * acnhor_w
            prdict_h = tf.exp(self.bbox[:, 3]) * acnhor_h

            # model이 예측한 bbox의 좌표(x1, y1, x2, y2)
            # nms need x1,y1,x2,y2 instead of x,y,w,h
            predcit_x1 = prdict_x - prdict_w * 0.5
            predcit_y1 = prdict_y - prdict_h * 0.5
            predcit_x2 = prdict_x + prdict_w * 0.5
            predcit_y2 = prdict_y + prdict_h * 0.5
            predict_coord = tf.stack([predcit_x1, predcit_y1, predcit_x2, predcit_y2], axis=1)
            # predcit result는 model이 예측한 값을 anchor에 맞게 값을 변환 한 값임
            # 원본 이미지에서 각각의 앵커에 대해서 예측한 좌표값들

            # 좌표에서 min max 보정
            predcit_x1_ = tf.maximum(tf.minimum(predict_coord[:, 0], tf.cast((self.image_width - 1), tf.float32)), 0.0)
            predcit_y1_ = tf.maximum(tf.minimum(predict_coord[:, 1], tf.cast((self.image_height - 1), tf.float32)), 0.0)
            predcit_x2_ = tf.maximum(tf.minimum(predict_coord[:, 2], tf.cast((self.image_width - 1), tf.float32)), 0.0)
            predcit_y2_ = tf.maximum(tf.minimum(predict_coord[:, 3], tf.cast((self.image_height - 1), tf.float32)), 0.0)
            predict_coord = tf.stack([predcit_x1_, predcit_y1_, predcit_x2_, predcit_y2_], axis=1)

            nms_idx = tf.image.non_max_suppression(predict_coord, scores, max_output_size=nms_num, iou_threshold=nms_thresh)
            rois = tf.gather(predict_coord, nms_idx)
            rois_score = tf.gather(scores, nms_idx)
            # self.nms_idx = nms_idx
            # self.nms_idx = tf.reshape(nms_idx, [tf.shape(rois)[0]])
            # self.nms_idx = tf.reshape(self.nms_idx, [-1])
            # 모델이 예측한 좌표값에 대해서 NMS 한 결과
            # rois = tf.concat([batch_idxs, nms_predict_coord], 1)
            print('rois_score : ', rois_score)
            print('batch_inds : ', nms_idx)
            print('rois : ', rois)

            # 학습 할 때는 target layer에 대해서 proposal
            # RoI 중에서 256 batch 에 대해서 positive와 negative sample을 만듦
            if self.is_train:
                rois, rois_score, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = tf.py_func(
                        proposal_target,
                        [rois, rois_score, self.gt_boxes, cfg.num_classes, self.gt_cls],
                        [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32],
                        name="proposal_target")
                # rois.set_shape([cfg.dect_train_batch, 5])
                rois.set_shape([cfg.dect_train_batch, 4])
                rois_score.set_shape([cfg.dect_train_batch])
                labels.set_shape([cfg.dect_train_batch, 1])
                bbox_targets.set_shape([cfg.dect_train_batch, cfg.num_classes * 4])
                bbox_inside_weights.set_shape([cfg.dect_train_batch, cfg.num_classes * 4])
                bbox_outside_weights.set_shape([cfg.dect_train_batch, cfg.num_classes * 4])

                self.labels = tf.to_int32(labels)
                self.bbox_targets = bbox_targets
                self.bbox_inside_weights = bbox_inside_weights
                self.bbox_outside_weights = bbox_outside_weights

        ########################################
        # RoI Poolling
        ########################################
        # train 에서는 256개 대해서
        # infernce에서는 NMS roi 대해서
        with tf.variable_scope('RoI_pooing'):
            # batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1]), [1])
            # bottom_shape = tf.shape(model)
            # height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(cfg.feat_stride)
            # width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(cfg.feat_stride)
            # RoI는 원본이미지에서 모델이 예측한 좌표 값들임
            # x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
            # y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
            # x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
            # y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height

            x1, y1, x2, y2 = tf.split(value=rois, num_or_size_splits=4, axis=1)
            x1 = x1 / self.image_width
            y1 = y1 / self.image_height
            x2 = x2 / self.image_width
            y2 = y2 / self.image_height
            rois = tf.concat([x1, y1, x2, y2], 1)

            # Won't be back-propagated to rois anyway, but to save time
            # bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
            pre_pool_size = cfg.POOLING_SIZE * 2  # 7*2

            print('rois : ', rois)
            print('model : ', model)
            box_ind = tf.zeros((tf.shape(rois)[0]), dtype=tf.float32)

            # http://incredible.ai/deep-learning/2018/03/17/Faster-R-CNN/
            # Fixed-size Resize instead of ROI Pooling
            crops = tf.image.crop_and_resize(model, rois, tf.to_int32(box_ind), [pre_pool_size, pre_pool_size], method="bilinear",
                                             name="crops")
            crops = tf.layers.max_pooling2d(crops, pool_size=(2, 2), strides=(2, 2), padding='VALID')

            crops = tf.layers.flatten(crops)

            model = tf.layers.dense(crops, 4096,
                                    kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01),
                                    activation=tf.nn.relu)
            model = tf.layers.dense(model, 4096,
                                    kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01),
                                    activation=tf.nn.relu)
            cls_score = tf.layers.dense(model, cfg.num_classes,
                                        kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01),
                                        # kernel_regularizer=tf.contrib.layers.l2_regularizer(cfg.weight_decay),
                                        activation=None, name='cls_score')

            # cls_prob = tf.nn.softmax(cls_score, name="cls_prob")
            # cls_pred = tf.argmax(cls_score, axis=1, name="cls_pred")

            bbox_score = tf.layers.dense(model, cfg.num_classes * 4,
                                        kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01),
                                        # kernel_regularizer=tf.contrib.layers.l2_regularizer(cfg.weight_decay),
                                        activation=None, name='bbox_pred')

        # return cls_score, cls_pred, cls_prob, bbox_pred
        return cls_score, bbox_score, nms_idx
Exemple #40
0
    fl.write("########\n")
    for k,v in config.items():
        fl.write('{0}: {1}\n'.format(k, v))

"""
Encoder
"""
X_dim = train_x.shape[1] # Input dimension 
# Placeholders for input and latent space
X, z = inputs(X_dim, z_dim)
nn = NeuralNetwork(X_dim, h_dim, z_dim, transfer_fct = tf.nn.softplus)

if normalizing_flow:
    # z_mu, z_log_var, z0, flow_params = nn.encoder(X, z, X_dim, h_dim, z_dim, nFlows)
    z_mu, z_log_var, flow_params = nn.encoder_nf(X, z, X_dim, h_dim, z_dim, nFlows)
    z_var= tf.exp(z_log_var) # Get variance
else:
    z_mu, z_log_var= nn.enc_vanilla_vae(X) 
    z_var= tf.exp(z_log_var) # Get variance
     

# Sample the latent variables from the posterior using z_mu and z_logvar. 
# Reparametrization trick is implicit in this step. Reference: Section 3 Kingma et al (2013).
z0 = nn.sample_z(z_mu, z_var)

"""
Flow
"""
if normalizing_flow:
    if flow_type == "Planar":
        currentClass = NormalizingPlanarFlow(z0, z_dim)
Exemple #41
0
def recognition_rate_at_k(probe_x,
                          probe_y,
                          gallery_x,
                          gallery_y,
                          k,
                          measure=pdist):
    """Compute the recognition rate at a given level `k`.

    For a given probe and ranked gallery that is sorted according to a distance
    measure `measure` in descending order, the recognition rate at `k` is::

        recognition_rate_at_k = num_correct / min(k, num_relevant)

    where num_correct refers to the fraction of images in the top k entries of
    the ranked gallery that have the same label as the probe and `num_relevant`
    refers to the total number of elements in the gallery that have the same
    label.

    Parameters
    ----------
    probe_x: tf.Tensor
        A tensor of probe images.
    probe_y: tf.Tensor
        A tensor of probe labels.
    gallery_x: tf.Tensor
        A tensor of gallery images.
    gallery_y: tf.Tensor
        A tensor of gallery labels.
    k: int
        See description above.
    measure: Callable[tf.Tensor, tf.Tensor] -> tf.Tensor
        A callable that computes for two matrices of row-vectors a matrix of
        element-wise distances. See `pdist` for an example.

    Returns
    -------
    tf.Tensor
        Returns a scalar tensor which represents the computed metric.

    """
    # Build a matrix of shape (num_probes, num_gallery_images) where element
    # (i, j) is 1 if probe image i and the gallery image j have the same
    # identity, otherwise 0.
    label_eq_mat = tf.cast(
        tf.equal(tf.reshape(probe_y, (-1, 1)), tf.reshape(gallery_y, (1, -1))),
        tf.float32)

    # For each probe image, compute the number of relevant images in the
    # gallery (same identity). This should always be one for CMC evaluation
    # because we always have exactly one probe and one gallery image for each
    # identity.
    num_relevant = tf.minimum(
        tf.cast(k, tf.float32),
        tf.reduce_sum(label_eq_mat, reduction_indices=[1]))

    # Rank gallery images by the similarity measure to build a matrix of
    # shape (num_probes, k) where element (i, j) contains the label of the
    # j-th ranked gallery image for probe i.
    predictions = tf.exp(-measure(probe_x, gallery_x))  # Compute similarity.
    _, prediction_indices = tf.nn.top_k(predictions, k=k)
    label_mat = tf.gather(gallery_y, prediction_indices)

    # Just as we have done before, build a matrix where element (i, j) is
    # one if probe i and gallery image j share the same label (same identity).
    # This time, the matrix is ranked by the similarity measure and we only
    # keep the top-k predictions.
    label_eq_mat = tf.cast(tf.equal(label_mat, tf.reshape(probe_y, (-1, 1))),
                           tf.float32)

    # Compute the number of true positives in [0, k[, i.e., check if we find
    # the correct gallery image within the top-k ranked results. Then, compute
    # the recognition rate, which in our case is either 0 or 1 since we have
    # only one gallery image that shares the same identity with the probe.
    #
    # This is the final output of our CMC metric.
    true_positives_at_k = tf.reduce_sum(label_eq_mat, reduction_indices=[1])
    return true_positives_at_k / num_relevant
Exemple #42
0
    def setup_model(self):

        # prevent import loops
        from stable_baselines.gail.adversary import TransitionClassifier

        from stable_baselines.mdal.adversary import TabularAdversary

        with SetVerbosity(self.verbose):
            self.graph = tf.Graph()
            with self.graph.as_default():
                self.set_random_seed(self.seed)
                self.sess = tf_util.make_session(num_cpu=self.n_cpu_tf_sess,
                                                 graph=self.graph)

                if self.using_mdal:
                    self.reward_giver = TabularAdversary(
                        self.observation_space,
                        self.action_space,
                        self.hidden_size_adversary,
                        entcoeff=self.adversary_entcoeff,
                        expert_features=self.expert_dataset.successor_features,
                        exploration_bonus=self.exploration_bonus,
                        bonus_coef=self.bonus_coef)

                self.replay_buffer = ReplayBuffer(self.buffer_size)

                with tf.variable_scope("input", reuse=False):
                    # Create policy and target TF objects
                    self.policy_tf = self.policy(self.sess,
                                                 self.observation_space,
                                                 self.action_space,
                                                 **self.policy_kwargs)
                    self.target_policy = self.policy(self.sess,
                                                     self.observation_space,
                                                     self.action_space,
                                                     **self.policy_kwargs)

                    # Initialize Placeholders
                    self.observations_ph = self.policy_tf.obs_ph
                    # Normalized observation for pixels
                    self.processed_obs_ph = self.policy_tf.processed_obs
                    self.next_observations_ph = self.target_policy.obs_ph
                    self.processed_next_obs_ph = self.target_policy.processed_obs
                    self.action_target = self.target_policy.action_ph
                    self.terminals_ph = tf.placeholder(tf.float32,
                                                       shape=(None, 1),
                                                       name='terminals')
                    self.rewards_ph = tf.placeholder(tf.float32,
                                                     shape=(None, 1),
                                                     name='rewards')
                    self.actions_ph = tf.placeholder(tf.float32,
                                                     shape=(None, ) +
                                                     self.action_space.shape,
                                                     name='actions')
                    self.learning_rate_ph = tf.placeholder(
                        tf.float32, [], name="learning_rate_ph")

                with tf.variable_scope("model", reuse=False):
                    # Create the policy
                    # first return value corresponds to deterministic actions
                    # policy_out corresponds to stochastic actions, used for training
                    # logp_pi is the log probability of actions taken by the policy
                    self.deterministic_action, policy_out, logp_pi = self.policy_tf.make_actor(
                        self.processed_obs_ph)
                    # Monitor the entropy of the policy,
                    # this is not used for training
                    self.entropy = tf.reduce_mean(self.policy_tf.entropy)
                    #  Use two Q-functions to improve performance by reducing overestimation bias.
                    qf1, qf2, value_fn = self.policy_tf.make_critics(
                        self.processed_obs_ph,
                        self.actions_ph,
                        create_qf=True,
                        create_vf=True)
                    qf1_pi, qf2_pi, _ = self.policy_tf.make_critics(
                        self.processed_obs_ph,
                        policy_out,
                        create_qf=True,
                        create_vf=False,
                        reuse=True)

                    # Target entropy is used when learning the entropy coefficient
                    if self.target_entropy == 'auto':
                        # automatically set target entropy if needed
                        self.target_entropy = -np.prod(
                            self.action_space.shape).astype(np.float32)
                    else:
                        # Force conversion
                        # this will also throw an error for unexpected string
                        self.target_entropy = float(self.target_entropy)

                    # The entropy coefficient or entropy can be learned automatically
                    # see Automating Entropy Adjustment for Maximum Entropy RL section
                    # of https://arxiv.org/abs/1812.05905
                    if isinstance(self.ent_coef,
                                  str) and self.ent_coef.startswith('auto'):
                        # Default initial value of ent_coef when learned
                        init_value = 1.0
                        if '_' in self.ent_coef:
                            init_value = float(self.ent_coef.split('_')[1])
                            assert init_value > 0., "The initial value of ent_coef must be greater than 0"

                        self.log_ent_coef = tf.get_variable(
                            'log_ent_coef',
                            dtype=tf.float32,
                            initializer=np.log(init_value).astype(np.float32))
                        self.ent_coef = tf.exp(self.log_ent_coef)
                    else:
                        # Force conversion to float
                        # this will throw an error if a malformed string (different from 'auto')
                        # is passed
                        self.ent_coef = float(self.ent_coef)

                with tf.variable_scope("target", reuse=False):
                    # Create the value network
                    _, _, value_target = self.target_policy.make_critics(
                        self.processed_next_obs_ph,
                        create_qf=False,
                        create_vf=True)
                    self.value_target = value_target

                with tf.variable_scope("loss", reuse=False):
                    # Take the min of the two Q-Values (Double-Q Learning)
                    min_qf_pi = tf.minimum(qf1_pi, qf2_pi)

                    # Target for Q value regression
                    q_backup = tf.stop_gradient(self.rewards_ph +
                                                (1 - self.terminals_ph) *
                                                self.gamma * self.value_target)

                    # Compute Q-Function loss
                    # TODO: test with huber loss (it would avoid too high values)
                    qf1_loss = 0.5 * tf.reduce_mean((q_backup - qf1)**2)
                    qf2_loss = 0.5 * tf.reduce_mean((q_backup - qf2)**2)

                    # Compute the entropy temperature loss
                    # it is used when the entropy coefficient is learned
                    ent_coef_loss, entropy_optimizer = None, None
                    if not isinstance(self.ent_coef, float):
                        ent_coef_loss = -tf.reduce_mean(
                            self.log_ent_coef *
                            tf.stop_gradient(logp_pi + self.target_entropy))
                        entropy_optimizer = tf.train.AdamOptimizer(
                            learning_rate=self.learning_rate_ph)

                    # Compute the policy loss
                    # Alternative: policy_kl_loss = tf.reduce_mean(logp_pi - min_qf_pi)
                    policy_kl_loss = tf.reduce_mean(self.ent_coef * logp_pi -
                                                    qf1_pi)

                    # NOTE: in the original implementation, they have an additional
                    # regularization loss for the Gaussian parameters
                    # this is not used for now
                    # policy_loss = (policy_kl_loss + policy_regularization_loss)
                    policy_loss = policy_kl_loss

                    # Target for value fn regression
                    # We update the vf towards the min of two Q-functions in order to
                    # reduce overestimation bias from function approximation error.
                    v_backup = tf.stop_gradient(min_qf_pi -
                                                self.ent_coef * logp_pi)
                    value_loss = 0.5 * tf.reduce_mean((value_fn - v_backup)**2)

                    values_losses = qf1_loss + qf2_loss + value_loss

                    # Policy train op
                    # (has to be separate from value train op, because min_qf_pi appears in policy_loss)
                    policy_optimizer = tf.train.AdamOptimizer(
                        learning_rate=self.learning_rate_ph)
                    policy_train_op = policy_optimizer.minimize(
                        policy_loss,
                        var_list=tf_util.get_trainable_vars('model/pi'))

                    # Value train op
                    value_optimizer = tf.train.AdamOptimizer(
                        learning_rate=self.learning_rate_ph)
                    values_params = tf_util.get_trainable_vars(
                        'model/values_fn')

                    source_params = tf_util.get_trainable_vars(
                        "model/values_fn")
                    target_params = tf_util.get_trainable_vars(
                        "target/values_fn")

                    # Polyak averaging for target variables
                    self.target_update_op = [
                        tf.assign(target,
                                  (1 - self.tau) * target + self.tau * source)
                        for target, source in zip(target_params, source_params)
                    ]
                    # Initializing target to match source variables
                    target_init_op = [
                        tf.assign(target, source)
                        for target, source in zip(target_params, source_params)
                    ]

                    # Control flow is used because sess.run otherwise evaluates in nondeterministic order
                    # and we first need to compute the policy action before computing q values losses
                    with tf.control_dependencies([policy_train_op]):
                        train_values_op = value_optimizer.minimize(
                            values_losses, var_list=values_params)

                        self.infos_names = [
                            'policy_loss', 'qf1_loss', 'qf2_loss',
                            'value_loss', 'entropy'
                        ]
                        # All ops to call during one training step
                        self.step_ops = [
                            policy_loss, qf1_loss, qf2_loss, value_loss, qf1,
                            qf2, value_fn, logp_pi, self.entropy,
                            policy_train_op, train_values_op
                        ]

                        # Add entropy coefficient optimization operation if needed
                        if ent_coef_loss is not None:
                            with tf.control_dependencies([train_values_op]):
                                ent_coef_op = entropy_optimizer.minimize(
                                    ent_coef_loss, var_list=self.log_ent_coef)
                                self.infos_names += [
                                    'ent_coef_loss', 'ent_coef'
                                ]
                                self.step_ops += [
                                    ent_coef_op, ent_coef_loss, self.ent_coef
                                ]

                    # Monitor losses and entropy in tensorboard
                    tf.summary.scalar('policy_loss', policy_loss)
                    tf.summary.scalar('qf1_loss', qf1_loss)
                    tf.summary.scalar('qf2_loss', qf2_loss)
                    tf.summary.scalar('value_loss', value_loss)
                    tf.summary.scalar('entropy', self.entropy)
                    if ent_coef_loss is not None:
                        tf.summary.scalar('ent_coef_loss', ent_coef_loss)
                        tf.summary.scalar('ent_coef', self.ent_coef)

                    tf.summary.scalar('learning_rate',
                                      tf.reduce_mean(self.learning_rate_ph))

                # Retrieve parameters that must be saved
                self.params = tf_util.get_trainable_vars("model")
                self.target_params = tf_util.get_trainable_vars(
                    "target/values_fn")

                # Initialize Variables and target network
                with self.sess.as_default():
                    self.sess.run(tf.global_variables_initializer())
                    self.sess.run(target_init_op)

                self.summary = tf.summary.merge_all()
 def softmax_with_mask(self, h, mask):
     exp_with_mask = tf.exp(h * mask) * mask
     s = tf.reduce_sum(exp_with_mask, axis=-1)
     return tf.transpose(tf.transpose(exp_with_mask) / s) + (1 - mask)        
 def call(self, x: types.TensorType,
          y: types.TensorType) -> types.TensorType:
     """Computes the Laplacian kernel."""
     return tf.exp(-tf.norm(x - y, axis=2) / self.kernel_length)
 def call(self, inputs):
     z_mean, z_log_var = inputs
     batch = tf.shape(z_mean)[0]
     dim = tf.shape(z_mean)[1]
     epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
     return z_mean + tf.exp(0.5 * z_log_var) * epsilon
Exemple #46
0
def streaming_mean_averge_precision(probe_x,
                                    probe_y,
                                    gallery_x,
                                    gallery_y,
                                    good_mask,
                                    measure=pdist):
    """Compute mean average precision (mAP) over a stream of data.

    Parameters
    ----------
    probe_x: tf.Tensor
        A tensor of N probe images.
    probe_y: tf.Tensor
        A tensor of N probe labels.
    gallery_x: tf.Tensor
        A tensor of M gallery images.
    gallery_y: tf.Tensor
        A tensor of M gallery labels.
    good_mask: Optional[tf.Tensor]
        A matrix of shape NxM where element (i, j) evaluates to 0.0 if the pair
        of i-th probe and j-th gallery image should be excluded from metric
        computation. All other elements should evaluate to 1.0.
    measure: Callable[tf.Tensor, tf.Tensor] -> tf.Tensor
        A callable that computes for two matrices of row-vectors a matrix of
        element-wise distances. See `pdist` for an example.

    Returns
    -------
    Tuple[tf.Tensor, tf.Tensor]
        The first element in the tuple is the current result. The second element
        is an operation that updates the computed metric based on new data.

    """
    # See Wikipedia:
    # https://en.wikipedia.org/wiki/Information_retrieval#Average_precision
    if good_mask.dtype != tf.float32:
        good_mask = tf.cast(good_mask, tf.float32)

    # Compute similarity measure and mask out diagonal (similarity to self).
    predictions = good_mask * tf.exp(-measure(probe_x, gallery_x))

    # Compute matrix of predicted labels.
    k = tf.shape(gallery_y)[0]
    _, prediction_indices = tf.nn.top_k(predictions, k=k)
    predicted_label_mat = tf.gather(gallery_y, prediction_indices)
    label_eq_mat = tf.cast(
        tf.equal(predicted_label_mat, tf.reshape(probe_y, (-1, 1))),
        tf.float32)

    # Compute statistics.
    num_relevant = tf.reduce_sum(good_mask * label_eq_mat,
                                 reduction_indices=[1],
                                 keep_dims=True)
    true_positives_at_k = tf.cumsum(label_eq_mat, axis=1)
    retrieved_at_k = tf.cumsum(tf.ones_like(label_eq_mat), axis=1)
    precision_at_k = true_positives_at_k / retrieved_at_k
    relevant_at_k = label_eq_mat
    average_precision = (
        tf.reduce_sum(precision_at_k * relevant_at_k, reduction_indices=[1]) /
        tf.cast(tf.squeeze(num_relevant), tf.float32))

    return slim.metrics.streaming_mean(average_precision)
Exemple #47
0
def log_normal_pdf(sample, mean, logvar, raxis=1):
    log2pi = tf.math.log(2. * np.pi)
    return tf.reduce_sum(
        -.5 * ((sample - mean)**2. * tf.exp(-logvar) + logvar + log2pi),
        axis=raxis)
Exemple #48
0
def exp2(x: TfExpressionEx) -> TfExpression:
    """Exponent in base 2."""
    with tf.name_scope("Exp2"):
        return tf.exp(x * np.float32(np.log(2.0)))
 def prob(self, given, group_ndims=0, name=None):
     with tf.name_scope(name=name, default_name='prob'):
         log_prob = self.log_prob(given, group_ndims, name)
         return tf.exp(log_prob)
encoder = tf.keras.Model(inputs=original_inputs, outputs=z, name='encoder')

# Define decoder model.
latent_inputs = tf.keras.Input(shape=(latent_dim,), name='z_sampling')
x = layers.RepeatVector(original_dim)(latent_inputs)
x = layers.CuDNNLSTM(intermediate_dim, return_sequences=True)(x)
outputs = layers.TimeDistributed(layers.Dense(1))(x)
decoder = tf.keras.Model(inputs=latent_inputs, outputs=outputs, name='decoder')

# Define VAE model.
outputs = decoder(z)
vae = tf.keras.Model(inputs=[original_inputs, input_err], outputs=outputs, name='vae')

# Add KL divergence regularization loss.
kl_loss = - 0.5 * tf.reduce_mean(
    z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)
vae.add_loss(kl_loss)

optimizer = tf.keras.optimizers.Adam(clipvalue=0.5) # SGD(lr=3e-4, clipvalue=0.5)

vae.compile(optimizer, loss=chi2(input_err))


vae.metrics_tensors.append(kl_loss)
vae.metrics_names.append("kl_loss")

# vae.metrics_tensors.append(chi2_nonfunc)
# vae.metrics_names.append("chi2_loss")

# vae.add_metric(kl_loss, name='kl_loss', aggregation='mean')
# vae.add_metric(chi2(input_err), name='mse_loss', aggregation='mean')
import tensorflow as tf
import tensorflow_probability as tfp

model = tf.keras.Sequential([
    tfp.layers.DistributionLambda(
        make_distribution_fn=lambda t: tfp.distributions.Normal(
            loc=0., scale=tf.exp(0.)),
        convert_to_tensor_fn=lambda s: s.sample(5))
])
Exemple #52
0
def loss(self, args):
    with tf.name_scope("losses"):
        # Melody input, not compatible with multif0 input
        annotations = self.annotations[:, :, 0] - args.min_note
        voicing_ref = tf.cast(tf.greater(annotations, 0), tf.float32)
        loss_names = []
        losses = []
        if self.note_logits is not None:
            if args.annotation_smoothing > 0:
                self.note_probabilities = tf.nn.sigmoid(self.note_logits)
                note_ref = tf.tile(
                    tf.reshape(annotations,
                               [-1, self.annotations_per_window, 1]),
                    [1, 1, self.bin_count])
                ref_probabilities = tf.exp(-(note_ref - self.note_bins)**2 /
                                           (2 * args.annotation_smoothing**2))

                unvoiced_weights = (1 -
                                    voicing_ref) * args.unvoiced_loss_weight
                voicing_weights = tf.tile(
                    tf.expand_dims(voicing_ref + unvoiced_weights, -1),
                    [1, 1, self.bin_count])

                # miss weights
                peak_ref = tf.cast(
                    tf.abs(
                        tf.tile(
                            tf.reshape(annotations,
                                       [-1, self.annotations_per_window, 1]),
                            [1, 1, self.bin_count]) - self.note_bins) < 0.5,
                    tf.float32)
                miss_weights = tf.ones_like(
                    voicing_weights) * args.miss_weight + peak_ref * (
                        1 - args.miss_weight)

                note_loss = tf.losses.sigmoid_cross_entropy(
                    ref_probabilities,
                    self.note_logits,
                    weights=voicing_weights * miss_weights)
            else:
                self.note_probabilities = tf.nn.softmax(self.note_logits)
                ref_bins = tf.cast(
                    tf.round(annotations * self.bins_per_semitone), tf.int32)
                note_loss = tf.losses.sparse_softmax_cross_entropy(
                    ref_bins, self.note_logits, weights=voicing_ref)

            loss_names.append("note_loss")
            losses.append(note_loss)

        if self.voicing_logits is not None:
            voicing_loss = tf.losses.sigmoid_cross_entropy(
                voicing_ref, self.voicing_logits)

            loss_names.append("voicing_loss")
            losses.append(voicing_loss)

        add_loss_names, add_losses = _common_losses(self, args)
        loss_names += add_loss_names
        losses += add_losses

    _common_loss_metrics(self, loss_names, losses)

    return tf.math.add_n(losses)
def z_sample(mean, logvar):
    eps = tf.random_normal(tf.shape(mean), mean=0.0, stddev=1.0, dtype=tf.float32)

    return mean + tf.exp(logvar * 0.5) * eps
Exemple #54
0
def echo_sample(
    inputs,
    clip=None, d_max=100, batch=100, multiplicative=False, echo_mc = False,
    replace=False, fx_clip=None, plus_sx=True, calc_log=True,
    return_noise=False, **kwargs
    ):
    # kwargs unused

    if isinstance(inputs, list):
        fx = inputs[0]
        sx = inputs[-1]
    else:
        fx = inputs

    # TO DO : CALC_LOG currently determines both whether to do log space calculations AND whether sx is a log
 
    fx_shape = fx.get_shape()
    sx_shape = sx.get_shape()


    # clip is multiplied times s(x) to ensure that sum of truncated terms < machine precision 
    # clip should be calculated numerically according to App C in paper
    # M (r ^ dmax / 1-r ) < precision, SOLVE for r (clipping factor), with M = max magnitude of f(x)
    
    # calculation below is an approximation (ensuring only term d_max + 1 < precision)
    if clip is None:
        max_fx = fx_clip if fx_clip is not None else 1.0
        clip = (2**(-23)/max_fx)**(1.0/d_max)
    
    # fx_clip can be used to restrict magnitude of f(x), not used in paper
    # defaults to no clipping and M = 1 (e.g. with tanh activation for f(x))
    if fx_clip is not None: 
        fx = K.clip(fx, -fx_clip, fx_clip)

    if not calc_log:
        sx = tf.multiply(clip,sx)
        sx = tf.where(tf.abs(sx) < K.epsilon(), K.epsilon()*tf.sign(sx), sx)
    else:
        # plus_sx based on activation for sx = s(x):
        #   True for log_sigmoid
        #   False for softplus
        sx = tf.log(clip) + (-1*sx if not plus_sx else sx)

    if echo_mc is not None:    
        # use mean centered fx for noise
        fx = fx - K.mean(fx, axis = 0, keepdims = True)

    z_dim = K.int_shape(fx)[-1]

    if replace: # replace doesn't set batch size (using permute_neighbor_indices does)
        batch = K.shape(fx)[0]
        sx = K.batch_flatten(sx) if len(sx_shape) > 2 else sx 
        fx = K.batch_flatten(fx) if len(fx_shape) > 2 else fx 
        inds = K.reshape(random_indices(batch, d_max), (-1, 1))
        select_sx = gather_nd_reshape(sx, inds, (-1, d_max, z_dim))
        select_fx = gather_nd_reshape(fx, inds, (-1, d_max, z_dim))

        if len(sx_shape)>2:
            select_sx = K.expand_dims(K.expand_dims(select_sx, 2), 2)
            sx = K.expand_dims(K.expand_dims(sx, 1),1)
        if len(fx_shape)>2:
            select_fx = K.expand_dims(K.expand_dims(select_fx, 2), 2)
            fx = K.expand_dims(K.expand_dims(fx, 1),1)

    else:
        # batch x batch x z_dim 
        # for all i, stack_sx[i, :, :] = sx
        repeat = tf.multiply(tf.ones_like(tf.expand_dims(fx, 0)), tf.ones_like(tf.expand_dims(fx, 1)))
        stack_fx = tf.multiply(fx, repeat)
        stack_sx = tf.multiply(sx, repeat)

        # select a set of dmax examples from original fx / sx for each batch entry
        inds = permute_neighbor_indices(batch, d_max, replace = replace)
        
        # note that permute_neighbor_indices sets the batch_size dimension != None
        # this necessitates the use of fit_generator, e.g. in training to avoid 'remainder' batches if data_size % batch > 0
        
        select_sx = tf.gather_nd(stack_sx, inds)
        select_fx = tf.gather_nd(stack_fx, inds)

    if calc_log:
        sx_echoes = tf.cumsum(select_sx, axis = 1, exclusive = True)
    else:
        sx_echoes = tf.cumprod(select_sx, axis = 1, exclusive = True)

    # calculates S(x0)S(x1)...S(x_l)*f(x_(l+1))
    sx_echoes = tf.exp(sx_echoes) if calc_log else sx_echoes 
    fx_sx_echoes = tf.multiply(select_fx, sx_echoes) 

    # performs the sum over dmax terms to calculate noise
    noise = tf.reduce_sum(fx_sx_echoes, axis = 1) 

    if multiplicative:
        # unused in paper, not extensively tested  
      sx = sx if not calc_log else tf.exp(sx)
      output = tf.exp(fx + tf.multiply(sx, noise))#tf.multiply(fx, tf.multiply(sx, noise))
    else:
      sx = sx if not calc_log else tf.exp(sx)
      output = fx + tf.multiply(sx, noise)
    
    sx = sx if not calc_log else tf.exp(sx) 
    
    if multiplicative: # log z according to echo
        output = tf.exp(fx + tf.multiply(sx, noise))
    else:
        output = fx + tf.multiply(sx, noise) 

    return output if not return_noise else noise
Exemple #55
0
def sample_z(mu, log_var):
    eps = tf.random_normal(shape=tf.shape(mu))
    return mu + tf.exp(log_var / 2) * eps
def kl_loss(mean, logvar):
    # shape : [batch_size, channel]
    loss = 0.5 * tf.reduce_sum(tf.square(mean) + tf.exp(logvar) - 1 - logvar, axis=-1)
    loss = tf.reduce_mean(loss)

    return loss
def gaussian_likelihood(x, mu, log_std):
    pre_sum = -0.5 * (((x-mu)/(tf.exp(log_std)+EPS))**2 + 2*log_std + np.log(2*np.pi))
    return tf.reduce_sum(pre_sum, axis=1)
Exemple #58
0
    def __init__(self,
                 num_emb,
                 emb_dim,
                 hidden_dim,
                 sequence_length,
                 start_token,
                 learning_rate=0.01,
                 reward_gamma=0.9):
        self.num_emb = num_emb
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.sequence_length = sequence_length
        self.start_token = tf.constant(start_token, dtype=tf.int32)
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.reward_gamma = reward_gamma
        self.g_params = []
        self.d_params = []

        self.expected_reward = tf.Variable(tf.zeros([self.sequence_length]))

        with tf.variable_scope('generator'):
            self.g_embeddings = tf.Variable(
                self.init_matrix([self.num_emb, self.emb_dim]))
            self.g_params.append(self.g_embeddings)
            self.g_recurrent_unit = self.create_recurrent_unit(
                self.g_params)  # maps h_tm1 to h_t for generator
            self.g_output_unit = self.create_output_unit(
                self.g_params,
                self.g_embeddings)  # maps h_t to o_t (output token logits)

        with tf.variable_scope('discriminator'):
            self.d_embeddings = tf.Variable(
                self.init_matrix([self.num_emb, self.emb_dim]))
            self.d_params.append(self.d_embeddings)
            self.d_recurrent_unit = self.create_recurrent_unit(
                self.d_params)  # maps h_tm1 to h_t for discriminator
            self.d_classifier_unit = self.create_classifier_unit(
                self.d_params)  # maps h_t to class prediction logits
            self.d_h0 = tf.Variable(self.init_vector([self.hidden_dim]))
            self.d_params.append(self.d_h0)

        self.h0 = tf.placeholder(
            tf.float32,
            shape=[self.hidden_dim])  # initial random vector for generator
        self.x = tf.placeholder(tf.int32, shape=[
            self.sequence_length
        ])  # sequence of indices of true data, not including start token
        self.samples = tf.placeholder(tf.float32,
                                      shape=[self.sequence_length
                                             ])  # random samples from [0, 1]

        # generator on initial randomness
        gen_o = tensor_array_ops.TensorArray(dtype=tf.float32,
                                             size=self.sequence_length,
                                             dynamic_size=False,
                                             infer_shape=True)
        gen_x = tensor_array_ops.TensorArray(dtype=tf.int32,
                                             size=self.sequence_length,
                                             dynamic_size=False,
                                             infer_shape=True)
        samples = tensor_array_ops.TensorArray(dtype=tf.float32,
                                               size=self.sequence_length)
        samples = samples.unstack(self.samples)

        def _g_recurrence(i, x_t, h_tm1, gen_o, gen_x):
            h_t = self.g_recurrent_unit(x_t, h_tm1)
            o_t = self.g_output_unit(h_t)
            sample = samples.read(i)
            o_cumsum = _cumsum(o_t, self.num_emb)  # prepare for sampling
            next_token = tf.to_int32(tf.reduce_min(
                tf.where(sample < o_cumsum)))  # sample
            x_tp1 = tf.gather(self.g_embeddings, next_token)
            gen_o = gen_o.write(i, tf.gather(
                o_t,
                next_token))  # we only need the sampled token's probability
            gen_x = gen_x.write(i, next_token)  # indices, not embeddings
            return i + 1, x_tp1, h_t, gen_o, gen_x

        _, _, _, self.gen_o, self.gen_x = control_flow_ops.while_loop(
            cond=lambda i, _1, _2, _3, _4: i < self.sequence_length,
            body=_g_recurrence,
            loop_vars=(tf.constant(0, dtype=tf.int32),
                       tf.gather(self.g_embeddings,
                                 self.start_token), self.h0, gen_o, gen_x))

        # discriminator on generated and real data
        d_gen_predictions = tensor_array_ops.TensorArray(
            dtype=tf.float32,
            size=self.sequence_length,
            dynamic_size=False,
            infer_shape=True)
        d_real_predictions = tensor_array_ops.TensorArray(
            dtype=tf.float32,
            size=self.sequence_length,
            dynamic_size=False,
            infer_shape=True)

        self.gen_x = self.gen_x.stack()
        emb_gen_x = tf.gather(self.d_embeddings, self.gen_x)
        ta_emb_gen_x = tensor_array_ops.TensorArray(dtype=tf.float32,
                                                    size=self.sequence_length)
        ta_emb_gen_x = ta_emb_gen_x.unstack(emb_gen_x)

        emb_real_x = tf.gather(self.d_embeddings, self.x)
        ta_emb_real_x = tensor_array_ops.TensorArray(dtype=tf.float32,
                                                     size=self.sequence_length)
        ta_emb_real_x = ta_emb_real_x.unstack(emb_real_x)

        def _d_recurrence(i, inputs, h_tm1, pred):
            x_t = inputs.read(i)
            h_t = self.d_recurrent_unit(x_t, h_tm1)
            y_t = self.d_classifier_unit(h_t)
            pred = pred.write(i, y_t)
            return i + 1, inputs, h_t, pred

        _, _, _, self.d_gen_predictions = control_flow_ops.while_loop(
            cond=lambda i, _1, _2, _3: i < self.sequence_length,
            body=_d_recurrence,
            loop_vars=(tf.constant(0, dtype=tf.int32), ta_emb_gen_x, self.d_h0,
                       d_gen_predictions))
        self.d_gen_predictions = tf.reshape(self.d_gen_predictions.stack(),
                                            [self.sequence_length])

        _, _, _, self.d_real_predictions = control_flow_ops.while_loop(
            cond=lambda i, _1, _2, _3: i < self.sequence_length,
            body=_d_recurrence,
            loop_vars=(tf.constant(0, dtype=tf.int32), ta_emb_real_x,
                       self.d_h0, d_real_predictions))
        self.d_real_predictions = tf.reshape(self.d_real_predictions.stack(),
                                             [self.sequence_length])

        # supervised pretraining for generator
        g_predictions = tensor_array_ops.TensorArray(dtype=tf.float32,
                                                     size=self.sequence_length,
                                                     dynamic_size=False,
                                                     infer_shape=True)

        emb_x = tf.gather(self.g_embeddings, self.x)
        ta_emb_x = tensor_array_ops.TensorArray(dtype=tf.float32,
                                                size=self.sequence_length)
        ta_emb_x = ta_emb_x.unstack(emb_x)

        def _pretrain_recurrence(i, x_t, h_tm1, g_predictions):
            h_t = self.g_recurrent_unit(x_t, h_tm1)
            o_t = self.g_output_unit(h_t)
            g_predictions = g_predictions.write(i, o_t)
            x_tp1 = ta_emb_x.read(i)
            return i + 1, x_tp1, h_t, g_predictions

        _, _, _, self.g_predictions = control_flow_ops.while_loop(
            cond=lambda i, _1, _2, _3: i < self.sequence_length,
            body=_pretrain_recurrence,
            loop_vars=(tf.constant(0, dtype=tf.int32),
                       tf.gather(self.g_embeddings,
                                 self.start_token), self.h0, g_predictions))

        self.g_predictions = tf.reshape(self.g_predictions.stack(),
                                        [self.sequence_length, self.num_emb])

        # calculate discriminator loss
        self.d_gen_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=self.d_gen_predictions,
                labels=tf.zeros([self.sequence_length])))
        self.d_real_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=self.d_real_predictions,
                labels=tf.ones([self.sequence_length])))

        # calculate generator rewards and loss
        decays = tf.exp(
            tf.log(self.reward_gamma) *
            tf.to_float(tf.range(self.sequence_length)))
        rewards = _backwards_cumsum(
            decays * tf.sigmoid(self.d_gen_predictions), self.sequence_length)
        normalized_rewards = \
            rewards / _backwards_cumsum(decays, self.sequence_length) - self.expected_reward

        self.reward_loss = tf.reduce_mean(normalized_rewards**2)
        self.g_loss = \
            -tf.reduce_mean(tf.log(self.gen_o.stack()) * normalized_rewards)

        # pretraining loss
        self.pretrain_loss = \
            (-tf.reduce_sum(
                tf.one_hot(tf.to_int64(self.x),
                           self.num_emb, 1.0, 0.0) * tf.log(self.g_predictions))
             / self.sequence_length)

        # training updates
        d_opt = self.d_optimizer(self.learning_rate)
        g_opt = self.g_optimizer(self.learning_rate)
        pretrain_opt = self.g_optimizer(self.learning_rate)
        reward_opt = tf.train.GradientDescentOptimizer(self.learning_rate)

        self.d_gen_grad = tf.gradients(self.d_gen_loss, self.d_params)
        self.d_real_grad = tf.gradients(self.d_real_loss, self.d_params)
        self.d_gen_updates = d_opt.apply_gradients(
            zip(self.d_gen_grad, self.d_params))
        self.d_real_updates = d_opt.apply_gradients(
            zip(self.d_real_grad, self.d_params))

        self.reward_grad = tf.gradients(self.reward_loss,
                                        [self.expected_reward])
        self.reward_updates = reward_opt.apply_gradients(
            zip(self.reward_grad, [self.expected_reward]))

        self.g_grad = tf.gradients(self.g_loss, self.g_params)
        self.g_updates = g_opt.apply_gradients(zip(self.g_grad, self.g_params))

        self.pretrain_grad = tf.gradients(self.pretrain_loss, self.g_params)
        print("PRETRAIN_GRAD: ", self.pretrain_grad)
        self.pretrain_updates = pretrain_opt.apply_gradients(
            zip(self.pretrain_grad, self.g_params))
def pointnet_sa_module_layer1(xyz, points, npoint, radius, nsample, mlp, mlp2, group_all, is_training, bn_decay, scope,
                       bn=True, pooling='max', knn=True, use_xyz=True, use_nchw=False):
    ''' PointNet Set Abstraction (SA) Module
        Input:
            xyz: (batch_size, ndataset, 3) TF tensor
            points: (batch_size, ndataset, channel) TF tensor
            npoint: int32 -- #points sampled in farthest point sampling
            radius: float32 -- search radius in local region
            nsample: int32 -- how many points in each local region
            mlp: list of int32 -- output size for MLP on each point
            mlp2: list of int32 -- output size for MLP on each region
            group_all: bool -- group all points into one PC if set true, OVERRIDE
                npoint, radius and nsample settings
            use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features
            use_nchw: bool, if True, use NCHW data format for conv2d, which is usually faster than NHWC format
        Return:
            new_xyz: (batch_size, npoint, 3) TF tensor
            new_points: (batch_size, npoint, mlp[-1] or mlp2[-1]) TF tensor
            idx: (batch_size, npoint, nsample) int32 -- indices for local regions
    '''
    data_format = 'NCHW' if use_nchw else 'NHWC'
    with tf.variable_scope(scope) as sc:
        # Sample and Grouping
        if group_all:
            nsample = xyz.get_shape()[1].value
            new_xyz, new_points, idx, grouped_xyz = sample_and_group_all(xyz, points, use_xyz)
        else:
            new_xyz, new_points, idx, grouped_xyz = sample_and_group_layer1(npoint, radius, nsample, xyz, points, knn, use_xyz)

        # Point Feature Embedding
        if use_nchw: new_points = tf.transpose(new_points, [0, 3, 1, 2])
        for i, num_out_channel in enumerate(mlp):
            new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1],
                                        padding='VALID', stride=[1, 1],
                                        bn=bn, is_training=is_training,
                                        scope='conv%d' % (i), bn_decay=bn_decay,
                                        data_format=data_format)
        if use_nchw: new_points = tf.transpose(new_points, [0, 2, 3, 1])

        # Pooling in Local Regions
        if pooling == 'max':
            new_points = tf.reduce_max(new_points, axis=[2], keep_dims=True, name='maxpool')
        elif pooling == 'avg':
            new_points = tf.reduce_mean(new_points, axis=[2], keep_dims=True, name='avgpool')
        elif pooling == 'weighted_avg':
            with tf.variable_scope('weighted_avg'):
                dists = tf.norm(grouped_xyz, axis=-1, ord=2, keep_dims=True)
                exp_dists = tf.exp(-dists * 5)
                weights = exp_dists / tf.reduce_sum(exp_dists, axis=2,
                                                    keep_dims=True)  # (batch_size, npoint, nsample, 1)
                new_points *= weights  # (batch_size, npoint, nsample, mlp[-1])
                new_points = tf.reduce_sum(new_points, axis=2, keep_dims=True)
        elif pooling == 'max_and_avg':
            max_points = tf.reduce_max(new_points, axis=[2], keep_dims=True, name='maxpool')
            avg_points = tf.reduce_mean(new_points, axis=[2], keep_dims=True, name='avgpool')
            new_points = tf.concat([avg_points, max_points], axis=-1)

        # [Optional] Further Processing
        if mlp2 is not None:
            if use_nchw: new_points = tf.transpose(new_points, [0, 3, 1, 2])
            for i, num_out_channel in enumerate(mlp2):
                new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1],
                                            padding='VALID', stride=[1, 1],
                                            bn=bn, is_training=is_training,
                                            scope='conv_post_%d' % (i), bn_decay=bn_decay,
                                            data_format=data_format)
            if use_nchw: new_points = tf.transpose(new_points, [0, 2, 3, 1])

        new_points = tf.squeeze(new_points, [2])  # (batch_size, npoints, mlp2[-1])
        return new_xyz, new_points, idx
Exemple #60
0
    )  # tanh(x)dx = 1 - tanh(x)**2
    psi_u.append(tf.matmul(tf.transpose(psi[i]), u_hat[i]))
    logdet_jacobian += tf.log(tf.abs(1 + psi_u[i]))

##################################################################################

_, logits = P(f_z[-1])  # add flows thing in P

X_samples, _ = P(z)

# E[log P(X|z_k)]
recon_loss = tf.reduce_sum(
    tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X), 1)
reconstruction_loss = tf.reduce_mean(recon_loss)
# D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian
kl_loss = 0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, 1)
# VAE loss
vae_loss = tf.reduce_mean(recon_loss + kl_loss - logdet_jacobian)

solver = tf.train.AdamOptimizer().minimize(vae_loss)

# sess = tf.Session(config=config)
sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

p = 0

distribution = {}