Ejemplo n.º 1
1
def mean(mean, variance, std=False):
    '''Output mean of ReLU for general Gaussian input.

    f(x) = max(x, 0).

    This function is broadcast-able, so you can provide multiple
    input means with a single variance or multiple input variances
    with a single input mean or multiple input means and variances.

    Args:
        mean: Input mean of size (Batch, Size).
        variance: Input variance vector (Batch, Size)
            or scalar v such that variance = v * ones(Size).
        std: Whether the provided `variance` is the standard deviation.

    Returns:
        Output mean of ReLU for general Gaussian input (Batch, Size).
    '''
    std = variance if std else tf.sqrt(variance)
    zero_mean = std / tf.sqrt(2.0 * math.pi)
    if mean is None:
        return zero_mean  # efficient computation when mean is zeros
    u = mean / (math.sqrt(2.0) * std)
    bias = 0.5 * mean * (1.0 + tf.erf(u))
    return zero_mean * tf.exp(-u ** 2.0) + bias
Ejemplo n.º 2
0
def soft_triplet_loss(anchor, positive, negative, extra=True, scope="soft_triplet_loss"):
    r"""Loss for triplet networks as described in the paper:
    `Deep Metric Learning using Triplet Network
    <https://arxiv.org/abs/1412.6622>`_ by Hoffer et al.

    It is a softmax loss using :math:`(anchor-positive)^2` and
    :math:`(anchor-negative)^2` as logits.

    Args:
        anchor (tf.Tensor): anchor feature vectors of shape [Batch, N].
        positive (tf.Tensor): features of positive match of the same shape.
        negative (tf.Tensor): features of negative match of the same shape.
        extra (bool): also return distances for pos and neg.

    Returns:
        tf.Tensor: triplet-loss as scalar (and optionally average_pos_dist, average_neg_dist)
    """

    eps = 1e-10
    with tf.name_scope(scope):
        d_pos = tf.sqrt(tf.reduce_sum(tf.square(anchor - positive), 1) + eps)
        d_neg = tf.sqrt(tf.reduce_sum(tf.square(anchor - negative), 1) + eps)

        logits = tf.stack([d_pos, d_neg], axis=1)
        ones = tf.ones_like(tf.squeeze(d_pos), dtype="int32")

        loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=ones))

        if extra:
            pos_dist = tf.reduce_mean(d_pos, name='pos-dist')
            neg_dist = tf.reduce_mean(d_neg, name='neg-dist')
            return loss, pos_dist, neg_dist
        else:
            return loss
Ejemplo n.º 3
0
def adam(params, cost_or_grads, alpha=3e-4, hps=None, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    beta2 = 1-1./(hps.train_its*hps.polyak_epochs)

    # all-reduce
    grads = [Z.allreduce_mean(g) for g in gs]

    t = tf.Variable(1., 'adam_t')
    alpha_t = alpha * tf.sqrt((1. - tf.pow(beta2, t))) / \
        (1. - tf.pow(hps.beta1, t))
    updates.append(t.assign_add(1))

    for w, g in zip(params, grads):
        mom2 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m2')
        if hps.beta1 > 0:
            mom1 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m1')
            mom1_new = hps.beta1 * mom1 + (1. - hps.beta1) * g
            updates.append(mom1.assign(mom1_new))
        else:
            mom1_new = g
        m2_new = beta2 * mom2 + (1. - beta2) * tf.square(g)
        delta_t = mom1_new / (tf.sqrt(m2_new) + epsilon)
        w_new = hps.weight_decay * w - alpha_t * delta_t
        updates.append(mom2.assign(m2_new))
        updates.append(w.assign(w_new))

    # Polyak averaging
    polyak_avg_op, polyak_swap_op, ema = polyak(params, beta2)
    train_op = tf.group(polyak_avg_op, *updates)
    return train_op, polyak_swap_op, ema
    def update_phis(self, ii, dirname):
        # Assumes a_matr is a_optimal is set. Will have to be so by script that runs this function after the loop
        # a_matr_cast = tf.cast(self.a_matr, tf.float64)
        # phis_cast = tf.cast(self.phis, tf.float64)
        # residual = tf.cast(self.data,tf.float64) - tf.matmul(phis_cast,a_matr_cast)
        # residual_sum = tf.reduce_mean(tf.reduce_sum(residual, reduction_indices = 0))
        # val_error = self.sess.run(residual_sum)
        residual = self.data - tf.matmul(self.phis, self.a_matr)
        residual_sum = tf.reduce_mean(tf.reduce_sum(residual, reduction_indices = 0))
        # Visualize input here
        # print("plotting data")
        # self.plot_obj.plot_input_data(self.sess.run(self.data), ii)
        # self.plot_obj.plot_reconstructions(self.sess.run(tf.matmul(phis_cast, a_matr_cast)), ii)
        # print("Val of Residual error after we do learningis: {}".format(val_error))
        # self.reconstruction_error_array.append(val_error)
        dbasis = (1/self.batch_size)* tf.matmul(residual, tf.transpose(self.a_matr))
        norm_grad_basis = tf.sqrt(tf.reduce_sum(dbasis ** 2, reduction_indices = 0))
        dbasis = dbasis / norm_grad_basis
        phis = self.phis + self.LR * dbasis
        phi_norm = tf.sqrt(tf.reduce_sum(phis ** 2.0, reduction_indices = 0))
        # self.phis = phis/phi_norm
        self.phis_so_far = self.sess.run(phis/phi_norm, feed_dict = {self.phis:self.phis_so_far, self.data: self.loaded_data})
        # assign = tf.assign(self.phis,phis/phi_norm)
        # self.sess.run(assign)
        if ii % 100 == 0:
            name_of_pickle_file = dirname + "/" + "phis.pkl"
            output = open(name_of_pickle_file, 'wb')
            print("Now pickling phis for sparse coding")
            pickle.dump(self.phis_so_far,output)
            print("Done pickling")


        print("The value sum of active coefficients after we do learning", np.sum(np.abs(self.sess.run(self.a_matr))))
Ejemplo n.º 5
0
def batchnormalize(X, eps=1e-8, g=None, b=None):
    if X.get_shape().ndims == 4:
        mean = tf.reduce_mean(X, [0,1,2])
        std = tf.reduce_mean( tf.square(X-mean), [0,1,2] )
        X = (X-mean) / tf.sqrt(std+eps)

        if g is not None and b is not None:
            g = tf.reshape(g, [1,1,1,-1])
            b = tf.reshape(b, [1,1,1,-1])
            X = X*g + b

    elif X.get_shape().ndims == 2:
        mean = tf.reduce_mean(X, 0)
        std = tf.reduce_mean(tf.square(X-mean), 0)
        X = (X-mean) / tf.sqrt(std+eps)#std

        if g is not None and b is not None:
            g = tf.reshape(g, [1,-1])
            b = tf.reshape(b, [1,-1])
            X = X*g + b

    else:
        raise NotImplementedError

    return X
  def _build_iid_normal_model(self, num_timesteps, latent_size,
                              observation_size, transition_variance,
                              observation_variance):
    """Build a model whose outputs are IID normal by construction."""

    transition_variance = self._build_placeholder(transition_variance)
    observation_variance = self._build_placeholder(observation_variance)

    # Use orthogonal matrices to project a (potentially
    # high-dimensional) latent space of IID normal variables into a
    # low-dimensional observation that is still IID normal.
    random_orthogonal_matrix = lambda: np.linalg.qr(
        np.random.randn(latent_size, latent_size))[0][:observation_size, :]
    observation_matrix = self._build_placeholder(random_orthogonal_matrix())

    model = tfd.LinearGaussianStateSpaceModel(
        num_timesteps=num_timesteps,
        transition_matrix=self._build_placeholder(
            np.zeros([latent_size, latent_size])),
        transition_noise=tfd.MultivariateNormalDiag(
            scale_diag=tf.sqrt(transition_variance) *
            tf.ones([latent_size], dtype=self.dtype)),
        observation_matrix=observation_matrix,
        observation_noise=tfd.MultivariateNormalDiag(
            scale_diag=tf.sqrt(observation_variance) *
            tf.ones([observation_size], dtype=self.dtype)),
        initial_state_prior=tfd.MultivariateNormalDiag(
            scale_diag=tf.sqrt(transition_variance) *
            tf.ones([latent_size], dtype=self.dtype)),
        validate_args=True)

    return model
Ejemplo n.º 7
0
    def build_likelihood(self):
        """
        Constuct a tensorflow function to compute the bound on the marginal
        likelihood. For a derivation of the terms in here, see the associated
        SGPR notebook. 
        """

        num_inducing = tf.shape(self.Z)[0]
        num_data = tf.shape(self.Y)[0]
        output_dim = tf.shape(self.Y)[1]

        err =  self.Y - self.mean_function(self.X)
        Kdiag = self.kern.Kdiag(self.X)
        Kuf = self.kern.K(self.Z, self.X)
        Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
        L = tf.cholesky(Kuu)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, Kuf, lower=True)*tf.sqrt(1./self.likelihood.variance)
        AAT = tf.matmul(A, tf.transpose(A))
        B = AAT + eye(num_inducing)
        LB = tf.cholesky(B)
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, err), lower=True) * tf.sqrt(1./self.likelihood.variance)

        #compute log marginal bound
        bound = -0.5*tf.cast(num_data*output_dim, tf.float64)*np.log(2*np.pi)
        bound += -tf.cast(output_dim, tf.float64)*tf.reduce_sum(tf.log(tf.user_ops.get_diag(LB)))
        bound += -0.5*tf.cast(num_data*output_dim, tf.float64)*tf.log(self.likelihood.variance)
        bound += -0.5*tf.reduce_sum(tf.square(err))/self.likelihood.variance
        bound += 0.5*tf.reduce_sum(tf.square(c))
        bound += -0.5*(tf.reduce_sum(Kdiag)/self.likelihood.variance - tf.reduce_sum(tf.user_ops.get_diag(AAT)))

        return bound
Ejemplo n.º 8
0
 def build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook. 
     """
     num_inducing = tf.shape(self.Z)[0]
     err =  self.Y - self.mean_function(self.X)
     Kuf = self.kern.K(self.Z, self.X)
     Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
     Kus = self.kern.K(self.Z, Xnew)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True)*tf.sqrt(1./self.likelihood.variance)
     B = tf.matmul(A, tf.transpose(A)) + eye(num_inducing)
     LB = tf.cholesky(B)
     c = tf.matrix_triangular_solve(LB, tf.matmul(A, err), lower=True) * tf.sqrt(1./self.likelihood.variance)
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tf.transpose(tmp2), c)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tf.transpose(tmp2), tmp2) - tf.matmul(tf.transpose(tmp1), tmp1)
         var = tf.tile(tf.expand_dims(var, 2), tf.pack([1,1, tf.shape(self.Y)[1]]))
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) - tf.reduce_sum(tf.square(tmp1), 0)
         var = tf.tile(tf.expand_dims(var, 1), tf.pack([1, tf.shape(self.Y)[1]]))
     return mean + self.mean_function(Xnew), var
Ejemplo n.º 9
0
  def _dist_to_opt(self):
    """Distance to optimum.

    Returns:
      D_t ops
    """
    dist_to_opt_ops = []
    # Running average of the norm of gradient
    self._grad_norm = tf.sqrt(self._grad_norm_squared)
    avg_op = self._moving_averager.apply([self._grad_norm,])
    dist_to_opt_ops.append(avg_op)
    with tf.control_dependencies([avg_op]):
      self._grad_norm_avg = self._moving_averager.average(self._grad_norm)
      # Single iteration distance estimation, note here
      # self._grad_norm_avg is per variable
      self._d_t = self._grad_norm_avg / self._grad_norm_squared_avg
    # Running average of distance
    avg_op = self._moving_averager.apply([self._d_t])
    dist_to_opt_ops.append(avg_op)
    with tf.control_dependencies([avg_op]):
      self._dist_to_opt_avg = tf.identity(
          self._moving_averager.average(self._d_t))
      if self._sparsity_debias:
        self._dist_to_opt_avg /= tf.sqrt(self._sparsity_avg)
    return dist_to_opt_ops  # D_t
Ejemplo n.º 10
0
def pearsoncorrelation(ypred, y):
    muy_ypred = tf.reduce_mean(ypred)
    muy_y = tf.reduce_mean(y)
    numerator = tf.reduce_sum(tf.multiply(ypred - muy_ypred, y - muy_y))
    denominator = tf.multiply(tf.sqrt(tf.reduce_sum(tf.square(ypred - muy_ypred))),
                              tf.sqrt(tf.reduce_sum(tf.square(y - muy_y)))) + 1e-10
    return numerator / denominator
Ejemplo n.º 11
0
 def p_zt(self, prev_state, t):
   """Computes the model p(z_t| z_{t-1})."""
   batch_size = tf.shape(prev_state)[0]
   if t > 0:
     z_mu_p = prev_state + self.bs[t - 1]
     p_zt = tf.contrib.distributions.Normal(
         loc=z_mu_p, scale=tf.sqrt(tf.ones_like(z_mu_p) * self.variance))
     return p_zt
   else:  # p(z_0) is mixture of two Normals
     mu_pos = tf.ones([batch_size, self.state_size], dtype=self.dtype) * self.prior_mode_mean
     mu_neg = tf.ones([batch_size, self.state_size], dtype=self.dtype) * -self.prior_mode_mean
     z0_pos = tf.contrib.distributions.Normal(
         loc=mu_pos,
         scale=tf.sqrt(tf.ones_like(mu_pos) * self.variance))
     z0_neg = tf.contrib.distributions.Normal(
         loc=mu_neg,
         scale=tf.sqrt(tf.ones_like(mu_neg) * self.variance))
     mode_probs = tf.convert_to_tensor([self.mixing_coeff, 1-self.mixing_coeff], dtype=tf.float64)
     mode_probs = tf.tile(mode_probs[tf.newaxis, tf.newaxis, :], [batch_size, 1, 1])
     mode_selection_dist = tf.contrib.distributions.Categorical(probs=mode_probs)
     z0_dist = tf.contrib.distributions.Mixture(
         cat=mode_selection_dist,
         components=[z0_pos, z0_neg],
         validate_args=False)
     return z0_dist
Ejemplo n.º 12
0
    def prob_is_largest(self, Y, mu, var, gh_x, gh_w):
        # work out what the mean and variance is of the indicated latent function.
        oh_on = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 1.0, 0.0), float_type)
        mu_selected = tf.reduce_sum(oh_on * mu, 1)
        var_selected = tf.reduce_sum(oh_on * var, 1)

        # generate Gauss Hermite grid
        X = tf.reshape(mu_selected, (-1, 1)) + gh_x * tf.reshape(
            tf.sqrt(tf.clip_by_value(2.0 * var_selected, 1e-10, np.inf)), (-1, 1)
        )

        # compute the CDF of the Gaussian between the latent functions and the grid (including the selected function)
        dist = (tf.expand_dims(X, 1) - tf.expand_dims(mu, 2)) / tf.expand_dims(
            tf.sqrt(tf.clip_by_value(var, 1e-10, np.inf)), 2
        )
        cdfs = 0.5 * (1.0 + tf.erf(dist / np.sqrt(2.0)))

        cdfs = cdfs * (1 - 2e-4) + 1e-4

        # blank out all the distances on the selected latent function
        oh_off = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 0.0, 1.0), float_type)
        cdfs = cdfs * tf.expand_dims(oh_off, 2) + tf.expand_dims(oh_on, 2)

        # take the product over the latent functions, and the sum over the GH grid.
        return tf.matmul(tf.reduce_prod(cdfs, reduction_indices=[1]), tf.reshape(gh_w / np.sqrt(np.pi), (-1, 1)))
def cosine_distance(v1, v2):
    """
    Calculate the cosine distance between the representations of the
    words of the two sentences.

    Parameters
    ----------
    v1: Tensor
        Tensor of shape (batch_size, 1, num_sentence_words, context_rnn_hidden_size)
        representing the first sentence to take the cosine similarity with.

    v2: Tensor
        Tensor of shape (batch_size, num_sentence_words, 1, context_rnn_hidden_size)
        representing the second sentence to take the cosine similarity with.
    """
    # The product of the two vectors is shape
    # (batch_size, num_sentence_words, num_sentence_words, rnn_hidden_size)
    # Taking the sum over the last axis reesults in shape:
    # (batch_size, num_sentence_words, num_sentence_words)
    cosine_numerator = tf.reduce_sum(tf.multiply(v1, v2), axis=-1)
    # Shape: (batch_size, 1, num_sentence_words)
    v1_norm = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(v1), axis=-1),
                                 EPSILON))
    # Shape: (batch_size, num_sentence_words, 1)
    v2_norm = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(v2), axis=-1),
                                 EPSILON))
    # Shape: (batch_size, num_sentence_words, num_sentence_words)
    return cosine_numerator / v1_norm / v2_norm
Ejemplo n.º 14
0
def dense(x, num_units, nonlinearity=None, init_scale=1., counters={},init=False, ema=None, train_scale=True, init_w=tf.random_normal_initializer(0, 0.05),**kwargs):
    ''' fully connected layer '''
    name = get_name('dense', counters)
    with tf.variable_scope(name):
        if init:
            # data based initialization of parameters
            V = tf.get_variable('V', [int(x.get_shape()[1]),num_units], tf.float32, init_w, trainable=True)
            V_norm = tf.nn.l2_normalize(V.initialized_value(), [0])
            x_init = tf.matmul(x, V_norm)
            m_init, v_init = tf.nn.moments(x_init, [0])
            scale_init = init_scale/tf.sqrt(v_init + 1e-10)
            # g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=train_scale)
            # b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init*scale_init, trainable=True)
            g = tf.get_variable('g', dtype=tf.float32, initializer=tf.constant(np.ones(num_units),tf.float32), trainable=train_scale)
            b = tf.get_variable('b', dtype=tf.float32, initializer=tf.constant(np.zeros(num_units),tf.float32), trainable=True)
            x_init = tf.reshape(scale_init,[1,num_units])*(x_init-tf.reshape(m_init,[1,num_units]))
            if nonlinearity is not None:
                x_init = nonlinearity(x_init)
            return x_init

        else:
            V,g,b = get_vars_maybe_avg(['V','g','b'], ema)
            # tf.assert_variables_initialized([V,g,b])

            # use weight normalization (Salimans & Kingma, 2016)
            x = tf.matmul(x, V)
            scaler = g/tf.sqrt(tf.reduce_sum(tf.square(V),[0]))
            x = tf.reshape(scaler,[1,num_units])*x + tf.reshape(b,[1,num_units])

            # apply nonlinearity
            if nonlinearity is not None:
                x = nonlinearity(x)
            return x
Ejemplo n.º 15
0
def xavier_init( n_inputs, n_outputs, uniform=True ):
    if uniform:
        init_range = tf.sqrt( 6.0 / (n_inputs + n_outputs) )
        return tf.random_uniform_initializer( -init_range, init_range )
    else:
        stddev = tf.sqrt( 3.0 / (n_inputs + n_outputs) )
        return tf.truncated_normal_initializer( stddev=stddev )
Ejemplo n.º 16
0
def summary_gradient_updates(grads, opt, lr):
    """get summary ops for the magnitude of gradient updates"""

    # strategy:
    # make a dict of variable name -> [variable, grad, adagrad slot]
    vars_grads = {}
    for v in tf.trainable_variables():
        vars_grads[v.name] = [v, None, None]
    for g, v in grads:
        vars_grads[v.name][1] = g
        vars_grads[v.name][2] = opt.get_slot(v, 'accumulator')

    # now make summaries
    ret = []
    for vname, (v, g, a) in vars_grads.items():

        if g is None:
            continue

        if isinstance(g, tf.IndexedSlices):
            # a sparse gradient - only take norm of params that are updated
            updates = lr * g.values
            if a is not None:
                updates /= tf.sqrt(tf.gather(a, g.indices))
        else:
            updates = lr * g
            if a is not None:
                updates /= tf.sqrt(a)

        values_norm = tf.sqrt(tf.reduce_sum(v * v)) + 1.0e-7
        updates_norm = tf.sqrt(tf.reduce_sum(updates * updates))
        ret.append(tf.summary.scalar('UPDATE/' + vname.replace(":", "_"), updates_norm / values_norm))

    return ret
    def __init__(
        self, sequence_length, vocab_size, embedding_size, hidden_units, l2_reg_lambda, batch_size, trainableEmbeddings):

        # Placeholders for input, output and dropout
        self.input_x1 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x1")
        self.input_x2 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x2")
        self.input_y = tf.placeholder(tf.float32, [None], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0, name="l2_loss")
          
        # Embedding layer
        with tf.name_scope("embedding"):
            self.W = tf.Variable(
                tf.constant(0.0, shape=[vocab_size, embedding_size]),
                trainable=trainableEmbeddings,name="W")
            self.embedded_words1 = tf.nn.embedding_lookup(self.W, self.input_x1)
            self.embedded_words2 = tf.nn.embedding_lookup(self.W, self.input_x2)
        print self.embedded_words1
        # Create a convolution + maxpool layer for each filter size
        with tf.name_scope("output"):
            self.out1=self.stackedRNN(self.embedded_words1, self.dropout_keep_prob, "side1", embedding_size, sequence_length, hidden_units)
            self.out2=self.stackedRNN(self.embedded_words2, self.dropout_keep_prob, "side2", embedding_size, sequence_length, hidden_units)
            self.distance = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(self.out1,self.out2)),1,keep_dims=True))
            self.distance = tf.div(self.distance, tf.add(tf.sqrt(tf.reduce_sum(tf.square(self.out1),1,keep_dims=True)),tf.sqrt(tf.reduce_sum(tf.square(self.out2),1,keep_dims=True))))
            self.distance = tf.reshape(self.distance, [-1], name="distance")
        with tf.name_scope("loss"):
            self.loss = self.contrastive_loss(self.input_y,self.distance, batch_size)
        #### Accuracy computation is outside of this class.
        with tf.name_scope("accuracy"):
            self.temp_sim = tf.subtract(tf.ones_like(self.distance),tf.rint(self.distance), name="temp_sim") #auto threshold 0.5
            correct_predictions = tf.equal(self.temp_sim, self.input_y)
            self.accuracy=tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def xavier_init(input_size, output_size, uniform=True):
    if uniform:
        init_range= tf.sqrt(6.0/(input_size+output_size))
        return tf.random_uniform_initializer(stdevv=init_range)
    else:
        init_range= tf.sqrt(3.0/(input_size+output_size))
        return tf.random_uniform_initializer(stdevv=init_range)
Ejemplo n.º 19
0
    def sample_weights(self, weights):

        log_p = 0
        log_q = 0

        sampled_weights = []
        for layer_i in range(len(self.network_architecture['decoder_net'])):

            if layer_i == 0:
                eps = tf.random_normal((self.n_z+1, self.network_architecture['decoder_net'][layer_i]), 0, 1, dtype=tf.float32)
                weights_ = tf.add(weights['l'+str(layer_i)+'mean'], tf.multiply(tf.sqrt(tf.exp(weights['l'+str(layer_i)+'logvar'])), eps))
                n_decoder_weights = (self.n_z+1) * self.network_architecture['decoder_net'][layer_i]
                log_p += self.log_p_theta(weights_, n_decoder_weights)
                log_q += self.log_q_theta(weights_, weights['l'+str(layer_i)+'mean'], weights['l'+str(layer_i)+'logvar'], n_decoder_weights)
            else:
                eps = tf.random_normal((self.network_architecture['decoder_net'][layer_i-1]+1, self.network_architecture['decoder_net'][layer_i]), 0, 1, dtype=tf.float32)
                weights_ = tf.add(weights['l'+str(layer_i)+'mean'], tf.multiply(tf.sqrt(tf.exp(weights['l'+str(layer_i)+'logvar'])), eps))
                n_decoder_weights = self.network_architecture['decoder_net'][layer_i-1]+1 * self.network_architecture['decoder_net'][layer_i]
                log_p += self.log_p_theta(weights_, n_decoder_weights)
                log_q += self.log_q_theta(weights_, weights['l'+str(layer_i)+'mean'], weights['l'+str(layer_i)+'logvar'], n_decoder_weights)

            sampled_weights.append(weights_)

        eps = tf.random_normal((self.network_architecture['decoder_net'][-1]+1, self.n_input), 0, 1, dtype=tf.float32)
        weights_ = tf.add(weights['out_mean_mean'], tf.multiply(tf.sqrt(tf.exp(weights['out_mean_logvar'])), eps))
        sampled_weights.append(weights_)
        n_decoder_weights = self.network_architecture['decoder_net'][-1]+1 * self.n_input
        log_p += self.log_p_theta(weights_, n_decoder_weights)
        log_q += self.log_q_theta(weights_, weights['out_mean_mean'], weights['out_mean_logvar'], n_decoder_weights)

        # print log_p
        # print log_q
        # fasdf

        return sampled_weights, log_p, log_q
Ejemplo n.º 20
0
    def recognition_network(self,weights,biases,batch_norm):
#	lin_layer = tf.add(tf.matmul(self.x,weights['l1']),biases['lb1'])
#       layer_1 = self.transfert_fct(tf.add(tf.matmul(lin_layer,weights['h1']),biases['b1']))
        ###no batch norm
#	layer_1 = self.transfert_fct(tf.add(tf.matmul(self.x,weights['h1']),biases['b1']))
#       layer_2 = self.transfert_fct(tf.add(tf.matmul(layer_1,weights['h2']),biases['b2']))
	#batch norm
        epsilon = 1e-16
        xm1,xv1 = tf.nn.moments(self.x,[0])
        bn_x = ((self.x-xm1)/tf.sqrt(xv1+epsilon))*batch_norm['gn_x']+batch_norm['bn_x']
    	layer_1 = self.transfert_fct(tf.add(tf.matmul(bn_x,weights['h1']),biases['b1']))
    	bm1,bv1 = tf.nn.moments(layer_1,[0])
    	bn_1 = ((layer_1-bm1)/tf.sqrt(bv1+epsilon))*batch_norm['gn_g1']+batch_norm['gn_g1']

        layer_2 = self.transfert_fct(tf.add(tf.matmul(bn_1,weights['h2']),biases['b2']))
    	bm2,bv2 = tf.nn.moments(layer_2,[0])
    	bn_2 = ((layer_2-bm2)/tf.sqrt(bv2+epsilon))*batch_norm['gn_g2']+batch_norm['gn_g2']
		

#        z_mean = tf.add(tf.matmul(layer_2,weights['out_mean']),biases['out_mean'])
        z_mean = tf.add(tf.matmul(bn_2,weights['out_mean']),biases['out_mean'])
#####perche' softplus????
#       z_log_sigma_sq = tf.nn.softplus(tf.add(tf.matmul(layer_2,weights['out_log_sigma']),biases['out_log_sigma']))
        z_log_sigma_sq = (tf.add(tf.matmul(layer_2,weights['out_log_sigma']),biases['out_log_sigma']))
        
        return(z_mean,z_log_sigma_sq)
Ejemplo n.º 21
0
def dense(x, num_units, nonlinearity=None, init_scale=1., counters={}, init=False, ema=None, **kwargs):
    ''' fully connected layer '''
    name = get_name('dense', counters)
    with tf.variable_scope(name):
        V = get_var_maybe_avg('V', ema, shape=[int(x.get_shape()[1]),num_units], dtype=tf.float32,
                              initializer=tf.random_normal_initializer(0, 0.05), trainable=True)
        g = get_var_maybe_avg('g', ema, shape=[num_units], dtype=tf.float32,
                              initializer=tf.constant_initializer(1.), trainable=True)
        b = get_var_maybe_avg('b', ema, shape=[num_units], dtype=tf.float32,
                              initializer=tf.constant_initializer(0.), trainable=True)

        # use weight normalization (Salimans & Kingma, 2016)
        x = tf.matmul(x, V)
        scaler = g / tf.sqrt(tf.reduce_sum(tf.square(V), [0]))
        x = tf.reshape(scaler, [1, num_units]) * x + tf.reshape(b, [1, num_units])

        if init: # normalize x
            m_init, v_init = tf.nn.moments(x, [0])
            scale_init = init_scale/tf.sqrt(v_init + 1e-10)
            with tf.control_dependencies([g.assign(g*scale_init), b.assign_add(-m_init*scale_init)]):
                x = tf.nn.l2_normalize(x, axis=0)

        # apply nonlinearity
        if nonlinearity is not None:
            x = nonlinearity(x)

        return x
Ejemplo n.º 22
0
    def _encode(self, boxes, anchors):
        """Encodes a box collection with respect to an anchor collection.

        Args:
          boxes: BoxList holding N boxes to be encoded.
          anchors: BoxList of anchors.

        Returns:
          a tensor representing N anchor-encoded boxes of the format
          [ty, tx, tl].
        """
        # Convert anchors to the center coordinate representation.
        ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
        la = tf.sqrt(ha * wa)
        ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
        l = tf.sqrt(h * w)
        # Avoid NaN in division and log below.
        la += EPSILON
        l += EPSILON

        tx = (xcenter - xcenter_a) / la
        ty = (ycenter - ycenter_a) / la
        tl = tf.log(l / la)
        # Scales location targets for joint training.
        if self._scale_factors:
            ty *= self._scale_factors[0]
            tx *= self._scale_factors[1]
            tl *= self._scale_factors[2]
        return tf.transpose(tf.stack([ty, tx, tl]))
Ejemplo n.º 23
0
def evalFunction( classVec, attributeVec, groundTruthLabels ):

    classVec = classVec/tf.sqrt(tf.reduce_sum(tf.square(classVec), 1, keep_dims=True))
    attributeVec = attributeVec / tf.sqrt(tf.reduce_sum(tf.square(attributeVec), 1, keep_dims=True))
    similarity = tf.matmul(classVec, attributeVec, transpose_b=True)

    return similarity
 def f1(): #The tensorflow path if no jump occurs
     vector= inter_vec_temp/tf.sqrt(new_norm)
     propa = prob / tf.sqrt(new_norm)
     #we already evolved by Heff so just normalize the state and move on with the same random number
     counter=tf.constant(0)
     t=self.r  
     return t,counter,norm,propa,vector
Ejemplo n.º 25
0
def encoder(inputs, training=True, scope="encoder", reuse=None):
    '''
    Args:
      inputs: A 2d tensor with shape of [N, Tx], with dtype of int32. Encoder inputs.
      training: Whether or not the layer is in training mode.
      scope: Optional scope for `variable_scope`
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.
    
    Returns:
      A collection of Hidden vectors. So-called memory. Has the shape of (N, Tx, e).
    '''
    with tf.variable_scope(scope, reuse=reuse):
        with tf.variable_scope("text_embedding"):
            embedding = embed(inputs, hp.vocab_size, hp.embed_size)  # (N, Tx, e)

        with tf.variable_scope("encoder_prenet"):
            tensor = fc_block(embedding, hp.enc_channels, training=training) # (N, Tx, c)

        with tf.variable_scope("encoder_conv"):
            for i in range(hp.enc_layers):
                outputs = conv_block(tensor,
                                    size=hp.enc_filter_size,
                                    rate=2**i,
                                    training=training,
                                    scope="encoder_conv_{}".format(i)) # (N, Tx, c)
                tensor = (outputs + tensor) * tf.sqrt(0.5)

        with tf.variable_scope("encoder_postnet"):
            keys = fc_block(tensor, hp.embed_size, training=training) # (N, Tx, e)
            vals = tf.sqrt(0.5) * (keys + embedding) # (N, Tx, e)

    return keys, vals
Ejemplo n.º 26
0
def disjunction_of_literals(literals, label="no_label"):
    list_of_literal_tensors = [lit.tensor for lit in literals]
    literals_tensor = tf.concat(1,list_of_literal_tensors)
    if default_tnorm == "product":
        result = 1.0-tf.reduce_prod(1.0-literals_tensor, 1, keep_dims=True)
    if default_tnorm == "yager2":
        result = tf.minimum(1.0, tf.sqrt(tf.reduce_sum(tf.square(literals_tensor), 1, keep_dims=True)))
    if default_tnorm == "luk":
        print "data aggregator is lukas"
        result = tf.minimum(1.0, tf.reduce_sum(literals_tensor, 1, keep_dims=True))
        PR(result)
    if default_tnorm == "goedel":
        result = tf.reduce_max(literals_tensor, 1, keep_dims=True, name=label)
    if default_aggregator == "product":
        return tf.reduce_prod(result, keep_dims=True)
    if default_aggregator == "mean":
        print "data aggregator is mean"
        return tf.reduce_mean(result, keep_dims=True, name=label)
    if default_aggregator == "gmean":
        return tf.exp(tf.mul(tf.reduce_sum(tf.log(result), keep_dims=True),
                             tf.inv(tf.to_float(tf.size(result)))), name=label)
    if default_aggregator == "hmean":
        print "data aggregator is hmean"
        return tf.div(tf.to_float(tf.size(result)), tf.reduce_sum(tf.inv(result), keep_dims=True))
    if default_aggregator == "min":
        print "data aggregator is min"
        return tf.reduce_min(result, keep_dims=True, name=label)
    if default_aggregator == "qmean":
        print "data aggregator is qmean"
        return tf.sqrt(tf.reduce_mean(tf.square(result), keep_dims=True), name=label)
    if default_aggregator == "cmean":
        print "data aggregator is cmean"
        return tf.pow(tf.reduce_mean(tf.pow(result, 3), keep_dims=True), tf.inv(tf.to_float(3)), name=label)
Ejemplo n.º 27
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        ts = super().apply_gradients(grads_and_vars, global_step, name)

        mn, vn = self.get_slot_names()
        dynamics = []

        with tf.name_scope(name, 'Adam_Dynamics'):
            b1_pow, b2_pow = self._beta1_power, self._beta2_power
            lr_k = self._lr_t * tf.sqrt(1. - b2_pow) / (1. - b1_pow)

            for g, w in grads_and_vars:
                m = self.get_slot(w, mn)
                v = self.get_slot(w, vn)
                mk = tf.add(self._beta1_t * m, (1. - self._beta1_t) * g, name=m.op.name)
                vk = tf.add(self._beta2_t * v,  (1. - self._beta2_t) * g * g, name=v.op.name)

                wk = tf.subtract(w, lr_k * mk / (tf.sqrt(vk + self._epsilon_t**2)), name=w.op.name)
                # IMPORTANT NOTE: epsilon should be outside sqrt as from the original implementation,
                # but this brings to computational instability of the hypergradient.

                dynamics.extend([(w, wk), (m, mk), (v, vk)])

            b1_powk = b1_pow * self._beta1_t
            b2_powk = b2_pow * self._beta2_t
            dynamics.extend([(b1_pow, b1_powk), (b2_pow, b2_powk)])

        return ts, dynamics
Ejemplo n.º 28
0
  def _encode(self, boxes, anchors):
    """Encodes a box collection with respect to an anchor collection.

    Args:
      boxes: BoxList holding N boxes to be encoded.
      anchors: BoxList of anchors.

    Returns:
      a tensor representing N anchor-encoded boxes of the format
      [ty, tx, tl].
    """
    # Convert anchors to the center coordinate representation.
    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
    la = tf.sqrt(ha * wa)
    ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
    l = tf.sqrt(h * w)
    # Avoid NaN in division and log below.
    la += EPSILON
    l += EPSILON

    top = tf.abs(ycenter_a - ycenter + 0.5*h)
    bown = tf.abs(ycenter_a - ycenter - 0.5*h)
    left = tf.abs(xcenter_a - xcenter + 0.5*w)
    right = tf.abs(xcenter_a - xcenter - 0.5*w)
    # Scales location targets for joint training.
    if self._scale_factors:
      top *= self._scale_factors[0]
      bown *= self._scale_factors[0]
      left *= self._scale_factors[1]
      right *= self._scale_factors[1]
    return tf.transpose(tf.stack([top, bown, left, right]))
Ejemplo n.º 29
0
    def bhattacharyya(self):
        """Approximate bhattacharyya distance between cover and non-cover distances.
        
        Similar to Mahalanobis distance, but for distributions with different variances.
        Assumes normality, hence approximate.

        Returns:
            tf.Tensor: bhattacharyya distance between distributions of the cover
                and non-cover pairs' distances.
            tf.Tensor: mean cover pair distance
            tf.Tensor: mean non-cover pair distance
        """
        y_A, y_B = self.subnet_A[-1], self.subnet_B[-1]
        squared_dists = tf.reduce_sum(tf.square(y_A - y_B),
                                      reduction_indices=1, )
        
        cover_pairs = tf.where(tf.equal(self.is_cover, tf.ones_like(self.is_cover)))
        non_cover_pairs = tf.where(tf.equal(self.is_cover, tf.zeros_like(self.is_cover)))

        pair_dists = tf.sqrt(tf.gather(squared_dists, tf.reshape(cover_pairs, [-1])))
        non_pair_dists = tf.sqrt(tf.gather(squared_dists, tf.reshape(non_cover_pairs, [-1])))
        
        mu_pairs, sigma2_pairs = tf.nn.moments(pair_dists, axes=[0], name='d_pairs')
        mu_non_pairs, sigma2_non_pairs = tf.nn.moments(non_pair_dists, axes=[0], name='d_non_pairs')

        bhatt = tf.add( 0.25 * tf.log(0.25 * (sigma2_pairs/sigma2_non_pairs + sigma2_non_pairs/sigma2_pairs + 2)),
                  0.25 * (mu_pairs - mu_non_pairs)**2 / (sigma2_pairs + sigma2_non_pairs), name='bhatt')
        return bhatt, mu_pairs, mu_non_pairs
Ejemplo n.º 30
0
def get_weight_stats(x, axis):
  """ Compute weight statistics over the given axis.

  Args:
    x: tf.Tensor
      a batch of activations.
    axis: int
      axis to perform statistics over.
  Returns:
    tf.Tensor
      a 3-D tensor with statistics.
  """
  if x is None:
    return []

  stats = []
  l1 = tf.reduce_mean(tf.abs(x), axis=axis)
  l2 = tf.sqrt(tf.reduce_mean(x**2, axis=axis) + 1e-6)

  mean, var = tf.nn.moments(x, [axis])
  stats.extend([l1, l2, mean, tf.sqrt(var + 1e-8)])

  stats = [tf.reshape(s, [-1, 1, 1]) for s in stats]

  return stats
Ejemplo n.º 31
0
Archivo: qaseq.py Proyecto: poyuwu/QA
def norm(tensor):#normalzie last line
    return tensor/(tf.sqrt(tf.reduce_sum(tf.square(tensor),-1,keep_dims=True))+1e-12)
Ejemplo n.º 32
0
  def run_test_sample_consistent_mean_covariance(
      self,
      sess_run_fn,
      dist,
      num_samples=int(1e5),
      seed=24,
      rtol=1e-2,
      atol=0.1,
      cov_rtol=None,
      cov_atol=None):
    """Tests that sample/mean/covariance are consistent with each other.

    "Consistency" means that `sample`, `mean`, `covariance`, etc all correspond
    to the same distribution.

    Args:
      sess_run_fn: Python `callable` taking `list`-like of `Tensor`s and
        returning a list of results after running one "step" of TensorFlow
        computation, typically set to `sess.run`.
      dist: Distribution instance or object which implements `sample`,
        `log_prob`, `event_shape_tensor` and `batch_shape_tensor`.
      num_samples: Python `int` scalar indicating the number of Monte-Carlo
        samples to draw from `dist`.
      seed: Python `int` indicating the seed to use when sampling from `dist`.
        In general it is not recommended to use `None` during a test as this
        increases the likelihood of spurious test failure.
      rtol: Python `float`-type indicating the admissible relative error between
        analytical and sample statistics.
      atol: Python `float`-type indicating the admissible absolute error between
        analytical and sample statistics.
      cov_rtol: Python `float`-type indicating the admissible relative error
        between analytical and sample covariance. Default: rtol.
      cov_atol: Python `float`-type indicating the admissible absolute error
        between analytical and sample covariance. Default: atol.
    """

    x = dist.sample(num_samples, seed=seed)
    sample_mean = tf.reduce_mean(input_tensor=x, axis=0)
    sample_covariance = tf.reduce_mean(
        input_tensor=_vec_outer_square(x - sample_mean), axis=0)
    sample_variance = tf.linalg.diag_part(sample_covariance)
    sample_stddev = tf.sqrt(sample_variance)

    [
        sample_mean_,
        sample_covariance_,
        sample_variance_,
        sample_stddev_,
        mean_,
        covariance_,
        variance_,
        stddev_
    ] = sess_run_fn([
        sample_mean,
        sample_covariance,
        sample_variance,
        sample_stddev,
        dist.mean(),
        dist.covariance(),
        dist.variance(),
        dist.stddev(),
    ])

    self.assertAllClose(mean_, sample_mean_, rtol=rtol, atol=atol)
    self.assertAllClose(covariance_, sample_covariance_,
                        rtol=cov_rtol or rtol,
                        atol=cov_atol or atol)
    self.assertAllClose(variance_, sample_variance_, rtol=rtol, atol=atol)
    self.assertAllClose(stddev_, sample_stddev_, rtol=rtol, atol=atol)
Ejemplo n.º 33
0
 def amp(x):
     return 1 + tf.sqrt(1.e-8 +
                        tf.reduce_sum(x**2, axis=-1, keepdims=True))
Ejemplo n.º 34
0
def configure(inputs, batch_size, target_outputs, is_training, learning_rate,
              beta1, is_depthwise_sep, decay, gen_scale):
    """Operations to calculate network losses and run training operations."""

    target_outputs0 = target_outputs

    with tf.variable_scope("gen"):
        output0, phase_components = generator(
            inputs=inputs,
            num_outputs=target_outputs.get_shape().as_list()[-1],
            is_training=is_training,
            is_depthwise_sep=is_depthwise_sep)
        output = output0

    if adversarial:
        #Theoretical argument for EMA tracking is in https://openreview.net/pdf?id=SJgw_sRqFQ

        #with tf.variable_scope("tracking/gen"):
        #    tracking_output = generator(
        #        inputs=inputs,
        #        num_outputs=target_outputs.get_shape().as_list()[-1],
        #        is_training=is_training,
        #        is_depthwise_sep=is_depthwise_sep
        #    )

        def amp(x):
            return 1 + tf.sqrt(1.e-8 +
                               tf.reduce_sum(x**2, axis=-1, keepdims=True))

        output = tf.concat([inputs, phase_components], axis=-1)
        target_outputs = tf.concat([inputs, target_outputs], axis=-1)

        if use_gradient_penalty:
            x_hat = output + tf.random_uniform(
                output.get_shape().as_list()) * (target_outputs - output)
            discr_batch = tf.concat([output, target_outputs, x_hat], axis=0)
        else:
            discr_batch = tf.concat([output, target_outputs], axis=0)

        with tf.variable_scope("main/discr"):
            preds = large_discriminator(discr_batch)

        #with tf.variable_scope("tracking/discr"):
        #    track_pred = large_discriminator(output)

        fake_pred = preds[:batch_size]
        real_pred = preds[batch_size:2 * batch_size]

        if use_gradient_penalty:
            x_hat_pred = preds[2 * batch_size:3 * batch_size]

        if use_gradient_penalty:
            grad = tf.gradients(x_hat_pred, [x_hat])[0]
            grad_norm2 = tf.sqrt(
                1.e-6 + tf.reduce_sum(tf.square(grad), axis=[1, 2, 3]))
            gradient_penalty = tf.reduce_mean((grad_norm2 - 1.)**2)

        if use_gradient_penalty or standard_wass:
            discr_loss = tf.reduce_mean(fake_pred - real_pred)
            gen_loss = -tf.reduce_mean(fake_pred)
        else:
            #noise = tf.random_uniform(real_pred.get_shape().as_list(), maxval=0.05)
            discr_loss = tf.reduce_mean((real_pred - 1)**2 + (fake_pred)**2)
            gen_loss = tf.reduce_mean((fake_pred - 1)**2)

        if standard_wass:
            for v in tf.trainable_variables("main/discr"):
                tf.add_to_collection(
                    "clip_weights", v.assign(tf.clip_by_value(v, -0.01, 0.01)))

        #mu  = tf.get_variable(
        #  auto_name("avg_loss"),
        #  initializer=tf.constant(0.707, dtype=tf.float32),
        #  trainable=False
        #  )

        #mu_op = mu.assign(0.999*mu + 0.001*tf.sqrt(discr_loss))
        #tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, mu_op)

        #mu_scaled = mu/0.707
        #discr_lr_scale = tf.cond(mu_scaled > 0.6,  lambda: 1., lambda: (mu_scaled/0.6)**2 )

        if use_gradient_penalty:
            discr_loss += 10 * gradient_penalty
            #discr_loss /= 100
            #gen_loss /= 100

        if use_l2_loss:
            gen_l2_loss = tf.add_n(
                [tf.nn.l2_loss(v) for v in tf.trainable_variables("gen")])
            discr_l2_loss = tf.add_n([
                tf.nn.l2_loss(v) for v in tf.trainable_variables("main/discr")
            ])

            discr_loss += 5.e-5 * discr_l2_loss
            gen_loss += 5.e-5 * gen_l2_loss

        #discr_loss = tf.reduce_mean( tf.nn.relu(1-real_pred) +  tf.nn.relu(1+fake_pred), axis=-1 ) + 10*gradient_penalty #+ 1.e-5*discr_l2_loss
        #gen_loss = -tf.reduce_mean( fake_pred, axis=-1 )# + 5.e-5*gen_l2_loss

        #discr_loss = tf.reduce_mean(fake_pred - real_pred) / 1 + 10*gradient_penalty + 1.e-5*discr_l2_loss
        #gen_loss = -tf.reduce_mean(fake_pred) / 1 + 1.e-5*gen_l2_loss

        #Create optimizer for stochastic gradient descent (SGD)
        discr_optimizer = tf.train.AdamOptimizer(learning_rate=0.00005,
                                                 beta1=0.5)
        #discr_optimizer = tf.train.RMSPropOptimizer(learning_rate=0.00005, decay=0.5)

        #l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])

        #total_loss = gen_loss + discr_loss + 10*gradient_penalty + 5.e-5*l2_loss

        ##Tracking
        #for v, t in zip(tf.trainable_variables("main"), tf.trainable_variables("tracking")):
        #    tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, t.assign(decay*t+(1-decay)*v) )

    else:
        #Mean squared errors
        mse = 10 * tf.reduce_mean(tf.square(output - target_outputs),
                                  axis=[1, 2, 3])

        alrc_mse = mse  #alrc(mse)
        alrc_mse = tf.reduce_mean(alrc_mse)

        mse = tf.reduce_mean(mse)

        ##L2 regularization
        l2_loss = tf.add_n(
            [tf.nn.l2_loss(v) for v in tf.trainable_variables()])

        gen_loss = alrc_mse + 5.e-5 * l2_loss

    #Create optimizer for stochastic gradient descent (SGD)
    gen_optimizer = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.5)
    #gen_optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001, decay=0.5)

    #(
    #    learning_rate=learning_rate,
    #    beta1=beta1,
    #    beta2=0.9
    #    )

    #Update ops for batch normalisation
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        if adversarial:
            #train_op = gen_optimizer.minimize(total_loss)
            gen_train_op = gen_optimizer.minimize(
                gen_loss, var_list=tf.trainable_variables("gen"))
            discr_train_op = discr_optimizer.minimize(
                discr_loss, var_list=tf.trainable_variables("main/discr"))
            train_op = [gen_train_op, discr_train_op]
        else:
            train_op = gen_optimizer.minimize(gen_loss)

    output_loss = {
        "Loss": tf.reduce_mean(tf.abs(phase_components - target_outputs0)),
        "pred_real": tf.reduce_mean(real_pred),
        "pred_fake": tf.reduce_mean(fake_pred)
    }

    return train_op, output_loss, output0
Ejemplo n.º 35
0
def generator(inputs, num_outputs, is_training, is_depthwise_sep=False):
    """Convolutional neural network (CNN) for image supersampling.
  
    Args:
    Inputs: Images tensor with shape [batch_size, heigh, width, channels].
    num_outputs: Number of channels in network output.
    is_training: Bool indicating whether to use training operations
    
    Returns:
    Super-sampled images
    """

    base_size = 32

    x = inputs

    x = tf.contrib.layers.batch_norm(x, is_training=is_training)

    x = conv(x, num_outputs=32, is_training=is_training)

    #Encoder
    for i in range(1, 4):

        x = conv(x,
                 num_outputs=base_size * 2**i,
                 stride=2,
                 is_depthwise_sep=is_depthwise_sep,
                 is_training=is_training,
                 actv_fn=std_actv)

        if i == 2:
            low_level = x

    #Residual blocks
    for _ in range(6):  #Number of blocks
        x = residual_block(x, skip=3, is_training=is_training)

    #Decoder
    for i in range(2, -1, -1):

        x = conv(x,
                 num_outputs=base_size * 2**i,
                 stride=2,
                 is_depthwise_sep=is_depthwise_sep,
                 is_training=is_training,
                 transpose=True,
                 actv_fn=std_actv)

        #if x.get_shape().as_list() == low_level.get_shape().as_list(): #Easy way to find concat level!
        #    x = tf.concat([x, low_level], axis=-1)

        #    for _ in range(3):
        #        x = conv(
        #            x,
        #            num_outputs=base_size*2**i,
        #            is_depthwise_sep=is_depthwise_sep,
        #            is_training=is_training,
        #        )

    x = conv(
        x,
        num_outputs=32,
        is_depthwise_sep=is_depthwise_sep,
        is_training=is_training,
    )

    #Project features onto output image
    x = conv(x,
             num_outputs=num_outputs,
             biases_initializer=None,
             actv_fn=None,
             is_batch_norm=True,
             is_training=is_training)

    x /= tf.sqrt(1.e-8 + tf.reduce_sum(x**2, axis=-1, keepdims=True))
    x0 = x
    x *= inputs

    return x, x0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """See base class."""
        assignments = []
        for (grad, param) in grads_and_vars:
            if grad is None or param is None:
                continue

            param_name = self._get_variable_name(param.name)

            m = tf.get_variable(name=param_name + "/lamb_m",
                                shape=param.shape.as_list(),
                                dtype=tf.float32,
                                trainable=False,
                                initializer=tf.zeros_initializer())
            v = tf.get_variable(name=param_name + "/lamb_v",
                                shape=param.shape.as_list(),
                                dtype=tf.float32,
                                trainable=False,
                                initializer=tf.zeros_initializer())

            # Standard Adam update.
            next_m = (tf.multiply(self.beta_1, m) +
                      tf.multiply(1.0 - self.beta_1, grad))
            next_v = (tf.multiply(self.beta_2, v) +
                      tf.multiply(1.0 - self.beta_2, tf.square(grad)))

            update = next_m / (tf.sqrt(next_v) + self.epsilon)

            # Just adding the square of the weights to the loss function is *not*
            # the correct way of using L2 regularization/weight decay with Adam,
            # since that will interact with the m and v parameters in strange ways.
            #
            # Instead we want ot decay the weights in a manner that doesn't interact
            # with the m/v parameters. This is equivalent to adding the square
            # of the weights to the loss with plain (non-momentum) SGD.
            if self._do_use_weight_decay(param_name):
                update += self.weight_decay_rate * param

            ############## BELOW ARE THE SPECIFIC PARTS FOR LAMB ##############

            # Note: Here are two choices for scaling function \phi(z)
            # minmax:   \phi(z) = min(max(z, \gamma_l), \gamma_u)
            # identity: \phi(z) = z
            # The authors does not mention what is \gamma_l and \gamma_u
            # UPDATE: after asking authors, they provide me the code below.
            # ratio = array_ops.where(math_ops.greater(w_norm, 0), array_ops.where(
            #      math_ops.greater(g_norm, 0), (w_norm / g_norm), 1.0), 1.0)

            r1 = tf.sqrt(tf.reduce_sum(tf.square(param)))
            r2 = tf.sqrt(tf.reduce_sum(tf.square(update)))

            r = tf.where(tf.greater(r1, 0.0),
                         tf.where(tf.greater(r2, 0.0), r1 / r2, 1.0), 1.0)

            eta = self.learning_rate * r

            update_with_lr = eta * update

            next_param = param - update_with_lr

            assignments.extend(
                [param.assign(next_param),
                 m.assign(next_m),
                 v.assign(next_v)])
        return tf.group(*assignments, name=name)
Ejemplo n.º 37
0
def build_roi_align_graph(rois, feature_maps, image_meta, crop_size, config):
    """
    Implement roi align. (support different sample ratio.)
    Args:
        rois: tensor with shape [batch_size, num_rois, 4]
        feature_maps: list of feature_map, each one is a tensor with shape [batch_size, H, W, 256]
        image_meta: tensor with shape [batch_size, 12]
        crop_size: output size after roi align.
        config:

    Returns:
        list of pooled tensor, each one with shape [batch_size, crop_H, crop_W, 256]
    """

    # Assign each ROI to a level in the pyramid based on the ROI area.
    y1, x1, y2, x2 = tf.split(rois, 4, axis=2)
    h = y2 - y1
    w = x2 - x1
    # Use shape of first image. Images in a batch must have the same size.
    image_shape = tensor_utils.parse_image_meta(image_meta)['image_shape'][0]
    # Equation 1 in the Feature Pyramid Networks paper. Account for
    # the fact that our coordinates are normalized here.
    # e.g. a 224x224 ROI (in pixels) maps to P4
    image_area = tf.cast(image_shape[0] * image_shape[1], tf.float32)
    roi_level = tensor_utils.log2(
        tf.sqrt(h * w) / (224.0 / tf.sqrt(image_area)))
    roi_level = tf.minimum(
        5, tf.maximum(2, 4 + tf.cast(tf.round(roi_level), tf.int32)))
    roi_level = tf.squeeze(roi_level, 2)

    # Loop through levels and apply ROI pooling to each. P2 to P5.
    pooled = []
    box_to_level = []
    for i, level in enumerate(range(2, 6)):
        ix = tf.where(tf.equal(roi_level, level))
        level_boxes = tf.gather_nd(rois, ix)

        # Box indices for crop_and_resize.
        box_indices = tf.cast(ix[:, 0], tf.int32)

        # Keep track of which box is mapped to which level
        box_to_level.append(ix)

        # Stop gradient propogation to ROI proposals
        level_boxes = tf.stop_gradient(level_boxes)
        box_indices = tf.stop_gradient(box_indices)

        pooled.append(
            roi_align(feature_maps[i],
                      level_boxes,
                      box_indices=box_indices,
                      output_size=crop_size,
                      sample_ratio=config.sample_ratio))  # use 2

    # Pack pooled features into one tensor
    pooled = tf.concat(pooled, axis=0)
    # Pack box_to_level mapping into one array and add another
    # column representing the order of pooled boxes
    box_to_level = tf.concat(box_to_level, axis=0)
    box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1)
    box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range],
                             axis=1)

    # Rearrange pooled features to match the order of the original boxes
    # Sort box_to_level by batch then box index
    # TF doesn't have a way to sort by two columns, so merge them and sort.
    sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1]
    ix = tf.nn.top_k(sorting_tensor, k=tf.shape(box_to_level)[0]).indices[::-1]
    ix = tf.gather(box_to_level[:, 2], ix)
    pooled = tf.gather(pooled, ix)

    # Re-add the batch dimension
    shape = tf.concat([tf.shape(rois)[:2], tf.shape(pooled)[1:]], axis=0)
    pooled = tf.reshape(pooled, shape)
    return pooled
# x, y의 데이터 값
data = [[2, 81], [4, 93], [6, 91], [8, 97]]
x_data = [x_row[0] for x_row in data]
y_data = [y_row[1] for y_row in data]

# 기울기 a와 y 절편 b의 값을 임의로 정한다.
# 단, 기울기의 범위는 0 ~ 10 사이이며 y 절편은 0 ~ 100 사이에서 변하게 한다.
a = tf.Variable(tf.random_uniform([1], 0, 10, dtype=tf.float64, seed=0))
b = tf.Variable(tf.random_uniform([1], 0, 100, dtype=tf.float64, seed=0))

# y에 대한 일차 방정식 ax+b의 식을 세운다.
y = a * x_data + b

# 텐서플로 RMSE 함수
rmse = tf.sqrt(tf.reduce_mean(tf.square(y - y_data)))

# 학습률 값
learning_rate = 0.1

# RMSE 값을 최소로 하는 값 찾기
gradient_decent = tf.train.GradientDescentOptimizer(learning_rate).minimize(
    rmse)

# 텐서플로를 이용한 학습
with tf.Session() as sess:
    # 변수 초기화
    sess.run(tf.global_variables_initializer())
    # 2001번 실행(0번 째를 포함하므로)
    for step in range(2001):
        sess.run(gradient_decent)
    # time we evaluate the loss.
    # Explanation of the meaning of NCE loss:
    #   http://mccormickml.com/2016/04/19/word2vec-tutorial-the-skip-gram-model/
    loss = tf.reduce_mean(
        tf.nn.nce_loss(weights=nce_weights,
                       biases=nce_biases,
                       labels=train_labels,
                       inputs=embed,
                       num_sampled=num_sampled,
                       num_classes=vocabulary_size))

    # Construct the SGD optimizer using a learning rate of 1.0.
    optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)

    # Compute the cosine similarity between minibatch examples and all embeddings.
    norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
    normalized_embeddings = embeddings / norm
    valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings,
                                              valid_dataset)
    similarity = tf.matmul(valid_embeddings,
                           normalized_embeddings,
                           transpose_b=True)

    # Add variable initializer.
    init = tf.global_variables_initializer()

# Step 5: Begin training.
num_steps = 500000

with tf.Session(graph=graph) as session:
    # We must initialize all variables before we use them.
Ejemplo n.º 40
0
def eval_once(saver, summary_writer, cifar_top_k_op, mnist_top_k_op, summary_op,itercount):
  """Run Eval once.

  Args:
    saver: Saver.
    summary_writer: Summary writer.
    top_k_op: Top K op.
    summary_op: Summary op.
  """
  with tf.Session() as sess:
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
      # Restores from checkpoint
      saver.restore(sess, ckpt.model_checkpoint_path)
      # Assuming model_checkpoint_path looks something like:
      #   /my-favorite-path/cifar10_train/model.ckpt-0,
      # extract global_step from it.
      global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
      ckpt_path_and_name = ckpt.model_checkpoint_path.split('-')[0]
    else:
      print('No checkpoint file found')
      return

    # Start the queue runners.
    coord = tf.train.Coordinator()
    try:
      threads = []
      for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
        threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
                                         start=True))

      NoiseTest = True
      if NoiseTest:
        # saver is put here to always modify the same net, with different amount of noise! 
        # counter is increased to be able to plot the graph!
        saver.save(sess,ckpt_path_and_name,global_step=int(global_step)+1)
        ## adding noise
        print('Adding noise to the loaded net..')
        train_vars = tf.trainable_variables()
        shared_vars = [var for var in train_vars if 'shared_' in var.name]
        cifar_vars = [var for var in train_vars if 'cifar_' in var.name]
        mnist_vars = [var for var in train_vars if 'mnist_' in var.name]

        for v in shared_vars:
          #print(v)
          v1 = sess.graph.get_tensor_by_name(v.name)
          v_shape = tf.shape(v1)
          l = len(v_shape.eval())
          mean, variance = tf.nn.moments(v1,list(range(l)))
          #mean, variance = tf.nn.moments(v1,[0])
          #print(v.name)
          #print('mean : ', mean.eval())
          #print('vari : ', variance.eval())
          # sqrt(variance)
          noise = tf.random_normal(shape=tf.shape(v1), mean=0.0, stddev=tf.sqrt(variance)*0.01*itercount, dtype=tf.float32)
          #noise = tf.random_normal(shape=tf.shape(v1), mean=0.0, stddev=0.01, dtype=tf.float32) 
          sess.run(tf.assign(v1,v1+noise))

      # saving noisy one
      #print("###########")
      #print(ckpt.model_checkpoint_path)
      #print(ckpt_path_and_name)
      #saver.save(sess,ckpt_path_and_name,global_step=int(global_step)+10)


      num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))
      cifar_true_count = 0  # Counts the number of correct predictions.
      cifar_total_sample_count = num_iter * FLAGS.batch_size
      step = 0
      while step < num_iter and not coord.should_stop():
        cifar_predictions = sess.run([cifar_top_k_op])
        cifar_true_count += np.sum(cifar_predictions)
        step += 1

      # Compute precision @ 1.
      cifar_precision = cifar_true_count / cifar_total_sample_count
      print('%s: CIFAR precision @ %d = %.3f' % (datetime.now(),int(global_step), cifar_precision))

      mnist_true_count = 0  # Counts the number of correct predictions.
      mnist_total_sample_count = num_iter * FLAGS.batch_size
      step = 0
      while step < num_iter and not coord.should_stop():
        mnist_predictions = sess.run([mnist_top_k_op])
        mnist_true_count += np.sum(mnist_predictions)
        step += 1

      # Compute precision @ 1.
      mnist_precision = mnist_true_count / mnist_total_sample_count
      print('%s: MNIST precision @ %d = %.3f' % (datetime.now(),int(global_step), mnist_precision))

      summary = tf.Summary()
      summary.ParseFromString(sess.run(summary_op))
      summary.value.add(tag='CIFAR Precision @ 1', simple_value=cifar_precision)
      summary.value.add(tag='MNIST Precision @ 1', simple_value=mnist_precision)
      summary_writer.add_summary(summary, global_step)
      resultsFile.write(str(global_step)+";"+str(cifar_precision)+";"+str(mnist_precision)+"\n")
    except Exception as e:  # pylint: disable=broad-except
      coord.request_stop(e)

    coord.request_stop()
    coord.join(threads, stop_grace_period_secs=10)
  return global_step, NoiseTest
Ejemplo n.º 41
0
    def body1(self, num, object_num, loss, predict, labels, nilboy):
        """
    calculate loss
    Args:
      predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell]
      labels : [max_objects, 5]  (x_center, y_center, w, h, class)
    """
        label = labels[num:num + 1, :]
        label = tf.reshape(label, [-1])

        #calculate objects  tensor [CELL_SIZE, CELL_SIZE]
        min_x = (label[0] - label[2] / 2) / (self.image_size / self.cell_size)
        max_x = (label[0] + label[2] / 2) / (self.image_size / self.cell_size)

        min_y = (label[1] - label[3] / 2) / (self.image_size / self.cell_size)
        max_y = (label[1] + label[3] / 2) / (self.image_size / self.cell_size)

        min_x = tf.floor(min_x)
        min_y = tf.floor(min_y)

        max_x = tf.ceil(max_x)
        max_y = tf.ceil(max_y)

        temp = tf.cast(tf.pack([max_y - min_y, max_x - min_x]), dtype=tf.int32)
        objects = tf.ones(temp, tf.float32)

        temp = tf.cast(
            tf.pack(
                [min_y, self.cell_size - max_y, min_x,
                 self.cell_size - max_x]), tf.int32)
        temp = tf.reshape(temp, (2, 2))
        objects = tf.pad(objects, temp, "CONSTANT")

        #calculate objects  tensor [CELL_SIZE, CELL_SIZE]
        #calculate responsible tensor [CELL_SIZE, CELL_SIZE]
        center_x = label[0] / (self.image_size / self.cell_size)
        center_x = tf.floor(center_x)

        center_y = label[1] / (self.image_size / self.cell_size)
        center_y = tf.floor(center_y)

        response = tf.ones([1, 1], tf.float32)

        temp = tf.cast(
            tf.pack([
                center_y, self.cell_size - center_y - 1, center_x,
                self.cell_size - center_x - 1
            ]), tf.int32)
        temp = tf.reshape(temp, (2, 2))
        response = tf.pad(response, temp, "CONSTANT")
        #objects = response

        #calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
        predict_boxes = predict[:, :, self.num_classes + self.boxes_per_cell:]

        predict_boxes = tf.reshape(
            predict_boxes,
            [self.cell_size, self.cell_size, self.boxes_per_cell, 4])

        predict_boxes = predict_boxes * [
            self.image_size / self.cell_size, self.image_size / self.cell_size,
            self.image_size, self.image_size
        ]

        base_boxes = np.zeros([self.cell_size, self.cell_size, 4])

        for y in range(self.cell_size):
            for x in range(self.cell_size):
                #nilboy
                base_boxes[y, x, :] = [
                    self.image_size / self.cell_size * x,
                    self.image_size / self.cell_size * y, 0, 0
                ]
        base_boxes = np.tile(
            np.resize(base_boxes, [self.cell_size, self.cell_size, 1, 4]),
            [1, 1, self.boxes_per_cell, 1])

        predict_boxes = base_boxes + predict_boxes

        iou_predict_truth = self.iou(predict_boxes, label[0:4])
        #calculate C [cell_size, cell_size, boxes_per_cell]
        C = iou_predict_truth * tf.reshape(response,
                                           [self.cell_size, self.cell_size, 1])

        #calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
        I = iou_predict_truth * tf.reshape(response,
                                           (self.cell_size, self.cell_size, 1))

        max_I = tf.reduce_max(I, 2, keep_dims=True)

        I = tf.cast((I >= max_I), tf.float32) * tf.reshape(
            response, (self.cell_size, self.cell_size, 1))

        #calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
        no_I = tf.ones_like(I, dtype=tf.float32) - I

        p_C = predict[:, :,
                      self.num_classes:self.num_classes + self.boxes_per_cell]

        #calculate truth x,y,sqrt_w,sqrt_h 0-D
        x = label[0]
        y = label[1]

        sqrt_w = tf.sqrt(tf.abs(label[2]))
        sqrt_h = tf.sqrt(tf.abs(label[3]))
        #sqrt_w = tf.abs(label[2])
        #sqrt_h = tf.abs(label[3])

        #calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
        p_x = predict_boxes[:, :, :, 0]
        p_y = predict_boxes[:, :, :, 1]

        #p_sqrt_w = tf.sqrt(tf.abs(predict_boxes[:, :, :, 2])) * ((tf.cast(predict_boxes[:, :, :, 2] > 0, tf.float32) * 2) - 1)
        #p_sqrt_h = tf.sqrt(tf.abs(predict_boxes[:, :, :, 3])) * ((tf.cast(predict_boxes[:, :, :, 3] > 0, tf.float32) * 2) - 1)
        #p_sqrt_w = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 2]))
        #p_sqrt_h = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 3]))
        #p_sqrt_w = predict_boxes[:, :, :, 2]
        #p_sqrt_h = predict_boxes[:, :, :, 3]
        p_sqrt_w = tf.sqrt(
            tf.minimum(self.image_size * 1.0,
                       tf.maximum(0.0, predict_boxes[:, :, :, 2])))
        p_sqrt_h = tf.sqrt(
            tf.minimum(self.image_size * 1.0,
                       tf.maximum(0.0, predict_boxes[:, :, :, 3])))
        #calculate truth p 1-D tensor [NUM_CLASSES]
        P = tf.one_hot(tf.cast(label[4], tf.int32),
                       self.num_classes,
                       dtype=tf.float32)

        #calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES]
        p_P = predict[:, :, 0:self.num_classes]

        #class_loss
        class_loss = tf.nn.l2_loss(
            tf.reshape(objects, (self.cell_size, self.cell_size, 1)) *
            (p_P - P)) * self.class_scale
        #class_loss = tf.nn.l2_loss(tf.reshape(response, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale

        #object_loss
        object_loss = tf.nn.l2_loss(I * (p_C - C)) * self.object_scale
        #object_loss = tf.nn.l2_loss(I * (p_C - (C + 1.0)/2.0)) * self.object_scale

        #noobject_loss
        #noobject_loss = tf.nn.l2_loss(no_I * (p_C - C)) * self.noobject_scale
        noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * self.noobject_scale

        #coord_loss
        coord_loss = (tf.nn.l2_loss(I * (p_x - x) /
                                    (self.image_size / self.cell_size)) +
                      tf.nn.l2_loss(I * (p_y - y) /
                                    (self.image_size / self.cell_size)) +
                      tf.nn.l2_loss(I *
                                    (p_sqrt_w - sqrt_w)) / self.image_size +
                      tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h)) /
                      self.image_size) * self.coord_scale

        nilboy = I

        return num + 1, object_num, [
            loss[0] + class_loss, loss[1] + object_loss,
            loss[2] + noobject_loss, loss[3] + coord_loss
        ], predict, labels, nilboy
Ejemplo n.º 42
0
 def _f(values):
     return tf.sqrt(tf.abs(values)) * tf.sign(values)
Ejemplo n.º 43
0
def train_model(graph, var_dict, train_data, max_epoch, hyper_param, 
                output_dir, test_data=None, ex_printer=None, session=None):
    """ train a model with provided data """
    learning_rate = hyper_param["learning_rate"]
    batch_size = hyper_param["batch_size"]
    log_file = os.path.join(output_dir, "train.log") if output_dir is not None else None

    with graph.as_default():
        # the saver to keep model
        saver = tf.train.Saver()
        last_best_accuracy = 0.

        # place holders for the model
        train_inputs = graph.get_tensor_by_name(var_dict["train_inputs"])
        train_outputs = graph.get_tensor_by_name(var_dict["train_outputs"])

        seq2seq_feed_previous = graph.get_tensor_by_name(var_dict["seq2seq_feed_previous"])
        input_mask = graph.get_tensor_by_name(var_dict["train_input_mask"])
        output_mask = graph.get_tensor_by_name(var_dict["train_output_mask"])
        type_masks = graph.get_tensor_by_name(var_dict["type_masks"])

        # operaters needed for testing
        token_accuracy = graph.get_tensor_by_name(var_dict["token_accuracy"])
        sentence_accuracy = graph.get_tensor_by_name(var_dict["sentence_accuracy"])
        total_loss = graph.get_tensor_by_name(var_dict["total_loss"])

        global_step = tf.placeholder(tf.int32, name="global_step")

        optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate)

        # gradient processing
        grad_clip_norm = hyper_param["gradient_clip_norm"] if "gradient_clip_norm" in hyper_param else None
        grad_noise = hyper_param["gradient_noise"] if "gradient_noise" in hyper_param else None
        grad_noise_gamma = hyper_param["gradient_noise_gamma"] if "gradient_noise_gamma" in hyper_param else None
        
        grads_and_vars = optimizer.compute_gradients(total_loss)
        (grads, variables) = zip(*grads_and_vars)

        if grad_clip_norm:
            print("clipping norm: {}".format(grad_clip_norm))
            capped_grads, _ = tf.clip_by_global_norm(grads, grad_clip_norm)
            grads_and_vars = zip(capped_grads, variables)

        if grad_noise:
            if grad_noise_gamma:
                grad_noise /= tf.pow(1.0 + tf.to_float(global_step), grad_noise_gamma)
            grads_tmp = []
            for g in grads:
                if g is not None:
                    noisy_grad = g + tf.sqrt(grad_noise)*tf.random_normal(tf.shape(g))
                    grads_tmp.append(noisy_grad)
                else:
                    grads_tmp.append(g)
            print("noise added")
            grads_and_vars = zip(grads_tmp, variables)

        train_step = optimizer.apply_gradients(grads_and_vars)

        session = tf.Session() if session is None else session

        with session.as_default():
            if log_file is not None:
                # initialize the logfile
                output_log = open(log_file, "w")
                log_header = "tr_tok_acc, test_tok_acc, tr_sen_acc, test_sen_acc, tr_loss, test_loss"
                if test_data is None:
                    log_header = "tr_tok_acc, tr_sen_acc, tr_loss" 
                output_log.write(log_header)
                output_log.write("\n")

            session.run(tf.global_variables_initializer())

            nbatches = int(np.ceil(len(train_data.Xs) / float(batch_size)))

            for n in range(max_epoch):
                print("================ epoch %d ==================" % n)
                print("PROGRESS: 00.00%")

                tr_token_accuracies = []
                tr_sentence_accuracies = []
                tr_losses = []

                for i in range(nbatches):

                    left = i * batch_size
                    right = min((i + 1) * batch_size, len(train_data.Xs))

                    Xt = train_data.Xs[left : right]
                    Yt = train_data.Ys[left : right]
                    XMasks = train_data.XMasks[left : right]
                    YMasks = train_data.YMasks[left : right]
                    ty_masks = train_data.type_masks[:,left : right,:]

                    #### HERE feed prvious set to true to make it work #####
                    training_result = session.run([ token_accuracy, 
                                                    sentence_accuracy, 
                                                    total_loss,
                                                    train_step ],
                                                  feed_dict={ train_inputs : Xt, train_outputs : Yt, 
                                                              input_mask: XMasks, output_mask : YMasks,
                                                              type_masks: ty_masks,
                                                              seq2seq_feed_previous : False,
                                                              global_step: n})

                    tr_token_accuracies.append(training_result[0])
                    tr_sentence_accuracies.append(training_result[1])
                    tr_losses.append(training_result[2])

                print("training_loss = {:.5f}".format(np.mean(tr_losses)))
                print("train_token_accuracy = {:.5f}".format(np.mean(tr_token_accuracies)))
                print("train_sentence_accuracy = {:.5f}".format(np.mean(tr_sentence_accuracies)))

                if test_data is not None:
                    test_result = test_model(graph, var_dict, session, test_data, batch_size, 
                                             ex_printer=ex_printer if (n % 30 == 0 or n == max_epoch - 1) else None)
                    
                    test_token_accuracy = test_result[0]
                    test_sentence_accuracy = test_result[1]
                    test_loss = test_result[2]

                    log_str = "{}, {}, {}, {}, {}, {}".format(
                                np.mean(tr_token_accuracies), test_token_accuracy,
                                np.mean(tr_sentence_accuracies), test_sentence_accuracy,
                                np.mean(tr_losses), test_loss)
                else:
                    log_str = "{}, {}, {}".format(np.mean(tr_token_accuracies),
                                                  np.mean(tr_sentence_accuracies),
                                                  np.mean(tr_losses))


                if output_dir:
                    if test_data is not None:
                        current_seq_acc = test_sentence_accuracy
                    else:
                        current_seq_acc = np.mean(tr_sentence_accuracies)

                    if current_seq_acc > last_best_accuracy:
                        last_best_accuracy = current_seq_acc
                        # add global step so that we can keep multiple models around
                        saver.save(session, os.path.join(output_dir, "table_nl_prog"), global_step=n)

                if log_file is not None:
                    # write corresponding data to log
                    output_log.write(log_str)
                    output_log.write("\n")

                if np.mean(tr_losses) < 0.05:
                    break

    return session
Ejemplo n.º 44
0
    def call(self, inputs, training=None, mask=None):

        query, key, value = self._unpack(inputs)

        query_mask, key_mask, _ = self._unpack(mask)

        batch_size = tf.shape(query)[0]
        dimension_query = query.get_shape().as_list()[-1]
        seq_len = tf.shape(query)[-2]
        key_len = tf.shape(key)[-2]
        feature_dim = tf.shape(value)[-1]

        query = tf.matmul(
            query,
            tf.tile(tf.expand_dims(self.kernel_query, 0), [batch_size, 1, 1]))
        key = tf.matmul(
            key, tf.tile(tf.expand_dims(self.kernel_key, 0),
                         [batch_size, 1, 1]))
        value = tf.matmul(
            value,
            tf.tile(tf.expand_dims(self.kernel_value, 0), [batch_size, 1, 1]))
        if self.use_bias:
            query += self.b_query
            key += self.b_key
            value += self.b_value

        def _reshape_multihead(origin_input):
            """
      reshape for multi head
        Input shape: (Batch size, steps, features)
        Output shape: (Batch size * head num, steps, features // head num)
      """
            return tf.concat(tf.split(origin_input, self.head_num, axis=2),
                             axis=0)

        def _reshape_mask(mask):
            """
      repeat mask for multi head
        Input shape: (Batch size, steps)
        Output shape: (Batch size * head num, steps)
      """
            if mask is None:
                return None
            seq_len = tf.shape(mask)[1]
            mask = tf.expand_dims(mask, axis=1)
            mask = tf.tile(mask, [1, self.head_num, 1])
            return tf.reshape(mask, shape=(-1, seq_len))

        query_ = _reshape_multihead(query)
        key_ = _reshape_multihead(key)
        value_ = _reshape_multihead(value)

        key_mask = _reshape_mask(key_mask)

        # (Batch size * head num, query steps, key steps)
        similaritys = tf.matmul(query_, tf.transpose(key_, [0, 2, 1]))
        # scale
        similaritys /= tf.sqrt(tf.cast(dimension_query, tf.float32))
        if self.sequence_mask:
            ones = tf.ones((seq_len, key_len))
            similaritys -= (ones - tf.matrix_band_part(ones, -1, 0)) * 1e9
        if key_mask is not None:
            similaritys -= (1.0 - tf.cast(tf.expand_dims(key_mask, axis=-2),
                                          tf.float32)) * 1e9

        attention_weights = tf.keras.activations.softmax(similaritys)
        attention_outputs = tf.matmul(attention_weights, value_)
        attention_outputs = tf.reshape(
            attention_outputs,
            (-1, self.head_num, seq_len, feature_dim // self.head_num))
        attention_outputs = tf.transpose(attention_outputs, [0, 2, 1, 3])
        attention_outputs = tf.reshape(attention_outputs,
                                       (-1, seq_len, feature_dim))

        attention_outputs = tf.matmul(
            attention_outputs,
            tf.tile(tf.expand_dims(self.kernel_project, 0),
                    [batch_size, 1, 1]))
        if self.use_bias:
            attention_outputs += self.b_project
        if self.activation is not None:
            attention_outputs = self.activation(attention_outputs)

        if query_mask is not None:
            attention_outputs *= tf.cast(tf.expand_dims(query_mask, axis=-1),
                                         tf.float32)

        return attention_outputs
Ejemplo n.º 45
0
    def __init__(self, env_action_space, env_observation_space,
                 planning_horizon=50, max_iterations=5, population_size=500,
                 num_elite=50, num_agents=5, alpha_cov=tf.constant(2.0, dtype=tf.float32),
                 h_sigma=tf.constant(1.0, dtype=tf.float32)):
        """
        This class defines a Covariance Matrix Adaptation Evolutionary-Strategy.
        (https://arxiv.org/pdf/1604.00772.pdf) Note: this optimzer is not optimized for more than one agent

        Parameters
        ---------
        env_action_space: gym.ActionSpace
            Defines the action space of the gym environment.
        env_observation_space: gym.ObservationSpace
            Defines the observation space of the gym environment.
        planning_horizon: Int
            Defines the planning horizon for the optimizer (how many steps to lookahead and optimize for).
        max_iterations: tf.int32
            Defines the maximimum iterations for the CMAES optimizer to refine its guess for the optimal solution.
        population_size: tf.int32
            Defines the population size of the particles evaluated at each iteration.
        num_elite: tf.int32
            Defines the number of elites kept for the next iteration from the population.
        num_agents: tf.int32
            Defines the number of runner running in parallel
        alpha_cov: tf.float32
            Defines the alpha covariance to be used.
        h_sigma: tf.float32
            Defines the h sigma to be used.
        """
        super(CMAESOptimizer, self).__init__(name=None,
                                             planning_horizon=planning_horizon,
                                             max_iterations=max_iterations,
                                             num_agents=num_agents,
                                             env_action_space=env_action_space,
                                             env_observation_space=
                                             env_observation_space)
        self._solution_dim = [self._num_agents,
                              self._planning_horizon,
                              self._dim_U]
        self._population_size = population_size
        self._num_elite = num_elite
        previous_solution_values = tf.constant(np.tile((self._action_lower_bound +
                                                        self._action_upper_bound) / 2,
                                                       [self._planning_horizon *
                                                        self._num_agents, 1]),
                                               dtype=tf.float32)
        previous_solution_values = tf.reshape(previous_solution_values, [-1])
        solution_variance_values = tf.constant(np.tile(np.square(self._action_lower_bound
                                                                 - self._action_upper_bound) / 16,
                                                       [self._planning_horizon *
                                                        self._num_agents, 1]),
                                               dtype=tf.float32)
        solution_variance_values = tf.reshape(solution_variance_values, [-1])

        # Recombination weights
        self._weights = tf.concat([
            tf.math.log(tf.cast(self._num_elite, dtype=tf.float32) + 0.5) -
            tf.math.log(tf.range(1, tf.cast(self._num_elite, dtype=tf.float32) + 1)),
            tf.zeros(shape=(self._population_size - self._num_elite,), dtype=tf.float32),
        ], axis=0)
        # Normalize weights such as they sum to one and reshape into a column matrix
        self._weights = (self._weights / tf.reduce_sum(self._weights))[:, tf.newaxis]
        self._mu_eff = tf.reduce_sum(self._weights) ** 2 / \
                       tf.reduce_sum(self._weights ** 2)
        self._solution_size = tf.reduce_prod(self._solution_dim)
        #step_size_control
        self._c_sigma = (self._mu_eff + 2) / (tf.cast(self._solution_size,
                                                      dtype=tf.float32) +
                                              self._mu_eff + 5)
        self._d_sigma = 1 + 2 * tf.maximum(0, tf.sqrt((self._mu_eff - 1) /
                                                      (tf.cast(self._solution_size,
                                                               dtype=tf.float32) + 1)) - 1) \
                        + self._c_sigma
        #Covariance Matrix Adaptation
        self._cc = (4 + self._mu_eff / tf.cast(self._solution_size, dtype=tf.float32)) / \
                    (tf.cast(self._solution_size, dtype=tf.float32) + 4 + 2 * self._mu_eff /
                    tf.cast(self._solution_size, dtype=tf.float32))
        self._alpha_cov = alpha_cov
        self._h_sigma = h_sigma
        self._c1 = self._alpha_cov / ((tf.cast(self._solution_size,
                                               dtype=tf.float32) + 1.3) ** 2 +
                                      self._mu_eff)
        c_mu_option_two = self._alpha_cov * (self._mu_eff - 2 + 1 / self._mu_eff) / \
                          ((tf.cast(self._solution_size, dtype=tf.float32) + 2)
                           ** 2 + self._alpha_cov * self._mu_eff / 2)
        self._c_mu = tf.minimum(1 - self._c1, c_mu_option_two)
        #define trainable parameters
        # Mean
        self._m = tf.Variable(previous_solution_values)
        # Step-size
        self._sigma = tf.Variable(tf.math.sqrt(solution_variance_values))
        # Covariance matrix
        self._C = tf.Variable(tf.eye(num_rows=tf.cast(self._solution_size,
                                                      dtype=tf.float32),
                                     dtype=tf.float32))
        # Evolution path for σ
        self._p_sigma = tf.Variable(tf.zeros((tf.cast(self._solution_size,
                                                      dtype=tf.float32),),
                                             dtype=tf.float32))
        # Evolution path for C
        self._p_C = tf.Variable(tf.zeros((tf.cast(self._solution_size,
                                                  dtype=tf.float32),),
                                         dtype=tf.float32))
        # Coordinate system (normalized eigenvectors)
        self._B = tf.Variable(tf.eye(num_rows=tf.cast(self._solution_size,
                                                      dtype=tf.float32),
                                     dtype=tf.float32))
        # Scaling (square root of eigenvalues)
        self._D = tf.Variable(tf.eye(num_rows=tf.cast(self._solution_size,
                                                      dtype=tf.float32),
                                     dtype=tf.float32))
        self._expectation_of_normal = tf.sqrt(tf.cast(self._solution_size,
                                                      dtype=tf.float32) *
                                              (1 - 1 / (4 * tf.cast(
                                                  self._solution_size,
                                                  dtype=tf.float32)) +
                                              1 / (21 * tf.cast(
                                                          self._solution_size,
                                                          dtype=tf.float32)
                                                   ** 2)))
        return
Ejemplo n.º 46
0
def xavier_init(size):  # 初始化参数时使用的xavier_init函数
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)  # 初始化标准差
    return tf.random_normal(shape=size, stddev=xavier_stddev)  # 返回初始化的结果
Ejemplo n.º 47
0
def _renorm_correction_and_moments(
    renorm_params,
    mean,
    variance,
    training,
):
    """Returns the correction and update values for renorm."""
    stddev = tf.sqrt(variance + renorm_params.epsilon)
    # Compute the average mean and standard deviation, as if they were
    # initialized with this batch's moments.
    mixed_renorm_mean = (renorm_params.renorm_mean +
                         (1. - renorm_params.renorm_mean_weight) * mean)
    mixed_renorm_stddev = (renorm_params.renorm_stddev +
                           (1. - renorm_params.renorm_stddev_weight) * stddev)
    # Compute the corrections for batch renorm.
    r = stddev / mixed_renorm_stddev
    d = (mean - mixed_renorm_mean) / mixed_renorm_stddev
    # Ensure the corrections use pre-update moving averages.
    with ops.control_dependencies([r, d]):
        mean = array_ops.identity(mean)
        stddev = array_ops.identity(stddev)
    rmin, rmax, dmax = [
        renorm_params.renorm_clipping.get(key)
        for key in ['rmin', 'rmax', 'dmax']
    ]
    if rmin is not None:
        r = tf.maximum(r, rmin)
    if rmax is not None:
        r = tf.minimum(r, rmax)
    if dmax is not None:
        d = tf.maximum(d, -dmax)
        d = tf.minimum(d, dmax)
    # When not training, use r=1, d=0.
    r = utils.smart_cond(training, lambda: r, lambda: array_ops.ones_like(r))
    d = utils.smart_cond(training, lambda: d, lambda: array_ops.zeros_like(d))

    def _update_renorm_variable(var, weight, value):
        """Updates a moving average and weight, returns the unbiased value."""
        value = array_ops.identity(value)

        def _do_update():
            # Update the variables without zero debiasing. The debiasing will be
            # accomplished by dividing the exponential moving average by the weight.
            # For example, after a single update, the moving average would be
            # (1-decay) * value. and the weight will be 1-decay, with their ratio
            # giving the value.
            # Make sure the weight is not updated until before r and d computation.
            with ops.control_dependencies([value]):
                weight_value = array_ops.constant(1., dtype=weight.dtype)
            new_var = moving_averages.assign_moving_average(
                var, value, renorm_params.renorm_momentum, zero_debias=False)
            new_weight = moving_averages.assign_moving_average(
                weight,
                weight_value,
                renorm_params.renorm_momentum,
                zero_debias=False)
            return new_var / new_weight

        def _fake_update():
            return array_ops.identity(var)

        return utils.smart_cond(training, _do_update, _fake_update)

    with ops.colocate_with(renorm_params.moving_mean):
        new_mean = _update_renorm_variable(renorm_params.renorm_mean,
                                           renorm_params.renorm_mean_weight,
                                           mean)
    with ops.colocate_with(renorm_params.moving_variance):
        new_stddev = _update_renorm_variable(
            renorm_params.renorm_stddev, renorm_params.renorm_stddev_weight,
            stddev)
        # Make sqrt(moving_variance + epsilon) = new_stddev.
        new_variance = tf.square(new_stddev) - renorm_params.epsilon

    return (r, d, new_mean, new_variance)
Ejemplo n.º 48
0
myEstimator.fit(nIter=nIter)

xi = myEstimator.x.numpy()
plt.figure()
img = plt.imshow(xi)
img.set_clim(0.0, 1.0)
plt.savefig('results/LLS.png')

A = aiaiTools.layers.linear.NumpyLinearOperator(FP, BP, x_shape, y_shape)
ATA = aiaiTools.layers.meta.LinearSequence(A, A.transpose())
ATA_fourier = aiaiTools.layers.linear.FourierApproximation2D(ATA, x)
ATA_inv = aiaiTools.layers.linear.FourierOperator2D(
    ATA_fourier.IFT(1 / ATA_fourier.H))
appodization = aiaiTools.layers.linear.GaussianBlur2D(0.5, x_shape)
M = aiaiTools.layers.linear.FourierOperator2D(
    ATA_fourier.IFT(1 / tf.sqrt(ATA_fourier.H) * appodization.H))
# M = aiaiTools.layers.linear.FourierOperator2D(ATA_fourier.IFT(1/tf.sqrt(ATA_fourier.H)))
myForwardModel = aiaiTools.layers.meta.LinearSequence(M, A)
myEstimator = aiaiTools.models.optimization.LeastSquares(myForwardModel,
                                                         y,
                                                         x_shape,
                                                         dynamic=True)
myEstimator.compile(learning_rate=learning_rate)
myEstimator.fit(nIter=nIter)

xi = M(myEstimator.x.numpy())
plt.figure()
img = plt.imshow(xi)
img.set_clim(0.0, 1.0)
plt.savefig('results/PLLS.png')
Ejemplo n.º 49
0
    def build_graph(self, x1, y_pred_, learning_rate, units, hold_prob):
        if not self.rated is None:
            norm_val = tf.constant(1, tf.float32, name='rated')
        else:
            norm_val = y_pred_
        with tf.name_scope("build_lstm") as scope:
            if self.trial == 0:
                lstm_1 = tf.keras.layers.LSTM(units[0],
                                              name='lstm1',
                                              return_sequences=True,
                                              activation=tf.nn.elu)
                full_out_dropout = tf.nn.dropout(lstm_1(x1),
                                                 rate=1 - hold_prob)
                shape = full_out_dropout.get_shape().as_list()
                full_out_dropout = tf.reshape(full_out_dropout,
                                              [-1, shape[1] * shape[2]])

            elif self.trial == 1:
                lstm_1 = tf.keras.layers.LSTM(units[0],
                                              name='lstm1',
                                              return_sequences=True,
                                              activation=tf.nn.elu)
                full_one_dropout = tf.nn.dropout(lstm_1(x1),
                                                 rate=1 - hold_prob)
                shape = full_one_dropout.get_shape().as_list()
                lstm_1_flat = tf.reshape(full_one_dropout,
                                         [-1, shape[1] * shape[2]])
                full_layer_one = tf.keras.layers.Dense(units=shape[1] *
                                                       shape[2],
                                                       activation=tf.nn.elu,
                                                       name='dense1')
                full_out_dropout = tf.nn.dropout(full_layer_one(lstm_1_flat),
                                                 rate=1 - hold_prob)

            elif self.trial == 2:
                lstm_1 = tf.keras.layers.LSTM(units[0],
                                              name='lstm1',
                                              return_sequences=True,
                                              activation=tf.nn.elu)
                full_one_dropout = tf.nn.dropout(lstm_1(x1),
                                                 rate=1 - hold_prob)

                shape = full_one_dropout.get_shape().as_list()
                lstm_2_flat = tf.reshape(full_one_dropout,
                                         [-1, shape[1] * shape[2]])
                full_layer_two = tf.keras.layers.Dense(units=shape[1] *
                                                       shape[2],
                                                       activation=tf.nn.elu,
                                                       name='dense1')
                full_two_dropout = tf.nn.dropout(full_layer_two(lstm_2_flat),
                                                 rate=1 - hold_prob)
                full_two_dropout = tf.reshape(full_two_dropout,
                                              [-1, shape[1], shape[2]])

                lstm_2 = tf.keras.layers.LSTM(units[2],
                                              name='lstm2',
                                              return_sequences=True,
                                              activation=tf.nn.elu)
                full_out_dropout = tf.nn.dropout(lstm_2(full_two_dropout),
                                                 rate=1 - hold_prob)
                shape = full_out_dropout.get_shape().as_list()
                full_out_dropout = tf.reshape(full_out_dropout,
                                              [-1, shape[1] * shape[2]])
            elif self.trial == 3:
                lstm_1 = tf.keras.layers.LSTM(units[0],
                                              name='lstm1',
                                              return_sequences=True,
                                              activation=tf.nn.elu)
                full_one_dropout = tf.nn.dropout(lstm_1(x1),
                                                 rate=1 - hold_prob)
                shape = full_one_dropout.get_shape().as_list()
                lstm_2_flat = tf.reshape(full_one_dropout,
                                         [-1, shape[1] * shape[2]])

                full_layer_two = tf.keras.layers.Dense(units=shape[1] *
                                                       shape[2],
                                                       activation=tf.nn.elu,
                                                       name='dense1')
                full_two_dropout = tf.nn.dropout(full_layer_two(lstm_2_flat),
                                                 rate=1 - hold_prob)
                full_two_dropout = tf.reshape(full_two_dropout,
                                              [-1, shape[1], shape[2]])
                lstm_2 = tf.keras.layers.LSTM(units[2],
                                              name='lstm2',
                                              return_sequences=True,
                                              activation=tf.nn.elu)
                full_three_dropout = tf.nn.dropout(lstm_2(full_two_dropout),
                                                   rate=1 - hold_prob)
                shape = full_three_dropout.get_shape().as_list()
                lstm_2_flat = tf.reshape(full_three_dropout,
                                         [-1, shape[1] * shape[2]])
                full_layer_three = tf.keras.layers.Dense(units=shape[1] *
                                                         shape[2],
                                                         activation=tf.nn.elu,
                                                         name='dense2')
                full_three_dropout = tf.nn.dropout(
                    full_layer_three(lstm_2_flat), rate=1 - hold_prob)
                full_three_dropout = tf.reshape(full_three_dropout,
                                                [-1, shape[1], shape[2]])
                lstm_3 = tf.keras.layers.LSTM(units[2],
                                              name='lstm3',
                                              return_sequences=True,
                                              activation=tf.nn.elu)
                full_out_dropout = tf.nn.dropout(lstm_3(full_three_dropout),
                                                 rate=1 - hold_prob)
                shape = full_out_dropout.get_shape().as_list()
                full_out_dropout = tf.reshape(full_out_dropout,
                                              [-1, shape[1] * shape[2]])
            if self.probabilistic:
                prob_layer = tf.keras.layers.Dense(y_pred_.shape[1],
                                                   activation=tf.nn.softmax,
                                                   name='dense_prob')
                y_pred = prob_layer(full_out_dropout)
            else:
                y_pred, W, b = self.normal_full_layer(full_out_dropout, 1)

            if self.trial == 0:
                weights = lstm_1.trainable_weights
            elif self.trial == 1:
                weights = lstm_1.trainable_weights + full_layer_one.trainable_weights
            elif self.trial == 2:
                weights = lstm_1.trainable_weights + full_layer_two.trainable_weights + lstm_2.trainable_weights
            elif self.trial == 3:
                weights = lstm_1.trainable_weights + full_layer_two.trainable_weights + lstm_2.trainable_weights + full_layer_three.trainable_weights + lstm_3.trainable_weights

            if self.probabilistic:
                weights += prob_layer.trainable_weights
            else:
                weights += [W, b]

        with tf.name_scope("train_lstm") as scope:
            if self.probabilistic:
                cost_lstm = tf.losses.softmax_cross_entropy(y_pred_, y_pred)
                optimizer_lstm = tf.compat.v1.train.AdamOptimizer(
                    learning_rate=learning_rate)
                train_lstm = optimizer_lstm.minimize(cost_lstm)
                accuracy_lstm = 1 / tf.metrics.accuracy(y_pred - y_pred_)
                sse_lstm = 1 / tf.metrics.recall(y_pred - y_pred_)
                rse_lstm = 1 / tf.metrics.precision(y_pred - y_pred_)
            else:
                err = tf.divide(tf.abs(y_pred - y_pred_), norm_val)
                cost_lstm = tf.reduce_mean(tf.square(err))
                optimizer_lstm = tf.compat.v1.train.AdamOptimizer(
                    learning_rate=learning_rate)
                train_lstm = optimizer_lstm.minimize(cost_lstm)
                accuracy_lstm = tf.reduce_mean(err)
                sse_lstm = tf.reduce_sum(tf.square(err))
                rse_lstm = tf.sqrt(tf.reduce_mean(tf.square(err)))

        return train_lstm, cost_lstm, accuracy_lstm, sse_lstm, rse_lstm, weights
Ejemplo n.º 50
0
    def init_weights(self, shape, Fin, Fout):

        scale = tf.sqrt( 2.0/ (Fin+Fout) )
        W = tf.random_uniform( shape, minval=-scale, maxval=scale )
        return W
Ejemplo n.º 51
0
    def __init__(self, size, eps=1e-2, default_clip_range=np.inf, sess=None):
        """A normalizer that ensures that observations are approximately distributed according to
        a standard Normal distribution (i.e. have mean zero and variance one).

        Args:
            size (int): the size of the observation to be normalized
            eps (float): a small constant that avoids underflows
            default_clip_range (float): normalized observations are clipped to be in
                [-default_clip_range, default_clip_range]
            sess (object): the TensorFlow session to be used
        """
        self.size = size
        self.eps = eps
        self.default_clip_range = default_clip_range
        self.sess = sess if sess is not None else tf.get_default_session()
        self.local_sum = np.zeros(self.size, np.float32)
        self.local_sumsq = np.zeros(self.size, np.float32)
        self.local_count = np.zeros(1, np.float32)

        self.sum_tf = tf.get_variable(initializer=tf.zeros_initializer(),
                                      shape=self.local_sum.shape,
                                      name='sum',
                                      trainable=False,
                                      dtype=tf.float32)
        self.sumsq_tf = tf.get_variable(initializer=tf.zeros_initializer(),
                                        shape=self.local_sumsq.shape,
                                        name='sumsq',
                                        trainable=False,
                                        dtype=tf.float32)
        self.count_tf = tf.get_variable(initializer=tf.ones_initializer(),
                                        shape=self.local_count.shape,
                                        name='count',
                                        trainable=False,
                                        dtype=tf.float32)
        self.mean = tf.get_variable(initializer=tf.zeros_initializer(),
                                    shape=(self.size, ),
                                    name='mean',
                                    trainable=False,
                                    dtype=tf.float32)
        self.std = tf.get_variable(initializer=tf.ones_initializer(),
                                   shape=(self.size, ),
                                   name='std',
                                   trainable=False,
                                   dtype=tf.float32)
        self.count_pl = tf.placeholder(name='count_pl',
                                       shape=(1, ),
                                       dtype=tf.float32)
        self.sum_pl = tf.placeholder(name='sum_pl',
                                     shape=(self.size, ),
                                     dtype=tf.float32)
        self.sumsq_pl = tf.placeholder(name='sumsq_pl',
                                       shape=(self.size, ),
                                       dtype=tf.float32)

        self.update_op = tf.group(self.count_tf.assign_add(self.count_pl),
                                  self.sum_tf.assign_add(self.sum_pl),
                                  self.sumsq_tf.assign_add(self.sumsq_pl))
        self.recompute_op = tf.group(
            tf.assign(self.mean, self.sum_tf / self.count_tf),
            tf.assign(
                self.std,
                tf.sqrt(
                    tf.maximum(
                        tf.square(self.eps), self.sumsq_tf / self.count_tf -
                        tf.square(self.sum_tf / self.count_tf)))),
        )
        self.lock = threading.Lock()
Ejemplo n.º 52
0
def Conv2D(name,
           input_dim,
           output_dim,
           filter_size,
           inputs,
           he_init=True,
           mask_type=None,
           stride=1,
           weightnorm=None,
           biases=True,
           gain=1.):
    """
    inputs: tensor of shape (batch size, num channels, height, width)
    mask_type: one of None, 'a', 'b'

    returns: tensor of shape (batch size, num channels, height, width)
    """
    with tf.name_scope(name) as scope:

        if mask_type is not None:
            mask_type, mask_n_channels = mask_type

            mask = np.ones((filter_size, filter_size, input_dim, output_dim),
                           dtype='float32')
            center = filter_size // 2

            # Mask out future locations
            # filter shape is (height, width, input channels, output channels)
            mask[center + 1:, :, :, :] = 0.
            mask[center, center + 1:, :, :] = 0.

            # Mask out future channels
            for i in xrange(mask_n_channels):
                for j in xrange(mask_n_channels):
                    if (mask_type == 'a' and i >= j) or (mask_type == 'b'
                                                         and i > j):
                        mask[center, center, i::mask_n_channels,
                             j::mask_n_channels] = 0.

        def uniform(stdev, size):
            return np.random.uniform(low=-stdev * np.sqrt(3),
                                     high=stdev * np.sqrt(3),
                                     size=size).astype('float32')

        fan_in = input_dim * filter_size**2
        fan_out = output_dim * filter_size**2 / (stride**2)

        if mask_type is not None:  # only approximately correct
            fan_in /= 2.
            fan_out /= 2.

        if he_init:
            filters_stdev = np.sqrt(4. / (fan_in + fan_out))
        else:  # Normalized init (Glorot & Bengio)
            filters_stdev = np.sqrt(2. / (fan_in + fan_out))

        if _weights_stdev is not None:
            filter_values = uniform(
                _weights_stdev,
                (filter_size, filter_size, input_dim, output_dim))
        else:
            filter_values = uniform(
                filters_stdev,
                (filter_size, filter_size, input_dim, output_dim))

        # print "WARNING IGNORING GAIN"
        filter_values *= gain

        filters = lib.param(name + '.Filters', filter_values)

        if weightnorm == None:
            weightnorm = _default_weightnorm
        if weightnorm:
            norm_values = np.sqrt(
                np.sum(np.square(filter_values), axis=(0, 1, 2)))
            target_norms = lib.param(name + '.g', norm_values)
            with tf.name_scope('weightnorm') as scope:
                norms = tf.sqrt(
                    tf.reduce_sum(tf.square(filters),
                                  reduction_indices=[0, 1, 2]))
                filters = filters * (target_norms / norms)

        if mask_type is not None:
            with tf.name_scope('filter_mask'):
                filters = filters * mask

        result = tf.nn.conv2d(input=inputs,
                              filter=filters,
                              strides=[1, 1, stride, stride],
                              padding='SAME',
                              data_format='NCHW')

        if biases:
            _biases = lib.param(name + '.Biases',
                                np.zeros(output_dim, dtype='float32'))

            result = tf.nn.bias_add(result, _biases, data_format='NCHW')

        return result
Ejemplo n.º 53
0
def lstm(params):
    data, count, dictionary, embeddings, normalized_embeddings, weights, biases = word2vec.get_word2vec(
        2, False)
    words_size = embeddings.shape[0]
    embedding_size = embeddings.shape[1]
    print('Most common words (+UNK)', count[:5])
    print('embedding size:%s data:%s' %
          (embedding_size, [dictionary[word] for word in data[:100]]))

    # Create a small validation set.
    valid_size = 1000
    valid_text = data[:valid_size]
    train_text = data[valid_size:]
    train_size = len(train_text)

    p_num_unrollings = params['num_unrollings']
    p_batch_size = params['batch_size']

    class BatchGenerator(object):
        def __init__(self, text, batch_size, num_unrollings):
            assert batch_size >= 1
            assert num_unrollings >= 1
            self._text = text
            self._text_size = len(text)
            self._batch_size = batch_size
            self._num_unrollings = num_unrollings
            segment = self._text_size // batch_size
            self._cursor_boundary = [
                offset * segment for offset in range(batch_size)
            ]
            self._cursor = self._cursor_boundary[:]
            self._last_batch = self._next_batch()

        def _next_batch(self):
            """Generate a single batch from the current cursor position in the data."""
            batch = np.zeros(shape=(self._batch_size, embedding_size),
                             dtype=np.float)
            for b in range(self._batch_size):
                batch[b] = normalized_embeddings[self._text[self._cursor[b]]]
                self._cursor[b] = (self._cursor[b] + 1)
            if self._cursor[self._batch_size - 1] == self._text_size:
                self._cursor = self._cursor_boundary[:]
            return batch

        def next(self):
            """Generate the next array of batches from the data. The array consists of
            the last batch of the previous array, followed by p_num_unrollings new ones.
            """
            batches = [self._last_batch]
            for _ in range(self._num_unrollings):
                batches.append(self._next_batch())
            self._last_batch = batches[-1]
            return batches

    def batches2string(batches):
        """Convert a sequence of batches back into their (most likely) string
        representation."""
        s = [''] * batches[0].shape[0]
        for b in batches:
            words = [
                dictionary[w]
                for w in np.argmax(np.matmul(b, normalized_embeddings.T), 1)
            ]
            s = [' '.join(x) for x in zip(s, words)]
        return s

    train_batches = BatchGenerator(train_text, p_batch_size, p_num_unrollings)
    valid_batches = BatchGenerator(valid_text, 1, 1)

    print(batches2string(train_batches.next()))
    print(batches2string(train_batches.next()))
    print(batches2string(train_batches.next()))
    print(batches2string(valid_batches.next()))
    print(batches2string(valid_batches.next()))
    print(batches2string(valid_batches.next()))

    def logprob(predictions, labels):
        """Log-probability of the true labels in a predicted batch."""
        predictions[predictions < 1e-10] = 1e-10
        return np.sum(
            -np.log([predictions[i, label]
                     for i, label in enumerate(labels)])) / labels.shape[0]

    graph = tf.Graph()
    with graph.as_default():
        p_num_nodes = params['num_nodes']
        p_max_k = params['max_k']

        def create_trainable_variables():
            '''
            Parameters:
                num_nodes*0:num_nodes*1 : Input gate
                num_nodes*1:num_nodes*2 : Forget gate
                num_nodes*2:num_nodes*3 : Output gate
                num_nodes*3:num_nodes*4 : New memory cell
            '''
            W = {
                'L1_W':
                tf.Variable(
                    tf.truncated_normal([embedding_size, p_num_nodes * 4],
                                        mean=0,
                                        stddev=0.1,
                                        name="L1_W")),
                'L1_U':
                tf.Variable(
                    tf.truncated_normal([p_num_nodes, p_num_nodes * 4],
                                        mean=0,
                                        stddev=0.1,
                                        name="L1_U")),
                'L1_b':
                tf.Variable(tf.zeros([1, p_num_nodes * 4]), name="L1_b"),
                'L2_W':
                tf.Variable(
                    tf.truncated_normal([p_num_nodes, p_num_nodes * 4],
                                        mean=0,
                                        stddev=0.1,
                                        name="L2_W")),
                'L2_U':
                tf.Variable(
                    tf.truncated_normal([p_num_nodes, p_num_nodes * 4],
                                        mean=0,
                                        stddev=0.1,
                                        name="L2_U")),
                'L2_b':
                tf.Variable(tf.zeros([1, p_num_nodes * 4]), name="L2_b"),
                'L3_W':
                tf.Variable(
                    tf.truncated_normal([p_num_nodes, p_num_nodes * 4],
                                        mean=0,
                                        stddev=0.1,
                                        name="L3_W")),
                'L3_U':
                tf.Variable(
                    tf.truncated_normal([p_num_nodes, p_num_nodes * 4],
                                        mean=0,
                                        stddev=0.1,
                                        name="L3_U")),
                'L3_b':
                tf.Variable(tf.zeros([1, p_num_nodes * 4]), name="L3_b"),
                'L4_W':
                tf.Variable(
                    tf.truncated_normal([p_num_nodes, embedding_size],
                                        mean=0,
                                        stddev=0.1,
                                        name="L4_W")),
                'L4_b':
                tf.Variable(tf.zeros([embedding_size]), name="L4_b"),
            }

            return W

        def create_variables(batch_size, num_unrollings):
            # Input data.
            train_data = list()
            for _ in range(num_unrollings + 1):
                train_data.append(
                    tf.placeholder(tf.float32,
                                   shape=[batch_size, embedding_size]))

            inputs = {
                'inputs': train_data[:num_unrollings],
                'labels':
                train_data[1:],  # labels are inputs shifted by one time step.
                'data': train_data,
                'dropout': tf.placeholder(tf.float32, name="dropout"),
            }

            # Variables saving state across unrollings.
            last_state = {
                'h1':
                tf.Variable(tf.zeros([batch_size, p_num_nodes]),
                            trainable=False,
                            name="h1"),
                'c1':
                tf.Variable(tf.zeros([batch_size, p_num_nodes]),
                            trainable=False,
                            name="c1"),
                'h2':
                tf.Variable(tf.zeros([batch_size, p_num_nodes]),
                            trainable=False,
                            name="h2"),
                'c2':
                tf.Variable(tf.zeros([batch_size, p_num_nodes]),
                            trainable=False,
                            name="c2"),
                'h3':
                tf.Variable(tf.zeros([batch_size, p_num_nodes]),
                            trainable=False,
                            name="h3"),
                'c3':
                tf.Variable(tf.zeros([batch_size, p_num_nodes]),
                            trainable=False,
                            name="c3"),
            }

            return inputs, last_state

        # Definition of the cell computation.
        def lstm_cell(x, h, c, W, U, b):
            """Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf
            Note that in this formulation, we omit the various connections between the
            previous c (i.e. state) and the gates."""
            raw_data = tf.matmul(x, W) + tf.matmul(h, U) + b
            gates = tf.sigmoid(raw_data[:, :p_num_nodes * 3])
            input_gate = gates[:, :p_num_nodes]  # p_batch_size x p_num_nodes
            forget_gate = gates[:, p_num_nodes:p_num_nodes *
                                2]  # p_batch_size x p_num_nodes
            output_gate = gates[:, p_num_nodes * 2:p_num_nodes *
                                3]  # p_batch_size x p_num_nodes
            new_memory_cell = raw_data[:, p_num_nodes *
                                       3:]  # p_batch_size x p_num_nodes
            c_next = forget_gate * c + input_gate * tf.tanh(
                new_memory_cell)  # p_batch_size x p_num_nodes
            h_next = output_gate * tf.tanh(c_next)
            return h_next, c_next

        def create_model(W, inputs, last_state):
            ys = list()
            h1 = last_state['h1']
            c1 = last_state['c1']
            h2 = last_state['h2']
            c2 = last_state['c2']
            h3 = last_state['h3']
            c3 = last_state['c3']
            # construct 2 layer LSTM
            for x in inputs['inputs']:
                h1, c1 = lstm_cell(x, h1, c1, W['L1_W'], W['L1_U'], W['L1_b'])
                x2 = tf.nn.dropout(h1, inputs['dropout'], name="dropout")
                h2, c2 = lstm_cell(x2, h2, c2, W['L2_W'], W['L2_U'], W['L2_b'])
                x3 = tf.nn.dropout(h2, inputs['dropout'], name="dropout")
                h3, c3 = lstm_cell(x3, h3, c3, W['L3_W'], W['L3_U'], W['L3_b'])
                ys.append(h3)

            # State saving across unrollings.
            with tf.control_dependencies([
                    last_state['h1'].assign(h1), last_state['c1'].assign(c1),
                    last_state['h2'].assign(h2), last_state['c2'].assign(c2),
                    last_state['h3'].assign(h3), last_state['c3'].assign(c3)
            ]):
                # Classifier.
                Y_pred = tf.nn.xw_plus_b(tf.concat(0, ys), W['L4_W'],
                                         W['L4_b'])
                norm = tf.sqrt(
                    tf.reduce_sum(tf.square(Y_pred), 1, keep_dims=True))
                normalized_Y_pred = Y_pred / norm
                Y = tf.concat(0, inputs['labels'])
                l2_loss = params['beta_regularization_value'] * (
                    tf.nn.l2_loss(W['L1_W']) + tf.nn.l2_loss(W['L2_W']) +
                    tf.nn.l2_loss(W['L3_W']) + tf.nn.l2_loss(W['L4_W']))
                loss = tf.contrib.losses.cosine_distance(
                    normalized_Y_pred, Y, dim=1) + l2_loss

            model = {
                'loss': loss,
                'Y_pred': Y_pred,
            }
            return model

        # Convert vec to word
        norm_embeddings = tf.constant(normalized_embeddings.T)

        W = create_trainable_variables()
        inputs, last_state = create_variables(p_batch_size, p_num_unrollings)

        # Unrolled LSTM loop.
        model = create_model(W, inputs, last_state)

        # Optimizer.
        global_step = tf.Variable(0)
        learning_rate = tf.train.exponential_decay(
            params['start_learning_rate'],
            global_step,
            5000,
            0.1,
            staircase=True)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        gradients, v = zip(*optimizer.compute_gradients(model['loss']))
        gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
        optimizer = optimizer.apply_gradients(zip(gradients, v),
                                              global_step=global_step)

        grad_sum = [
            tf.sqrt(tf.reduce_mean(tf.square(gradient)))
            for gradient in gradients[:len(gradients) - 2]
        ]
        v_sum = [
            tf.sqrt(tf.reduce_mean(tf.square(variable)))
            for variable in v[:len(gradients) - 2]
        ]
        grad_v_sum = [grad / v for grad, v in zip(grad_sum, v_sum)]
        grad_sum_string = tf.Print(grad_sum, [grad_sum], message="grad_sum: ")
        v_sum_string = tf.Print(v_sum, [v_sum], message="v_sum: ")
        grad_v_sum_string = tf.Print(grad_v_sum, [grad_v_sum],
                                     message="grad_v_sum: ")

        # Sampling and validation eval: batch 1, no unrolling.
        sample_batch_size = 1
        sample_num_unrollings = 1
        sample_inputs, sample_last_state = create_variables(
            sample_batch_size, sample_num_unrollings)
        sample_model = create_model(W, sample_inputs, sample_last_state)
        reset_sample_state = tf.group(
            sample_last_state['h1'].assign(
                tf.zeros([sample_batch_size,
                          p_num_nodes])), sample_last_state['c1'].assign(
                              tf.zeros([sample_batch_size, p_num_nodes])),
            sample_last_state['h2'].assign(
                tf.zeros([sample_batch_size,
                          p_num_nodes])), sample_last_state['c2'].assign(
                              tf.zeros([sample_batch_size, p_num_nodes])),
            sample_last_state['h3'].assign(
                tf.zeros([sample_batch_size,
                          p_num_nodes])), sample_last_state['c3'].assign(
                              tf.zeros([sample_batch_size, p_num_nodes])))

        similarity = tf.matmul(sample_model['Y_pred'], norm_embeddings)
        sample_next = tf.nn.top_k(similarity, p_max_k)[1]

        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()

    p_epochs = params['epochs']
    p_summary_frequency = params['summary_frequency']
    with tf.Session(graph=graph) as session:
        tf.initialize_all_variables().run()
        print('Initialized')
        if os.path.exists(params['savefile']) and params['resume']:
            # Restore variables from disk.
            saver.restore(session, params['savefile'])
            print("Model restored.")

        start_time = time.time()
        n_batch = len(data) // p_batch_size
        for epoch in range(int(math.ceil(p_epochs))):
            # p_epochs can be 0.001 to test overfit
            fraction = p_epochs - epoch
            if (fraction) < 1:
                n_batch = n_batch * fraction
            total_step = int(math.ceil(n_batch))
            mean_loss = 0
            print("Epoch %s start / total p_epochs %s, total steps %s" %
                  (epoch, p_epochs, total_step))
            for step in range(total_step):
                batches = train_batches.next()
                inputs_dict = dict()
                for i in range(p_num_unrollings + 1):
                    inputs_dict[inputs['data'][i]] = batches[i]
                inputs_dict[inputs['dropout']] = params['dropout']
                _, loss_e, learning_rate_e = session.run(
                    [optimizer, model['loss'], learning_rate],
                    feed_dict=inputs_dict)
                mean_loss += loss_e
                if step % p_summary_frequency == 0:
                    mean_loss = mean_loss / p_summary_frequency
                    # The mean loss is an estimate of the loss over the last few batches.
                    # PP = exp(CE) = exp(-log(prediction)) = 1/prediction. max PP = 1 / (1/50000) = 50000
                    print(
                        'Average loss at step(%d):%f learning rate:%.2f time:%s'
                        % (step, mean_loss, learning_rate_e,
                           timedelta(seconds=(time.time() - start_time))))
                    mean_loss = 0

                    def sample(candiate_indices):
                        # check https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py#L62
                        k = int(abs(random.normalvariate(
                            0, p_max_k / 2))) % p_max_k
                        index = candiate_indices[k]
                        # Skip UNK
                        if len(candiate_indices) > 1:
                            while index == 0:
                                k = int(
                                    abs(random.normalvariate(
                                        0, p_max_k / 2))) % p_max_k
                                index = candiate_indices[k]
                        return index

                    if step % (p_summary_frequency * 10) == 0:
                        # Generate some samples.
                        print('=' * 80)
                        for _ in range(5):
                            word = int(
                                random.uniform(0, 1) * words_size) % words_size
                            feed = np.array([embeddings[word]])
                            sentence = dictionary[word]
                            reset_sample_state.run()
                            for _ in range(79):
                                prediction = sample_next.eval({
                                    sample_inputs['inputs'][0]:
                                    feed,
                                    sample_inputs['dropout']:
                                    1,
                                })
                                index = sample(prediction[0, :])
                                feed = np.array([embeddings[index]])
                                sentence += ' ' + dictionary[index]
                            print(sentence)
                        print('=' * 80)

                        # Save the variables to disk.
                        save_path = saver.save(session, params['savefile'])

                        # Measure validation set perplexity.
                        valid_mean_loss = 0
                        reset_sample_state.run()
                        for _ in range(valid_size):
                            validation_batches = valid_batches.next()
                            sample_feeds = {
                                sample_inputs['inputs'][0]:
                                validation_batches[0],
                                sample_inputs['labels'][0]:
                                validation_batches[1],
                                sample_inputs['dropout']: 1,
                            }
                            valid_loss = session.run([sample_model['loss']],
                                                     feed_dict=sample_feeds)
                            valid_mean_loss += valid_loss[0]
                        print('Validation set loss: %.2f. saved:%s' %
                              (valid_mean_loss / valid_size, save_path))
Ejemplo n.º 54
0
    def __init__(self, args):
        inputs = tf.placeholder(shape=(args.batch_size, None),
                                dtype=tf.int32,
                                name='inputs')
        mask = tf.placeholder(shape=(args.batch_size, None),
                              dtype=tf.float32,
                              name='inputs_mask')
        seq_length = tf.placeholder(shape=args.batch_size,
                                    dtype=tf.float32,
                                    name='seq_length')

        self.input_form = [inputs, mask, seq_length]

        encoder_inputs = inputs
        decoder_inputs = tf.concat(
            [tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32), inputs],
            axis=1)
        decoder_targets = tf.concat(
            [inputs,
             tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32)],
            axis=1)
        decoder_mask = tf.concat(
            [mask,
             tf.zeros(shape=(args.batch_size, 1), dtype=tf.float32)],
            axis=1)

        x_size = out_size = args.map_size[0] * args.map_size[1]
        embeddings = tf.Variable(tf.random_uniform(
            [x_size, args.x_latent_size], -1.0, 1.0),
                                 dtype=tf.float32)
        encoder_inputs_embedded = tf.nn.embedding_lookup(
            embeddings, encoder_inputs)
        decoder_inputs_embedded = tf.nn.embedding_lookup(
            embeddings, decoder_inputs)

        with tf.variable_scope("encoder"):
            encoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size)
            _, encoder_final_state = tf.nn.dynamic_rnn(
                encoder_cell,
                encoder_inputs_embedded,
                sequence_length=seq_length,
                dtype=tf.float32,
            )

        mu_w = tf.get_variable("mu_w", [args.rnn_size, args.rnn_size],
                               tf.float32,
                               tf.random_normal_initializer(stddev=0.02))
        mu_b = tf.get_variable("mu_b", [args.rnn_size],
                               tf.float32,
                               initializer=tf.constant_initializer(0.0))
        sigma_w = tf.get_variable("sigma_w", [args.rnn_size, args.rnn_size],
                                  tf.float32,
                                  tf.random_normal_initializer(stddev=0.02))
        sigma_b = tf.get_variable("sigma_b", [args.rnn_size],
                                  tf.float32,
                                  initializer=tf.constant_initializer(0.0))

        mu = tf.matmul(encoder_final_state, mu_w) + mu_b
        log_sigma_sq = tf.matmul(encoder_final_state, sigma_w) + sigma_b
        eps = tf.random_normal(shape=tf.shape(log_sigma_sq),
                               mean=0,
                               stddev=1,
                               dtype=tf.float32)

        if args.eval:
            z = tf.zeros(shape=(args.batch_size, args.rnn_size),
                         dtype=tf.float32)
        else:
            z = mu + tf.sqrt(tf.exp(log_sigma_sq)) * eps

        self.batch_post_embedded = z

        with tf.variable_scope("decoder"):
            decoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size)
            decoder_init_state = z
            decoder_outputs, _ = tf.nn.dynamic_rnn(
                decoder_cell,
                decoder_inputs_embedded,
                initial_state=decoder_init_state,
                sequence_length=seq_length,
                dtype=tf.float32,
            )

        out_w = tf.get_variable("out_w", [out_size, args.rnn_size], tf.float32,
                                tf.random_normal_initializer(stddev=0.02))
        out_b = tf.get_variable("out_b", [out_size],
                                tf.float32,
                                initializer=tf.constant_initializer(0.0))
        batch_rec_loss = tf.reduce_mean(decoder_mask * tf.reshape(
            tf.nn.sampled_softmax_loss(
                weights=out_w,
                biases=out_b,
                labels=tf.reshape(decoder_targets, [-1, 1]),
                inputs=tf.reshape(decoder_outputs, [-1, args.rnn_size]),
                num_sampled=args.neg_size,
                num_classes=out_size), [args.batch_size, -1]),
                                        axis=-1)
        batch_latent_loss = -0.5 * tf.reduce_sum(
            1 + log_sigma_sq - tf.square(mu) - tf.exp(log_sigma_sq), axis=1)

        self.rec_loss = rec_loss = tf.reduce_mean(batch_rec_loss)
        self.latent_loss = latent_loss = tf.reduce_mean(batch_latent_loss)

        self.loss = loss = tf.reduce_mean([rec_loss, latent_loss])
        self.train_op = tf.train.AdamOptimizer(
            args.learning_rate).minimize(loss)

        target_out_w = tf.nn.embedding_lookup(out_w, decoder_targets)
        target_out_b = tf.nn.embedding_lookup(out_b, decoder_targets)

        self.batch_likelihood = tf.reduce_mean(decoder_mask * tf.log_sigmoid(
            tf.reduce_sum(decoder_outputs * target_out_w, -1) + target_out_b),
                                               axis=-1,
                                               name="batch_likelihood")

        saver = tf.train.Saver(tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES),
                               max_to_keep=10)
        self.save, self.restore = saver.save, saver.restore
Ejemplo n.º 55
0
 def xavier_init(size):
     in_dim = size[0]
     xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
     return tf.random_normal(shape=size, stddev=xavier_stddev)
Ejemplo n.º 56
0
    def __init__(self, learning_rate=0.001, n_particles=1):

        self.learning_rate = learning_rate
        self.n_particles = n_particles
        self.D = 1

        #Recogntiion model
        self.mean1 = tf.Variable([-1.])
        self.log_var1 = tf.Variable([1.])

        self.mean2 = tf.Variable([-1.])
        self.log_var2 = tf.Variable([1.])

        #Sample
        self.eps1 = tf.random_normal((self.n_particles, self.D), 0, 1, dtype=tf.float32)
        self.eps2 = tf.random_normal((self.n_particles, self.D), 0, 1, dtype=tf.float32)

        self.z1 = tf.add(self.mean1, tf.mul(tf.sqrt(tf.exp(self.log_var1)), self.eps1)) 
        self.z2 = tf.add(self.mean2, tf.mul(tf.sqrt(tf.exp(self.log_var2)), self.eps2))

        self.log_q1 = self.log_q_z(self.z1, self.mean1, self.log_var1)
        self.log_q2 = self.log_q_z(self.z2, self.mean2, self.log_var2)

        # self.log_p_z = self.log_p_z()
        self.p_z1 = self.p_z(self.z1)
        self.p_z2 = self.p_z(self.z2)

        self.w1 = self.p_z1 / tf.exp(self.log_q1)
        self.w2 = self.p_z2 / tf.exp(self.log_q2)

        self.w_total = tf.reduce_sum(self.w1) + tf.reduce_sum(self.w2) 

        self.pi1 = tf.reduce_sum(self.w1) / self.w_total
        self.pi2 = tf.reduce_sum(self.w2) / self.w_total

        self.z_all = tf.concat(0, [self.z1, self.z2])
        self.q_all = self.pi1 * tf.exp(self.log_q_z(self.z_all, self.mean1, self.log_var1)) + self.pi2 * tf.exp(self.log_q_z(self.z_all, self.mean2, self.log_var2))
        self.p_all = tf.concat(0, [self.p_z1, self.p_z2])

        self.w_all = self.p_all / self.q_all

        self.elbo = tf.log(tf.reduce_mean(self.w_all))




        # self.log_p_z = tf.clip_by_value(aaaa, clip_value_min=-8, clip_value_max=8)

        # self.log_q_z  = self.log_q_z()
        # # self.log_q_z = tf.clip_by_value(bbbb, clip_value_min=-8, clip_value_max=8)

        # self.log_w = self.log_p_z - self.log_q_z



        # self.w = self.p_z / tf.exp(self.log_q_z)

        # #SVI Objective
        # self.elbo = tf.reduce_mean(self.log_p_z - self.log_q_z) #average over particles

        #W-SVI Objective
        # max_ = tf.reduce_max(self.log_w) #max over particles
        # min_ = tf.reduce_min(self.log_w) #max over particles

        # #IWAE Code
        # log_ws_minus_max = self.log_w - min_
        # ws = tf.exp(log_ws_minus_max)
        # ws_normalized = ws / tf.reduce_sum(ws)
        # self.elbo = tf.reduce_sum(ws_normalized * self.log_w) #average over particles



        # self.elbo = tf.log(tf.reduce_mean(tf.exp(self.log_w - max_))) + max_ #average over particles
        # self.elbo = tf.log(tf.reduce_mean(tf.exp(self.log_w - min_))) +min_ #average over particles


        # self.elbo = tf.log(tf.reduce_mean(self.w))


        # self.elbo = tf.log(tf.reduce_mean(tf.exp(self.log_w))) #average over particles


        # Use ADAM optimizer
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, epsilon=1e-04).minimize(-self.elbo)
Ejemplo n.º 57
0
        def iterate(t, mean):
            # -----------------------------------------------------
            # (1) Sample a new population of solutions ∼ N(m, σ²C)
            # -----------------------------------------------------
            z = tf.random.normal([self._population_size, self._solution_size], dtype=tf.float32)
            y = tf.matmul(z, tf.matmul(self._B, self._D))
            samples = self._m + self._sigma * y
            samples = tf.reshape(samples, [self._population_size, *self._solution_dim])
            # -------------------------------------------------
            # (2) Selection and Recombination: Moving the Mean
            # -------------------------------------------------
            # Evaluate and sort solutions
            samples_feasible = tf.clip_by_value(samples, self._action_lower_bound_horizon,
                                                self._action_upper_bound_horizon)
            penalty = tf.norm(tf.reshape(samples - samples_feasible,
                                         [self._population_size, self._num_agents, -1]),
                              axis=2) ** 2
            samples = samples_feasible
            # -------------------------------------------------
            # (2) Selection and Recombination: Moving the Mean
            # -------------------------------------------------
            # Evaluate and sort solutions
            rewards = self._trajectory_evaluator(current_state, samples, time_step) - penalty
            rewards = tf.reduce_sum(rewards, axis=1) #TODO: double check this, very flaky
            self._x_sorted = tf.gather(samples, tf.argsort(rewards, direction='DESCENDING'))
            # The new mean is a weighted average of the top-μ solutions
            x_diff = (tf.reshape(self._x_sorted, [self._population_size, self._solution_size]) - self._m)
            x_mean = tf.reduce_sum(tf.multiply(x_diff, self._weights), axis=0)
            m = self._m + x_mean
            # ----------------------
            # (3) Step-size control
            # ----------------------
            y_mean = x_mean / self._sigma
            D_inv = tf.linalg.tensor_diag(tf.math.reciprocal(tf.linalg.diag_part(self._D)))
            C_inv_half = tf.matmul(tf.matmul(self._B, D_inv), tf.transpose(self._B))
            p_sigma = ((1 - self._c_sigma) * self._p_sigma) + (tf.math.sqrt(self._c_sigma * (2 - self._c_sigma) * self._mu_eff) *
                                                               tf.squeeze(tf.matmul(C_inv_half, y_mean[:, tf.newaxis])))
            sigma = self._sigma * tf.exp((self._c_sigma / self._d_sigma) * ((tf.norm(p_sigma) /
                                                                             self._expectation_of_normal) - 1))
            # -----------------------------------
            # (4) Adapting the Covariance Matrix
            # -----------------------------------
            p_C = ((1 - self._cc) * self._p_C + (self._h_sigma * tf.sqrt(self._cc * (2 - self._cc) * self._mu_eff) * y_mean))

            p_C_matrix = p_C[:, tf.newaxis]
            y_mean_unweighted = x_diff / self._sigma
            y_mean_unweighted_squared = tf.map_fn(fn=lambda e: e * tf.transpose(e), elems=y_mean_unweighted[:, tf.newaxis])
            y_s = tf.reduce_sum(tf.multiply(y_mean_unweighted_squared, self._weights[:, tf.newaxis]), axis=0)
            C = ((1 - self._c1 - self._c_mu) * self._C + self._c1 * p_C_matrix * tf.transpose(p_C_matrix) +
                 self._c_mu * y_s)
            # -----------------------------------
            # (5) Ensure the symmetry of the covariance matrix here
            # -----------------------------------
            C_upper = tf.linalg.band_part(C, 0, -1)
            C_upper_no_diag = C_upper - tf.linalg.tensor_diag(tf.linalg.diag_part(C_upper))
            C = C_upper + tf.transpose(C_upper_no_diag)

            # -----------------------------------
            # (6)Update the values
            # -----------------------------------
            u, B, _ = tf.linalg.svd(C)

            diag_D = tf.sqrt(u)
            D = tf.linalg.tensor_diag(diag_D)
            # Assign values
            self._p_C.assign(p_C)
            self._p_sigma.assign(p_sigma)
            self._C.assign(C)
            self._sigma.assign(sigma)
            self._B.assign(B)
            self._D.assign(D)
            self._m.assign(m)
            return t + tf.constant(1, dtype=tf.int32), m
    def timeflow(self, t=0.0):
        self.setalpha(t)
        Momentum = tf.matmul(tf.matmul(
            self.body.wb, self.body.Ib), self.body.Q) + tf.scalar_mul(
                self.body.m, tf.cross(self.body.rs, self.body.vs))
        Feqc = tf.scalar_mul(Mtot, g)
        Feqa = tf.diag([Mtot, Mtot, Mtot])
        Crossvec = tf.zeros((1, 3), dtype=tf.float64)
        Teqalpha = tf.zeros((3, 3), dtype=tf.float64)
        Teqc = tf.zeros((1, 3), dtype=tf.float64)
        mlsum = tf.zeros((1, 3), dtype=tf.float64)
        sumDs = tf.zeros((3, 3), dtype=tf.float64)
        wbs = tf.matmul(self.body.wb, self.body.Q)  #[1,3] matrix
        tot_lbtomots = []
        for p in range(numLeg):
            for i in range(numsubleg):
                self.leg[p].sub[i].omega += self.leg[p].sub[
                    i].alpha * dtime  #omega를 시간에 따라 갱신
                self.leg[p].sub[i].theta += self.leg[p].sub[
                    i].omega * dtime  #theta를 시간에 따라 갱신
                self.leg[p].sub[i].Q = tf.scalar_mul(tf.cos(self.leg[p].sub[i].theta), tf.eye(3, dtype=tf.float64)) + \
                tf.scalar_mul(1.-tf.cos(self.leg[p].sub[i].theta), tf.matmul(self.leg[p].sub[i].axis, self.leg[p].sub[i].axis, transpose_a = True)) + \
                tf.scalar_mul(tf.sin(self.leg[p].sub[i].theta), tf.cross(tf.tile(self.leg[p].sub[i].axis,[3,1]), tf.eye(3, dtype=tf.float64)))
            Qs = [tf.matmul(self.leg[p].sub[0].Q,
                            self.body.Q)]  #Qs는 i번째 subleg에서 space로의 좌표변환
            #List of rotation matrices of each sublegs in space frame
            #Type : list of [3,3] Tensor
            for i in range(1, numsubleg):
                Qs.append(tf.matmul(self.leg[p].sub[i].Q, Qs[i - 1]))

            Is = [
                tf.matmul(
                    tf.matmul(Qs[i], self.leg[p].sub[i].Ib, transpose_a=True),
                    Qs[i]) for i in range(numsubleg)
            ]

            e = [
                tf.matmul(self.leg[p].sub[i].axis, Qs[i])
                for i in range(numsubleg)
            ]
            #List of axes of each sublegs in space frame
            #Type : list of [None,3] Tensor

            Qalpha = [
                tf.scalar_mul(self.leg[p].sub[i].alpha, e[i])
                for i in range(numsubleg)
            ]

            Qalphasum = [Qalpha[0]]
            for i in range(1, numsubleg):
                Qalphasum.append(Qalphasum[i - 1] + Qalpha[i])

            Qw = [
                tf.scalar_mul(self.leg[p].sub[i].omega, e[i])
                for i in range(numsubleg)
            ]

            ws = [wbs + Qw[0]]
            for i in range(1, numsubleg):
                ws.append(ws[i - 1] + Qw[i])

            w = [
                tf.matmul(ws[i], Qs[i], transpose_b=True)
                for i in range(numsubleg)
            ]

            ls = [[
                tf.matmul(self.leg[p].sub[i].l[0], Qs[i]),
                tf.matmul(self.leg[p].sub[i].l[1], Qs[i])
            ] for i in range(numsubleg)]  #ls = 2Dtensor

            lbtomotbs = tf.matmul(self.body.lbtomot[p],
                                  self.body.Q)  # lbtomotbs = 2Dtensor

            lbtomots = [lbtomotbs + ls[0][0]]  # lbtomots = 2Dtensor

            for i in range(1, numsubleg):
                lbtomots.append(lbtomots[i - 1] + ls[i - 1][1] + ls[i][0])
            for i in range(numsubleg):
                mlsum += tf.scalar_mul(self.leg[p].sub[i].m, lbtomots[i])
            #각운동량 디버깅용
            vmotbs = [tf.cross(wbs, lbtomotbs) + tf.cross(ws[0], ls[0][0])]
            for i in range(1, numsubleg):
                vmotbs.append(vmotbs[i - 1] +
                              tf.cross(ws[i - 1], ls[i - 1][1]) +
                              tf.cross(ws[i], ls[i][0]))

            #Calculating External Forces
            vs = self.body.vs
            for i in range(numsubleg):
                Collisiontemp = tf.cast(
                    tf.less(lbtomots[i] + ls[i][1] + self.body.rs,
                            tf.zeros((1, 3), dtype=tf.float64)), tf.float64)
                Collisionz = tf.multiply(Collisiontemp,
                                         tf.constant([[0, 0, 1]], tf.float64))
                Collisionxy = tf.matmul(
                    Collisionz,
                    tf.constant([[0, 0, 0], [0, 0, 0], [1, 1, 0]],
                                tf.float64))  ##더 연산량을 줄일 수 있을 듯 방법을 강구하라
                vs += tf.cross(ws[i], ls[i][0] + ls[i][1])
                vCollision = tf.cast(
                    tf.less(vs, tf.zeros((1, 3), dtype=tf.float64)),
                    tf.float64)
                Ftemp = tf.multiply(
                    Collisionz, Fadded + tf.multiply(
                        (vCollision - Offset), Fsubed))
                Feqc += Ftemp
                Teqc += tf.cross(lbtomots[i] + ls[i][1], Ftemp)
                FrictionTemp = -tf.multiply(tf.scalar_mul(
                    Fricscale, vs), Collisionxy)  ##########하.. 힘이 너무 다 틀렸어
                Feqc += FrictionTemp
                Teqc += tf.cross(lbtomots[i] + ls[i][1], FrictionTemp)

            A = [
                tf.cross(wbs, tf.cross(wbs, lbtomotbs)) +
                tf.cross(Qalphasum[0], ls[0][0]) +
                tf.cross(ws[0], tf.cross(ws[0], ls[0][0]))
            ]

            for i in range(1, numsubleg):
                A.append(
                    tf.cross(Qalphasum[i - 1], ls[i - 1][1]) +
                    tf.cross(Qalphasum[i], ls[i][0]) +
                    tf.cross(ws[i - 1], tf.cross(ws[i - 1], ls[i - 1][1])) +
                    tf.cross(ws[i], tf.cross(ws[i], ls[i][0])))

            mlsquare = tf.zeros((1), dtype=tf.float64)
            for i in range(numsubleg):
                mlsquare += tf.scalar_mul(
                    self.leg[p].sub[i].m,
                    tf.matmul(lbtomots[i], lbtomots[i], transpose_b=True))
            mlsquare = tf.reshape(mlsquare, [-1])
            Dya = tf.zeros([3, 3], dtype=tf.float64)
            for i in range(numsubleg):
                Dya += tf.scalar_mul(
                    self.leg[p].sub[i].m,
                    tf.matmul(lbtomots[i], lbtomots[i], transpose_a=True))
            ###############
            Ds = tf.diag(tf.concat([mlsquare, mlsquare, mlsquare],
                                   axis=0)) - Dya
            Teqalpha += Ds
            sumDs += Ds
            #Qb * Ib * Qb.transpose()

            for i in range(numsubleg):
                Feqc -= tf.scalar_mul(self.leg[p].sub[i].m, A[i])
                Crossvec += tf.scalar_mul(self.leg[p].sub[i].m, lbtomots[i])
                Teqc += tf.matmul(
                    tf.cross(tf.matmul(w[i], self.leg[p].sub[i].Ib), w[i]),
                    Qs[i])
                Teqc -= tf.matmul(Qalphasum[i], Is[i])
                Teqalpha += Is[i]
                #Qs_i * I_i * Qs_i^T
            for i in range(numsubleg):
                Momentum += tf.matmul(tf.matmul(w[i], self.leg[p].sub[i].Ib),
                                      Qs[i])
                Momentum += tf.scalar_mul(
                    self.leg[p].sub[i].m,
                    tf.cross(lbtomots[i] + self.body.rs,
                             vmotbs[i] + self.body.vs))
            #leg update
            #float32 -> float64 conversion : 171013 Fine
            #update 'Q's of leg - 20171012 fine
            tot_lbtomots += lbtomots
        Teqalpha += tf.matmul(
            tf.matmul(self.body.Q, self.body.Ib, transpose_a=True),
            self.body.Q)
        Teqc += tf.matmul(
            tf.cross(tf.matmul(self.body.wb, self.body.Ib), self.body.wb),
            self.body.Q)
        Teqc += tf.cross(mlsum, g)
        Teqanorm = tf.reduce_sum(tf.square(mlsum))
        alphabs = tf.matmul(
            Teqc - tf.scalar_mul(1. / Mtot, tf.cross(mlsum, Feqc)),
            tf.matrix_inverse(Teqalpha + tf.scalar_mul(
                1. / Mtot,
                Teqanorm * tf.eye(3, dtype=tf.float64) -
                tf.matmul(mlsum, mlsum, transpose_a=True))  #여기가 너무 헷갈림.......
                              ))
        asb = tf.scalar_mul(1. / Mtot, Feqc - tf.cross(mlsum, alphabs))
        alphab = tf.matmul(alphabs, self.body.Q, transpose_b=True)
        self.body.wb += tf.scalar_mul(dtime, alphab)
        self.body.Q += tf.scalar_mul(
            dtime, tf.cross(tf.concat([wbs, wbs, wbs], axis=0), self.body.Q))
        self.body.vs += tf.scalar_mul(dtime, asb)
        self.body.rs += tf.scalar_mul(dtime, self.body.vs)

        # Q to quaternion

        qw = tf.scalar_mul(
            0.5, tf.sqrt(tf.reduce_sum(tf.diag_part(self.body.Q)) + 1.))
        qv = -tf.reduce_sum(tf.cross(self.body.Q, tf.eye(3, dtype=tf.float64)),
                            axis=0) / tf.scalar_mul(4., qw)

        # quaternion normalization

        qvsquare = tf.reduce_sum(tf.square(qv))
        qnorm = tf.sqrt(tf.square(qw) + qvsquare)
        qw /= qnorm
        qv /= qnorm
        # quaternion to Q

        self.body.Q = tf.scalar_mul(qw*qw-qvsquare,tf.eye(3, dtype = tf.float64))\
            + 2 * tf.matmul(tf.reshape(qv, [3, 1]), tf.reshape(qv, [1, 3]))\
            + 2 * qw * tf.cross(tf.tile(tf.reshape(qv, [1,3]), [3,1]), tf.eye(3, dtype = tf.float64))

        return Momentum, [x + self.body.rs for x in tot_lbtomots]
Ejemplo n.º 59
0
target = tf.constant(0.)

# L2 norm is a loss function squaring the difference

l2_y_vals = tf.square(target - x_vals)
l2_y_out = sess.run(l2_y_vals)

l1_y_vals = tf.abs(target - x_vals)
l1_y_out = sess.run(l1_y_vals)

# Pseudo huber loss is a continuous and smooth approximation to the huber loss function.
delta1 = 0.25
delta2 = 0.5

phuber1_y_vals = tf.multiply(tf.square(delta1),
                             tf.sqrt(1. + tf.square(target - x_vals)) - 1.)
phuber1_y_out = sess.run(phuber1_y_vals)

phuber2_y_vals = tf.multiply(tf.square(delta2),
                             tf.sqrt(1. + tf.square(target - x_vals)) - 1.)
phuber2_y_out = sess.run(phuber2_y_vals)

x_array = sess.run(x_vals)
plt.plot(x_array, l2_y_out, 'b--', label='L2 Loss')
plt.plot(x_array, l1_y_out, 'r--', label='L1 Loss')
plt.plot(x_array, phuber1_y_out, 'k--', label='Huber Loss D=0.25')
plt.plot(x_array, phuber2_y_out, 'g--', label='Huber Loss D=0.5')
plt.legend(loc='lower right', prop={'size': 11})
plt.show()

# For classification function
Ejemplo n.º 60
0
def _pairwise_distance_computation(input_tensor, margin):
    input_tensor = tf.expand_dims(input_tensor, 1)
    d_sq = tf.reduce_sum(tf.square(input_tensor - tf.transpose(input_tensor, (1, 0, 2))), \
                      2, keep_dims=False)
    d = tf.sqrt(d_sq + 1e-8)
    return tf.exp(margin - d), d