Esempio n. 1
0
 def build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = tf.shape(self.Z)[0]
     err = self.Y - self.mean_function(self.X)
     Kuf = self.kern.K(self.Z, self.X)
     Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
     Kus = self.kern.K(self.Z, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
     B = tf.matmul(A, tf.transpose(A)) + eye(num_inducing)
     LB = tf.cholesky(B)
     Aerr = tf.matmul(A, err)
     c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tf.transpose(tmp2), c)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tf.transpose(tmp2), tmp2)\
             - tf.matmul(tf.transpose(tmp1), tmp1)
         shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0)\
             - tf.reduce_sum(tf.square(tmp1), 0)
         shape = tf.pack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean + self.mean_function(Xnew), var
Esempio n. 2
0
def sq_dist(boxlist1, boxlist2, scope=None):
  """Computes the pairwise squared distances between box corners.

  This op treats each box as if it were a point in a 4d Euclidean space and
  computes pairwise squared distances.

  Mathematically, we are given two matrices of box coordinates X and Y,
  where X(i,:) is the i'th row of X, containing the 4 numbers defining the
  corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to
  boxlist2.  We compute
  Z(i,j) = ||X(i,:) - Y(j,:)||^2
         = ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:),

  Args:
    boxlist1: BoxList holding N boxes
    boxlist2: BoxList holding M boxes
    scope: name scope.

  Returns:
    a tensor with shape [N, M] representing pairwise distances
  """
  with tf.name_scope(scope, 'SqDist'):
    sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True)
    sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True)
    innerprod = tf.matmul(boxlist1.get(), boxlist2.get(),
                          transpose_a=False, transpose_b=True)
    return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod
Esempio n. 3
0
def soft_triplet_loss(anchor, positive, negative, extra=True, scope="soft_triplet_loss"):
    r"""Loss for triplet networks as described in the paper:
    `Deep Metric Learning using Triplet Network
    <https://arxiv.org/abs/1412.6622>`_ by Hoffer et al.

    It is a softmax loss using :math:`(anchor-positive)^2` and
    :math:`(anchor-negative)^2` as logits.

    Args:
        anchor (tf.Tensor): anchor feature vectors of shape [Batch, N].
        positive (tf.Tensor): features of positive match of the same shape.
        negative (tf.Tensor): features of negative match of the same shape.
        extra (bool): also return distances for pos and neg.

    Returns:
        tf.Tensor: triplet-loss as scalar (and optionally average_pos_dist, average_neg_dist)
    """

    eps = 1e-10
    with tf.name_scope(scope):
        d_pos = tf.sqrt(tf.reduce_sum(tf.square(anchor - positive), 1) + eps)
        d_neg = tf.sqrt(tf.reduce_sum(tf.square(anchor - negative), 1) + eps)

        logits = tf.stack([d_pos, d_neg], axis=1)
        ones = tf.ones_like(tf.squeeze(d_pos), dtype="int32")

        loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=ones))

        if extra:
            pos_dist = tf.reduce_mean(d_pos, name='pos-dist')
            neg_dist = tf.reduce_mean(d_neg, name='neg-dist')
            return loss, pos_dist, neg_dist
        else:
            return loss
    def loss(self):
        # 1. The margin loss

        # [batch_size, 10, 1, 1]
        # max_l = max(0, m_plus-||v_c||)^2
        max_l = tf.square(tf.maximum(0., cfg.m_plus - self.v_length))
        # max_r = max(0, ||v_c||-m_minus)^2
        max_r = tf.square(tf.maximum(0., self.v_length - cfg.m_minus))
        assert max_l.get_shape() == [cfg.batch_size, 10, 1, 1]

        # reshape: [batch_size, 10, 1, 1] => [batch_size, 10]
        max_l = tf.reshape(max_l, shape=(cfg.batch_size, -1))
        max_r = tf.reshape(max_r, shape=(cfg.batch_size, -1))

        # calc T_c: [batch_size, 10]
        # T_c = Y, is my understanding correct? Try it.
        T_c = self.Y
        # [batch_size, 10], element-wise multiply
        L_c = T_c * max_l + cfg.lambda_val * (1 - T_c) * max_r

        self.margin_loss = tf.reduce_mean(tf.reduce_sum(L_c, axis=1))

        # 2. The reconstruction loss
        orgin = tf.reshape(self.X, shape=(cfg.batch_size, -1))
        squared = tf.square(self.decoded - orgin)
        self.reconstruction_err = tf.reduce_mean(squared)

        # 3. Total loss
        # The paper uses sum of squared error as reconstruction error, but we
        # have used reduce_mean in `# 2 The reconstruction loss` to calculate
        # mean squared error. In order to keep in line with the paper,the
        # regularization scale should be 0.0005*784=0.392
        self.total_loss = self.margin_loss + cfg.regularization_scale * self.reconstruction_err
Esempio n. 5
0
def standard_reg():
    reg = tf.constant(0.0, dtype=tf.float32)
    reg = reg + standard_w_weight_reg * tf.reduce_mean(tf.square(net_params['sDW1']))
    #reg = reg + standard_w_weight_reg * tf.reduce_mean(tf.square(net_params['sDW2']))    
    reg = reg + regressor_w_weight_reg * tf.reduce_mean(tf.square(net_params['sRW']))

    return reg
Esempio n. 6
0
def kl_multivariate_normal(loc_one, scale_one, loc_two=0.0, scale_two=1.0):
    """Calculate the KL of multivariate normal distributions with
    diagonal covariances.

    Parameters
    ----------
    loc_one : tf.Tensor
        A 0-D tensor, 1-D tensor of length n, or 2-D tensor of shape M
        x n where each row represents the mean of a n-dimensional
        Gaussian.
    scale_one : tf.Tensor
        A tensor of same shape as ``loc_one``, representing the
        standard deviation.
    loc_two : tf.Tensor, optional
        A tensor of same shape as ``loc_one``, representing the
        mean of another Gaussian.
    scale_two : tf.Tensor, optional
        A tensor of same shape as ``loc_one``, representing the
        standard deviation of another Gaussian.

    Returns
    -------
    tf.Tensor
        For 0-D or 1-D tensor inputs, outputs the 0-D tensor
        ``KL( N(z; loc_one, scale_one) || N(z; loc_two, scale_two) )``
        For 2-D tensor inputs, outputs the 1-D tensor
        ``[KL( N(z; loc_one[m,:], scale_one[m,:]) || N(z; loc_two[m,:], scale_two[m,:]) )]_{m=1}^M``

    Raises
    ------
    InvalidArgumentError
        If the location variables have Inf or NaN values, or if the scale
        variables are not positive.
    """
    dependencies = [tf.verify_tensor_all_finite(loc_one, msg=''),
                    tf.verify_tensor_all_finite(loc_two, msg=''),
                    tf.assert_positive(scale_one),
                    tf.assert_positive(scale_two)]
    loc_one = control_flow_ops.with_dependencies(dependencies, loc_one)
    scale_one = control_flow_ops.with_dependencies(dependencies, scale_one)
    loc_one = tf.cast(loc_one, tf.float32)
    scale_one = tf.cast(scale_one, tf.float32)

    if loc_two == 0.0 and scale_two == 1.0:
        # With default arguments, we can avoid some intermediate computation.
        out = tf.square(scale_one) + tf.square(loc_one) - \
              1.0 - 2.0 * tf.log(scale_one)
    else:
        loc_two = control_flow_ops.with_dependencies(dependencies, loc_two)
        scale_two = control_flow_ops.with_dependencies(dependencies, scale_two)
        loc_two = tf.cast(loc_two, tf.float32)
        scale_two = tf.cast(scale_two, tf.float32)
        out = tf.square(scale_one/scale_two) + \
              tf.square((loc_two - loc_one)/scale_two) - \
              1.0 + 2.0 * tf.log(scale_two) - 2.0 * tf.log(scale_one)

    if len(out.get_shape()) <= 1: # scalar or vector
        return 0.5 * tf.reduce_sum(out)
    else: # matrix
        return 0.5 * tf.reduce_sum(out, 1)
Esempio n. 7
0
def multilinear_square_product(emb, tuples, l2=0):
    """
     Compute the square-product of real vectors at selected embeddings.
     This is the sum over all dimensions of the square of summed embedding vectors.
    :param emb: embedding matrix of size [n_emb, rank] containing float numbers
    :param tuples: tuple matrix of size [n_t, arity] containing integers
    :param l2: optional l2 regularization strength that is added to the score. If it is different from 0, the function
    returns a pair (pred, l2norm) where pred is the sample prediction, but l2norm is the l2 norm of the selected
    embeddings
    :return: the multilinear square product between selected embeddings
    S[i] = sum_k ( sum_j  E[I[i,k],j] )^2

    >>> emb = [[12., 0, 0], [0, 1, 0], [-1, 1, 1]]
    >>> idx = tf.Variable([[1,0,0],[1,1,0]])
    >>> g = multilinear_square_product(emb, idx)
    >>> print(tf_eval(g))
    [ 577.  148.]
    """
    emb_sel = tf.gather(emb, tuples)
    pred = tf.reduce_sum(tf.square(tf.reduce_sum(emb_sel, 1)), 1)

    if l2 == 0:  # unregularized prediction ==> returns only the predictions
        return pred
    else:  # l2 regularization of the selected embeddings
        reg = l2 * tf.reduce_sum(tf.square(emb_sel))
        return pred, reg
Esempio n. 8
0
def cord_cls_loss(
                detectors_mask,
                matching_true_boxes,
                num_classes,
                pred_class_prob,
                pred_boxes,
                loc_scale,
              ):
    """
    :param detectors_mask: [batch, 13, 13, 3, 1]
    :param matching_true_boxes: [batch, 13, 13, 3, 5]   [σ(tx), σ(ty), tw, th, cls]
    :param num_classes: 20
    :param pred_class_prob: [batch, 13, 13, 3, 20]
    :param pred_boxes: [batch, 13, 13, 3, 4]
    :param loc_scale: [batch, 13, 13, 3, 1]
    :return:
        mean_loss: float
        mean localization loss across minibatch
    """

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = tf.cast(matching_true_boxes[..., 4], tf.int32)   # [batch, 13, 13, 3]
    matching_classes = tf.one_hot(matching_classes, num_classes)    # [batch, 13, 13, 3, 20]
    classification_loss = (detectors_mask *
                           tf.square(matching_classes - pred_class_prob))   # [batch, 13, 13, 3, 20]

    # Coordinate loss for matching detection boxes.   [σ(tx), σ(ty), tw, th]
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (detectors_mask * loc_scale * tf.square(matching_boxes - pred_boxes))

    classification_loss_sum = tf.reduce_sum(classification_loss)
    coordinates_loss_sum = tf.reduce_sum(coordinates_loss)

    return classification_loss_sum + coordinates_loss_sum
Esempio n. 9
0
    def e_step(o_mean, o_stdv, o_activations, votes):
      """The E-Step in EM Routing.

      :param o_mean: (24, 6, 6, 1, 32, 16)
      :param o_stdv: (24, 6, 6, 1, 32, 16)
      :param o_activations: (24, 6, 6, 1, 32, 1)
      :param votes: (24, 6, 6, 288, 32, 16)

      :return: rr
      """

      o_p_unit0 = - tf.reduce_sum(
        tf.square(votes - o_mean) / (2 * tf.square(o_stdv)), axis=-1, keep_dims=True
      )

      o_p_unit2 = - tf.reduce_sum(
        tf.log(o_stdv + epsilon), axis=-1, keep_dims=True
      )

      # o_p is the probability density of the h-th component of the vote from i to j
      # (24, 6, 6, 1, 32, 16)
      o_p = o_p_unit0 + o_p_unit2

      # rr: (24, 6, 6, 288, 32, 1)
      zz = tf.log(o_activations + epsilon) + o_p
      rr = tf.nn.softmax(
        zz, dim=len(zz.get_shape().as_list())-2
      )

      return rr
Esempio n. 10
0
 def _build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = len(self.feature)
     err = self.Y - self.mean_function(self.X)
     Kuf = self.feature.Kuf(self.kern, self.X)
     Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
     Kus = self.feature.Kuf(self.kern, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
     B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type)
     LB = tf.cholesky(B)
     Aerr = tf.matmul(A, err)
     c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tmp2, c, transpose_a=True)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
               - tf.matmul(tmp1, tmp1, transpose_a=True)
         shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
               - tf.reduce_sum(tf.square(tmp1), 0)
         shape = tf.stack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean + self.mean_function(Xnew), var
def triplet_loss(y_true, y_pred, alpha = 0.2):
    """
    Implementation of the triplet loss as defined by formula

    Arguments:
    y_true -- true labels, required when you define a loss in Keras, you don't need it in this function.
    y_pred -- python list containing three objects:
            anchor -- the encodings for the anchor images, of shape (None, 128)
            positive -- the encodings for the positive images, of shape (None, 128)
            negative -- the encodings for the negative images, of shape (None, 128)

    Returns:
    loss -- real number, value of the loss
    """

    anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]

    ### START CODE HERE ### (≈ 4 lines)
    # Step 1: Compute the (encoding) distance between the anchor and the positive, you will need to sum over axis=-1
    pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)))
    # Step 2: Compute the (encoding) distance between the anchor and the negative, you will need to sum over axis=-1
    neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)))
    # Step 3: subtract the two previous distances and add alpha.
    basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha)
    # Step 4: Take the maximum of basic_loss and 0.0. Sum over the training examples.
    loss = tf.reduce_sum(tf.maximum(basic_loss, 0.))
    ### END CODE HERE ###

    return loss
Esempio n. 12
0
    def build_graph(self, image_pos):
        image_pos = image_pos / 128.0 - 1

        z = tf.random_normal([self.batch, self.zdim], name='z_train')
        z = tf.placeholder_with_default(z, [None, self.zdim], name='z')

        with argscope([Conv2D, Conv2DTranspose, FullyConnected],
                      kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                image_gen = self.generator(z)
            tf.summary.image('generated-samples', image_gen, max_outputs=30)

            alpha = tf.random_uniform(shape=[self.batch, 1, 1, 1],
                                      minval=0., maxval=1., name='alpha')
            interp = image_pos + alpha * (image_gen - image_pos)

            with tf.variable_scope('discrim'):
                vecpos = self.discriminator(image_pos)
                vecneg = self.discriminator(image_gen)
                vec_interp = self.discriminator(interp)

        # the Wasserstein-GAN losses
        self.d_loss = tf.reduce_mean(vecneg - vecpos, name='d_loss')
        self.g_loss = tf.negative(tf.reduce_mean(vecneg), name='g_loss')

        # the gradient penalty loss
        gradients = tf.gradients(vec_interp, [interp])[0]
        gradients = tf.sqrt(tf.reduce_sum(tf.square(gradients), [1, 2, 3]))
        gradients_rms = symbolic_functions.rms(gradients, 'gradient_rms')
        gradient_penalty = tf.reduce_mean(tf.square(gradients - 1), name='gradient_penalty')
        add_moving_summary(self.d_loss, self.g_loss, gradient_penalty, gradients_rms)

        self.d_loss = tf.add(self.d_loss, 10 * gradient_penalty)

        self.collect_variables()
Esempio n. 13
0
def build_psi_stats_rbf_plus_linear(Z, kern, mu, S):
    # TODO: make sure the acvite dimensions are overlapping completely

    # use only active dimensions
    mu, S = kern._slice(mu, S)  # only use the active dimensions.
    Z, _ = kern._slice(Z, None)

    psi0_lin, psi1_lin, psi2_lin = build_psi_stats_linear(Z, kern.linear, mu, S)
    psi0_rbf, psi1_rbf, psi2_rbf = build_psi_stats_rbf(Z, kern.rbf, mu, S)
    psi0, psi1, psi2 = psi0_lin + psi0_rbf, psi1_lin + psi1_rbf, psi2_lin + psi2_rbf

    # extra terms for the 'interaction' of linear and rbf
    l2 = tf.square(kern.rbf.lengthscales)
    A = tf.expand_dims(1./S + 1./l2, 1)  # N x 1 x Q
    m = (tf.expand_dims(mu/S, 1) + tf.expand_dims(Z/l2, 0)) / A  # N x M x Q
    mTAZ = tf.reduce_sum(tf.expand_dims(m * kern.linear.variance, 1) *
                         tf.expand_dims(tf.expand_dims(Z, 0), 0), 3)  # N x M x M
    Z2 = tf.reduce_sum(tf.square(Z) / l2, 1)  # M,
    mu2 = tf.reduce_sum(tf.square(mu) / S, 1)  # N
    mAm = tf.reduce_sum(tf.square(m) * A, 2)  # N x M
    exp_term = tf.exp(-(tf.reshape(Z2, (1, -1)) + tf.reshape(mu2, (-1, 1))-mAm) / 2.)  # N x M
    psi2_extra = tf.reduce_sum(kern.rbf.variance *
                               tf.expand_dims(exp_term, 2) *
                               tf.expand_dims(tf.expand_dims(tf.reduce_prod(S, 1), 1), 2) *
                               tf.expand_dims(tf.reduce_prod(A, 2), 1) *
                               mTAZ, 0)

    psi2 = psi2 + psi2_extra + tf.transpose(psi2_extra)
    return psi0, psi1, psi2
Esempio n. 14
0
File: model.py Progetto: cning/ehc
    def drawGraph(self, n_row, n_latent, n_col):
        with tf.name_scope('matDecomp'):
            self._p = tf.placeholder(tf.float32, shape=[None, n_col])
            self._c = tf.placeholder(tf.float32, shape=[None, n_col])
            self._lambda = tf.placeholder(tf.float32)
            self._index = tf.placeholder(tf.float32, shape=[None, n_row])
            self._A = tf.Variable(tf.truncated_normal([n_row, n_latent]))
            self._B = tf.Variable(tf.truncated_normal([n_latent, n_col]))
            self._h = tf.matmul(tf.matmul(self._index, self._A), self._B) 
            
            weighted_loss = tf.reduce_mean(tf.mul(self._c, tf.squared_difference(self._p, self._h)))
            self._weighted_loss = weighted_loss
            l2_A = tf.reduce_sum(tf.square(self._A))
            l2_B = tf.reduce_sum(tf.square(self._B))
            n_w = tf.constant(n_row * n_latent + n_latent * n_col, tf.float32)
            l2 = tf.truediv(tf.add(l2_A, l2_B), n_w)
            reg_term = tf.mul(self._lambda, l2)
            self._loss = tf.add(weighted_loss, reg_term)
            
            self._mask = tf.placeholder(tf.float32, shape=[n_row, n_col])
            one = tf.constant(1, tf.float32)
            pred = tf.cast(tf.greater_equal(tf.matmul(self._A, self._B), one), tf.float32)
            cor = tf.mul(tf.cast(tf.equal(pred, self._p), tf.float32), self._c)
            self._vali_err = tf.reduce_sum(tf.mul(cor, self._mask))

            self._saver = tf.train.Saver([v for v in tf.all_variables() if v.name.find('matDecomp') != -1])
            tf.scalar_summary('training_weighted_loss_l2', self._loss)
            tf.scalar_summary('validation_weighted_loss', self._weighted_loss)
            merged = tf.merge_all_summaries()
Esempio n. 15
0
        def _ssim_helper(var_x, var_y, max_val, kernel, compensation=1.0):
            """
            Helper function for computing SSIM.
            SSIM estimates covariances with weighted sums.  The default parameters
            use a biased estimate of the covariance:
            Suppose `reducer` is a weighted sum, then the mean estimators are
            mu_x = sum_i w_i x_i,
            mu_y = sum_i w_i y_i,
            where w_i's are the weighted-sum weights, and covariance estimator is
            cov_{xy} = sum_i w_i (x_i - mu_x) (y_i - mu_y)
            with assumption sum_i w_i = 1. This covariance estimator is biased, since
            E[cov_{xy}] = (1 - sum_i w_i ^ 2) Cov(X, Y).
            For SSIM measure with unbiased covariance estimators, pass as `compensation`
            argument (1 - sum_i w_i ^ 2).
            Arguments:
            x: First set of images.
            y: Second set of images.
            reducer: Function that computes 'local' averages from set of images.
              For non-covolutional version, this is usually tf.reduce_mean(x, [1, 2]),
              and for convolutional version, this is usually tf.nn.avg_pool or
              tf.nn.conv2d with weighted-sum kernel.
            max_val: The dynamic range (i.e., the difference between the maximum
              possible allowed value and the minimum allowed value).
            compensation: Compensation factor. See above.
            Returns:
            A pair containing the luminance measure, and the contrast-structure measure.
            """

            def reducer(var_x, kernel):
                shape = tf.shape(var_x)
                var_x = tf.reshape(var_x, shape=tf.concat([[-1], shape[-3:]], 0))
                var_y = tf.nn.depthwise_conv2d(var_x, kernel, strides=[1, 1, 1, 1], padding='VALID')
                return tf.reshape(var_y, tf.concat([shape[:-3], tf.shape(var_y)[1:]], 0))

            _ssim_k1 = 0.01
            _ssim_k2 = 0.03

            c_1 = (_ssim_k1 * max_val) ** 2
            c_2 = (_ssim_k2 * max_val) ** 2

            # SSIM luminance measure is
            # (2 * mu_x * mu_y + c_1) / (mu_x ** 2 + mu_y ** 2 + c_1).
            mean0 = reducer(var_x, kernel)
            mean1 = reducer(var_y, kernel)
            num0 = mean0 * mean1 * 2.0
            den0 = tf.square(mean0) + tf.square(mean1)
            luminance = (num0 + c_1) / (den0 + c_1)

            # SSIM contrast-structure measure is
            #   (2 * cov_{xy} + c_2) / (cov_{xx} + cov_{yy} + c_2).
            # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then
            #   cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)
            #          = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j).
            num1 = reducer(var_x * var_y, kernel) * 2.0
            den1 = reducer(tf.square(var_x) + tf.square(var_y), kernel)
            c_2 *= compensation
            c_s = (num1 - num0 + c_2) / (den1 - den0 + c_2)

            # SSIM score is the product of the luminance and contrast-structure measures.
            return luminance, c_s
Esempio n. 16
0
def cosine_distances(test, support):
  """Computes pairwise cosine distances between provided tensors

  Parameters
  ----------
  test: tf.Tensor
    Of shape (n_test, n_feat)
  support: tf.Tensor
    Of shape (n_support, n_feat)

  Returns
  -------
  tf.Tensor:
    Of shape (n_test, n_support)
  """
  rnorm_test = tf.rsqrt(
      tf.reduce_sum(tf.square(test), 1, keep_dims=True)) + 1e-7
  rnorm_support = tf.rsqrt(
      tf.reduce_sum(tf.square(support), 1, keep_dims=True)) + 1e-7
  test_normalized = test * rnorm_test
  support_normalized = support * rnorm_support

  # Transpose for mul
  support_normalized_t = tf.transpose(support_normalized, perm=[1, 0])
  g = tf.matmul(test_normalized, support_normalized_t)  # Gram matrix
  return g
Esempio n. 17
0
def batchnormalize(X, eps=1e-8, g=None, b=None):
    if X.get_shape().ndims == 4:
        mean = tf.reduce_mean(X, [0,1,2])
        std = tf.reduce_mean( tf.square(X-mean), [0,1,2] )
        X = (X-mean) / tf.sqrt(std+eps)

        if g is not None and b is not None:
            g = tf.reshape(g, [1,1,1,-1])
            b = tf.reshape(b, [1,1,1,-1])
            X = X*g + b

    elif X.get_shape().ndims == 2:
        mean = tf.reduce_mean(X, 0)
        std = tf.reduce_mean(tf.square(X-mean), 0)
        X = (X-mean) / tf.sqrt(std+eps)#std

        if g is not None and b is not None:
            g = tf.reshape(g, [1,-1])
            b = tf.reshape(b, [1,-1])
            X = X*g + b

    else:
        raise NotImplementedError

    return X
Esempio n. 18
0
def calc_bound_loss(x_tf, bound_min, bound_max):
    # penalty for violating bounds
    violation_min = tf.minimum(x_tf - bound_min, 0)
    violation_max = tf.maximum(x_tf - bound_max, 0)
    violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(tf.square(violation_max), axis=-1)
    loss = 0.5 * tf.reduce_mean(violation)
    return loss
Esempio n. 19
0
 def build_predict(self,Xnew,task_ind):
         """
         We need to assume the task_ind starts from 0
         """
         Fmean,Fvar = 0,0
         for i in np.arange(self.rank):
             for j in np.arange(self.num_latent_list[i]):
                 lat_id = np.sum(self.num_latent_list[:i],dtype = np.int64) + j
                 if self.whiten_list[lat_id]:  # need to compute fmean and fvar by the weights
                     fmean, fvar = conditionals.gaussian_gp_predict_whitened(Xnew, self.Z[lat_id],
                                     self.kern_list[i], self.q_mu_list[lat_id],
                                      self.q_sqrt_list[lat_id], 1)
                 else:
                     fmean, fvar = conditionals.gaussian_gp_predict(Xnew, self.Z[lat_id],
                                     self.kern_list[i], self.q_mu_list[lat_id],
                                      self.q_sqrt_list[lat_id],1)
                 W_ij = tf.gather(self.W,task_ind)[lat_id]
                 Fmean += (fmean + self.mean_function_list[lat_id](Xnew))*W_ij
                 Fvar += fvar * tf.square(W_ij)
         if self.tsk:
             for i in np.arange(self.num_tasks):
                 lat_id = np.sum(self.num_latent_list,dtype = np.int64) + i
                 if self.whiten_list[lat_id]:  # need to compute fmean and fvar by the weights
                     fmean, fvar = conditionals.gaussian_gp_predict_whitened(Xnew, self.Z[lat_id],
                                     self.tskern_list[i], self.q_mu_list[lat_id],
                                      self.q_sqrt_list[lat_id], 1)
                 else:
                     fmean, fvar = conditionals.gaussian_gp_predict(Xnew, self.Z[lat_id],
                                     self.tskern_list[i], self.q_mu_list[lat_id],
                                      self.q_sqrt_list[lat_id], 1)
                 switch = tf.cast(tf.equal(tf.to_int64(i), task_ind),tf.float64)
                 W_ij = tf.gather(self.Kappa,i)[0]*switch
                 Fmean += (fmean + self.mean_function_list[lat_id](Xnew))*W_ij
                 Fvar += fvar * tf.square(W_ij)
         return Fmean, Fvar
Esempio n. 20
0
def gauss_kl_white(q_mu, q_sqrt, num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, I)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).
    """
    KL = 0.5 * tf.reduce_sum(tf.square(q_mu))  # Mahalanobis term
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    for d in range(num_latent):
        Lq = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
        # Log determinant of q covariance:
        KL -= 0.5 * tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
        KL += 0.5 * tf.reduce_sum(tf.square(Lq))  # Trace term.
    return KL
Esempio n. 21
0
def gauss_kl_diag(q_mu, q_sqrt, K,  num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a matrix, each column represents the diagonal of a square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and q_sqrt).
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL += num_latent * 0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    KL += -0.5 * tf.reduce_sum(tf.log(tf.square(q_sqrt)))  # Log-det of q-cov
    L_inv = tf.matrix_triangular_solve(L, eye(tf.shape(L)[0]), lower=True)
    K_inv = tf.matrix_triangular_solve(tf.transpose(L), L_inv, lower=False)
    KL += 0.5 * tf.reduce_sum(tf.expand_dims(tf.diag_part(K_inv), 1)
                              * tf.square(q_sqrt))  # Trace term.
    return KL
Esempio n. 22
0
    def cross_entropy(u, label_u, alpha=0.5, normed=False):

        label_ip = tf.cast(
            tf.matmul(label_u, tf.transpose(label_u)), tf.float32)
        s = tf.clip_by_value(label_ip, 0.0, 1.0)

        # compute balance param
        # s_t \in {-1, 1}
        s_t = tf.multiply(tf.add(s, tf.constant(-0.5)), tf.constant(2.0))
        sum_1 = tf.reduce_sum(s)
        sum_all = tf.reduce_sum(tf.abs(s_t))
        balance_param = tf.add(tf.abs(tf.add(s, tf.constant(-1.0))),
                               tf.multiply(tf.div(sum_all, sum_1), s))

        if normed:
            # ip = tf.clip_by_value(tf.matmul(u, tf.transpose(u)), -1.5e1, 1.5e1)
            ip_1 = tf.matmul(u, tf.transpose(u))

            def reduce_shaper(t):
                return tf.reshape(tf.reduce_sum(t, 1), [tf.shape(t)[0], 1])
            mod_1 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(u)),
                                      reduce_shaper(tf.square(u)), transpose_b=True))
            ip = tf.div(ip_1, mod_1)
        else:
            ip = tf.clip_by_value(tf.matmul(u, tf.transpose(u)), -1.5e1, 1.5e1)
        ones = tf.ones([tf.shape(u)[0], tf.shape(u)[0]])
        return tf.reduce_mean(tf.multiply(tf.log(ones + tf.exp(alpha * ip)) - s * alpha * ip, balance_param))
Esempio n. 23
0
def gaussian(y, mu_k, sigma_k):
	y = tf.reshape(y, [batchDim,1,L_out])
	norm = tf.reduce_sum(tf.square(y-mu_k),axis=2)	# sums over the L dimensions -> we get shape (N,K) again
	phi_k = -tf.div(norm, 2*tf.square(sigma_k))		
	phi_k = tf.exp(phi_k)
	phi_k = tf.divide(phi_k, sigma_k)
	return phi_k
Esempio n. 24
0
	def build_rmsprop_optimizer(self, learning_rate, rmsprop_decay, rmsprop_constant, gradient_clip, version):

		with tf.name_scope('rmsprop'):
			optimizer = tf.train.GradientDescentOptimizer(learning_rate)

			grads_and_vars = optimizer.compute_gradients(self.loss)
			grads = [gv[0] for gv in grads_and_vars]
			params = [gv[1] for gv in grads_and_vars]

			if gradient_clip > 0:
				grads = tf.clip_by_global_norm(grads, gradient_clip)

			if version == 'rmsprop':
				return optimizer.apply_gradients(zip(grads, params))
			elif version == 'graves_rmsprop':
				square_grads = [tf.square(grad) for grad in grads]

				avg_grads = [tf.Variable(tf.ones(var.get_shape())) for var in params]
				avg_square_grads = [tf.Variable(tf.ones(var.get_shape())) for var in params]

				update_avg_grads = [grad_pair[0].assign((rmsprop_decay * grad_pair[0]) + ((1 - rmsprop_decay) * grad_pair[1])) 
					for grad_pair in zip(avg_grads, grads)]
				update_avg_square_grads = [grad_pair[0].assign((rmsprop_decay * grad_pair[0]) + ((1 - rmsprop_decay) * tf.square(grad_pair[1]))) 
					for grad_pair in zip(avg_square_grads, grads)]
				avg_grad_updates = update_avg_grads + update_avg_square_grads

				rms = [tf.sqrt(avg_grad_pair[1] - tf.square(avg_grad_pair[0]) + rmsprop_constant)
					for avg_grad_pair in zip(avg_grads, avg_square_grads)]


				rms_updates = [grad_rms_pair[0] / grad_rms_pair[1] for grad_rms_pair in zip(grads, rms)]
				train = optimizer.apply_gradients(zip(rms_updates, params))

				return tf.group(train, tf.group(*avg_grad_updates))
Esempio n. 25
0
def gauss_kl(q_mu, q_sqrt, K, num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL += num_latent * 0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    for d in range(num_latent):
        Lq = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
        # Log determinant of q covariance:
        KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
        LiLq = tf.matrix_triangular_solve(L, Lq, lower=True)
        KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
Esempio n. 26
0
        def __graph__():
            """Building the inference graph"""

            with tf.name_scope('input'):
                # [BATCH_SIZE, NUM_FEATURES]
                x_input = tf.placeholder(dtype=tf.float32, shape=[None, self.num_features], name='x_input')

                # [BATCH_SIZE]
                y_input = tf.placeholder(dtype=tf.uint8, shape=[None], name='y_input')

                # [BATCH_SIZE, NUM_CLASSES]
                y_onehot = tf.one_hot(indices=y_input, depth=self.num_classes, on_value=1, off_value=-1,
                                      name='y_onehot')

            learning_rate = tf.placeholder(dtype=tf.float32, name='learning_rate')

            with tf.name_scope('training_ops'):
                with tf.name_scope('weights'):
                    weight = tf.get_variable(name='weights',
                                             initializer=tf.random_normal([self.num_features, self.num_classes],
                                                                          stddev=0.01))
                    self.variable_summaries(weight)
                with tf.name_scope('biases'):
                    bias = tf.get_variable(name='biases', initializer=tf.constant([0.1], shape=[self.num_classes]))
                    self.variable_summaries(bias)
                with tf.name_scope('Wx_plus_b'):
                    output = tf.matmul(x_input, weight) + bias
                    tf.summary.histogram('pre-activations', output)

            with tf.name_scope('svm'):
                regularization = tf.reduce_mean(tf.square(weight))
                hinge_loss = tf.reduce_mean(tf.square(tf.maximum(tf.zeros([self.batch_size, self.num_classes]),
                                                                 1 - tf.cast(y_onehot, tf.float32) * output)))
                with tf.name_scope('loss'):
                    loss = regularization + self.svm_c * hinge_loss
            tf.summary.scalar('loss', loss)

            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

            with tf.name_scope('accuracy'):
                predicted_class = tf.sign(output)
                predicted_class = tf.identity(predicted_class, name='prediction')
                with tf.name_scope('correct_prediction'):
                    correct = tf.equal(tf.argmax(predicted_class, 1), tf.argmax(y_onehot, 1))
                with tf.name_scope('accuracy'):
                    accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
            tf.summary.scalar('accuracy', accuracy)

            merged = tf.summary.merge_all()

            self.x_input = x_input
            self.y_input = y_input
            self.y_onehot = y_onehot
            self.learning_rate = learning_rate
            self.loss = loss
            self.optimizer = optimizer
            self.output = output
            self.predicted_class = predicted_class
            self.accuracy = accuracy
            self.merged = merged
Esempio n. 27
0
	def build_loss(self, error_clip, num_actions, double_dqn):
		''' build loss graph '''
		with tf.name_scope("loss"):

			predictions = tf.reduce_sum(tf.mul(self.gpu_q_layer, self.actions), 1)
			
			max_action_values = None
			if double_dqn: # Double Q-Learning:
				max_actions = tf.to_int32(tf.argmax(self.gpu_q_layer, 1))
				# tf.gather doesn't support multidimensional indexing yet, so we flatten output activations for indexing
				indices = tf.range(0, tf.size(max_actions) * num_actions, num_actions) + max_actions
				max_action_values = tf.gather(tf.reshape(self.target_q_layer, shape=[-1]), indices)
			else:
				max_action_values = tf.reduce_max(self.target_q_layer, 1)

			targets = tf.stop_gradient(self.rewards + (self.discount_factor * max_action_values * self.terminals))

			difference = tf.abs(predictions - targets)

			if error_clip >= 0:
				quadratic_part = tf.clip_by_value(difference, 0.0, error_clip)
				linear_part = difference - quadratic_part
				errors = (0.5 * tf.square(quadratic_part)) + (error_clip * linear_part)
			else:
				errors = (0.5 * tf.square(difference))

			return tf.reduce_sum(errors)
Esempio n. 28
0
def gauss_kl(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    num_latent = tf.cast(tf.shape(q_sqrt)[2], float_type)
    KL += num_latent * 0.5 * tf.reduce_sum(tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]), float_type)  # constant term
    Lq = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0)  # force lower triangle
    KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.matrix_diag_part(Lq))))  # logdet
    L_tiled = tf.tile(tf.expand_dims(L, 0), tf.pack([tf.shape(Lq)[0], 1, 1]))
    LiLq = tf.matrix_triangular_solve(L_tiled, Lq, lower=True)
    KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
    def _build_loss(self):

        with tf.variable_scope("loss"):

            # Compute y_j = r_j * discount*best_qvalue
            self.tf_discount = tf.constant(self.discount)
            self.qtarget = tf.add(self.pl_rewards, tf.mul(1.0-self.pl_terminals, tf.mul(self.tf_discount, self.pl_qtargets)))

            # Select Q-values for given actions
            self.actions_one_hot = tf.one_hot(self.pl_actions, self.num_actions, 1.0, 0.0)
            self.qvalue_pred = tf.reduce_sum(tf.mul(self.qvalues, self.actions_one_hot), reduction_indices=1)

            # Difference between target and predicted Q-network output
            self.delta = tf.sub(self.qtarget, self.qvalue_pred)

            if self.clip_delta > 0:
                # Perform clipping of the error term, default clipping is to (-1, +1) range
                self.quadratic_part = tf.minimum(tf.abs(self.delta), tf.constant(self.clip_delta))
                self.linear_part    = tf.sub(tf.abs(self.delta), self.quadratic_part)
                self.delta_square   = tf.mul(tf.constant(0.5), tf.square(self.quadratic_part)) + (self.clip_delta*self.linear_part)
                #self.delta_clipped = tf.clip_by_value(self.delta, -1.0*self.clip_delta, self.clip_delta)
                #self.delta_square  = tf.square(self.delta_clipped)
            else:
                # No error clipping
                self.delta_square  = tf.square(self.delta)

        # Actual loss
        if self.batch_accumulator == "sum":
           self.loss = tf.reduce_sum(self.delta_square)
        else:
           self.loss = tf.reduce_mean(self.delta_square)

        # Running average of the loss for TensorBoard
        self.loss_moving_avg    = tf.train.ExponentialMovingAverage(decay=0.999)
        self.loss_moving_avg_op = self.loss_moving_avg.apply([self.loss])
Esempio n. 30
0
def gauss_kl_white_diag(q_mu, q_sqrt, num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, I)

    We assume num_latent independent distributions, given by the columns of
    q_mu and q_sqrt

    q_mu is a matrix, each column contains a mean

    q_sqrt is a matrix, each column represents the diagonal of a square-root
        matrix of the covariance.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and q_sqrt).
    """

    KL = 0.5 * tf.reduce_sum(tf.square(q_mu))  # Mahalanobis term
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    KL += -0.5 * tf.reduce_sum(tf.log(tf.square(q_sqrt)))  # Log-det of q-cov
    KL += 0.5 * tf.reduce_sum(tf.square(q_sqrt))  # Trace term
    return KL
Esempio n. 31
0
y_data = [v[1] for v in vectors_set]


#plt.figure(figsize=(30,40),dpi=20)
# plt.scatter(x_data,y_data,color='r')
# plt.show()

# to make one dimension matrix that the value is between -1 and 1
W = tf.Variable(tf.random_uniform([1],-1.0,1.0),name='W')
# to define the b matrix one dimension, the initial value is 0
b = tf.Variable(tf.zeros([1]),name='b')
# get the predicted value by calculating
y = W * x_data + b

# define the loss that the mean square error between the y and y_data
loss = tf.reduce_mean(tf.square(y-y_data),name='loss')
# to optimize the parameter by using gradient descent
optimizer = tf.train.GradientDescentOptimizer(0.5)
# the progress training is minimum the error value
train = optimizer.minimize(loss,name='train')

sess = tf.Session()

sess.run(tf.global_variables_initializer())

# value of the initializtion w and b
print('W=',sess.run(W),'b=',sess.run(b),'loss=',sess.run(loss))
# training 20 times
for step in range(20):
    sess.run(train)
    print('W=', sess.run(W), 'b=', sess.run(b), 'loss=', sess.run(loss))
# 回归问题只有1个输出结点
y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y-input')

# 定义一个单层的神经网络前向传播过程,只是简单加权和
w1 = tf.Variable(tf.random_normal([2, 1], stddev=1, seed=1))
y = tf.matmul(x, w1)

# 定义预测多了和预测少了的成本
loss_less = 1
loss_more = 10
loss = tf.reduce_sum(
    tf.select(tf.greater(y, y_), loss_more*(y-y_), loss_less*(y_-y)))
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

# 定义损失函数为均方误差
loss_2 = tf.reduce_mean(tf.square(y_ - y))
train_step_2 = tf.train.AdamOptimizer(0.001).minimize(loss_2)

# 通过随机数生成一个模拟数据集
rdm = RandomState(1)
dataset_size = 128
X = rdm.rand(dataset_size, 2)
# 设置回归的正确值为两个输入加上一个随机量
Y = [[x1 + x2 + rdm.rand()/10.0-0.05] for (x1, x2) in X]

# 训练神经网络
with tf.Session() as sess:
    init_op = tf.initialize_all_variables()
    sess.run(init_op)
    STEPS = 5000
    for i in range(STEPS):
Esempio n. 33
0
    def _create_one_cell():
        lstm_cell = tf.contrib.rnn.LSTMCell(config.lstm_size,
                                            state_is_tuple=True)
        if config.keep_prob < 1.0:
            return tf.contrib.rnn.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)

    cell = tf.contrib.rnn.MultiRNNCell(
        [_create_one_cell() for _ in range(config.num_layers)],
        state_is_tuple=True) if config.num_layers > 1 else _create_one_cell()
    val, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
    weight = tf.Variable(
        tf.truncated_normal([config.lstm_size, config.input_size]))
    bias = tf.Variable(tf.constant(0.1, shape=[config.input_size]))
    prediction = tf.matmul(last, weight) + bias
    loss = tf.reduce_mean(tf.square(prediction - targets))
    optimizer = tf.train.RMSPropOptimizer(learning_rate)
    minimize = optimizer.minimize(loss)

with tf.Session(graph=lstm_graph) as sess:
    tf.global_variables_initializer().run()
learning_rates_to_use = [
    config.init_learning_rate *
    (config.learning_rate_decay**max(float(i + 1 - config.init_epoch), 0.0))
    for i in range(config.max_epoch)
]

for epoch_step in range(config.max_epoch):
    current_lr = learning_rates_to_use[epoch_step]

    # Check https://github.com/lilianweng/stock-rnn/blob/master/data_wrapper.py
Esempio n. 34
0
	def loss(self, y, label):
		with tf.variable_scope('losses'):
			loss = tf.reduce_mean(tf.square(y-label), name = 'mse')
		return loss
Esempio n. 35
0
import numpy as np
import tensorflow as tf

# Model parameters
W = tf.Variable([.3], dtype=tf.float32, name='weights')
b = tf.Variable([-.3], dtype=tf.float32, name='biases')
tf.summary.histogram('W', W)
tf.summary.histogram('b', b)

# Model input and output
x = tf.placeholder(tf.float32, name='x')
linear_model = W * x + b
y = tf.placeholder(tf.float32, name='y')

# loss
loss = tf.reduce_sum(tf.square(linear_model - y))  # sum of the squares
tf.summary.scalar('loss', loss)

# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)

# training data
x_train = [1, 2, 3, 4]
y_train = [0, -1, -2, -3]

# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)  # reset values to wrong
Esempio n. 36
0
# input place holders
X = tf.placeholder(tf.float32, [None, seq_length, data_dim])
Y = tf.placeholder(tf.float32, [None, 1])

# build a LSTM network
cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_dim,
                                    state_is_tuple=True,
                                    activation=tf.tanh)
outputs, _states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
Y_pred = tf.contrib.layers.fully_connected(
    outputs[:, -1], output_dim,
    activation_fn=None)  # We use the last cell's output

# cost/loss
loss = tf.reduce_sum(tf.square(Y_pred - Y))  # sum of the squares
# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)

# RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)

    # Training
    for i in range(iterations):
Esempio n. 37
0
print("{},{}".format(m, n))
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]
print(housing_data_plus_bias.shape)
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
print(scaled_housing_data_plus_bias.shape)

learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name='theta')
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()

n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

def fetch_batch(epoch, batch_index, batch_size):
    know = np.random.seed(epoch * n_batches +  batch_index)
    indices = np.random.randint(m, size=batch_size)
Esempio n. 38
0
 def loss(self):
     """Compute function loss."""
     return tf.reduce_sum(
         tf.square(tf.linalg.matvec(self.A, self.x) - self.b))
Esempio n. 39
0
    def build_model(self):
        print("Setting up model...")

        # input_images = First frame of video
        self.input_images = tf.placeholder(tf.float32, [self.batch_size, self.crop_size, self.crop_size, self.channels])
        self.videos_fake, self.gen_reg, self.generator_variables = self.generator(self.input_images)

        self.fake_min = tf.reduce_min(self.videos_fake)
        self.fake_max = tf.reduce_max(self.videos_fake)

        print('Shapes of videos:')
        print('Original:')
        print(self.videos.shape)
        print('Generated:')
        print(self.videos_fake.shape)

        self.d_real, self.discriminator_variables = self.discriminator(self.videos, reuse=False)

        # merging initial frame and generated to create full forecast "video"
        self.videos_fake = tf.stack([self.input_images, self.videos_fake], axis=1)

        self.d_fake, _ = self.discriminator(self.videos_fake, reuse=True)

        self.g_cost_pure = -tf.reduce_mean(self.d_fake)

        # self.g_cost = self.g_cost_pure + 1000 * self.gen_reg

        self.d_cost = tf.reduce_mean(self.d_fake) - tf.reduce_mean(self.d_real)

        self.videos = tf.reshape(self.videos, [self.batch_size, self.frame_size, self.crop_size, self.crop_size, self.channels])
        self.videos_fake = tf.reshape(self.videos_fake, [self.batch_size, self.frame_size, self.crop_size, self.crop_size, self.channels])

        help_v = [0,0,0,0,0]
        par = 0
        for c,k in zip(self.wvars, range(5)):
            if c == '1':
                help_v[k] = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.videos[:,:,:,:,par], self.videos_fake[:,:,:,:,par]))))
                par += 1
            else:
                help_v[k] = tf.constant(0.0)

        self.rmse_temp = help_v[0]
        self.rmse_cc = help_v[1]
        self.rmse_sh = help_v[2]
        self.rmse_sp = help_v[3]
        self.rmse_geo = help_v[4]

        tf.summary.scalar('rmse_temp', self.rmse_temp)
        tf.summary.scalar('rmse_cc', self.rmse_cc)
        tf.summary.scalar('rmse_sh', self.rmse_sh)
        tf.summary.scalar('rmse_sp', self.rmse_sp)
        tf.summary.scalar('rmse_geo', self.rmse_geo)

        self.rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.videos, self.videos_fake))))

        # self.mae = tf.metrics.mean_absolute_error(self.videos_fake, self.videos)

        # error of discriminator failing to evaluate generated sample as fake - good job generator
        tf.summary.scalar("g_cost_pure", self.g_cost_pure)
        # diff between original image and created image/sequence in generator
        tf.summary.scalar("g_cost_regularizer", self.gen_reg)
        # error of - saying fake is fake and original is original (when fake == orig and orig == fake)
        tf.summary.scalar("d_cost", self.d_cost)
        
        tf.summary.scalar("RMSE_overal", self.rmse)
        # tf.summary.tensor_summary("MAE", self.mae)

        alpha = tf.random_uniform(
            shape=[self.batch_size, 1],
            minval=0.,
            maxval=1.
        )

        dim = self.frame_size * self.crop_size * self.crop_size * self.channels

        vid = tf.reshape(self.videos, [self.batch_size, dim])
        fake = tf.reshape(self.videos_fake, [self.batch_size, dim])
        differences = fake - vid
        interpolates = vid + (alpha * differences)
        d_hat, _ = self.discriminator(tf.reshape(interpolates, [self.batch_size, self.frame_size, self.crop_size,
                                                                self.crop_size, self.channels]), reuse=True)
        gradients = tf.gradients(d_hat, [interpolates])[0]
        slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
        gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)

        self.d_penalty = 10 * gradient_penalty

        tf.summary.scalar('d_penalty', self.d_penalty)

        self.d_cost_final = self.d_cost + self.d_penalty

        tf.summary.scalar("d_cost_penalized", self.d_cost_final)

        self.d_adam, self.g_adam = None, None
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            self.d_adam = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.beta1, beta2=0.999) \
                .minimize(self.d_cost_final, var_list=self.discriminator_variables)
            self.g_adam_gan = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.beta1, beta2=0.999) \
                .minimize(self.g_cost_pure, var_list=self.generator_variables)
            self.g_adam_first = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.beta1, beta2=0.999) \
                .minimize(self.gen_reg, var_list=self.generator_variables)

        self.sample = self.videos_fake
        self.summary_op = tf.summary.merge_all()
Esempio n. 40
0
 def _pixel_norm(self, epsilon=1e-08):
     """
     Pixelwise normalization
     """
     return layers.Lambda(lambda x: x * tf.math.rsqrt(
         tf.reduce_mean(tf.square(x), axis=-1, keepdims=True) + epsilon))
Esempio n. 41
0
multi_cells = tf.contrib.rnn.MultiRNNCell(stackedRNNs, state_is_tuple=True)

# RNN Cell(여기서는 LSTM셀임)들을 연결
outputs, _states = tf.nn.dynamic_rnn(multi_cells, X, dtype=tf.float32)
print("outputs: ", outputs)

# [:, -1]를 잘 살펴보자. LSTM RNN의 마지막 (hidden)출력만을 사용했다.
# 과거 여러 거래일의 주가를 이용해서 다음날의 주가 1개를 예측하기때문에 MANY-TO-ONE형태이다
Y_pred = tf.contrib.layers.fully_connected(outputs[:, -1],
                                           1,
                                           activation_fn=None)

#%%
# Loss function define

loss_p = tf.reduce_sum(tf.square(Y_pred - Y))
#loss_dpl = tf.reduce_sum(tf.abs(tf.sign(Y_pred[1:,:] - Y[:-1,:]) - tf.sign(Y[1:,:] - Y[:-1,:])  ) )
#loss_dpl = tf.reduce_sum(Y_pred - Y)

loss = loss_p

# Optimizer
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)

# RMSE(Root Mean Square Error)
rmse = tf.sqrt(tf.reduce_mean(tf.squared_difference(targets, predictions)))

#%%

# Session 초기화
with tf.Session() as sess:
Esempio n. 42
0
    def generator(self, img_batch):
        with tf.variable_scope('g_') as vs:
            """ -----------------------------------------------------------------------------------
            ENCODER 
            ----------------------------------------------------------------------------------- """
            print('ENCODER')

            self.en_h0 = conv2d(img_batch, self.channels, 128, k_h=4, k_w=4, d_w=2, d_h=2, name="enc_conv1")
            self.en_h0 = tf.nn.relu(tf.contrib.layers.batch_norm(self.en_h0))
            add_activation_summary(self.en_h0)
            print(self.en_h0.get_shape().as_list())

            self.en_h1 = conv2d(self.en_h0, 128, 256, k_h=4, k_w=4, d_w=2, d_h=2, name="enc_conv2")
            self.en_h1 = tf.contrib.layers.batch_norm(self.en_h1, scope="enc_bn2")
            self.en_h1 = tf.nn.relu(self.en_h1)
            add_activation_summary(self.en_h1)
            print(self.en_h1.get_shape().as_list())

            self.en_h2 = conv2d(self.en_h1, 256, 512, k_h=4, k_w=4, d_w=2, d_h=2, name="enc_conv3")
            self.en_h2 = tf.contrib.layers.batch_norm(self.en_h2, scope="enc_bn3")
            self.en_h2 = tf.nn.relu(self.en_h2)
            add_activation_summary(self.en_h2)
            print(self.en_h2.get_shape().as_list())

            self.en_h3 = conv2d(self.en_h2, 512, 1024, k_h=4, k_w=4, d_w=2, d_h=2, name="enc_conv4")
            self.en_h3 = tf.contrib.layers.batch_norm(self.en_h3, scope="enc_bn4")
            self.en_h3 = tf.nn.relu(self.en_h3)
            add_activation_summary(self.en_h3)
            print(self.en_h3.get_shape().as_list())

            """ -----------------------------------------------------------------------------------
            GENERATOR 
            ----------------------------------------------------------------------------------- """
            print('GENERATOR')

            self.z_ = tf.reshape(self.en_h3, [self.batch_size, 2, 2, 1024])
            print(self.z_.get_shape().as_list())

            self.fg_h1 = tf.image.resize_images(self.z_, [4,4], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            self.fg_h1 = conv2d(self.fg_h1, 1024, 512, d_h=1, d_w=1, name="gen_conv1")
            self.fg_h1 = tf.nn.relu(tf.contrib.layers.batch_norm(self.fg_h1, scope='g_f_bn1'), name='g_f_relu1')
            add_activation_summary(self.fg_h1)
            print(self.fg_h1.get_shape().as_list())

            self.fg_h2 = tf.image.resize_images(self.fg_h1, [8,8], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            self.fg_h2 = conv2d(self.fg_h2, 512, 256, d_h=1, d_w=1, name="gen_conv2")
            self.fg_h2 = tf.nn.relu(tf.contrib.layers.batch_norm(self.fg_h2, scope='g_f_bn2'), name='g_f_relu2')
            add_activation_summary(self.fg_h2)
            print(self.fg_h2.get_shape().as_list())

            self.fg_h3 = tf.image.resize_images(self.fg_h2, [16,16], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            self.fg_h3 = conv2d(self.fg_h3, 256, 128, d_h=1, d_w=1, name="gen_conv3")
            self.fg_h3 = tf.nn.relu(tf.contrib.layers.batch_norm(self.fg_h3, scope='g_f_bn3'), name='g_f_relu3')
            add_activation_summary(self.fg_h3)
            print(self.fg_h3.get_shape().as_list())

            self.fg_h4 = tf.image.resize_images(self.fg_h3, [self.crop_size,self.crop_size], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            self.fg_h4 = conv2d(self.fg_h4, 128, self.channels, d_h=1, d_w=1, name="gen_conv4")
            self.fg_fg = tf.nn.tanh(self.fg_h4, name='g_f_actication')
            print(self.fg_fg.get_shape().as_list())

            gen_reg = tf.reduce_mean(tf.square(img_batch - self.fg_fg))

        variables = tf.contrib.framework.get_variables(vs)
        return self.fg_fg, gen_reg, variables
def create_model(inputsX, inputsY, a):

    # Modify values if images are reduced
    IMAGE_SIZE = 256

    OUTPUT_DIM = IMAGE_SIZE*IMAGE_SIZE*3 # 256x256x3

    # Target for inputsX is inputsY and vice versa
    targetsX = inputsY
    targetsY = inputsX

    ######### IMAGE_TRANSLATORS
    with tf.variable_scope("generatorX2Y_encoder"):
        sR_X2Y, eR_X2Y = create_generator_encoder(inputsX, a)

    with tf.variable_scope("generatorY2X_encoder"):
        sR_Y2X, eR_Y2X = create_generator_encoder(inputsY, a)

    # Generate random noise to substitute exclusive rep
    z = tf.random_normal(eR_X2Y.shape)
    z2 = tf.random_normal(eR_X2Y.shape)

    # One copy of the decoder for the noise input, the second copy for the correct the cross-domain autoencoder
    with tf.name_scope("generatorX2Y_decoder_noise"):
        with tf.variable_scope("generatorX2Y_decoder"):
            out_channels = int(targetsX.get_shape()[-1])
            outputsX2Y = create_generator_decoder(sR_X2Y, z, out_channels, a)

        with tf.variable_scope("generatorX2Y_decoder", reuse=True):
            outputsX2Yp = create_generator_decoder(sR_X2Y, z2, out_channels, a)

    with tf.name_scope("generatorX2Y_reconstructor"):
        with tf.variable_scope("generatorY2X_encoder", reuse=True):
            sR_X2Y_recon, eR_X2Y_recon = create_generator_encoder(outputsX2Y, a)

    #CYCLE-CONSISTENCY
    with tf.name_scope("generatorX2Y_cyc"):
        with tf.variable_scope("generatorX2Y_decoder", reuse=True):
            outputX2Y_cyc = create_generator_decoder(sR_X2Y_recon, eR_X2Y, out_channels, a)

    with tf.name_scope("generatorY2X_decoder_noise"):
        with tf.variable_scope("generatorY2X_decoder"):
            out_channels = int(targetsY.get_shape()[-1])
            outputsY2X = create_generator_decoder(sR_Y2X, z, out_channels, a)

        with tf.variable_scope("generatorY2X_decoder", reuse=True):
            outputsY2Xp = create_generator_decoder(sR_Y2X, z2, out_channels, a)

    with tf.name_scope("generatorY2X_reconstructor"):
        with tf.variable_scope("generatorX2Y_encoder", reuse=True):
            sR_Y2X_recon, eR_Y2X_recon = create_generator_encoder(outputsY2X, a)

    # CYCLE-CONSISTENCY
    with tf.name_scope("generatorY2X_cyc"):
        with tf.variable_scope("generatorY2X_decoder", reuse=True):
            outputY2X_cyc = create_generator_decoder(sR_Y2X_recon, eR_Y2X, out_channels, a)

    # create two copies of discriminator, one for real pairs and one for fake pairs
    # they share the same underlying variables

    # We will now have 2 different discriminators, one per direction, and two
    # copies of each for real/fake pairs

    with tf.name_scope("real_discriminatorX2Y"):
        with tf.variable_scope("discriminatorX2Y"):
            predict_realX2Y = create_discriminator(inputsX, targetsX, a)

    with tf.name_scope("real_discriminatorY2X"):
        with tf.variable_scope("discriminatorY2X"):
            predict_realY2X = create_discriminator(inputsY, targetsY, a)

    with tf.name_scope("fake_discriminatorX2Y"):
        with tf.variable_scope("discriminatorX2Y", reuse=True):
            predict_fakeX2Y = create_discriminator(inputsX, outputsX2Y, a)

    with tf.name_scope("fake_discriminatorY2X"):
        with tf.variable_scope("discriminatorY2X", reuse=True):
            predict_fakeY2X = create_discriminator(inputsY, outputsY2X, a)

    ######### VISUAL ANALOGIES
    # This is only for visualization (visual analogies), not used in training loss
    with tf.name_scope("image_swapper_X"):
        im_swapped_X,sel_auto_X = create_visual_analogy(sR_X2Y, eR_X2Y,
                                                 outputX2Y_cyc,inputsX,'Y2X', a)
    with tf.name_scope("image_swapper_Y"):
        im_swapped_Y,sel_auto_Y = create_visual_analogy(sR_Y2X, eR_Y2X,
                                                  outputY2X_cyc,inputsY,'X2Y', a)

    ######### EXCLUSIVE REPRESENTATION
    # Create generators/discriminators for exclusive representation
    with tf.variable_scope("generator_exclusiveX2Y_decoder"):
        outputs_exclusiveX2Y = create_generator_decoder_exclusive(eR_X2Y, out_channels, a)

    with tf.name_scope("real_discriminator_exclusiveX2Y"):
        with tf.variable_scope("discriminator_exclusiveX2Y"):
            predict_real_exclusiveX2Y = create_discriminator(inputsX, targetsX, a)

    with tf.name_scope("fake_discriminator_exclusiveX2Y"):
        with tf.variable_scope("discriminator_exclusiveX2Y", reuse=True):
            predict_fake_exclusiveX2Y = create_discriminator(inputsX, outputs_exclusiveX2Y, a)


    with tf.variable_scope("generator_exclusiveY2X_decoder"):
        outputs_exclusiveY2X = create_generator_decoder_exclusive(eR_Y2X, out_channels, a)

    with tf.name_scope("real_discriminator_exclusiveY2X"):
        with tf.variable_scope("discriminator_exclusiveY2X"):
            predict_real_exclusiveY2X = create_discriminator(inputsY, targetsY, a)

    with tf.name_scope("fake_discriminator_exclusiveY2Y"):
        with tf.variable_scope("discriminator_exclusiveY2X", reuse=True):
            predict_fake_exclusiveY2X = create_discriminator(inputsY, outputs_exclusiveY2X, a)


    ######### SHARED REPRESENTATION
    # Create generators/discriminators for exclusive representation

    with tf.name_scope("discriminator_sharedX2Y"):
        with tf.variable_scope("discriminator_sharedX2Y"):
            predict_fake_sharedX2Y = create_domain_classifier(sR_X2Y, a)

    with tf.name_scope("discriminator_sharedY2X"):
        with tf.variable_scope("discriminator_sharedY2X"):
            predict_fake_sharedY2X = create_domain_classifier(sR_Y2X, a)


    ######### LOSSES

    with tf.name_scope("generatorX2Y_loss"):
        genX2Y_loss_GAN = -tf.reduce_mean(predict_fakeX2Y)
        genX2Y_loss = genX2Y_loss_GAN * a.gan_weight

    with tf.name_scope("discriminatorX2Y_loss"):
        discrimX2Y_loss = tf.reduce_mean(predict_fakeX2Y) - tf.reduce_mean(predict_realX2Y)
        alpha = tf.random_uniform(shape=[a.batch_size,1], minval=0., maxval=1.)
        differences = tf.reshape(outputsX2Y,[-1,OUTPUT_DIM])-tf.reshape(targetsX,[-1,OUTPUT_DIM])
        interpolates = tf.reshape(targetsX, [-1,OUTPUT_DIM]) + (alpha*differences)
        with tf.variable_scope("discriminatorX2Y", reuse=True):
            gradients = tf.gradients(create_discriminator(inputsX,tf.reshape(interpolates,[-1,IMAGE_SIZE,IMAGE_SIZE,3]),a),
                         [interpolates])[0]
        slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients),
                                       reduction_indices=[1]))
        gradient_penalty = tf.reduce_mean((slopes-1.)**2)

        tf.summary.histogram("X2Y/fake_score", predict_fakeX2Y)
        tf.summary.histogram("X2Y/real_score", predict_realX2Y)
        tf.summary.histogram("X2Y/disc_loss", discrimX2Y_loss )
        tf.summary.histogram("X2Y/gradient_penalty", gradient_penalty)
        discrimX2Y_loss += LAMBDA*gradient_penalty

    with tf.name_scope("generatorY2X_loss"):
        genY2X_loss_GAN = -tf.reduce_mean(predict_fakeY2X)
        genY2X_loss = genY2X_loss_GAN * a.gan_weight

    with tf.name_scope("discriminatorY2X_loss"):
        discrimY2X_loss = tf.reduce_mean(predict_fakeY2X) - tf.reduce_mean(predict_realY2X)
        alpha = tf.random_uniform(shape=[a.batch_size,1], minval=0., maxval=1.)
        differences = tf.reshape(outputsY2X,[-1,OUTPUT_DIM])-tf.reshape(targetsY,[-1,OUTPUT_DIM])
        interpolates = tf.reshape(targetsY,[-1,OUTPUT_DIM]) + (alpha*differences)
        with tf.variable_scope("discriminatorY2X", reuse=True):
            gradients = tf.gradients(create_discriminator(inputsY,tf.reshape(interpolates,[-1,IMAGE_SIZE,IMAGE_SIZE,3]),a),
                         [interpolates])[0]
        slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients),
                                       reduction_indices=[1]))
        gradient_penalty = tf.reduce_mean((slopes-1.)**2)
        discrimY2X_loss += LAMBDA*gradient_penalty

    with tf.name_scope("generator_exclusiveX2Y_loss"):
        gen_exclusiveX2Y_loss_GAN = -tf.reduce_mean(predict_fake_exclusiveX2Y)
        gen_exclusiveX2Y_loss = gen_exclusiveX2Y_loss_GAN * a.gan_exclusive_weight

    with tf.name_scope("discriminator_exclusiveX2Y_loss"):
        discrim_exclusiveX2Y_loss = tf.reduce_mean(predict_fake_exclusiveX2Y) - tf.reduce_mean(predict_real_exclusiveX2Y)
        alpha = tf.random_uniform(shape=[a.batch_size,1], minval=0., maxval=1.)
        differences = tf.reshape(outputs_exclusiveX2Y,[-1,OUTPUT_DIM])-tf.reshape(targetsX,[-1,OUTPUT_DIM])
        interpolates = tf.reshape(targetsX,[-1,OUTPUT_DIM]) + (alpha*differences)
        with tf.variable_scope("discriminator_exclusiveX2Y", reuse=True):
            gradients = tf.gradients(create_discriminator(inputsX,tf.reshape(interpolates,[-1,IMAGE_SIZE,IMAGE_SIZE,3]),a),
                             [interpolates])[0]
        slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients),
                                       reduction_indices=[1]))
        gradient_penalty = tf.reduce_mean((slopes-1.)**2)
        discrim_exclusiveX2Y_loss += LAMBDA*gradient_penalty

    with tf.name_scope("generator_exclusiveY2X_loss"):
        gen_exclusiveY2X_loss_GAN = -tf.reduce_mean(predict_fake_exclusiveY2X)
        gen_exclusiveY2X_loss = gen_exclusiveY2X_loss_GAN * a.gan_exclusive_weight


    with tf.name_scope("discriminator_exclusiveY2X_loss"):
        discrim_exclusiveY2X_loss = tf.reduce_mean(predict_fake_exclusiveY2X) - tf.reduce_mean(predict_real_exclusiveY2X)
        alpha = tf.random_uniform(shape=[a.batch_size,1], minval=0., maxval=1.)
        differences = tf.reshape(outputs_exclusiveY2X,[-1,OUTPUT_DIM])-tf.reshape(targetsX,[-1,OUTPUT_DIM])
        interpolates = tf.reshape(targetsX,[-1,OUTPUT_DIM]) + (alpha*differences)
        with tf.variable_scope("discriminator_exclusiveY2X", reuse=True):
            gradients = tf.gradients(create_discriminator(inputsX,tf.reshape(interpolates,[-1,IMAGE_SIZE,IMAGE_SIZE,3]),a),
                             [interpolates])[0]
        slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients),
                                       reduction_indices=[1]))
        gradient_penalty = tf.reduce_mean((slopes-1.)**2)
        discrim_exclusiveY2X_loss += LAMBDA*gradient_penalty


    #SHARED GRL LOSS

    with tf.name_scope("discriminator_sharedX2Y_loss"):
        labels_X2Y = tf.zeros([a.batch_size, 1], dtype=tf.float32)
        cross_entropyX2Y = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_X2Y, logits=predict_fake_sharedX2Y)
        discrim_sharedX2Y_loss = tf.reduce_mean(cross_entropyX2Y)
        discrim_sharedX2Y_loss = discrim_sharedX2Y_loss * a.classifier_shared_weight



    with tf.name_scope("discriminator_sharedY2X_loss"):
        labels_Y2X = tf.ones([a.batch_size, 1], dtype=tf.float32)
        cross_entropyY2X = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_Y2X, logits=predict_fake_sharedY2X)
        discrim_sharedY2X_loss = tf.reduce_mean(cross_entropyY2X)
        discrim_sharedY2X_loss = discrim_sharedY2X_loss * a.classifier_shared_weight

    with tf.name_scope("code_recon_loss"):
        code_sR_X2Y_recon_loss = tf.reduce_mean(tf.abs(sR_X2Y_recon-sR_X2Y))
        code_sR_Y2X_recon_loss = tf.reduce_mean(tf.abs(sR_Y2X_recon-sR_Y2X))
        code_eR_X2Y_recon_loss = tf.reduce_mean(tf.abs(eR_X2Y_recon-z))
        code_eR_Y2X_recon_loss = tf.reduce_mean(tf.abs(eR_Y2X_recon-z))
        code_recon_loss = a.l1_weight*(code_sR_X2Y_recon_loss + code_sR_Y2X_recon_loss
                                    +code_eR_X2Y_recon_loss + code_eR_Y2X_recon_loss)

    #CYCLE-CONSISTENCY LOSS
    with tf.name_scope("cycX_loss"):
        cycX_loss = a.cyc_weight*tf.reduce_mean(tf.abs(outputX2Y_cyc-inputsX))

    with tf.name_scope("cycY_loss"):
        cycY_loss = a.cyc_weight*tf.reduce_mean(tf.abs(outputY2X_cyc-inputsY))

    ######### OPTIMIZERS

    with tf.name_scope("discriminatorX2Y_train"):
        discrimX2Y_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminatorX2Y")]
        discrimX2Y_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        discrimX2Y_grads_and_vars = discrimX2Y_optim.compute_gradients(discrimX2Y_loss, var_list=discrimX2Y_tvars)
        discrimX2Y_train = discrimX2Y_optim.apply_gradients(discrimX2Y_grads_and_vars)

    with tf.name_scope("discriminatorY2X_train"):
        discrimY2X_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminatorY2X")]
        discrimY2X_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        discrimY2X_grads_and_vars = discrimY2X_optim.compute_gradients(discrimY2X_loss, var_list=discrimY2X_tvars)
        discrimY2X_train = discrimY2X_optim.apply_gradients(discrimY2X_grads_and_vars)

    with tf.name_scope("generatorX2Y_train"):
        with tf.control_dependencies([discrimX2Y_train]):
            genX2Y_tvars = [var for var in tf.trainable_variables() if var.name.startswith("generatorX2Y")]
            genX2Y_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
            genX2Y_grads_and_vars = genX2Y_optim.compute_gradients(genX2Y_loss, var_list=genX2Y_tvars)
            genX2Y_train = genX2Y_optim.apply_gradients(genX2Y_grads_and_vars)

    with tf.name_scope("generatorY2X_train"):
        with tf.control_dependencies([discrimY2X_train]):
            genY2X_tvars = [var for var in tf.trainable_variables() if var.name.startswith("generatorY2X")]
            genY2X_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
            genY2X_grads_and_vars = genY2X_optim.compute_gradients(genY2X_loss, var_list=genY2X_tvars)
            genY2X_train = genY2X_optim.apply_gradients(genY2X_grads_and_vars)

    with tf.name_scope("discriminator_exclusiveX2Y_train"):
        discrim_exclusiveX2Y_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminator_exclusiveX2Y")]
        discrim_exclusiveX2Y_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        discrim_exclusiveX2Y_grads_and_vars = discrim_exclusiveX2Y_optim.compute_gradients(discrim_exclusiveX2Y_loss, var_list=discrim_exclusiveX2Y_tvars)
        discrim_exclusiveX2Y_train = discrim_exclusiveX2Y_optim.apply_gradients(discrim_exclusiveX2Y_grads_and_vars)

    with tf.name_scope("generator_exclusiveX2Y_train"):
        with tf.control_dependencies([discrim_exclusiveX2Y_train]):
            gen_exclusiveX2Y_tvars = [var for var in tf.trainable_variables()
                                      if var.name.startswith("generator_exclusiveX2Y")
                                        or var.name.startswith("generatorX2Y_encoder")]
            gen_exclusiveX2Y_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
            gen_exclusiveX2Y_grads_and_vars = gen_exclusiveX2Y_optim.compute_gradients(gen_exclusiveX2Y_loss, var_list=gen_exclusiveX2Y_tvars)
            gen_exclusiveX2Y_train = gen_exclusiveX2Y_optim.apply_gradients(gen_exclusiveX2Y_grads_and_vars)

    with tf.name_scope("discriminator_exclusiveY2X_train"):
        discrim_exclusiveY2X_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminator_exclusiveY2X")]
        discrim_exclusiveY2X_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        discrim_exclusiveY2X_grads_and_vars = discrim_exclusiveY2X_optim.compute_gradients(discrim_exclusiveY2X_loss, var_list=discrim_exclusiveY2X_tvars)
        discrim_exclusiveY2X_train = discrim_exclusiveY2X_optim.apply_gradients(discrim_exclusiveY2X_grads_and_vars)

    with tf.name_scope("generator_exclusiveY2X_train"):
        with tf.control_dependencies([discrim_exclusiveY2X_train]):
            gen_exclusiveY2X_tvars = [var for var in tf.trainable_variables()
                                      if var.name.startswith("generator_exclusiveY2X")
                                        or var.name.startswith("generatorY2X_encoder")]
            gen_exclusiveY2X_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
            gen_exclusiveY2X_grads_and_vars = gen_exclusiveY2X_optim.compute_gradients(gen_exclusiveY2X_loss, var_list=gen_exclusiveY2X_tvars)
            gen_exclusiveY2X_train = gen_exclusiveY2X_optim.apply_gradients(gen_exclusiveY2X_grads_and_vars)


    #SHARED GRL OPTIMIZATION
    with tf.name_scope("discriminator_sharedX2Y_train"):
        discrim_sharedX2Y_tvars = [var for var in tf.trainable_variables() if
                                      var.name.startswith("discriminator_sharedX2Y")]
        discrim_sharedX2Y_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        discrim_sharedX2Y_grads_and_vars = discrim_sharedX2Y_optim.compute_gradients(discrim_sharedX2Y_loss,
                                                                                           var_list=discrim_sharedX2Y_tvars)
        discrim_sharedX2Y_train = discrim_sharedX2Y_optim.apply_gradients(discrim_sharedX2Y_grads_and_vars)

    with tf.name_scope("discriminator_sharedY2X_train"):
        discrim_sharedY2X_tvars = [var for var in tf.trainable_variables() if
                                      var.name.startswith("discriminator_sharedY2X")]
        discrim_sharedY2X_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        discrim_sharedY2X_grads_and_vars = discrim_sharedY2X_optim.compute_gradients(discrim_sharedY2X_loss,
                                                                                           var_list=discrim_sharedY2X_tvars)
        discrim_sharedY2X_train = discrim_sharedY2X_optim.apply_gradients(discrim_sharedY2X_grads_and_vars)

    with tf.name_scope("code_recon_train"):
        code_recon_tvars = [var for var in tf.trainable_variables() if
                              var.name.startswith("generatorX2Y") or
                              var.name.startswith("generatorY2X")]
        code_recon_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        code_recon_grads_and_vars = code_recon_optim.compute_gradients(code_recon_loss, var_list=code_recon_tvars)
        code_recon_train = code_recon_optim.apply_gradients(code_recon_grads_and_vars)

    #CYCLE-CONSISTENCY OPTIMIZATION
    with tf.name_scope("generatorX2Y_cyc_train"):
        cycX_tvars = [var for var in tf.trainable_variables() if
                              var.name.startswith("generatorX2Y_encoder") or
                              var.name.startswith("generatorX2Y_decoder") or
                              var.name.startswith("generatorY2X_encoder") or
                              var.name.startswith("generatorY2X_decoder")]
        cycX_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        cycX_grads_and_vars = cycX_optim.compute_gradients(cycX_loss, var_list=cycX_tvars)
        cycX_train = cycX_optim.apply_gradients(cycX_grads_and_vars)

    with tf.name_scope("generatorY2X_cyc_train"):
        cycY_tvars = [var for var in tf.trainable_variables() if
                              var.name.startswith("generatorX2Y_encoder") or
                              var.name.startswith("generatorX2Y_decoder") or
                              var.name.startswith("generatorY2X_encoder") or
                              var.name.startswith("generatorY2X_decoder")]
        cycY_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
        cycY_grads_and_vars = cycY_optim.compute_gradients(cycY_loss, var_list=cycY_tvars)
        cycY_train = cycY_optim.apply_gradients(cycY_grads_and_vars)




    ema = tf.train.ExponentialMovingAverage(decay=0.99)
    update_losses = ema.apply([discrimX2Y_loss, discrimY2X_loss,
                               genX2Y_loss, genY2X_loss,
                               code_recon_loss,
                               code_sR_X2Y_recon_loss, code_sR_Y2X_recon_loss,
                               code_eR_X2Y_recon_loss, code_eR_Y2X_recon_loss,
                               discrim_exclusiveX2Y_loss, discrim_exclusiveY2X_loss,
                               gen_exclusiveX2Y_loss, gen_exclusiveY2X_loss,
                               discrim_sharedX2Y_loss, discrim_sharedY2X_loss,
                               cycX_loss, cycY_loss])

    global_step = tf.train.get_or_create_global_step()
    incr_global_step = tf.assign(global_step, global_step+1)
    return Model(
        predict_realX2Y=predict_realX2Y,
        predict_realY2X=predict_realY2X,
        predict_fakeX2Y=predict_fakeX2Y,
        predict_fakeY2X=predict_fakeY2X,
        im_swapped_X=im_swapped_X,
        im_swapped_Y=im_swapped_Y,
        sel_auto_X=sel_auto_X,
        sel_auto_Y=sel_auto_Y,
        sR_X2Y=sR_X2Y,
        sR_Y2X=sR_Y2X,
        eR_X2Y=eR_X2Y,
        eR_Y2X=eR_Y2X,
        discrimX2Y_loss=ema.average(discrimX2Y_loss),
        discrimY2X_loss=ema.average(discrimY2X_loss),
        genX2Y_loss=ema.average(genX2Y_loss),
        genY2X_loss=ema.average(genY2X_loss),
        discrim_exclusiveX2Y_loss=ema.average(discrim_exclusiveX2Y_loss),
        discrim_exclusiveY2X_loss=ema.average(discrim_exclusiveY2X_loss),
        gen_exclusiveX2Y_loss=ema.average(gen_exclusiveX2Y_loss),
        gen_exclusiveY2X_loss=ema.average(gen_exclusiveY2X_loss),
        discrim_sharedX2Y_loss=ema.average(discrim_sharedX2Y_loss),
        discrim_sharedY2X_loss=ema.average(discrim_sharedY2X_loss),
        outputsX2Y=outputsX2Y,
        outputsY2X=outputsY2X,
        outputsX2Yp=outputsX2Yp,
        outputsY2Xp=outputsY2Xp,
        outputs_exclusiveX2Y=outputs_exclusiveX2Y,
        outputs_exclusiveY2X=outputs_exclusiveY2X,
        cycX_output=outputX2Y_cyc,
        cycX_loss=ema.average(cycX_loss),
        cycY_output=outputY2X_cyc,
        cycY_loss=ema.average(cycY_loss),
        code_recon_loss=ema.average(code_recon_loss),
        code_sR_X2Y_recon_loss=ema.average(code_sR_X2Y_recon_loss),
        code_sR_Y2X_recon_loss=ema.average(code_sR_Y2X_recon_loss),
        code_eR_X2Y_recon_loss=ema.average(code_eR_X2Y_recon_loss),
        code_eR_Y2X_recon_loss=ema.average(code_eR_Y2X_recon_loss),
        train=tf.group(update_losses, incr_global_step, genX2Y_train,
                       genY2X_train, code_recon_train,
                       gen_exclusiveX2Y_train,gen_exclusiveY2X_train,
                       discrim_sharedX2Y_train,discrim_sharedY2X_train,
                       cycX_train, cycY_train),
    )
model_output = tf.add(tf.matmul(x_data, A), b)

###
# Loss Functions
###

# Select appropriate loss function based on regression type

if regression_type == 'LASSO':
    # Declare Lasso loss function
    # Lasso Loss = L2_Loss + heavyside_step,
    # Where heavyside_step ~ 0 if A < constant, otherwise ~ 99
    lasso_param = tf.constant(0.9)
    heavyside_step = tf.truediv(1., tf.add(1., tf.exp(tf.multiply(-50., tf.subtract(A, lasso_param)))))
    regularization_param = tf.multiply(heavyside_step, 99.)
    loss = tf.add(tf.reduce_mean(input_tensor=tf.square(y_target - model_output)), regularization_param)

elif regression_type == 'Ridge':
    # Declare the Ridge loss function
    # Ridge loss = L2_loss + L2 norm of slope
    ridge_param = tf.constant(1.)
    ridge_loss = tf.reduce_mean(input_tensor=tf.square(A))
    loss = tf.expand_dims(tf.add(tf.reduce_mean(input_tensor=tf.square(y_target - model_output)), tf.multiply(ridge_param, ridge_loss)), 0)
    
else:
    print('Invalid regression_type parameter value',file=sys.stderr)


###
# Optimizer
###
    def generate(self, x, **kwargs):
        """
        Generate symbolic graph for adversarial examples and return.
        :param x: The model's symbolic inputs.
        :param eps: (required float) maximum distortion of adversarial example
                    compared to original input
        :param eps_iter: (required float) step size for each attack iteration
        :param nb_iter: (required int) Number of attack iterations.
        :param y: (optional) A tensor with the model labels.
        :param y_target: (optional) A tensor with the labels to target. Leave
                         y_target=None if y is also set. Labels should be
                         one-hot-encoded.
        :param ord: (optional) Order of the norm (mimics Numpy).
                    Possible values: np.inf, 1 or 2.
        :param clip_min: (optional float) Minimum input component value
        :param clip_max: (optional float) Maximum input component value
        """
        import tensorflow as tf

        # Parse and save attack-specific parameters
        assert self.parse_params(**kwargs)

        # Initialize loop variables
        eta = 0

        # Fix labels to the first model predictions for loss computation
        model_preds = self.model.get_probs(x)
        preds_max = tf.reduce_max(model_preds, 1, keep_dims=True)
        if self.y_target is not None:
            y = self.y_target
            targeted = True
        elif self.y is not None:
            y = self.y
            targeted = False
        else:
            y = tf.to_float(tf.equal(model_preds, preds_max))
            y = tf.stop_gradient(y)
            targeted = False

        y_kwarg = 'y_target' if targeted else 'y'
        fgm_params = {'eps': self.eps_iter, y_kwarg: y, 'ord': self.ord,
                      'clip_min': self.clip_min, 'clip_max': self.clip_max}

        for i in range(self.nb_iter):
            FGM = FastGradientMethod(self.model, back=self.back,
                                     sess=self.sess)
            # Compute this step's perturbation
            eta = FGM.generate(x + eta, **fgm_params) - x

            # Clipping perturbation eta to self.ord norm ball
            if self.ord == np.inf:
                eta = tf.clip_by_value(eta, -self.eps, self.eps)
            elif self.ord in [1, 2]:
                reduc_ind = list(xrange(1, len(eta.get_shape())))
                if self.ord == 1:
                    norm = tf.reduce_sum(tf.abs(eta),
                                         reduction_indices=reduc_ind,
                                         keep_dims=True)
                elif self.ord == 2:
                    norm = tf.sqrt(tf.reduce_sum(tf.square(eta),
                                                 reduction_indices=reduc_ind,
                                                 keep_dims=True))
                eta = eta * self.eps / norm

        # Define adversarial example (and clip if necessary)
        adv_x = x + eta
        if self.clip_min is not None and self.clip_max is not None:
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        return adv_x
Esempio n. 46
0
def main():
  traincsvdata = np.loadtxt('trainset.csv', unpack=True, delimiter=',', skiprows=1)   
  num_points = len(traincsvdata[0]) 
  print("training points : ", num_points)

  x_data = traincsvdata[0]  # [230. 280. 241....
  y_data = traincsvdata[1]  # [1349.9 1809.  1590.8 1571.8 1768.3

  #학습용 데이터셋을 녹색(g)에 둥근점(o)로 시각화
  plt.suptitle('Training Data Set', fontsize=16)
  plt.plot(x_data, y_data, 'go')
  plt.xlabel('weight')
  plt.ylabel('distance')
  plt.show()

  #데이터 정규화 진행. 데이터범위를 0~1사이 값으로 변환.
  x_data = minmax_normalize(x_data)
  y_data = minmax_normalize(y_data)
  
  #배치단위로 학습
  BATCH_SIZE = 5
  BATCH_NUM = int(len(x_data)/BATCH_SIZE)
  
  #데이터를 세로로(한개씩)나열한 형태로 reshape
  x_data = np.reshape(x_data, [len(x_data),1]) # [[230.] [280.] ...
  y_data = np.reshape(y_data, [len(y_data),1])
  
  #총 개수는 정해지지 않았고 1개씩 들어가는 Placeholder 생성
  input_data = tf.placeholder(tf.float32, shape=[None,1])  
  output_data = tf.placeholder(tf.float32, shape=[None,1])

  #레이어간 Weight 정의후 랜덤값으로 초기화. 그림에서는 선으로 표시.
  W1 = tf.Variable(tf.random_uniform([1,5], 0.0, 300.0))
  W2 = tf.Variable(tf.random_uniform([5,3], 0.0, 1.0))
  W_out = tf.Variable(tf.random_uniform([3,1], 0.0, 300.0))

  #레이어의 노드가 하는 계산. 이전노드와 현재노드의 곱셈. 비선형함수로 sigmoid 추가.
  hidden1 = tf.nn.sigmoid(tf.matmul(input_data,W1))
  hidden2 = tf.nn.sigmoid(tf.matmul(hidden1,W2))
  output = tf.matmul(hidden2, W_out)

  #비용함수, 최적화함수, train 정의
  loss = tf.reduce_mean(tf.square(output-output_data))
  optimizer = tf.train.AdamOptimizer(0.1)
  train = optimizer.minimize(loss)

  #변수(Variable) 사용준비
  init = tf.global_variables_initializer()

  #세션 열고 init 실행
  sess= tf.Session()
  sess.run(init)


  #학습을 반복하며 값 업데이트
  for step in range(4000):
    index = 0    

    #매번 데이터셋을 섞음
    x_data, y_data = shuffle_data(x_data, y_data)

    #배치크기만큼 학습을 진행
    for batch_iter in range(BATCH_NUM-1):
        feed_dict = {input_data: x_data[index:index+BATCH_SIZE], output_data: y_data[index:index+BATCH_SIZE]}
        sess.run(train, feed_dict = feed_dict)
        index += BATCH_SIZE
        
    #화면에 학습진행상태 출력(최초100회까지는 10마다 한번씩, 이후는 100회에 한번씩)
    if (step%200==0): 
        print("Step=%5d, Loss Value=%f" %(step, sess.run(loss, feed_dict = feed_dict)))      

  
  #학습이 끝난후 그래프로 결과확인
  print("## 학습결과 그래프 ##")
  
  #학습용 데이터셋을 녹색(g)에 둥근점(o)로 시각화    
  plt.plot(x_data, y_data, 'go')  
  
  #예측모델 출력은 검은색(k) 별표(*)로 시각화
  feed_dict = {input_data: x_data}
  plt.plot(x_data, sess.run(output, feed_dict=feed_dict), 'k*') 
  
  #그래프 그리기
  plt.suptitle('Training Result', fontsize=16)    
  plt.xlabel('weight')
  plt.ylabel('distance')
  plt.show()
  
  print("# 학습결과 이륙거리계산 #")
  ask_x = 270
  ask_norm_x = [[minmax_get_norm(ask_x, traincsvdata[0])]]
  answer_norm_y = sess.run(output, feed_dict={input_data: ask_norm_x})
  answer_y = minmax_get_denorm(answer_norm_y, traincsvdata[1])
  print("> 무게(X):", ask_x, "ton => 이륙거리Y:", answer_y[0][0], "m \n\n\n")
  
  #테스트셋을 활용한 결과확인  
  print("## Test Set 검증결과 그래프 ##")
        
  #테스트셋 파일읽음
  test_csv_x_data = np.loadtxt('testset_x.csv', unpack=True, delimiter=',', skiprows=1)
  test_csv_y_data = np.loadtxt('testset_y.csv', unpack=True, delimiter=',', skiprows=1)
  
  #테스트셋 정규화 진행
  test_norm_x_data = minmax_normalize(test_csv_x_data)
  test_norm_y_data = minmax_normalize(test_csv_y_data)
  
  #CVS 데이터(테스트셋, 정답데이터) : 빨간색(m)에 둥근점(o)로 시각화
  plt.plot(test_csv_x_data, test_csv_y_data, 'mo')
  
  #예측데이터 : 검은색(k) 별표(*)로 시각화   
  feed_dict = {input_data: np.reshape(test_norm_x_data, (len(test_norm_x_data), 1)) } #[[0.41573034] [0.20224719] ...
  test_pred_y_data = minmax_get_denorm(sess.run(output, feed_dict=feed_dict), traincsvdata[1])
  plt.plot(test_csv_x_data, test_pred_y_data, 'k*')
  
  #그래프 그리기
  plt.suptitle('Test Result', fontsize=16)  
  plt.xlabel('weight')
  plt.ylabel('distance')
  plt.show()
Esempio n. 47
0
# 必要なライブラリのインポート
import tensorflow as tf
import numpy as np

# 変数の定義
dim = 5
x = tf.placeholder(tf.float32, [None, dim + 1])
w = tf.Variable(tf.zeros([dim + 1, 1]))
y = tf.matmul(x, w)
t = tf.placeholder(tf.float32, [None, 1])
sess = tf.Session()

# 損失関数と学習メソッドの定義
loss = tf.reduce_sum(tf.square(y - t))
train_step = tf.train.AdamOptimizer().minimize(loss)

# TensorBoardで追跡する変数を定義
with tf.name_scope('summary'):
    tf.summary.scalar('loss', loss)
    merged = tf.summary.merge_all()
    writer = tf.summary.FileWriter('./logs', sess.graph)

# セッションの初期化と入力データの準備
sess.run(tf.global_variables_initializer())

train_t = np.array(
    [5.2, 5.7, 8.6, 14.9, 18.2, 20.4, 25.5, 26.4, 22.8, 17.5, 11.1, 6.6])
train_t = train_t.reshape([12, 1])
train_x = np.zeros([12, dim + 1])
for row, month in enumerate(range(1, 13)):
    for col, n in enumerate(range(0, dim + 1)):
Esempio n. 48
0
    def __init__(self,
                 mixing_weights,
                 component_params,
                 component_dist,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="ParamMixture"):
        """Initialize a batch of mixture random variables.

    Args:
      mixing_weights: tf.Tensor.
        (Normalized) weights whose inner (right-most) dimension matches
        the number of components.
      component_params: dict.
        Parameters of the per-component distributions.
      component_dist: RandomVariable.
        Distribution of each component. The outer (left-most) dimension
        of its batch shape when instantiated determines the number of
        components.
    """
        parameters = locals()
        parameters.pop("self")
        values = [mixing_weights] + list(six.itervalues(component_params))
        with tf.name_scope(name, values=values):
            if validate_args:
                if not isinstance(component_params, dict):
                    raise TypeError("component_params must be a dict.")
                elif not issubclass(component_dist, RandomVariable):
                    raise TypeError(
                        "component_dist must be a ed.RandomVariable object.")

            # get sample_shape from inherited RandomVariable specifically
            if hasattr(self, '_kwargs'):
                sample_shape = self._kwargs.get('sample_shape', ())
            else:
                sample_shape = ()

            self._mixing_weights = tf.identity(mixing_weights,
                                               name="mixing_weights")
            self._cat = Categorical(probs=self._mixing_weights,
                                    validate_args=validate_args,
                                    allow_nan_stats=allow_nan_stats,
                                    sample_shape=sample_shape)
            self._component_params = component_params
            self._components = component_dist(validate_args=validate_args,
                                              allow_nan_stats=allow_nan_stats,
                                              sample_shape=sample_shape,
                                              collections=[],
                                              **component_params)

            if validate_args:
                if not self._mixing_weights.shape[-1].is_compatible_with(
                        self._components.batch_shape[0]):
                    raise TypeError(
                        "Last dimension of mixing_weights must match with "
                        "the first dimension of components.")
                elif not self._mixing_weights.shape[:-1].is_compatible_with(
                        self._components.batch_shape[1:]):
                    raise TypeError(
                        "Dimensions of mixing_weights are not compatible "
                        "with the dimensions of components.")

            try:
                self._num_components = self._cat.probs.shape.as_list()[-1]
            except:  # if p has TensorShape None
                raise NotImplementedError(
                    "Number of components must be statically "
                    "determined.")

            self._mean_val = None
            self._variance_val = None
            self._stddev_val = None
            if self._cat.probs.shape.ndims <= 1:
                with tf.name_scope('means'):
                    try:
                        comp_means = self._components.mean()
                        comp_vars = self._components.variance()
                        comp_mean_sq = tf.square(comp_means) + comp_vars

                        # weights has shape batch_shape + [num_components]; change
                        # to broadcast with [num_components] + batch_shape + event_shape.
                        # The below reshaping only works for empty batch_shape.
                        weights = self._cat.probs
                        event_rank = self._components.event_shape.ndims
                        for _ in range(event_rank):
                            weights = tf.expand_dims(weights, -1)

                        self._mean_val = tf.reduce_sum(comp_means * weights,
                                                       0,
                                                       name='mean')
                        mean_sq_val = tf.reduce_sum(comp_mean_sq * weights,
                                                    0,
                                                    name='mean_squared')
                        self._variance_val = tf.subtract(mean_sq_val,
                                                         tf.square(
                                                             self._mean_val),
                                                         name='variance')
                        self._stddev_val = tf.sqrt(self._variance_val,
                                                   name='stddev')
                    except:
                        # This fails if _components.{mean,variance}() fails.
                        pass

        super(distributions_ParamMixture, self).__init__(
            dtype=self._components.dtype,
            reparameterization_type=self._components.reparameterization_type,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats,
            parameters=parameters,
            graph_parents=[self._cat.value(),
                           self._components.value()],
            name=name)
Esempio n. 49
0
    def _init_graph(self):
        '''
        Init a tensorflow Graph containing: input data, variables, model, loss, optimizer
        '''
        self.graph = tf.Graph()
        with self.graph.as_default():  # , tf.device('/cpu:0'):
            # Set graph level random seed
            tf.set_random_seed(self.random_seed)
            # Input data.
            self.train_features = tf.placeholder(tf.int32, shape=[None, None])  # None * features_M
            self.train_labels = tf.placeholder(tf.float32, shape=[None, 1])  # None * 1
            self.dropout_keep = tf.placeholder(tf.float32, shape=[None])
            self.train_phase = tf.placeholder(tf.bool)

            # Variables.
            self.weights = self._initialize_weights()

            # Model.
            # _________ sum_square part _____________
            # get the summed up embeddings of features.
            nonzero_embeddings = tf.nn.embedding_lookup(self.weights['feature_embeddings'], self.train_features)
            self.summed_features_emb = tf.reduce_sum(nonzero_embeddings, 1) # None * K
            # get the element-multiplication
            self.summed_features_emb_square = tf.square(self.summed_features_emb)  # None * K

            # _________ square_sum part _____________
            self.squared_features_emb = tf.square(nonzero_embeddings)
            self.squared_sum_features_emb = tf.reduce_sum(self.squared_features_emb, 1)  # None * K

            # ________ FM __________
            self.FM = 0.5 * tf.subtract (self.summed_features_emb_square, self.squared_sum_features_emb)  # None * K
            if self.batch_norm:
                self.FM = self.batch_norm_layer(self.FM, train_phase=self.train_phase, scope_bn='bn_fm')
            self.FM = tf.nn.dropout(self.FM, self.dropout_keep[-1]) # dropout at the bilinear interactin layer

            # ________ Deep Layers __________
            for i in range(0, len(self.layers)):
                self.FM = tf.add(tf.matmul(self.FM, self.weights['layer_%d' %i]), self.weights['bias_%d'%i]) # None * layer[i] * 1
                if self.batch_norm:
                    self.FM = self.batch_norm_layer(self.FM, train_phase=self.train_phase, scope_bn='bn_%d' %i) # None * layer[i] * 1
                self.FM = self.activation_function(self.FM)
                self.FM = tf.nn.dropout(self.FM, self.dropout_keep[i]) # dropout at each Deep layer
            self.FM = tf.matmul(self.FM, self.weights['prediction'])     # None * 1

            # _________out _________
            Bilinear = tf.reduce_sum(self.FM, 1, keep_dims=True)  # None * 1
            self.Feature_bias = tf.reduce_sum(tf.nn.embedding_lookup(self.weights['feature_bias'], self.train_features) , 1)  # None * 1
            Bias = self.weights['bias'] * tf.ones_like(self.train_labels)  # None * 1
            self.out = tf.add_n([Bilinear, self.Feature_bias, Bias])  # None * 1

            # Compute the loss.
            if self.loss_type == 'square_loss':
                if self.lamda_bilinear > 0:
                    self.loss = tf.nn.l2_loss(tf.subtract (self.train_labels, self.out)) + tf.contrib.layers.l2_regularizer(self.lamda_bilinear)(self.weights['feature_embeddings'])  # regulizer
                else:
                    self.loss = tf.nn.l2_loss(tf.subtract (self.train_labels, self.out))
            elif self.loss_type == 'log_loss':
                self.out = tf.sigmoid(self.out)
                if self.lambda_bilinear > 0:
                    self.loss = tf.contrib.losses.log_loss(self.out, self.train_labels, weight=1.0, epsilon=1e-07, scope=None) + tf.contrib.layers.l2_regularizer(self.lamda_bilinear)(self.weights['feature_embeddings'])  # regulizer
                else:
                    self.loss = tf.contrib.losses.log_loss(self.out, self.train_labels, weight=1.0, epsilon=1e-07, scope=None)

            # Optimizer.
            if self.optimizer_type == 'AdamOptimizer':
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8).minimize(self.loss)
            elif self.optimizer_type == 'AdagradOptimizer':
                self.optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate, initial_accumulator_value=1e-8).minimize(self.loss)
            elif self.optimizer_type == 'GradientDescentOptimizer':
                self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
            elif self.optimizer_type == 'MomentumOptimizer':
                self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=0.95).minimize(self.loss)

            # init
            self.saver = tf.train.Saver()
            init = tf.global_variables_initializer()
            self.sess = tf.Session()
            self.sess.run(init)

            # number of params
            total_parameters = 0
            for variable in self.weights.values():
                shape = variable.get_shape() # shape is an array of tf.Dimension
                variable_parameters = 1
                for dim in shape:
                    variable_parameters *= dim.value
                total_parameters += variable_parameters
            if self.verbose > 0:
                print("#params: %d" %total_parameters) 
    def build_model(self, is_train):
        
        self.abstract_size = self.sample_size // 2 ** 4

        _,_,images= get_pipeline_training_from_dump('data_example.tfrecords',
                                                                 self.batch_size*3,
                                                                 1000, image_size=60,resize_size=60,
                                                                 img_channels=self.c_dim)

        _,_,test_images1 = get_pipeline_training_from_dump('data_example.tfrecords',
                                                                 self.batch_size*2,
                                                                 10000000, image_size=60,resize_size=60,
                                                                 img_channels=self.c_dim)

        self.images = images[0:self.batch_size,:,:,:]
        self.imagesR = images[self.batch_size:self.batch_size*2,:,:,:]
        
        self.third_image = images[self.batch_size*2:self.batch_size*3,:,:,:]

        self.test_images1 = test_images1[0:self.batch_size,:,:,:]
        self.test_images2 = test_images1[self.batch_size:self.batch_size*2,:,:,:]


        self.chunk_num = 8
        self.chunk_size = 64
        self.feature_size = self.chunk_size*self.chunk_num
        

        with tf.variable_scope('generator') as scope: 

            self.rep = self.encoder(self.images)
            self.D_I = self.generator(self.rep)

            _ = self.classifier(self.D_I,self.D_I,self.D_I)

            scope.reuse_variables()
            self.repR = self.encoder(self.imagesR)
            self.D_IR = self.generator(self.repR)

            k = tf.random_uniform(shape=[self.chunk_num],minval=0,maxval=2,dtype=tf.int32)
            a_chunk = tf.ones((self.batch_size,self.chunk_size),dtype=tf.int32) 
            a_fea = tf.ones_like(self.rep,dtype=tf.int32)

            i=0
            t1 = self.rep[:,i*self.chunk_size:(i+1)*self.chunk_size]
            e1 = self.repR[:,i*self.chunk_size:(i+1)*self.chunk_size]
            self.fea = tf.where(tf.equal(k[0]*a_chunk,0),t1,e1)
            self.fea_mix = self.fea

            self.feaR = tf.where(tf.equal(k[0]*a_chunk,1),t1,e1)
            self.fea_mixR = self.feaR
    
            # mix the feature
            for i in xrange(1,self.chunk_num):
                t1 = self.rep[:,i*self.chunk_size:(i+1)*self.chunk_size]
                e1 = self.repR[:,i*self.chunk_size:(i+1)*self.chunk_size]
                self.fea = tf.where(tf.equal(k[i]*a_chunk,0),t1,e1)
                self.fea_mix = tf.concat(axis=1,values=[self.fea_mix,self.fea])

                self.feaR = tf.where(tf.equal(k[i]*a_chunk,1),t1,e1)
                self.fea_mixR = tf.concat(axis=1,values=[self.fea_mixR,self.feaR])


            self.k = k
            self.k0 = k[0]

            self.D_mix = self.generator(self.fea_mix)

            self.cf = self.classifier(self.images,self.imagesR,self.D_mix)

            self.kfc = tf.cast(tf.ones((self.batch_size,self.chunk_num),dtype=tf.int32)*k,tf.float32)

            self.D_mixR = self.generator(self.fea_mixR)

            self.rep_mix = self.encoder(self.D_mix)

            i = 0 
            tt = self.rep_mix[:,i*self.chunk_size:(i+1)*self.chunk_size]
            ee = self.rep[:,i*self.chunk_size:(i+1)*self.chunk_size]
            eeR = self.repR[:,i*self.chunk_size:(i+1)*self.chunk_size]
            self.rep_re = tf.where(tf.equal(k[i]*a_chunk,0),tt,ee)
            self.repR_re = tf.where(tf.equal(k[i]*a_chunk,1),tt,eeR)

            for i in xrange(1,self.chunk_num):
                tt = self.rep_mix[:,i*self.chunk_size:(i+1)*self.chunk_size]
                ee = self.rep[:,i*self.chunk_size:(i+1)*self.chunk_size]
                eeR = self.repR[:,i*self.chunk_size:(i+1)*self.chunk_size]
                self.rep_re = tf.concat(axis=1,values=[self.rep_re,tf.where(tf.equal(k[i]*a_chunk,0),tt,ee)]) 
                self.repR_re = tf.concat(axis=1,values=[self.repR_re,tf.where(tf.equal(k[i]*a_chunk,1),tt,eeR)]) 

            self.D_regenerate1 = self.generator(self.rep_re)

            self.D_regenerate2 = self.generator(self.repR_re)

            scope.reuse_variables()
            self.rep_test1 = self.encoder(self.test_images1)
            self.rep_test2 = self.encoder(self.test_images2)

            i = 0
            self.rep_test = self.rep_test2[:,0*self.chunk_size:1*self.chunk_size]
            for j in xrange(1,self.chunk_num):
                tmp = self.rep_test1[:,j*self.chunk_size:(j+1)*self.chunk_size]
                self.rep_test = tf.concat(axis=1,values=[self.rep_test,tmp])
            self.D_mix_allchunk = self.generator(self.rep_test,reuse=True)
            self.D_mix_allchunk_sup = self.D_mix_allchunk
            
            
            for i in xrange(1,self.chunk_num):
                self.rep_test = self.rep_test1[:,0*self.chunk_size:1*self.chunk_size]
                for j in xrange(1,self.chunk_num):
                    if j==i:
                        tmp = self.rep_test2[:,j*self.chunk_size:(j+1)*self.chunk_size]
                        self.rep_test = tf.concat(axis=1,values=[self.rep_test,tmp])
                    else:
                        tmp = self.rep_test1[:,j*self.chunk_size:(j+1)*self.chunk_size]
                        self.rep_test = tf.concat(axis=1,values=[self.rep_test,tmp])
                tmp_mix = self.generator(self.rep_test)
                self.D_mix_allchunk = tf.concat(axis=0,values=[self.D_mix_allchunk,tmp_mix])

            for i in xrange(1,self.chunk_num):
                self.rep_test = self.rep_test2[:,0*self.chunk_size:1*self.chunk_size]
                for j in xrange(1,self.chunk_num):
                    if j<=i:
                        tmp = self.rep_test2[:,j*self.chunk_size:(j+1)*self.chunk_size]
                        self.rep_test = tf.concat(axis=1,values=[self.rep_test,tmp])
                    else:
                        tmp = self.rep_test1[:,j*self.chunk_size:(j+1)*self.chunk_size]
                        self.rep_test = tf.concat(axis=1,values=[self.rep_test,tmp])
                tmp_mix = self.generator(self.rep_test)
                self.D_mix_allchunk_sup = tf.concat(axis=0,values=[self.D_mix_allchunk_sup,tmp_mix])

        with tf.variable_scope('classifier_loss') as scope:

            self.cf_loss = binary_cross_entropy_with_logits(self.kfc,self.cf)

        with tf.variable_scope('discriminator') as scope:

            self.D = self.discriminator(self.images)  

            self.D_ = self.discriminator(self.D_mix, reuse=True)

        with tf.variable_scope('discriminator_loss') as scope:
            self.d_loss_real = binary_cross_entropy_with_logits(tf.ones_like(self.D), self.D)
            self.d_loss_fake = binary_cross_entropy_with_logits(tf.zeros_like(self.D_), self.D_)

            self.d_loss = self.d_loss_real + self.d_loss_fake


        with tf.variable_scope('generator_loss') as scope:

            self.g_loss = binary_cross_entropy_with_logits(tf.ones_like(self.D_), self.D_)


        with tf.variable_scope('L2') as scope:

            self.rec_loss = tf.reduce_mean(tf.square(self.D_I - self.images))

            self.recR_loss = tf.reduce_mean(tf.square(self.D_IR - self.imagesR))

            self.rec_mix_loss = tf.reduce_mean(tf.square(self.D_regenerate1 - self.images))

            self.recR_mix_loss = tf.reduce_mean(tf.square(self.D_regenerate2 - self.imagesR))


        self.bn_assigners = tf.group(*batch_norm.assigners)

        t_vars = tf.trainable_variables()

        self.d_vars = [var for var in t_vars if 'd_' in var.name]
        self.g_vars = [var for var in t_vars if 'g_' in var.name]
        self.g_s_vars = [var for var in t_vars if 'g_s' in var.name]
        self.g_e_vars = [var for var in t_vars if 'g_en' in var.name]
        self.c_vars = [var for var in t_vars if 'c_' in var.name]

        self.saver = tf.train.Saver(self.d_vars + self.g_vars + self.c_vars+
                                    batch_norm.shadow_variables,
                                    max_to_keep=0)
Esempio n. 51
0
def square(x, name=None):
    return _auto_upcast(tf.square(x, name))
Esempio n. 52
0
# Build graph
real_data = tf.placeholder(tf.float32, shape=[None, OUTPUT_DIM])
input_noise = tf.placeholder(tf.float32, shape=[None, NOISE_DIM])
fake_data = Generator(BATCH_SIZE, input_noise)

dis_real, real_noise = Discriminator(real_data)
dis_fake, invert_noise = Discriminator(fake_data)

gen_params = lib.params_with_name('Generator')
dis_params = lib.params_with_name('Discriminator')
inv_params = lib.params_with_name('Invertor')

# Optimize cost function
if MODE == 'wgan-gp':
  inv_cost = tf.reduce_mean(tf.square(input_noise - invert_noise))
  gen_cost = -tf.reduce_mean(dis_fake)
  dis_cost = tf.reduce_mean(dis_fake) - tf.reduce_mean(dis_real)

  alpha = tf.random_uniform(shape=[BATCH_SIZE, 1], minval=0., maxval=1.)
  differences = fake_data - real_data
  interpolates = real_data + alpha * differences
  gradients = tf.gradients(Discriminator(interpolates)[0], [interpolates])[0]
  slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=1))
  gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)
  dis_cost_gp = dis_cost + LAMBDA * gradient_penalty

  inv_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5,
                                        beta2=0.9).minimize(inv_cost,
                                                            var_list=inv_params)
  gen_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5,
Esempio n. 53
0
def build_mnist_model(num_hidden, decay, activation):
    x = tf.placeholder(dtype=tf.float32, shape=[None, args.x_dim])
    y = tf.placeholder(dtype=tf.float32, shape=[None, 1])
    is_training = tf.placeholder(dtype=tf.bool, shape=[])
    with tf.variable_scope('network'):
        out, reg, layers = feed_forward(x, num_hidden, decay, activation,
                                        is_training)

    rmse_loss = tf.reduce_mean(tf.reduce_sum(tf.square(y - out), 1))
    loss = rmse_loss + reg

    all_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                    scope='network')
    show_variables(all_weights)
    last_layer_weights = tf.get_collection(
        tf.GraphKeys.GLOBAL_VARIABLES,
        scope='network/dense_{}'.format(len(num_hidden) - 1))

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='network')
    for item in update_ops:
        print('Update {}'.format(item))

    lr_decay = tf.placeholder(dtype=tf.float32, shape=[])
    all_op = tf.train.GradientDescentOptimizer(args.lr * lr_decay)
    all_grads = all_op.compute_gradients(loss=loss, var_list=all_weights)
    all_train_op = all_op.apply_gradients(grads_and_vars=all_grads)

    lr = args.lr * lr_decay
    TEMPERATURE = 1e-8
    noise_train_ops = []
    for g, v in all_grads:
        if g is None:
            continue
        noise_train_ops.append(
            tf.assign(
                v, v - lr * g - tf.sqrt(lr) * TEMPERATURE *
                tf.random_normal(v.shape, stddev=1)))

    all_train_op_noise = tf.group(noise_train_ops)
    lst_op = tf.train.GradientDescentOptimizer(args.lr * lr_decay)
    lst_grads = lst_op.compute_gradients(loss=loss,
                                         var_list=last_layer_weights)
    lst_train_op = lst_op.apply_gradients(grads_and_vars=lst_grads)
    reset_lst_op = tf.variables_initializer(lst_op.variables())
    reset_all_op = tf.variables_initializer(all_op.variables())

    weight_dict = {}
    for item in all_weights:
        if 'kernel' in item.name:
            weight_dict[item.name] = item
    print('weights to be saved')
    print(weight_dict)

    ph = {'x': x, 'y': y, 'lr_decay': lr_decay, 'is_training': is_training}
    ph['kernel_l0'] = tf.placeholder(
        dtype=tf.float32,
        shape=weight_dict['network/dense_0/kernel:0'].get_shape())
    #ph['bias_l0'] = tf.placeholder(dtype=tf.float32, shape=weight_dict['network/dense_0/bias:0'].get_shape())

    targets = {
        'layers': layers,
        'all': {
            'weights': all_weights,
            'train': all_train_op,
            'rmse_loss': rmse_loss,
            'update': update_ops,
            'reg_loss': reg
        },
        'all_noise': {
            'weights': all_weights,
            'train': all_train_op_noise,
            'rmse_loss': rmse_loss,
            'update': update_ops,
            'reg_loss': reg
        },
        'lst': {
            'weights': all_weights,
            'train': lst_train_op,
            'rmse_loss': rmse_loss,
            'update': update_ops,
            'reg_loss': reg
        },
        'eval': {
            'weights': weight_dict,
            'rmse_loss': rmse_loss,
            'out': out
        },
        'assign_weights': {
            'weights_l0':
            tf.assign(weight_dict['network/dense_0/kernel:0'],
                      ph['kernel_l0']),
            #'bias': tf.assign(weight_dict['network/dense_0/bias:0'], ph['bias_l0']),
        },
        'reset': {
            'lst': reset_lst_op,
            'all': reset_all_op
        }
    }

    return ph, targets
Esempio n. 54
0
            h1 = tf.nn.relu(h1)
            #[b,256] => [b,128]
            #不加broadcast也会自动转换
            h2 = h1@w2 + b2
            h2 = tf.nn.relu(h2)
            # [b,128] => [b,10]
            out=h2@w3 + b3

            #compute loss
            #out: [b,10]
            #y: [b] => [b,10]
            y_onehot = tf.one_hot(y,depth=10)

            #mse = mean(sum(y-out)^2) 均方差
            #[b,10]
            loss = tf.square(y_onehot - out)
            #mean:scalar
            loss=tf.reduce_mean(loss)

        #compute gradients
        grads = tape.gradient(loss,[w1,b1,w2,b2,w3,b3])

        #w1 = w1- lr * w1 grad

        w1.assign_sub(lr * grads[0])
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
        b3.assign_sub(lr * grads[5])
Esempio n. 55
0
                               stddev=1.0 / math.sqrt(embedding_size))
        )
        
        nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
        
    loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weights,
                                        biases=nce_biases,
                                         labels=train_labels,
                                         inputs=embed,
                                         num_sampled=num_sampled,
                                         num_classes=vocabulary_size
                                        ))
    
    optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)

    norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
    normalized_embeddings = embeddings / norm
    valid_embeddings = tf.nn.embedding_lookup(
        normalized_embeddings, valid_dataset
    )
    similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b = True)

    init = tf.global_variables_initializer()

num_steps = 100001
with tf.Session(graph=graph) as session:
    init.run()
    print('Initialized')
    
    average_loss = 0
    for step in range(num_steps):
Esempio n. 56
0
def learn(env,
          q_func,
          optimizer_spec,
          session,
          exploration=LinearSchedule(1000000, 0.1),
          stopping_criterion=None,
          replay_buffer_size=1000000,
          batch_size=32,
          gamma=0.99,
          learning_starts=50000,
          learning_freq=4,
          frame_history_len=4,
          target_update_freq=10000,
          grad_norm_clipping=10):
    """Run Deep Q-learning algorithm.

    You can specify your own convnet using q_func.

    All schedules are w.r.t. total number of steps taken in the environment.

    Parameters
    ----------
    env: gym.Env
        gym environment to train on.
    q_func: function
        Model to use for computing the q function. It should accept the
        following named arguments:
            img_in: tf.Tensor
                tensorflow tensor representing the input image
            num_actions: int
                number of actions
            scope: str
                scope in which all the model related variables
                should be created
            reuse: bool
                whether previously created variables should be reused.
    optimizer_spec: OptimizerSpec
        Specifying the constructor and kwargs, as well as learning rate schedule
        for the optimizer
    session: tf.Session
        tensorflow session to use.
    exploration: rl_algs.deepq.utils.schedules.Schedule
        schedule for probability of chosing random action.
    stopping_criterion: (env, t) -> bool
        should return true when it's ok for the RL algorithm to stop.
        takes in env and the number of steps executed so far.
    replay_buffer_size: int
        How many memories to store in the replay buffer.
    batch_size: int
        How many transitions to sample each time experience is replayed.
    gamma: float
        Discount Factor
    learning_starts: int
        After how many environment steps to start replaying experiences
    learning_freq: int
        How many steps of environment to take between every experience replay
    frame_history_len: int
        How many past frames to include as input to the model.
    target_update_freq: int
        How many experience replay rounds (not steps!) to perform between
        each update to the target Q network
    grad_norm_clipping: float or None
        If not None gradients' norms are clipped to this value.
    """
    assert type(env.observation_space) == gym.spaces.Box
    assert type(env.action_space) == gym.spaces.Discrete

    ###############
    # BUILD MODEL #
    ###############

    if len(env.observation_space.shape) == 1:
        # This means we are running on low-dimensional observations (e.g. RAM)
        input_shape = env.observation_space.shape
    else:
        img_h, img_w, img_c = env.observation_space.shape
        input_shape = (img_h, img_w, frame_history_len * img_c)
    num_actions = env.action_space.n

    # set up placeholders
    # placeholder for current observation (or state)
    obs_t_ph = tf.placeholder(tf.uint8, [None] + list(input_shape))
    # placeholder for current action
    act_t_ph = tf.placeholder(tf.int32, [None])
    # placeholder for current reward
    rew_t_ph = tf.placeholder(tf.float32, [None])
    # placeholder for next observation (or state)
    obs_tp1_ph = tf.placeholder(tf.uint8, [None] + list(input_shape))
    # placeholder for end of episode mask
    # this value is 1 if the next state corresponds to the end of an episode,
    # in which case there is no Q-value at the next state; at the end of an
    # episode, only the current state reward contributes to the target, not the
    # next state Q-value (i.e. target is just rew_t_ph, not rew_t_ph + gamma * q_tp1)
    done_mask_ph = tf.placeholder(tf.float32, [None])

    # casting to float on GPU ensures lower data transfer times.
    obs_t_float = tf.cast(obs_t_ph, tf.float32) / 255.0
    obs_tp1_float = tf.cast(obs_tp1_ph, tf.float32) / 255.0

    # Here, you should fill in your own code to compute the Bellman error. This requires
    # evaluating the current and next Q-values and constructing the corresponding error.
    # TensorFlow will differentiate this error for you, you just need to pass it to the
    # optimizer. See assignment text for details.
    # Your code should produce one scalar-valued tensor: total_error
    # This will be passed to the optimizer in the provided code below.
    # Your code should also produce two collections of variables:
    # q_func_vars
    # target_q_func_vars
    # These should hold all of the variables of the Q-function network and target network,
    # respectively. A convenient way to get these is to make use of TF's "scope" feature.
    # For example, you can create your Q-function network with the scope "q_func" like this:
    # <something> = q_func(obs_t_float, num_actions, scope="q_func", reuse=False)
    # And then you can obtain the variables like this:
    # q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_func')
    # Older versions of TensorFlow may require using "VARIABLES" instead of "GLOBAL_VARIABLES"
    ######

    q_eval_values = q_func(obs_t_float, num_actions, 'q_func', reuse=False)
    q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                    scope='q_func')

    q_target_values = q_func(obs_tp1_float,
                             num_actions,
                             'target_q_func',
                             reuse=False)
    target_q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                           scope='target_q_func')

    eval_acts = tf.one_hot(act_t_ph, num_actions, dtype=tf.float32)
    delta_t = rew_t_ph + (gamma * tf.reduce_max(q_target_values, axis=1) -
                          tf.reduce_sum(eval_acts * q_eval_values, axis=1))
    total_error = tf.square(delta_t)
    ######

    # construct optimization op (with gradient clipping)
    learning_rate = tf.placeholder(tf.float32, (), name="learning_rate")
    optimizer = optimizer_spec.constructor(learning_rate=learning_rate,
                                           **optimizer_spec.kwargs)
    train_fn = minimize_and_clip(optimizer,
                                 total_error,
                                 var_list=q_func_vars,
                                 clip_val=grad_norm_clipping)

    # update_target_fn will be called periodically to copy Q network to target Q network
    update_target_fn = []
    for var, var_target in zip(
            sorted(q_func_vars, key=lambda v: v.name),
            sorted(target_q_func_vars, key=lambda v: v.name)):
        update_target_fn.append(var_target.assign(var))
    update_target_fn = tf.group(*update_target_fn)

    # construct the replay buffer
    replay_buffer = ReplayBuffer(replay_buffer_size, frame_history_len)

    ###############
    # RUN ENV     #
    ###############
    model_initialized = False
    num_param_updates = 0
    mean_episode_reward = -float('nan')
    best_mean_episode_reward = -float('inf')
    last_obs = env.reset()
    LOG_EVERY_N_STEPS = 10000

    for t in itertools.count():
        ### 1. Check stopping criterion
        if stopping_criterion is not None and stopping_criterion(env, t):
            break

        ### 2. Step the env and store the transition
        # At this point, "last_obs" contains the latest observation that was
        # recorded from the simulator. Here, your code needs to store this
        # observation and its outcome (reward, next observation, etc.) into
        # the replay buffer while stepping the simulator forward one step.
        # At the end of this block of code, the simulator should have been
        # advanced one step, and the replay buffer should contain one more
        # transition.
        # Specifically, last_obs must point to the new latest observation.
        # Useful functions you'll need to call:
        # obs, reward, done, info = env.step(action)
        # this steps the environment forward one step
        # obs = env.reset()
        # this resets the environment if you reached an episode boundary.
        # Don't forget to call env.reset() to get a new observation if done
        # is true!!
        # Note that you cannot use "last_obs" directly as input
        # into your network, since it needs to be processed to include context
        # from previous frames. You should check out the replay buffer
        # implementation in dqn_utils.py to see what functionality the replay
        # buffer exposes. The replay buffer has a function called
        # encode_recent_observation that will take the latest observation
        # that you pushed into the buffer and compute the corresponding
        # input that should be given to a Q network by appending some
        # previous frames.
        # Don't forget to include epsilon greedy exploration!
        # And remember that the first time you enter this loop, the model
        # may not yet have been initialized (but of course, the first step
        # might as well be random, since you haven't trained your net...)

        #####
        frame_id = replay_buffer.store_frame(last_obs)
        epsilon = exploration.value(t)

        if not model_initialized:
            action = env.action_space.sample()
        elif random.random() < epsilon:
            action = env.action_space.sample()
        else:
            img_in = replay_buffer.encode_recent_observation()
            q = session.run(q_eval_values,
                            feed_dict={obs_t_ph: img_in[None, :]})
            action = np.argmax(q)
        next_state, reward, done, _ = env.step(action)
        replay_buffer.store_effect(frame_id,
                                   action=action,
                                   done=done,
                                   reward=reward)

        if done:
            next_state = env.reset()

        last_obs = next_state

        #####

        # at this point, the environment should have been advanced one step (and
        # reset if done was true), and last_obs should point to the new latest
        # observation

        ### 3. Perform experience replay and train the network.
        # note that this is only done if the replay buffer contains enough samples
        # for us to learn something useful -- until then, the model will not be
        # initialized and random actions should be taken
        if (t > learning_starts and t % learning_freq == 0
                and replay_buffer.can_sample(batch_size)):
            # Here, you should perform training. Training consists of four steps:
            # 3.a: use the replay buffer to sample a batch of transitions (see the
            # replay buffer code for function definition, each batch that you sample
            # should consist of current observations, current actions, rewards,
            # next observations, and done indicator).
            # 3.b: initialize the model if it has not been initialized yet; to do
            # that, call
            #    initialize_interdependent_variables(session, tf.global_variables(), {
            #        obs_t_ph: obs_t_batch,
            #        obs_tp1_ph: obs_tp1_batch,
            #    })
            # where obs_t_batch and obs_tp1_batch are the batches of observations at
            # the current and next time step. The boolean variable model_initialized
            # indicates whether or not the model has been initialized.
            # Remember that you have to update the target network too (see 3.d)!
            # 3.c: train the model. To do this, you'll need to use the train_fn and
            # total_error ops that were created earlier: total_error is what you
            # created to compute the total Bellman error in a batch, and train_fn
            # will actually perform a gradient step and update the network parameters
            # to reduce total_error. When calling session.run on these you'll need to
            # populate the following placeholders:
            # obs_t_ph
            # act_t_ph
            # rew_t_ph
            # obs_tp1_ph
            # done_mask_ph
            # (this is needed for computing total_error)
            # learning_rate -- you can get this from optimizer_spec.lr_schedule.value(t)
            # (this is needed by the optimizer to choose the learning rate)
            # 3.d: periodically update the target network by calling
            # session.run(update_target_fn)
            # you should update every target_update_freq steps, and you may find the
            # variable num_param_updates useful for this (it was initialized to 0)
            #####

            batch_state, batch_action, batch_reward, batch_next, batch_done = replay_buffer.sample(
                batch_size)
            if not model_initialized:

                initialize_interdependent_variables(session,
                                                    tf.global_variables(), {
                                                        obs_t_ph: batch_state,
                                                        obs_tp1_ph: batch_next,
                                                    })

            feeding = {
                obs_t_ph: batch_state,
                obs_tp1_ph: batch_next,
                act_t_ph: batch_action,
                rew_t_ph: batch_reward,
                done_mask_ph: batch_done,
                learning_rate: optimizer_spec.lr_schedule.value(t)
            }

            session.run(train_fn, feed_dict=feeding)

            if t % target_update_freq == 0:
                session.run(update_target_fn)
                num_param_updates += 1

            #####

        ### 4. Log progress
        episode_rewards = get_wrapper_by_name(env,
                                              "Monitor").get_episode_rewards()
        if len(episode_rewards) > 0:
            mean_episode_reward = np.mean(episode_rewards[-100:])
        if len(episode_rewards) > 100:
            best_mean_episode_reward = max(best_mean_episode_reward,
                                           mean_episode_reward)
        if t % LOG_EVERY_N_STEPS == 0 and model_initialized:
            print("Timestep %d" % (t, ))
            print("mean reward (100 episodes) %f" % mean_episode_reward)
            print("best mean reward %f" % best_mean_episode_reward)
            print("episodes %d" % len(episode_rewards))
            print("exploration %f" % exploration.value(t))
            print("learning_rate %f" % optimizer_spec.lr_schedule.value(t))
            sys.stdout.flush()
import tensorflow as tf
import numpy as np


x_data = np.random.rand(100).astype(float)
y_data = x_data*.1 + .3

# Try to find values for W and b that compute y_data = W * x_data + b
# (We know that W should be 0.1 and b 0.3, but TensorFlow will
# figure that out for us.)
W = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
b = tf.Variable(tf.zeros([1]))
y = W * x_data + b

# Minimize the mean squared errors.
loss = tf.reduce_mean(tf.square(y - y_data))
optimizer = tf.train.GradientDescentOptimizer(0.5)
train = optimizer.minimize(loss)

# Before starting, initialize the variables.  We will 'run' this first.
init = tf.global_variables_initializer()

# Launch the graph.
sess = tf.Session()
sess.run(init)

# Fit the line.
for step in range(201):
    sess.run(train)
    if step % 20 == 0:
        print(step, sess.run(W), sess.run(b), )
def get_train_model(num_channels, label_len, b, img_size):
    inputs = tf.placeholder(
        tf.float32,
        shape=(b, img_size[0], img_size[1], num_channels))

    # 定义ctc_loss需要的稀疏矩阵
    targets = tf.sparse_placeholder(tf.int32)

    # 1维向量 序列长度 [batch_size,]
    seq_len = tf.placeholder(tf.int32, [None])
    x = inputs

    x = conv(x,num_channels,64,ksize=[3,3])
    x = tf.layers.batch_normalization(x)
    x = tf.nn.relu(x)
    x = tf.nn.max_pool(x,
                          ksize=[1, 3, 3, 1],
                          strides=[1, 1, 1, 1],
                          padding='SAME')
    x = small_basic_block(x,64,64)
    x2=x
    x = tf.layers.batch_normalization(x)
    x = tf.nn.relu(x)

    x = tf.nn.max_pool(x,
                          ksize=[1, 3, 3, 1],
                          strides=[1, 2, 1, 1],
                          padding='SAME')
    x = small_basic_block(x, 64,256)
    x = tf.layers.batch_normalization(x)
    x = tf.nn.relu(x)
    x = small_basic_block(x, 256, 256)
    x3 = x
    x = tf.layers.batch_normalization(x)

    x = tf.nn.relu(x)
    x = tf.nn.max_pool(x,
                       ksize=[1, 3, 3, 1],
                       strides=[1, 2, 1, 1],
                       padding='SAME')
    x = tf.layers.dropout(x)

    x = conv(x, 256, 256, ksize=[4, 1])
    x = tf.layers.dropout(x)
    x = tf.layers.batch_normalization(x)
    x = tf.nn.relu(x)


    x = conv(x,256,NUM_CHARS+1,ksize=[1,13],pad='SAME')
    x = tf.nn.relu(x)
    cx = tf.reduce_mean(tf.square(x))
    x = tf.div(x,cx)

    #x = tf.reduce_mean(x,axis = 2)
    #x1 = conv(inputs,num_channels,num_channels,ksize = (5,1))


    x1 = tf.nn.avg_pool(inputs,
                       ksize=[1, 4, 1, 1],
                       strides=[1, 4, 1, 1],
                       padding='SAME')
    cx1 = tf.reduce_mean(tf.square(x1))
    x1 = tf.div(x1, cx1)

    # x1 = tf.image.resize_images(x1, size = [18, 16], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)

    x2 = tf.nn.avg_pool(x2,
                        ksize=[1, 4, 1, 1],
                        strides=[1, 4, 1, 1],
                        padding='SAME')
    cx2 = tf.reduce_mean(tf.square(x2))
    x2 = tf.div(x2, cx2)

    #x2 = tf.image.resize_images(x2, size=[18, 16], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)

    x3 = tf.nn.avg_pool(x3,
                        ksize=[1, 2, 1, 1],
                        strides=[1, 2, 1, 1],
                        padding='SAME')
    cx3 = tf.reduce_mean(tf.square(x3))
    x3 = tf.div(x3, cx3)

    #x3 = tf.image.resize_images(x3, size=[18, 16], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)


    #x1 = tf.nn.relu(x1)

    x = tf.concat([x,x1,x2,x3],3)
    x = conv(x, x.get_shape().as_list()[3], NUM_CHARS + 1, ksize=(1, 1))
    logits = tf.reduce_mean(x,axis=2)
    # x_shape = x.get_shape().as_list()
    # outputs = tf.reshape(x, [-1,x_shape[2]*x_shape[3]])
    # W1 = tf.Variable(tf.truncated_normal([x_shape[2]*x_shape[3],
    #                                      150],
    #                                     stddev=0.1))
    # b1 = tf.Variable(tf.constant(0., shape=[150]))
    # # [batch_size*max_timesteps,num_classes]
    # x = tf.matmul(outputs, W1) + b1
    # x= tf.layers.dropout(x)
    # x = tf.nn.relu(x)
    # W2 = tf.Variable(tf.truncated_normal([150,
    #                                      NUM_CHARS+1],
    #                                     stddev=0.1))
    # b2 = tf.Variable(tf.constant(0., shape=[NUM_CHARS+1]))
    # x = tf.matmul(x, W2) + b2
    # x = tf.layers.dropout(x)
    # # [batch_size,max_timesteps,num_classes]
    # logits = tf.reshape(x, [b, -1, NUM_CHARS+1])

    return logits, inputs, targets, seq_len
Esempio n. 59
0
File: AC.py Progetto: syyunn/PINNs
    def __init__(self, x0, u0, x1, layers, dt, lb, ub, q):

        self.lb = lb
        self.ub = ub

        self.x0 = x0
        self.x1 = x1

        self.u0 = u0

        self.layers = layers
        self.dt = dt
        self.q = max(q, 1)

        # Initialize NN
        self.weights, self.biases = self.initialize_NN(layers)

        # Load IRK weights
        tmp = np.float32(
            np.loadtxt("../../Utilities/IRK_weights/Butcher_IRK%d.txt" % (q),
                       ndmin=2))
        self.IRK_weights = np.reshape(tmp[0:q**2 + q], (q + 1, q))
        self.IRK_times = tmp[q**2 + q:]

        # tf placeholders and graph
        self.sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True, log_device_placement=True))

        self.x0_tf = tf.placeholder(tf.float32, shape=(None, self.x0.shape[1]))
        self.x1_tf = tf.placeholder(tf.float32, shape=(None, self.x1.shape[1]))
        self.u0_tf = tf.placeholder(tf.float32, shape=(None, self.u0.shape[1]))
        self.dummy_x0_tf = tf.placeholder(
            tf.float32,
            shape=(None, self.q))  # dummy variable for fwd_gradients
        self.dummy_x1_tf = tf.placeholder(
            tf.float32,
            shape=(None, self.q + 1))  # dummy variable for fwd_gradients

        self.U0_pred = self.net_U0(self.x0_tf)  # N x (q+1)
        self.U1_pred, self.U1_x_pred = self.net_U1(self.x1_tf)  # N1 x (q+1)

        self.loss = (
            tf.reduce_sum(tf.square(self.u0_tf - self.U0_pred)) +
            tf.reduce_sum(tf.square(self.U1_pred[0, :] - self.U1_pred[1, :])) +
            tf.reduce_sum(
                tf.square(self.U1_x_pred[0, :] - self.U1_x_pred[1, :])))

        self.optimizer = tf.contrib.opt.ScipyOptimizerInterface(
            self.loss,
            method="L-BFGS-B",
            options={
                "maxiter": 50000,
                "maxfun": 50000,
                "maxcor": 50,
                "maxls": 50,
                "ftol": 1.0 * np.finfo(float).eps,
            },
        )

        self.optimizer_Adam = tf.train.AdamOptimizer()
        self.train_op_Adam = self.optimizer_Adam.minimize(self.loss)

        init = tf.global_variables_initializer()
        self.sess.run(init)
Esempio n. 60
0
def distanceFunc(X, mu):  # Returns distance squared
    expandPoints = tf.expand_dims(X, 0)
    expandCentroid = tf.expand_dims(mu, 1)
    return tf.reduce_sum(tf.square(tf.subtract(expandPoints, expandCentroid)),
                         2)