コード例 #1
0
 def top1(self, yhat):
     yhatT = tf.transpose(yhat)
     term1 = tf.reduce_mean(tf.nn.sigmoid(-tf.diag_part(yhat) + yhatT) +
                            tf.nn.sigmoid(yhatT**2),
                            axis=0)
     term2 = tf.nn.sigmoid(tf.diag_part(yhat)**2) / self.batch_size
     return tf.reduce_mean(term1 - term2)
コード例 #2
0
def _mix_rbf_kernel(X, Y, sigmas, wts=None):
    """"""
    if wts is None:
        wts = [1.0] * sigmas.get_shape()[0]

    # debug!
    if len(X.shape) == 2:
        # matrix
        XX = tf.matmul(X, X, transpose_b=True)
        XY = tf.matmul(X, Y, transpose_b=True)
        YY = tf.matmul(Y, Y, transpose_b=True)
    elif len(X.shape) == 3:
        # tensor -- this is computing the Frobenius norm
        XX = tf.tensordot(X, X, axes=[[1, 2], [1, 2]])
        XY = tf.tensordot(X, Y, axes=[[1, 2], [1, 2]])
        YY = tf.tensordot(Y, Y, axes=[[1, 2], [1, 2]])
    else:
        raise ValueError(X)

    X_sqnorms = tf.diag_part(XX)
    Y_sqnorms = tf.diag_part(YY)

    r = lambda x: tf.expand_dims(x, 0)
    c = lambda x: tf.expand_dims(x, 1)

    K_XX, K_XY, K_YY = 0, 0, 0
    for sigma, wt in zip(tf.unstack(sigmas, axis=0), wts):
        gamma = 1 / (2 * sigma**2)
        K_XX += wt * tf.exp(-gamma * (-2 * XX + c(X_sqnorms) + r(X_sqnorms)))
        K_XY += wt * tf.exp(-gamma * (-2 * XY + c(X_sqnorms) + r(Y_sqnorms)))
        K_YY += wt * tf.exp(-gamma * (-2 * YY + c(Y_sqnorms) + r(Y_sqnorms)))

    return K_XX, K_XY, K_YY, tf.reduce_sum(wts)
コード例 #3
0
def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
    m = tf.cast(K_XX.get_shape()[0], tf.float32)  # Assumes X, Y are same shape

    ### Get the various sums of kernels that we'll use
    # Kts drop the diagonal, but we don't need to compute them explicitly
    if const_diagonal is not False:
        const_diagonal = tf.cast(const_diagonal, tf.float32)
        diag_X = diag_Y = const_diagonal
        sum_diag_X = sum_diag_Y = m * const_diagonal
        sum_diag2_X = sum_diag2_Y = m * const_diagonal**2
    else:
        diag_X = tf.diag_part(K_XX)
        diag_Y = tf.diag_part(K_YY)

        sum_diag_X = tf.reduce_sum(diag_X)
        sum_diag_Y = tf.reduce_sum(diag_Y)

        sum_diag2_X = sq_sum(diag_X)
        sum_diag2_Y = sq_sum(diag_Y)

    Kt_XX_sums = tf.reduce_sum(K_XX, 1) - diag_X
    Kt_YY_sums = tf.reduce_sum(K_YY, 1) - diag_Y
    K_XY_sums_0 = tf.reduce_sum(K_XY, 0)
    K_XY_sums_1 = tf.reduce_sum(K_XY, 1)

    Kt_XX_sum = tf.reduce_sum(Kt_XX_sums)
    Kt_YY_sum = tf.reduce_sum(Kt_YY_sums)
    K_XY_sum = tf.reduce_sum(K_XY_sums_0)

    Kt_XX_2_sum = sq_sum(K_XX) - sum_diag2_X
    Kt_YY_2_sum = sq_sum(K_YY) - sum_diag2_Y
    K_XY_2_sum = sq_sum(K_XY)

    if biased:
        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m) + (Kt_YY_sum + sum_diag_Y) /
                (m * m) - 2 * K_XY_sum / (m * m))
    else:
        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * (m - 1)) +
                (Kt_YY_sum + sum_diag_Y) / (m *
                                            (m - 1)) - 2 * K_XY_sum / (m * m))

    var_est = (2 / (m**2 * (m - 1)**2) *
               (2 * sq_sum(Kt_XX_sums) - Kt_XX_2_sum + 2 * sq_sum(Kt_YY_sums) -
                Kt_YY_2_sum) - (4 * m - 6) / (m**3 * (m - 1)**3) *
               (Kt_XX_sum**2 + Kt_YY_sum**2) + 4 * (m - 2) / (m**3 *
                                                              (m - 1)**2) *
               (sq_sum(K_XY_sums_1) + sq_sum(K_XY_sums_0)) - 4 * (m - 3) /
               (m**3 * (m - 1)**2) * K_XY_2_sum - (8 * m - 12) /
               (m**5 * (m - 1)) * K_XY_sum**2 + 8 / (m**3 * (m - 1)) *
               (1 / m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum -
                dot(Kt_XX_sums, K_XY_sums_1) - dot(Kt_YY_sums, K_XY_sums_0)))

    return mmd2, var_est
コード例 #4
0
def zero_diag(input):
    """Helper function that zeros matrix diagonal.

    Args:
      input: 2-D float32 `Tensor`.

    Returns:
      2-D float32 `Tensor` with diagonal zeroed.
    """
    return input - tf.diag(tf.diag_part(input))
コード例 #5
0
ファイル: facenet.py プロジェクト: preethiraksha/Temporary
def decov_loss(xs):
    """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
    """
    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
    m = tf.reduce_mean(x, 0, True)
    z = tf.expand_dims(x-m, 2)
    corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
    loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
    return loss 
コード例 #6
0
def rank_loss(sentence_emb, image_emb, margin=0.2):
  """Experimental rank loss, thanks to kkurach@ for the code."""
  with tf.name_scope("rank_loss"):
    # Normalize first as this is assumed in cosine similarity later.
    sentence_emb = tf.nn.l2_normalize(sentence_emb, 1)
    image_emb = tf.nn.l2_normalize(image_emb, 1)
    # Both sentence_emb and image_emb have size [batch, depth].
    scores = tf.matmul(image_emb, tf.transpose(sentence_emb))  # [batch, batch]
    diagonal = tf.diag_part(scores)  # [batch]
    cost_s = tf.maximum(0.0, margin - diagonal + scores)  # [batch, batch]
    cost_im = tf.maximum(
        0.0, margin - tf.reshape(diagonal, [-1, 1]) + scores)  # [batch, batch]
    # Clear diagonals.
    batch_size = tf.shape(sentence_emb)[0]
    empty_diagonal_mat = tf.ones_like(cost_s) - tf.eye(batch_size)
    cost_s *= empty_diagonal_mat
    cost_im *= empty_diagonal_mat
    return tf.reduce_mean(cost_s) + tf.reduce_mean(cost_im)
コード例 #7
0
    def posterior_pred(self, x):

        self.Kinv_Y = tf.cholesky_solve(self.L_xx, self.t_Y)

        self.K_xX = self.create_kernel(x, self.t_X)

        self.K_xx = self.create_kernel(x, x)

        self.y_mu = tf.matmul(self.K_xX, self.Kinv_Y)

        self.K_xx_d = tf.diag_part(self.K_xx) + self.noise_var * tf.ones(
            [tf.shape(x)[0]], dtype=self.dtype)

        self.y_var = self.K_xx_d - tf.reduce_sum(tf.square(
            tf.matrix_triangular_solve(self.L_xx, tf.transpose(self.K_xX))),
                                                 axis=0)

        self.y_var = self.y_var[:, tf.newaxis]

        return self.y_mu, self.y_var
コード例 #8
0
def pr_re_fbeta(cm, pos_indices, beta=1):
  """Uses a confusion matrix to compute precision, recall and fbeta."""
  num_classes = cm.shape[0]
  neg_indices = [i for i in range(num_classes) if i not in pos_indices]
  cm_mask = np.ones([num_classes, num_classes])
  cm_mask[neg_indices, neg_indices] = 0
  diag_sum = tf.reduce_sum(tf.diag_part(cm * cm_mask))

  cm_mask = np.ones([num_classes, num_classes])
  cm_mask[:, neg_indices] = 0
  tot_pred = tf.reduce_sum(cm * cm_mask)

  cm_mask = np.ones([num_classes, num_classes])
  cm_mask[neg_indices, :] = 0
  tot_gold = tf.reduce_sum(cm * cm_mask)

  pr = safe_div(diag_sum, tot_pred)
  re = safe_div(diag_sum, tot_gold)
  fbeta_score = safe_div((1. + beta**2) * pr * re, beta**2 * pr + re)

  return pr, re, fbeta_score
コード例 #9
0
def regularize_diag_off_diag_dip(covariance_matrix, lambda_od, lambda_d):
    """Compute on and off diagonal regularizers for DIP-VAE models.

  Penalize deviations of covariance_matrix from the identity matrix. Uses
  different weights for the deviations of the diagonal and off diagonal entries.

  Args:
    covariance_matrix: Tensor of size [num_latent, num_latent] to regularize.
    lambda_od: Weight of penalty for off diagonal elements.
    lambda_d: Weight of penalty for diagonal elements.

  Returns:
    dip_regularizer: Regularized deviation from diagonal of covariance_matrix.
  """
    covariance_matrix_diagonal = tf.diag_part(covariance_matrix)
    covariance_matrix_off_diagonal = covariance_matrix - tf.diag(
        covariance_matrix_diagonal)
    dip_regularizer = tf.add(
        lambda_od * tf.reduce_sum(covariance_matrix_off_diagonal**2),
        lambda_d * tf.reduce_sum((covariance_matrix_diagonal - 1)**2))
    return dip_regularizer
コード例 #10
0
ファイル: triplet_loss.py プロジェクト: AltschulerWu-Lab/MUSE
def _pairwise_distances(embeddings, squared=False):
    """Compute the 2D matrix of distances between all the embeddings.
    Args:
        embeddings: tensor of shape (batch_size, embed_dim)
        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                 If false, output is the pairwise euclidean distance matrix.
    Returns:
        pairwise_distances: tensor of shape (batch_size, batch_size)
    """
    # Get the dot product between all embeddings
    # shape (batch_size, batch_size)
    dot_product = tf.matmul(embeddings, tf.transpose(embeddings))

    # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`.
    # This also provides more numerical stability (the diagonal of the result will be exactly 0).
    # shape (batch_size,)
    square_norm = tf.diag_part(dot_product)

    # Compute the pairwise distance matrix as we have:
    # ||a - b||^2 = ||a||^2  - 2 <a, b> + ||b||^2
    # shape (batch_size, batch_size)
    distances = tf.expand_dims(
        square_norm, 1) - 2.0 * dot_product + tf.expand_dims(square_norm, 0)

    # Because of computation errors, some distances might be negative so we put everything >= 0.0
    distances = tf.maximum(distances, 0.0)

    if not squared:
        # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal)
        # we need to add a small epsilon where distances == 0.0
        mask = tf.to_float(tf.equal(distances, 0.0))
        distances = distances + mask * 1e-16

        distances = tf.sqrt(distances)

        # Correct the epsilon added: set the distances on the mask to be exactly 0.0
        distances = distances * (1.0 - mask)

    return distances
コード例 #11
0
 def bpr(self, yhat):
     yhatT = tf.transpose(yhat)
     return tf.reduce_mean(
         -tf.log(tf.nn.sigmoid(tf.diag_part(yhat) - yhatT)))
コード例 #12
0
def add_contrastive_loss(hidden,
                         hidden_norm=True,
                         temperature=1.0,
                         tpu_context=None,
                         weights=1.0):
    """Compute loss for model.

  Args:
    hidden: hidden vector (`Tensor`) of shape (bsz, dim).
    hidden_norm: whether or not to use normalization on the hidden vector.
    temperature: a `floating` number for temperature scaling.
    tpu_context: context information for tpu.
    weights: a weighting number or vector.

  Returns:
    A loss scalar.
    The logits for contrastive prediction task.
    The labels for contrastive prediction task.
  """
    # Get (normalized) hidden1 and hidden2.
    if hidden_norm:
        hidden = tf.math.l2_normalize(hidden, -1)
    hidden1, hidden2 = tf.split(hidden, 2, 0)
    batch_size = tf.shape(hidden1)[0]

    # Gather hidden1/hidden2 across replicas and create local labels.
    if tpu_context is not None:
        hidden1_large = tpu_cross_replica_concat(hidden1, tpu_context)
        hidden2_large = tpu_cross_replica_concat(hidden2, tpu_context)
        enlarged_batch_size = tf.shape(hidden1_large)[0]
        # TODO(iamtingchen): more elegant way to convert u32 to s32 for replica_id.
        replica_id = tf.cast(tf.cast(xla.replica_id(), tf.uint32), tf.int32)
        labels_idx = tf.range(batch_size) + replica_id * batch_size
        labels = tf.one_hot(labels_idx, enlarged_batch_size * 2)
        masks = tf.one_hot(labels_idx, enlarged_batch_size)
    else:
        hidden1_large = hidden1
        hidden2_large = hidden2
        labels = tf.one_hot(tf.range(batch_size), batch_size * 2)
        masks = tf.one_hot(tf.range(batch_size), batch_size)

    logits_aa = tf.matmul(hidden1, hidden1_large,
                          transpose_b=True) / temperature
    logits_aa = logits_aa - masks * LARGE_NUM
    logits_bb = tf.matmul(hidden2, hidden2_large,
                          transpose_b=True) / temperature
    logits_bb = logits_bb - masks * LARGE_NUM
    logits_ab = tf.matmul(hidden1, hidden2_large,
                          transpose_b=True) / temperature
    logits_ba = tf.matmul(hidden2, hidden1_large,
                          transpose_b=True) / temperature

    logits_a = tf.concat([logits_ab, logits_aa], 1)
    logits_b = tf.concat([logits_ba, logits_bb], 1)

    if FLAGS.loss_func != 'NT-Xent':
        logits_positive = tf.diag_part(logits_ab)
        temp_positive = tf.tile(tf.expand_dims(logits_positive, -1),
                                [1, logits_a.shape[1]])
        masks_a = tf.cast(tf.greater_equal(logits_a, temp_positive - 1e-5),
                          tf.float32)
        masks_b = tf.cast(tf.greater_equal(logits_b, temp_positive - 1e-5),
                          tf.float32)
        logits_a = logits_a - masks_a * LARGE_NUM
        logits_b = logits_b - masks_b * LARGE_NUM
        logits_negative_a = tf.reduce_max(logits_a, axis=1)
        logits_negative_b = tf.reduce_max(logits_b, axis=1)
        #print(logits_negative_a, logits_negative_b)
        if FLAGS.loss_func == 'NT-Logistic':
            loss_a = tf.reduce_mean(
                tf.log(1 + tf.exp(-logits_positive)) +
                tf.log(1 + tf.exp(logits_negative_a)))
            loss_b = tf.reduce_mean(
                tf.log(1 + tf.exp(-logits_positive)) +
                tf.log(1 + tf.exp(logits_negative_b)))
            tf.losses.add_loss(loss_a + loss_b)
            #print(loss_a, loss_b)
            return loss_a + loss_b, logits_ab, labels
        else:
            loss_a = tf.reduce_mean(
                tf.maximum(logits_negative_a - logits_positive + MARGIN, 0))
            loss_b = tf.reduce_mean(
                tf.maximum(logits_negative_b - logits_positive + MARGIN, 0))
            tf.losses.add_loss(loss_a + loss_b)
            return loss_a + loss_b, logits_ab, labels

    loss_a = tf.losses.softmax_cross_entropy(labels, logits_a, weights=weights)
    loss_b = tf.losses.softmax_cross_entropy(labels, logits_b, weights=weights)
    #print(loss_a, loss_b)
    loss = loss_a + loss_b
    return loss, logits_ab, labels
コード例 #13
0
    def __init__(self,
                 embeddings,
                 latent_inters,
                 latent_varies,
                 degrees,
                 edge_types,
                 edge_type2dim,
                 placeholders,
                 margin=0.1,
                 neg_sample_weights=1.,
                 batch_size=100):
        self.embeddings = embeddings
        self.latent_inters = latent_inters  #model的中间层
        self.latent_varies = latent_varies  #model的变化
        self.edge_types = edge_types  #边界
        self.degrees = degrees
        self.edge_type2dim = edge_type2dim
        self.obj_type2n = {
            i: self.edge_type2dim[i, j][0][0]
            for i, j in self.edge_types
        }  #0:500 1:400
        self.margin = margin
        self.neg_sample_weights = neg_sample_weights
        self.batch_size = batch_size

        self.inputs = placeholders['batch']  #0
        self.batch_edge_type_idx = placeholders['batch_edge_type_idx']  #0
        self.batch_row_edge_type = placeholders['batch_row_edge_type']  #0
        self.batch_col_edge_type = placeholders['batch_col_edge_type']  #0

        self.row_inputs = tf.squeeze(gather_cols(self.inputs, [0]))  #→lables
        self.col_inputs = tf.squeeze(gather_cols(self.inputs, [1]))

        obj_type_n = [self.obj_type2n[i] for i in range(len(self.embeddings))]
        self.obj_type_lookup_start = tf.cumsum([0] + obj_type_n[:-1])
        self.obj_type_lookup_end = tf.cumsum(obj_type_n)

        labels = tf.reshape(tf.cast(self.row_inputs, dtype=tf.int64),
                            [self.batch_size, 1])

        # 这一段是文章中的一个方法(可以先不管他):负采样
        # estimate the model through negative sampling
        # for each drug-drug edge in graph(vi,r,vj),we sample a random edge(vi,r,vn),
        # vnis randomly choosed according to sampling distribution Pr
        neg_samples_list = []
        for i, j in self.edge_types:
            for k in range(self.edge_types[i, j]):
                neg_samples, _, _ = tf.nn.fixed_unigram_candidate_sampler(
                    true_classes=labels,
                    num_true=1,
                    num_sampled=self.batch_size,
                    unique=False,
                    range_max=len(self.degrees[i][k]),
                    distortion=0.75,
                    unigrams=self.degrees[i][k].tolist())
                neg_samples_list.append(neg_samples)
        self.neg_samples = tf.cast(tf.gather(neg_samples_list,
                                             self.batch_edge_type_idx),
                                   dtype=tf.int64)  # tf.int32

        self.preds = self.batch_predict(self.row_inputs, self.col_inputs)
        self.outputs = tf.diag_part(self.preds)  # 返回矩阵对角线元素
        self.outputs = tf.reshape(self.outputs, [-1])  #outputs输出到交叉熵损失函数
        self.neg_preds = self.batch_predict(self.neg_samples, self.col_inputs)
        self.neg_outputs = tf.diag_part(self.neg_preds)
        self.neg_outputs = tf.reshape(self.neg_outputs, [-1])
        self.predict()
        self.build()
コード例 #14
0
    def _build_graph(
        Npartitions,
        voc_size,
        batch_size,
        gamma_regularizer,
        reg2,
        optimizer_param,
        optimizer_type,
        init_std_dev=.05,
    ):

        graph = tf.Graph()
        with graph.as_default():

            chosen_index_1 = tf.placeholder(dtype=tf.int32, shape=(batch_size))
            chosen_index_2 = tf.placeholder(dtype=tf.int32, shape=(batch_size))
            is_corrections_pl = tf.placeholder_with_default(tf.ones(
                batch_size, dtype=tf.float32),
                                                            shape=(batch_size))

            learning_rate_pl = tf.placeholder(dtype=tf.float32)

            t_weights_free = tf.Variable(tf.truncated_normal(
                [Npartitions, Npartitions], mean=0., stddev=init_std_dev),
                                         dtype=tf.float32)
            t_weights_free_sym = t_weights_free + tf.transpose(t_weights_free)
            t_weights = tf.reshape(
                tf.nn.softmax(
                    tf.reshape(t_weights_free_sym,
                               [Npartitions * Npartitions])),
                [Npartitions, Npartitions])

            t_topics_free = tf.Variable(tf.truncated_normal(
                [Npartitions, voc_size], mean=0., stddev=init_std_dev),
                                        dtype=tf.float32)
            t_topics = tf.nn.softmax(t_topics_free)  #default axis is (-1)

            t_topics_free_pl = tf.placeholder(tf.float32,
                                              shape=[Npartitions, voc_size])
            t_weights_free_pl = tf.placeholder(
                tf.float32, shape=[Npartitions, Npartitions])

            t_weights_free_assign_op = tf.assign(t_weights_free,
                                                 t_weights_free_pl)
            t_topics_free_assign_op = tf.assign(t_topics_free,
                                                t_topics_free_pl)

            t_gamma = gamma_regularizer
            t_gamma2 = reg2

            pre_target = tf.log((tf.reduce_sum((tf.matmul(
                tf.expand_dims(
                    tf.transpose(tf.gather(t_topics, chosen_index_1, axis=1)),
                    -1),
                tf.expand_dims(
                    tf.transpose(tf.gather(t_topics, chosen_index_2, axis=1)),
                    1)) * t_weights),
                                               axis=[1, 2])))
            target = tf.reduce_mean(is_corrections_pl * tf.where(
                tf.is_nan(pre_target), tf.zeros_like(pre_target), pre_target
            )) + t_gamma * tf.reduce_sum(
                tf.diag_part(t_weights)) + t_gamma2 * tf.reduce_sum(
                    tf.diag_part(t_weights) / tf.reduce_sum(t_weights, axis=1))

            #now optimizer
            t_loss = -target

            #t_optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
            #t_optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum = .9)
            #t_optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate)

            if optimizer_type == 'adam':
                t_optimizer = tf.train.AdamOptimizer(
                    learning_rate=learning_rate_pl, **optimizer_param)
            elif optimizer_type == 'rmsprop':
                t_optimizer = tf.train.RMSPropOptimizer(
                    learning_rate=learning_rate_pl, **optimizer_param)
            else:
                raise ValueError('Unknown optimizer')

            opt_vars = t_optimizer.variables()
            opt_vars_pls = [
                tf.placeholder(dtype=v.dtype, shape=v.shape) for v in opt_vars
            ]
            opt_vars_assigns = [
                tf.assign(v, pl) for v, pl in zip(opt_vars, opt_vars_pls)
            ]

            t_train_op = t_optimizer.minimize(t_loss)

            t_tfinit = tf.global_variables_initializer()
            saver = tf.train.Saver(max_to_keep=2)

            t_loss_to_display = -(target - (t_gamma * tf.reduce_sum(
                tf.diag_part(t_weights)) + t_gamma2 * tf.reduce_sum(
                    tf.diag_part(t_weights) / tf.reduce_sum(t_weights, axis=1))
                                            ))

            return (graph, t_tfinit, t_loss_to_display, t_topics, t_train_op,
                    t_weights, chosen_index_1, chosen_index_2,
                    is_corrections_pl, saver, t_topics_free_pl,
                    t_weights_free_pl, t_weights_free_assign_op,
                    t_topics_free_assign_op, pre_target, learning_rate_pl,
                    t_weights_free, t_topics_free, opt_vars, opt_vars_pls,
                    opt_vars_assigns)
コード例 #15
0
def invert(
    settings,
    samples,
    para_path,
    g_tolerance=None,
    e_tolerance=0.1,
    n_iter=None,
    max_iter=10000,
    heuristic_sigma=None,
):
    """
    Return the latent space points corresponding to a set of a samples
    ( from gradient descent )
    Note: this function is designed for ONE sample generation
    """
    # num_samples = samples.shape[0]
    # cast samples to float32

    samples = np.float32(samples)

    # get the model
    # if settings is a string, assume it's an identifier and load
    if type(settings) == str:
        settings = json.load(open("./experiments/settings/" + settings + ".txt", "r"))

    # print('Inverting', 1, 'samples using model', settings['identifier'], 'at epoch', epoch,)
    # if not g_tolerance is None:
    #     print('until gradient norm is below', g_tolerance)
    # else:
    #     print('until error is below', e_tolerance)

    # get parameters
    parameters = model.load_parameters(para_path)
    # # assertions
    # assert samples.shape[2] == settings['num_generated_features']
    # create VARIABLE Z
    Z = tf.get_variable(
        name="Z",
        shape=[1, settings["seq_length"], settings["latent_dim"]],
        initializer=tf.random_normal_initializer(),
    )
    # create outputs

    G_samples = generator_o(
        Z,
        settings["hidden_units_g"],
        settings["seq_length"],
        1,
        settings["num_generated_features"],
        reuse=False,
        parameters=parameters,
    )
    # generator_vars = ['hidden_units_g', 'seq_length', 'batch_size', 'num_generated_features', 'cond_dim', 'learn_scale']
    # generator_settings = dict((k, settings[k]) for k in generator_vars)
    # G_samples = model.generator(Z, **generator_settings, reuse=True)

    fd = None

    # define loss mmd-based loss
    if heuristic_sigma is None:
        heuristic_sigma = mmd.median_pairwise_distance_o(samples)  # this is noisy
        print("heuristic_sigma:", heuristic_sigma)
    samples = tf.reshape(
        samples, [1, settings["seq_length"], settings["num_generated_features"]]
    )
    Kxx, Kxy, Kyy, wts = mmd._mix_rbf_kernel(
        G_samples, samples, sigmas=tf.constant(value=heuristic_sigma, shape=(1, 1))
    )
    similarity_per_sample = tf.diag_part(Kxy)
    reconstruction_error_per_sample = 1 - similarity_per_sample
    # reconstruction_error_per_sample = tf.reduce_sum((tf.nn.l2_normalize(G_samples, dim=1) - tf.nn.l2_normalize(samples, dim=1))**2, axis=[1,2])
    similarity = tf.reduce_mean(similarity_per_sample)
    reconstruction_error = 1 - similarity
    # updater
    #    solver = tf.train.AdamOptimizer().minimize(reconstruction_error_per_sample, var_list=[Z])
    # solver = tf.train.RMSPropOptimizer(learning_rate=500).minimize(reconstruction_error, var_list=[Z])
    solver = tf.train.RMSPropOptimizer(learning_rate=0.1).minimize(
        reconstruction_error_per_sample, var_list=[Z]
    )
    # solver = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=0.9).minimize(reconstruction_error_per_sample, var_list=[Z])

    grad_Z = tf.gradients(reconstruction_error_per_sample, Z)[0]
    grad_per_Z = tf.norm(grad_Z, axis=(1, 2))
    grad_norm = tf.reduce_mean(grad_per_Z)
    # solver = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(reconstruction_error, var_list=[Z])
    print("Finding latent state corresponding to samples...")

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        error = sess.run(reconstruction_error, feed_dict=fd)
        g_n = sess.run(grad_norm, feed_dict=fd)
        # print(g_n)
        i = 0
        if not n_iter is None:
            while i < n_iter:
                _ = sess.run(solver, feed_dict=fd)
                error = sess.run(reconstruction_error, feed_dict=fd)
                i += 1
        else:
            if not g_tolerance is None:
                while g_n > g_tolerance:
                    _ = sess.run(solver, feed_dict=fd)
                    error, g_n = sess.run(
                        [reconstruction_error, grad_norm], feed_dict=fd
                    )
                    i += 1
                    print(error, g_n)
                    if i > max_iter:
                        break
            else:
                while np.abs(error) > e_tolerance:
                    _ = sess.run(solver, feed_dict=fd)
                    error = sess.run(reconstruction_error, feed_dict=fd)
                    i += 1
                    # print(error)
                    if i > max_iter:
                        break
        Zs = sess.run(Z, feed_dict=fd)
        Gs = sess.run(G_samples, feed_dict={Z: Zs})
        error_per_sample = sess.run(reconstruction_error_per_sample, feed_dict=fd)
        print("Z found in", i, "iterations with final reconstruction error of", error)
    tf.reset_default_graph()

    return Gs, Zs, error_per_sample, heuristic_sigma
コード例 #16
0
    def predict(self, k_test_test, sess, get_var=False):

        self.k_test_test = k_test_test

        if self.l_np is None:
            self._build_cholesky()
            start_time = time.time()

            while self.current_stability_eps < 10:
                try:
                    start_time = time.time()
                    self.l_np, self.v_np = sess.run(
                        [self.l, self.v],
                        feed_dict={
                            self.y_pl: self.output_y,
                            self.K_data_data_pl: self.k_data_data,
                            self.stability_eps: self.current_stability_eps
                        })
                    tf.logging.info("Computed L_DD in %.3f secs" %
                                    (time.time() - start_time))
                    break
                except tf.errors.InvalidArgumentError:
                    if self.current_stability_eps < 1:
                        self.current_stability_eps *= 10
                    else:
                        self.current_stability_eps += 1
                    tf.logging.info(
                        "Cholesky decomposition failed, trying larger epsilon"
                        ": {}".format(self.current_stability_eps))
        if self.current_stability_eps > 8:
            raise ArithmeticError("Could not compute Cholesky decomposition.")

        self.K_data_test_pl = tf.placeholder(tf.float64, [1291, 327],
                                             name="K_data_test")
        self.K_test_test_pl = tf.placeholder(tf.float64, [327, 327],
                                             name="K_test_test")

        a = tf.matrix_triangular_solve(self.l, self.K_data_test_pl)
        fmean = tf.matmul(a, self.v, transpose_a=True)

        fvar = tf.diag_part(self.K_test_test_pl) - tf.reduce_sum(
            tf.square(a), 0)
        fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, self.output_y.shape[1]])

        self.fmean = fmean
        self.fvar = fvar

        start_time = time.time()
        mean_pred, var_pred = sess.run(
            [self.fmean, self.fvar],
            feed_dict={
                self.K_data_test_pl: self.k_data_test,
                #self.K_data_data_pl: self.k_data_data,
                self.l: self.l_np,
                self.v: self.v_np,
                self.K_test_test_pl: self.k_test_test
            })
        tf.logging.info("Did regression in %.3f secs" %
                        (time.time() - start_time))

        return mean_pred, var_pred
コード例 #17
0
 def cross_entropy(self, yhat):
     return tf.reduce_mean(-tf.log(tf.diag_part(yhat) + 1e-24))
コード例 #18
0
    def create_model(self, x, y, *args):

        if self.process_y:

            self.f_mu = Regression().fit(x, y)
            self.Ymu = self.f_mu(x)
            self.Ys2 = np.std((y - self.Ymu))

            y = (y - self.Ymu) / self.Ys2

        self.t_X = tf.constant(x, dtype=self.dtype)
        self.t_Y = tf.constant(y, dtype=self.dtype)

        self.t_N = tf.shape(self.t_Y)[0]
        self.t_D = tf.shape(self.t_Y)[1]
        self.t_Q = tf.shape(self.t_X)[0]
        self.t_M = tf.shape(self.t_X)[1]

        self.M = x.shape[1]

        if self.kernel == 'Squared Exponential':

            self.kernel_function = self.sq_exp_kernel

            self.signal_var = self.init_variable(args[0][0], positive=True)
            self.lengthscale = self.init_variable([args[0][1]] * self.M,
                                                  positive=True,
                                                  multi=self.variable_l)
            self.noise_var = self.init_variable(args[0][2], positive=True)

            self.hparamd = ['Signal Variance', 'Lengthscale']
            self.hparams = [self.signal_var, self.lengthscale]

        if self.kernel == 'Periodic':

            self.kernel_function = self.sq_exp_kernel

            self.signal_var = self.init_variable(args[0][0], True)
            self.gamma = self.init_variable(args[0][0], True)
            self.period = self.init_variable(args[0][0], True)
            self.noise_var = self.init_variable(args[0][0], True)

            self.p_mu = self.init_variable(tf.log(self.t_Y), False)
            self.p_s2 = self.init_variable(1.0, True)

            self.hparamd = ['Signal Variance', 'Gamma', 'Period']
            self.hparams = [self.signal_var, self.gamma, self.period]

        self.create_kernel = lambda t_x1, t_x2: self.kernel_function(
            t_x1, t_x2, self.hparams)

        ### CREATING THE TRAINING MATRICES ###

        self.K_xx = self.create_kernel(self.t_X, self.t_X) + (
            self.noise_var + self.jitter) * tf.eye(self.t_N, dtype=self.dtype)

        self.L_xx = tf.cholesky(self.K_xx)

        self.logdet = 2.0 * tf.reduce_sum(tf.log(tf.diag_part(self.L_xx)))

        self.Kinv_YYt = 0.5 * tf.reduce_sum(
            tf.square(
                tf.matrix_triangular_solve(self.L_xx, self.t_Y, lower=True)))

        ### Initialising loose priors ###

        self.hprior = 0

        if self.variable_l:

            self.hprior += 0.5 * tf.square(tf.log(self.hparams[0]))

            self.hprior += tf.reduce_sum(0.5 *
                                         tf.square(tf.log(self.hparams[1])))

        else:

            for i in self.hparams:

                self.hprior += 0.5 * tf.square(tf.log(i))

        self.noise_prior = 0.5 * tf.square(tf.log(self.noise_var))

        ### Negative marginal log likelihood under Gaussian assumption ###

        if self.distribution == 'Gaussian':

            pi_term = tf.constant(0.5 * np.log(2.0 * np.pi), dtype=self.dtype)

            self.term1 = pi_term * tf.cast(self.t_D, dtype = self.dtype) * tf.cast(self.t_N, dtype = self.dtype) \
                               + 0.5 * tf.cast(self.t_D, dtype = self.dtype) * self.logdet \
                               + self.Kinv_YYt

        if self.distribution == 'Poisson' and self.kernel == 'Periodic':

            self.Kinv = tf.cholesky_solve(self.L_xx,
                                          tf.eye(self.t_N, dtype=self.dtype))

            self.term1 = -tf.reduce_sum(self.t_Y*self.p_mu - tf.exp(self.p_mu + self.p_s2/2)) \
            + (1/2)*(tf.trace(self.Kinv @ (self.p_s2*tf.eye(self.t_N, dtype=self.dtype) + [email protected](self.p_mu))) \
                     - tf.cast(self.t_N, dtype = self.dtype) + self.logdet - tf.cast(self.t_N, dtype = self.dtype)*tf.log(self.p_s2))

        self.objective = self.term1 + self.hprior + self.noise_prior