Example #1
0
 def log_prior_pdf(self):
     K = self.topic_dim
     new_s1 = np.floatX(self.prior_s1 + K / 2)
     z_mean = tf.reduce_mean(self.z_3d, axis=-1, keep_dims=True)
     new_s2 = self.prior_s2 + tf.reduce_sum(tf.square(self.z_3d - z_mean), axis=-1) / 2 \
              + self.prior_lambda * K / (self.prior_lambda + K) / 2 * tf.square(z_mean - self.prior_mu)
     return tf.lgamma(new_s1) - tf.lgamma(self.prior_s1) + np.floatX(self.prior_s1 * np.log(self.prior_s2)) - new_s1 * tf.log(new_s2) \
         - tf.cast(0.5 * tf.log(1 + K / self.prior_lambda), tf.floatX)
Example #2
0
    def build_kl_loss(self):
        # The prior.
        self.prior_a = np.floatX(self.cfg["prior_alpha"])
        self.prior_b = np.floatX(self.cfg["prior_beta"])
        if self.cfg["pitman_yor"]:
            # Pitman-Yor process for power-law cluster size distribution.
            # self.prior_a is `1 - a` in the standard parametrization ( a is the discount parameter ) ; 0 < prior_a <= 1
            # self.prior_b is `a + b` in the standard parametrization; 0 < prior_b
            # As the cluster distribution goes with k^{-1/a} asymptotically; the larger the prior_a, the smaller the a, the less activated topics the prior encourage
            self.prior_b = np.floatX(
                np.arange(self.topic_dim - 1) * (1 - self.prior_a) +
                self.prior_b)
        self.KL_loss_3d = self.calc_kl_loss()
        self.KL_loss = tf.reduce_sum(self.KL_loss_3d, axis=-1)

        # diviserity loss
        dl_type = self.cfg.get("diversity_loss_type", None)
        print("Use diversity regularization: ", dl_type)
        if dl_type == "xie_2015":
            K = []
            beta_norm = tf.sqrt(tf.reduce_sum(tf.square(self.beta), axis=-1))
            for i in range(self.topic_dim):
                Ki = []
                for j in range(i):
                    Ki.append(K[j][i])
                Ki.append(tf.constant(0, dtype=tf.floatX))
                for j in range(i + 1, self.topic_dim):
                    Ki.append(
                        tf.acos(
                            tf.reduce_sum(self.beta[i, :] * self.beta[j, :]) /
                            (beta_norm[i] * beta_norm[j])))

                K.append(tf.stack(Ki))
            K_mat = tf.stack(K)
            self.angle_mean = tf.reduce_mean(K_mat)
            self.angle_v = tf.reduce_mean(tf.square(K_mat - self.angle_mean))
            self.diversity_loss = -self.diversity_weight_placeholder * (
                self.angle_mean - self.angle_v)
        elif dl_type == "dpp":
            K = []
            #beta_norm = tf.sqrt(tf.reduce_sum(tf.square(self.beta), axis=-1))
            for i in range(self.topic_dim):
                Ki = []
                for j in range(i):
                    Ki.append(K[j][i])
                #Ki.append(tf.constant(, dtype=tf.floatX))
                for j in range(i, self.topic_dim):
                    # TODO: other kernels?
                    Ki.append(tf.reduce_sum(self.beta[i, :] * self.beta[j, :]))
                K.append(tf.stack(Ki))
            K_mat = tf.stack(K)
            self.diversity_loss = -2 * logdet(K_mat)

        else:
            self.diversity_loss = tf.constant(0., dtype=tf.floatX)

        self.batch_kl_loss = tf.reduce_mean(self.KL_loss)
Example #3
0
 def sample_from_prior(self, num):
     # lambda ~ Gamma(s_1, s_2)
     lambdas = np.random.gamma(scale=self.prior_s1, shape=1./self.prior_s2, size=(num,))
     # mu ~ Normal(mu_p, 1 / (lambda_p * lambda))
     mus = np.random.normal(loc=self.prior_mu, scale=np.sqrt(1./(self.prior_lambda * lambdas)))
     # x ~ Normal(mu, 1 / lambda)
     return np.floatX(np.random.normal(mus, np.sqrt(1. / lambdas), size=(self.topic_dim, num)).T)
Example #4
0
    def build_kl_loss(self):
        self.prior_mu = np.floatX(self.cfg["prior_mu"])
        self.prior_lambda = np.floatX(self.cfg["prior_lambda"])
        self.prior_s1 = np.floatX(self.cfg["prior_s1"])
        self.prior_s2 = np.floatX(self.cfg["prior_s2"])

        self.post_mu = tf.get_variable("sgd_post_mu", shape=[], dtype=tf.floatX,
                                       initializer=tf.constant_initializer(self.prior_mu),
                                       trainable=self.cfg["trainable_post_mu"])
        self.log_post_lambda = tf.get_variable("sgd_log_post_lambda", shape=[], dtype=tf.floatX,
                                               initializer=tf.constant_initializer(np.log(self.prior_lambda)),
                                               trainable=self.cfg["trainable_post_lambda"])
        self.invsp_post_gamma1 = tf.get_variable("sgd_invsp_post_gamma1", shape=[], dtype=tf.floatX,
                                                 initializer=tf.constant_initializer(np.log(np.exp(self.prior_s1) - 1)),
                                                 trainable=self.cfg["trainable_post_gamma1"])
        self.invsp_post_gamma2 = tf.get_variable("sgd_invsp_post_gamma2", shape=[], dtype=tf.floatX,
                                                 initializer=tf.constant_initializer(np.log(np.exp(self.prior_s2) - 1)),
                                                 trainable=self.cfg["trainable_post_gamma2"])

        self.post_lambda = tf.exp(self.log_post_lambda)
        self.post_gamma1 = tf.nn.softplus(self.invsp_post_gamma1)
        self.post_gamma2 = tf.nn.softplus(self.invsp_post_gamma2)

        gamma_ratio = self.post_gamma1 / self.post_gamma2

        neg_log_entro = - self.topic_dim / 2 * (np.log(2 * np.pi) + 1) - tf.reduce_sum(self.z_logvar, axis=-1) / 2
        neg_log_entro_3d = - 1 / 2 * (np.log(2 * np.pi) + 1) - self.z_logvar / 2

        kl_normal_gamma = tf.lgamma(self.prior_s1) - tf.lgamma(self.post_gamma1) - self.prior_s1 * tf.log(self.prior_s2 / self.post_gamma2) \
                                                 - (np.log(self.prior_lambda) - tf.log(self.post_lambda)) / 2 \
                                                 - tf.digamma(self.post_gamma1) * (self.prior_s1 - self.post_gamma1) \
                                                 + gamma_ratio * self.prior_s2 - self.post_gamma1 - 0.5 \
                                                 + self.prior_lambda / self.post_lambda / 2 \
                                                 + self.prior_lambda * gamma_ratio / 2 * (self.prior_mu - self.post_mu) ** 2

        neg_Epz_q_z_u_lambda = self.topic_dim / 2 * (tf.log(2 * np.pi * self.post_gamma2) + 1 / self.post_lambda \
                                                     + self.post_mu ** 2 * gamma_ratio - tf.digamma(self.post_gamma1)) \
                                                 + gamma_ratio / 2 * tf.reduce_sum(tf.square(self.z_mean) + self.z_var - 2 * self.z_mean * self.post_mu, axis=-1)
        neg_Epz_q_z_u_lambda_3d = 0.5 * (tf.log(2 * np.pi * self.post_gamma2) + 1 / self.post_lambda \
                                         + self.post_mu ** 2 * gamma_ratio - tf.digamma(self.post_gamma1)) \
            + gamma_ratio / 2 * (tf.square(self.z_mean) + self.z_var - 2 * self.z_mean * self.post_mu)

        self.KL_loss_3d = neg_log_entro_3d + kl_normal_gamma + neg_Epz_q_z_u_lambda_3d

        self.KL_loss = neg_log_entro + kl_normal_gamma + neg_Epz_q_z_u_lambda
        self.batch_kl_loss = tf.reduce_mean(self.KL_loss)
Example #5
0
    def build_kl_loss(self):
        # The prior.
        self.prior_a = np.floatX(self.cfg["prior_alpha"])
        self.prior_b = np.floatX(self.cfg["prior_beta"])
        self.prior_gamma_a = np.floatX(self.cfg["prior_gamma_a"])
        self.prior_gamma = np.floatX(self.cfg["prior_gamma"])
        if self.cfg.get("pitman_yor", False):
            self.prior_b = np.floatX(np.arange(self.cfg["L2_truncation_level"] - 1) * (1 - self.prior_a) + self.prior_b)
            self.prior_gamma = np.floatX(np.arange(self.topic_dim - 1) * (1 - self.prior_gamma_a) + self.prior_gamma)
        if self.cfg["closed_form_update_beta"]:
            self.post_u = tf.get_variable("post_u", shape=[self.topic_dim-1], dtype=tf.floatX,
                                          initializer=tf.constant_initializer(1.0), trainable=False)
            self.post_v = tf.get_variable("post_v", shape=[self.topic_dim-1], dtype=tf.floatX,
                                          initializer=tf.constant_initializer(self.prior_gamma), trainable=False)
        else:
            self.inv_post_u = tf.get_variable("sgd_log_post_u", shape=[self.topic_dim-1], dtype=tf.floatX,
                                              initializer=tf.constant_initializer(np.log(np.exp(1.0)-1)))
            self.inv_post_v = tf.get_variable("sgd_log_post_v", shape=[self.topic_dim-1], dtype=tf.floatX,
                                              initializer=tf.constant_initializer(np.log(np.exp(self.prior_gamma)-1)))
            self.post_u = tf.nn.softplus(self.inv_post_u)
            self.post_v = tf.nn.softplus(self.inv_post_v)
        self.post_e_beta = self.post_u / (self.post_u + self.post_v)

        self.KL_pi = self.calc_kl_loss()
        self.KL_beta = self.calc_kl_loss_beta()
        self.KL_c = self.calc_kl_loss_c()
        # self.KL_loss_3d = self.calc_kl_loss()
        # self.KL_loss = tf.reduce_sum(self.KL_loss_3d, axis=-1)
        self.batch_kl_c = tf.reduce_mean(self.KL_c)
        self.batch_kl_pi = tf.reduce_mean(tf.reduce_sum(self.KL_pi, axis=-1))
        self.KL_loss = tf.reduce_sum(self.KL_pi, axis=-1) + self.KL_c + self.KL_beta / self.dataset_size_placeholder
        if self.cfg["KL_beta_ratio"] > 0:
            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.cfg["KL_beta_ratio"] * self.KL_beta)
        # / tf.where(self.training_placeholder,

        # / tf.cast(tf.shape(self.x)[0] * self.cfg["MC_samples"], tf.float32)
        # self.KL_loss = tf.Print(self.KL_loss, [tf.reduce_mean(tf.reduce_sum(self.KL_pi, axis=-1)), tf.reduce_mean(self.KL_c), self.KL_beta], "kl losses")

        self.diversity_loss = tf.constant(0., dtype=tf.floatX)

        self.batch_kl_loss = tf.reduce_mean(self.KL_loss)
Example #6
0
    def build_stochastic_layer(self, layer):
        self.a = tf.layers.dense(
            layer,
            self.topic_dim - 1,
            activation=self.cfg["dirichlet_ab_fct"],
            use_bias=self.cfg["dirichlet_ab_use_bias"],
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            bias_initializer=tf.zeros_initializer(),
            name="posterior_a_output")
        self.b = tf.layers.dense(
            layer,
            self.topic_dim - 1,
            activation=self.cfg["dirichlet_ab_fct"],
            use_bias=self.cfg["dirichlet_ab_use_bias"],
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            bias_initializer=tf.constant_initializer(self.cfg["b_init"]),
            name="posterior_b_output")
        uniform_samples = tf.random_uniform(
            (self.cfg["MC_samples"], tf.shape(self.x)[0], self.topic_dim - 1),
            minval=0.01,
            maxval=0.99,
            dtype=tf.floatX)
        if self.cfg.get("bias_on_prior", False):
            self.prior_a = np.floatX(self.cfg["prior_alpha"])
            self.prior_b = np.floatX(self.cfg["prior_beta"])
            if self.cfg["pitman_yor"]:
                self.prior_b = np.floatX(
                    np.arange(self.topic_dim - 1) * (1 - self.prior_a) +
                    self.prior_b)
            self.b = self.b + self.prior_b
            self.a = self.a + self.prior_a
        else:
            self.a = self.a + 1e-5
            self.b = self.b + 1e-5
        self.vs = (1 - uniform_samples**(1 / self.b))**(1 / self.a)

        # self.vs = tf.Print(self.vs, [tf.reduce_mean(self.vs), tf.reduce_max(self.vs), self.vs[:, 37, :]], summarize=200, message="print_vs: ")
        # Construct topic vector by stick-breaking process
        # stick_segment = tf.zeros((self.cfg["MC_samples"], tf.shape(self.x)[0]))
        # remaining_stick = tf.ones((self.cfg["MC_samples"], tf.shape(self.x)[0]))
        # def stick_breaking(s, elem):
        #     stick = s[1] * self.vs[:, :, elem]
        #     remain = s[1] * (1 - self.vs[:, :, elem])
        #     return (stick, remain)
        # stick_segments, remaining_sticks = tf.scan(fn=stick_breaking, elems=tf.range(self.topic_dim - 1),
        #                                            initializer=(stick_segment, remaining_stick))
        # self.z = tf.transpose(tf.concat((stick_segments, tf.expand_dims(remaining_sticks[-1, :, :], axis=0)), axis=0), (1, 2, 0))
        # # 0.01 -> 99% stick
        # self.average_used_dims = tf.reduce_mean(tf.reduce_sum(tf.cast(remaining_sticks > self.cfg["stick_epsilon"], tf.floatX), axis=0))

        stick_segments_lst = []
        remaining_sticks = tf.ones(
            (self.cfg["MC_samples"], tf.shape(self.x)[0]), dtype=tf.floatX)
        for i in range(self.topic_dim - 1):
            stick_segments_lst.append(remaining_sticks * self.vs[:, :, i])
            remaining_sticks = remaining_sticks * (1 - self.vs[:, :, i])
        stick_segments = tf.stack(
            stick_segments_lst
        )  # (topic_dim - 1) x (MC samples) x (batch size)
        self.z_3d = tf.transpose(
            tf.concat(
                (stick_segments, tf.expand_dims(remaining_sticks, axis=0)),
                axis=0), (1, 2, 0))
        # Change
        if self.cfg["effective_indicator"] == "average":
            self.average_of_every_topic = tf.reduce_mean(
                self.z_3d, axis=(0, 1)) * tf.cast(
                    tf.shape(self.x)[0], tf.floatX)
            effective_dims = self.average_of_every_topic > self.cfg[
                "effective_threshold"]
            self.average_used_dims = tf.reduce_sum(
                tf.cast(effective_dims, tf.floatX))
            self.effective_dims = tf.squeeze(tf.where(effective_dims))
        elif self.cfg["effective_indicator"] == "assignment" or self.cfg[
                "effective_indicator"] == "ratio":
            self.assignment_of_every_topic = tf.bincount(
                tf.cast(tf.argmax(self.z_3d, axis=-1), tf.int32),
                minlength=self.topic_dim)
            effective_dims_bool = tf.cast(
                self.assignment_of_every_topic,
                tf.floatX) > self.cfg["assignment_threshold"] * tf.cast(
                    tf.shape(self.x)[0], tf.floatX) * self.cfg["MC_samples"]
            # FIXME: for now, if MC_sample is not 1. This is not correct.
            self.average_used_dims = tf.reduce_sum(
                tf.cast(effective_dims_bool, tf.floatX))
            self.effective_dims = tf.squeeze(tf.where(effective_dims_bool))

        # self.average_used_dims = tf.Print(self.average_used_dims, [tf.transpose(remaining_sticks, (1, 2, 0))], "print_remaining", summarize=100, first_n=3)
        # self.z = tf.Print(self.z, [self.z], "print_z", summarize=50)
        # self.z = tf.Print(self.z, [tf.reduce_sum(self.z, axis=-1)], "print_z_sum")
        z = tf.reshape(self.z_3d, [-1, self.topic_dim])
        return z
Example #7
0
 def sample_from_prior(self, num):
     eps = np.floatX(
         np.random.beta(self.prior_a,
                        self.prior_b,
                        size=(num, self.topic_dim - 1)))
     return self.stick_breaking(eps)
Example #8
0
 def sample_from_prior(self, num):
     eps = np.floatX(np.random.normal(size=(num, self.topic_dim)))
     return eps * np.sqrt(self.prior_var) + self.prior_mu
Example #9
0
 def _gaussian_log_pdf(self, x, mu, sigma):
     return -0.5 * (self.topic_dim * tf.cast(tf.log(2 * np.floatX(np.pi)), tf.floatX) + tf.reduce_sum(tf.log(sigma), axis=-1) + tf.reduce_sum(tf.square(x - mu) / sigma, axis=-1))