def elbo(xn, D, K, alpha, m_o, beta_o, delta_o, lambda_pi, lambda_m, lambda_beta, phi): """ ELBO computation """ lb = log_beta_function(lambda_pi) lb -= log_beta_function(alpha) lb += np.dot(alpha - lambda_pi, dirichlet_expectation(lambda_pi)) lb += K / 2. * np.log(np.linalg.det(beta_o * delta_o)) lb += K * D / 2. for k in range(K): a1 = lambda_m[k, :] - m_o a2 = np.dot(delta_o, (lambda_m[k, :] - m_o).T) a3 = beta_o / 2. * np.dot(a1, a2) a4 = D * beta_o / (2. * lambda_beta[k]) a5 = 1 / 2. * np.log(np.linalg.det(lambda_beta[k] * delta_o)) a6 = a3 + a4 + a5 lb -= a6 b1 = phi[:, k].T b2 = dirichlet_expectation(lambda_pi)[k] b3 = np.log(phi[:, k]) b4 = 1 / 2. * np.log(np.linalg.det(delta_o) / (2. * math.pi)) b5 = xn - lambda_m[k, :] b6 = np.dot(delta_o, (xn - lambda_m[k, :]).T) b7 = 1 / 2. * np.diagonal(np.dot(b5, b6)) b8 = D / (2. * lambda_beta[k]) lb += np.dot(b1, b2 - b3 + b4 - b7 - b8) return lb
def elbo((lambda_pi, lambda_phi, lambda_m, lambda_beta)): """ ELBO computation """ lambda_pi_aux = agnp.log(1 + agnp.exp(lambda_pi) + MACHINE_PRECISION) phi_aux = (agnp.exp(lambda_phi) + MACHINE_PRECISION) / agnp.tile( (agnp.exp(lambda_phi) + MACHINE_PRECISION).sum(axis=1), (K, 1)).T lambda_mu_beta_aux = agnp.log(1 + agnp.exp(lambda_beta) + MACHINE_PRECISION) ELBO = log_beta_function(lambda_pi_aux) - log_beta_function(alpha_o) \ + agnp.dot(alpha_o - lambda_pi_aux, dirichlet_expectation(lambda_pi_aux)) \ + K / 2. * agnp.log(agnp.linalg.det(beta_o * delta_o)) + K * D / 2. for k in range(K): ELBO -= beta_o / 2. * agnp.dot( (lambda_m[k, :] - m_o), agnp.dot(delta_o, (lambda_m[k, :] - m_o).T)) for n in range(N): ELBO += phi_aux[n, k] * ( dirichlet_expectation(lambda_pi_aux)[k] - agnp.log(phi_aux[n, k]) + 1 / 2. * agnp.log(1. / (2. * math.pi)) - 1 / 2. * agnp.dot( (xn[n, :] - lambda_m[k, :]), agnp.dot(delta_o, (xn[n, :] - lambda_m[k, :]).T)) - D / (2. * lambda_mu_beta_aux[k])) return -ELBO
def elbo((lambda_pi, lambda_phi, lambda_m, lambda_beta, lambda_nu, lambda_w)): """ ELBO computation """ e3 = e2 = h2 = 0 e1 = - log_beta_function(alpha_o) \ + agnp.dot((alpha_o - agnp.ones(K)), dirichlet_expectation(lambda_pi)) h1 = log_beta_function(lambda_pi) \ - agnp.dot((lambda_pi - agnp.ones(K)), dirichlet_expectation(lambda_pi)) logdet = agnp.log( agnp.array([agnp.linalg.det(lambda_w[k, :, :]) for k in range(K)])) logDeltak = agscipy.psi(lambda_nu / 2.) \ + agscipy.psi((lambda_nu - 1.) / 2.) + 2. * agnp.log( 2.) + logdet for n in range(N): e2 += agnp.dot(lambda_phi[n, :], dirichlet_expectation(lambda_pi)) h2 += -agnp.dot(lambda_phi[n, :], log_(lambda_phi[n, :])) product = agnp.array([ agnp.dot(agnp.dot(xn[n, :] - lambda_m[k, :], lambda_w[k, :, :]), (xn[n, :] - lambda_m[k, :]).T) for k in range(K) ]) e3 += 1. / 2 * agnp.dot(lambda_phi[n, :], (logDeltak - 2. * agnp.log(2 * agnp.pi) - lambda_nu * product - 2. / lambda_beta).T) product = agnp.array([ agnp.dot(agnp.dot(lambda_m[k, :] - m_o, lambda_w[k, :, :]), (lambda_m[k, :] - m_o).T) for k in range(K) ]) traces = agnp.array([ agnp.trace(agnp.dot(agnp.linalg.inv(w_o), lambda_w[k, :, :])) for k in range(K) ]) h4 = agnp.sum((1. + agnp.log(2. * agnp.pi) - 1. / 2 * (agnp.log(lambda_beta) + logdet))) logB = lambda_nu / 2. * logdet + lambda_nu * agnp.log( 2.) + 1. / 2 * agnp.log(agnp.pi) \ + agscipy.gammaln(lambda_nu / 2.) + agscipy.gammaln( (lambda_nu - 1) / 2.) h5 = agnp.sum((logB - (lambda_nu - 3.) / 2. * logDeltak + lambda_nu)) e4 = agnp.sum( (1. / 2 * (agnp.log(beta_o) + logDeltak - 2 * agnp.log(2. * agnp.pi) - beta_o * lambda_nu * product - 2. * beta_o / lambda_beta))) logB = nu_o / 2. * agnp.log(agnp.linalg.det(w_o)) + nu_o * agnp.log(2.) \ + 1. / 2 * agnp.log(agnp.pi) + agscipy.gammaln( nu_o / 2.) + agscipy.gammaln((nu_o - 1) / 2.) e5 = agnp.sum( (-logB + (nu_o - 3.) / 2. * logDeltak - lambda_nu / 2. * traces)) return e1 + e2 + e3 + e4 + e5 + h1 + h2 + h4 + h5
def elbo2(xn, alpha_o, lambda_pi, lambda_phi, m_o, lambda_m, beta_o, lambda_beta, nu_o, lambda_nu, w_o, lambda_w, N, K): """ ELBO computation """ e3 = e2 = h2 = 0 e1 = - log_beta_function(alpha_o) \ + np.dot((alpha_o-np.ones(K)), dirichlet_expectation(lambda_pi)) h1 = log_beta_function(lambda_pi) \ - np.dot((lambda_pi-np.ones(K)), dirichlet_expectation(lambda_pi)) logdet = np.log(np.array([det(lambda_w[k, :, :]) for k in range(K)])) logDeltak = psi(lambda_nu/2.) \ + psi((lambda_nu-1.)/2.) + 2.*np.log(2.) + logdet for n in range(N): e2 += np.dot(lambda_phi[n, :], dirichlet_expectation(lambda_pi)) h2 += -np.dot(lambda_phi[n, :], log_(lambda_phi[n, :])) product = np.array([ np.dot(np.dot(xn[n, :] - lambda_m[k, :], lambda_w[k, :, :]), (xn[n, :] - lambda_m[k, :]).T) for k in range(K) ]) e3 += 1. / 2 * np.dot(lambda_phi[n, :], (logDeltak - 2. * np.log(2 * math.pi) - lambda_nu * product - 2. / lambda_beta).T) product = np.array([ np.dot(np.dot(lambda_m[k, :] - m_o, lambda_w[k, :, :]), (lambda_m[k, :] - m_o).T) for k in range(K) ]) traces = np.array( [np.trace(np.dot(inv(w_o), lambda_w[k, :, :])) for k in range(K)]) h4 = np.sum( (1. + np.log(2. * math.pi) - 1. / 2 * (np.log(lambda_beta) + logdet))) logB = lambda_nu/2.*logdet + lambda_nu*np.log(2.) + 1./2*np.log(math.pi) \ + gammaln(lambda_nu/2.) + gammaln((lambda_nu-1)/2.) h5 = np.sum((logB - (lambda_nu - 3.) / 2. * logDeltak + lambda_nu)) e4 = np.sum( (1. / 2 * (np.log(beta_o) + logDeltak - 2 * np.log(2. * math.pi) - beta_o * lambda_nu * product - 2. * beta_o / lambda_beta))) logB = nu_o/2.*np.log(np.linalg.det(w_o)) + nu_o*np.log(2.) \ + 1./2*np.log(math.pi) + gammaln(nu_o/2.) + gammaln((nu_o-1)/2.) e5 = np.sum( (-logB + (nu_o - 3.) / 2. * logDeltak - lambda_nu / 2. * traces)) return e1 + e2 + e3 + e4 + e5 + h1 + h2 + h4 + h5
tf.nn.softplus(tf.diag_part(lambda_w_var[k]))) mats.append(tf.matmul(aux1, aux1, transpose_b=True)) lambda_w = tf.convert_to_tensor(mats) alpha_o = tf.convert_to_tensor(alpha_o, dtype=tf.float64) nu_o = tf.convert_to_tensor(nu_o, dtype=tf.float64) w_o = tf.convert_to_tensor(w_o, dtype=tf.float64) m_o = tf.convert_to_tensor(m_o, dtype=tf.float64) beta_o = tf.convert_to_tensor(beta_o, dtype=tf.float64) # Evidence Lower Bound definition e3 = tf.convert_to_tensor(0., dtype=tf.float64) e2 = tf.convert_to_tensor(0., dtype=tf.float64) h2 = tf.convert_to_tensor(0., dtype=tf.float64) e1 = tf.add( -log_beta_function(alpha_o), tf.reduce_sum( tf.multiply(tf.subtract(alpha_o, tf.ones(K, dtype=tf.float64)), dirichlet_expectation(lambda_pi)))) h1 = tf.subtract( log_beta_function(lambda_pi), tf.reduce_sum( tf.multiply(tf.subtract(lambda_pi, tf.ones(K, dtype=tf.float64)), dirichlet_expectation(lambda_pi)))) logdet = tf.log( tf.convert_to_tensor( [tf.matrix_determinant(lambda_w[k, :, :]) for k in xrange(K)])) logDeltak = tf.add( tf.digamma(tf.div(lambda_nu, 2.)), tf.add( tf.digamma(
lambda_pi_aux = alpha_aux + np.sum(lambda_phi_aux, axis=0) lambda_beta_aux = beta_o_aux + np.sum(lambda_phi_aux, axis=0) lambda_m_aux = np.tile(1. / lambda_beta_aux, (2, 1)).T * \ (beta_o_aux * m_o_aux + np.dot(lambda_phi_aux.T, xn)) # Variational parameters lambda_phi = tf.Variable(lambda_phi_aux, dtype=tf.float64) lambda_pi = tf.Variable(lambda_pi_aux, dtype=tf.float64) lambda_beta = tf.Variable(lambda_beta_aux, dtype=tf.float64) lambda_m = tf.Variable(lambda_m_aux, dtype=tf.float64) # Reshapes lambda_mu_beta_res = tf.reshape(lambda_beta, [K, 1]) # Lower Bound definition LB = log_beta_function(lambda_pi) LB = tf.subtract(LB, log_beta_function(alpha_o)) LB = tf.add( LB, tf.matmul(tf.subtract(alpha_o, lambda_pi), tf.reshape(dirichlet_expectation(lambda_pi), [K, 1]))) LB = tf.add( LB, tf.multiply(tf.cast(K / 2., tf.float64), tf.log(tf.matrix_determinant(tf.multiply(beta_o, delta_o))))) LB = tf.add(LB, tf.cast(K * (D / 2.), tf.float64)) for k in range(K): a1 = tf.subtract(lambda_m[k, :], m_o) a2 = tf.matmul(delta_o, tf.transpose(tf.subtract(lambda_m[k, :], m_o))) a3 = tf.multiply(tf.div(beta_o, 2.), tf.matmul(a1, a2)) a4 = tf.div(tf.multiply(tf.cast(D, tf.float64), beta_o),
mats.append(tf.matmul(aux1, aux1, transpose_b=True)) lambda_w = tf.convert_to_tensor(mats) idx_tensor = tf.placeholder(tf.int32, shape=(BATCH_SIZE)) alpha_o = tf.convert_to_tensor(alpha_o, dtype=tf.float64) nu_o = tf.convert_to_tensor(nu_o, dtype=tf.float64) w_o = tf.convert_to_tensor(w_o, dtype=tf.float64) m_o = tf.convert_to_tensor(m_o, dtype=tf.float64) beta_o = tf.convert_to_tensor(beta_o, dtype=tf.float64) # Evidence Lower Bound definition e3 = tf.convert_to_tensor(0., dtype=tf.float64) e2 = tf.convert_to_tensor(0., dtype=tf.float64) h2 = tf.convert_to_tensor(0., dtype=tf.float64) e1 = tf.add(-log_beta_function(alpha_o), tf.reduce_sum(tf.multiply( tf.subtract(alpha_o, tf.ones(K, dtype=tf.float64)), dirichlet_expectation(lambda_pi)))) h1 = tf.subtract(log_beta_function(lambda_pi), tf.reduce_sum(tf.multiply( tf.subtract(lambda_pi, tf.ones(K, dtype=tf.float64)), dirichlet_expectation(lambda_pi)))) logdet = tf.log(tf.convert_to_tensor([ tf.matrix_determinant(lambda_w[k, :, :]) for k in xrange(K)])) logDeltak = tf.add(tf.digamma(tf.div(lambda_nu, 2.)), tf.add(tf.digamma(tf.div(tf.subtract( lambda_nu, tf.cast(1., dtype=tf.float64)), tf.cast(2., dtype=tf.float64))), tf.add(tf.multiply(tf.cast(2., dtype=tf.float64), tf.cast(tf.log(2.),