def _free_energy(self, v): K = float(self.n_hidden) M = float(self.n_samples) with tf.name_scope('free_energy'): T1 = -tf.einsum('ij,j->i', v, self._vb) T2 = -tf.matmul(v, self._W) h_hat = Multinomial(total_count=M, logits=tf.ones([K])).sample() T3 = tf.einsum('ij,j->i', T2, h_hat) fe = tf.reduce_mean(T1 + T3, axis=0) fe += -tf.lgamma(M + K) + tf.lgamma(M + 1) + tf.lgamma(K) return fe
def multinomial(policy, game_state): ## identify the free positions: free_positions = tf.to_float(tf.equal(game_state, tf.zeros((1, 9)))) fm_mapping = lambda x: tf.diag(tf.reshape(x, (9, ))) free_matrices = tf.map_fn(fm_mapping, free_positions) ## calculate probability vector: pvec_mapping = lambda x: tf.transpose(tf.matmul(x, tf.transpose(policy))) prob_vec = tf.map_fn(pvec_mapping, free_matrices) prob = prob_vec / (tf.reduce_sum(prob_vec) + tf.constant(1e-5)) return Multinomial(total_count=1., probs=prob)
def __call__(self, session, trainX, trainY, testX, testY): """ Initialize the actual graph Parameters ---------- session : tf.Session Tensorflow session trainX : sparse array in coo format Test input OTU table, where rows are samples and columns are observations trainY : np.array Test output metabolite table testX : sparse array in coo format Test input OTU table, where rows are samples and columns are observations. This is mainly for cross validation. testY : np.array Test output metabolite table. This is mainly for cross validation. """ self.session = session self.nnz = len(trainX.data) self.d1 = trainX.shape[1] self.d2 = trainY.shape[1] self.cv_size = len(testX.data) # keep the multinomial sampling on the cpu # https://github.com/tensorflow/tensorflow/issues/18058 with tf.device('/cpu:0'): X_ph = tf.SparseTensor(indices=np.array([trainX.row, trainX.col]).T, values=trainX.data, dense_shape=trainX.shape) Y_ph = tf.constant(trainY, dtype=tf.float32) X_holdout = tf.SparseTensor(indices=np.array( [testX.row, testX.col]).T, values=testX.data, dense_shape=testX.shape) Y_holdout = tf.constant(testY, dtype=tf.float32) total_count = tf.reduce_sum(Y_ph, axis=1) batch_ids = tf.multinomial( tf.log(tf.reshape(X_ph.values, [1, -1])), self.batch_size) batch_ids = tf.squeeze(batch_ids) X_samples = tf.gather(X_ph.indices, 0, axis=1) X_obs = tf.gather(X_ph.indices, 1, axis=1) sample_ids = tf.gather(X_samples, batch_ids) Y_batch = tf.gather(Y_ph, sample_ids) X_batch = tf.gather(X_obs, batch_ids) with tf.device(self.device_name): self.qUmain = tf.Variable(tf.random_normal([self.d1, self.p]), name='qU') self.qUbias = tf.Variable(tf.random_normal([self.d1, 1]), name='qUbias') self.qVmain = tf.Variable(tf.random_normal([self.p, self.d2 - 1]), name='qV') self.qVbias = tf.Variable(tf.random_normal([1, self.d2 - 1]), name='qVbias') qU = tf.concat([tf.ones([self.d1, 1]), self.qUbias, self.qUmain], axis=1) qV = tf.concat( [self.qVbias, tf.ones([1, self.d2 - 1]), self.qVmain], axis=0) # regression coefficents distribution Umain = Normal(loc=tf.zeros([self.d1, self.p]) + self.u_mean, scale=tf.ones([self.d1, self.p]) * self.u_scale, name='U') Ubias = Normal(loc=tf.zeros([self.d1, 1]) + self.u_mean, scale=tf.ones([self.d1, 1]) * self.u_scale, name='biasU') Vmain = Normal(loc=tf.zeros([self.p, self.d2 - 1]) + self.v_mean, scale=tf.ones([self.p, self.d2 - 1]) * self.v_scale, name='V') Vbias = Normal(loc=tf.zeros([1, self.d2 - 1]) + self.v_mean, scale=tf.ones([1, self.d2 - 1]) * self.v_scale, name='biasV') du = tf.gather(qU, X_batch, axis=0, name='du') dv = tf.concat([tf.zeros([self.batch_size, 1]), du @ qV], axis=1, name='dv') tc = tf.gather(total_count, sample_ids) Y = Multinomial(total_count=tc, logits=dv, name='Y') num_samples = trainX.shape[0] norm = num_samples / self.batch_size logprob_vmain = tf.reduce_sum(Vmain.log_prob(self.qVmain), name='logprob_vmain') logprob_vbias = tf.reduce_sum(Vbias.log_prob(self.qVbias), name='logprob_vbias') logprob_umain = tf.reduce_sum(Umain.log_prob(self.qUmain), name='logprob_umain') logprob_ubias = tf.reduce_sum(Ubias.log_prob(self.qUbias), name='logprob_ubias') logprob_y = tf.reduce_sum(Y.log_prob(Y_batch), name='logprob_y') self.log_loss = -(logprob_y * norm + logprob_umain + logprob_ubias + logprob_vmain + logprob_vbias) # keep the multinomial sampling on the cpu # https://github.com/tensorflow/tensorflow/issues/18058 with tf.device('/cpu:0'): # cross validation with tf.name_scope('accuracy'): cv_batch_ids = tf.multinomial( tf.log(tf.reshape(X_holdout.values, [1, -1])), self.cv_size) cv_batch_ids = tf.squeeze(cv_batch_ids) X_cv_samples = tf.gather(X_holdout.indices, 0, axis=1) X_cv = tf.gather(X_holdout.indices, 1, axis=1) cv_sample_ids = tf.gather(X_cv_samples, cv_batch_ids) Y_cvbatch = tf.gather(Y_holdout, cv_sample_ids) X_cvbatch = tf.gather(X_cv, cv_batch_ids) holdout_count = tf.reduce_sum(Y_cvbatch, axis=1) cv_du = tf.gather(qU, X_cvbatch, axis=0, name='cv_du') pred = tf.reshape(holdout_count, [-1, 1]) * tf.nn.softmax( tf.concat([tf.zeros([self.cv_size, 1]), cv_du @ qV], axis=1, name='pred')) self.cv = tf.reduce_mean(tf.squeeze(tf.abs(pred - Y_cvbatch))) # keep all summaries on the cpu with tf.device('/cpu:0'): tf.summary.scalar('logloss', self.log_loss) tf.summary.scalar('cv_rmse', self.cv) tf.summary.histogram('qUmain', self.qUmain) tf.summary.histogram('qVmain', self.qVmain) tf.summary.histogram('qUbias', self.qUbias) tf.summary.histogram('qVbias', self.qVbias) self.merged = tf.summary.merge_all() self.writer = tf.summary.FileWriter(self.save_path, self.session.graph) with tf.device(self.device_name): with tf.name_scope('optimize'): optimizer = tf.train.AdamOptimizer(self.learning_rate, beta1=self.beta_1, beta2=self.beta_2) gradients, self.variables = zip( *optimizer.compute_gradients(self.log_loss)) self.gradients, _ = tf.clip_by_global_norm( gradients, self.clipnorm) self.train = optimizer.apply_gradients( zip(self.gradients, self.variables)) tf.global_variables_initializer().run()
def _sample(self, means): probs = tf.to_float(means / tf.reduce_sum(means)) return Multinomial(total_count=self.n_samples, probs=probs)
def main(_): opts = Options(save_path=FLAGS.save_path, train_biom=FLAGS.train_biom, test_biom=FLAGS.test_biom, train_metadata=FLAGS.train_metadata, test_metadata=FLAGS.test_metadata, formula=FLAGS.formula, learning_rate=FLAGS.learning_rate, clipping_size=FLAGS.clipping_size, beta_mean=FLAGS.beta_mean, beta_scale=FLAGS.beta_scale, gamma_mean=FLAGS.gamma_mean, gamma_scale=FLAGS.gamma_scale, epochs_to_train=FLAGS.epochs_to_train, num_neg_samples=FLAGS.num_neg_samples, batch_size=FLAGS.batch_size, min_sample_count=FLAGS.min_sample_count, min_feature_count=FLAGS.min_feature_count, statistics_interval=FLAGS.statistics_interval, summary_interval=FLAGS.summary_interval, checkpoint_interval=FLAGS.checkpoint_interval) # preprocessing train_table, train_metadata = opts.train_table, opts.train_metadata train_metadata = train_metadata.loc[train_table.ids(axis='sample')] sample_filter = lambda val, id_, md: ( (id_ in train_metadata.index) and np.sum(val) > opts.min_sample_count) read_filter = lambda val, id_, md: np.sum(val) > opts.min_feature_count metadata_filter = lambda val, id_, md: id_ in train_metadata.index train_table = train_table.filter(metadata_filter, axis='sample') train_table = train_table.filter(sample_filter, axis='sample') train_table = train_table.filter(read_filter, axis='observation') train_metadata = train_metadata.loc[train_table.ids(axis='sample')] sort_f = lambda xs: [xs[train_metadata.index.get_loc(x)] for x in xs] train_table = train_table.sort(sort_f=sort_f, axis='sample') train_metadata = dmatrix(opts.formula, train_metadata, return_type='dataframe') # hold out data preprocessing test_table, test_metadata = opts.test_table, opts.test_metadata metadata_filter = lambda val, id_, md: id_ in test_metadata.index obs_lookup = set(train_table.ids(axis='observation')) feat_filter = lambda val, id_, md: id_ in obs_lookup test_table = test_table.filter(metadata_filter, axis='sample') test_table = test_table.filter(feat_filter, axis='observation') test_metadata = test_metadata.loc[test_table.ids(axis='sample')] sort_f = lambda xs: [xs[test_metadata.index.get_loc(x)] for x in xs] test_table = test_table.sort(sort_f=sort_f, axis='sample') test_metadata = dmatrix(opts.formula, test_metadata, return_type='dataframe') p = train_metadata.shape[1] # number of covariates G_data = train_metadata.values y_data = np.array(train_table.matrix_data.todense()).T y_test = np.array(test_table.matrix_data.todense()).T N, D = y_data.shape save_path = opts.save_path learning_rate = opts.learning_rate batch_size = opts.batch_size gamma_mean, gamma_scale = opts.gamma_mean, opts.gamma_scale beta_mean, beta_scale = opts.beta_mean, opts.beta_scale num_iter = (N // batch_size) * opts.epochs_to_train holdout_size = test_metadata.shape[0] checkpoint_interval = opts.checkpoint_interval # Model code with tf.Graph().as_default(), tf.Session() as session: with tf.device("/cpu:0"): # Place holder variables to accept input data G_ph = tf.placeholder(tf.float32, [batch_size, p], name='G_ph') Y_ph = tf.placeholder(tf.float32, [batch_size, D], name='Y_ph') G_holdout = tf.placeholder(tf.float32, [holdout_size, p], name='G_holdout') Y_holdout = tf.placeholder(tf.float32, [holdout_size, D], name='Y_holdout') total_count = tf.placeholder(tf.float32, [batch_size], name='total_count') # Define PointMass Variables first qgamma = tf.Variable(tf.random_normal([1, D]), name='qgamma') qbeta = tf.Variable(tf.random_normal([p, D]), name='qB') # Distributions # species bias gamma = Normal(loc=tf.zeros([1, D]) + gamma_mean, scale=tf.ones([1, D]) * gamma_scale, name='gamma') # regression coefficents distribution beta = Normal(loc=tf.zeros([p, D]) + beta_mean, scale=tf.ones([p, D]) * beta_scale, name='B') Bprime = tf.concat([qgamma, qbeta], axis=0) # add bias terms for samples Gprime = tf.concat([tf.ones([batch_size, 1]), G_ph], axis=1) eta = tf.matmul(Gprime, Bprime) phi = tf.nn.log_softmax(eta) Y = Multinomial(total_count=total_count, logits=phi, name='Y') loss = -(tf.reduce_mean(gamma.log_prob(qgamma)) + \ tf.reduce_mean(beta.log_prob(qbeta)) + \ tf.reduce_mean(Y.log_prob(Y_ph)) * (N / batch_size)) loss = tf.Print(loss, [loss]) optimizer = tf.train.AdamOptimizer(learning_rate) gradients, variables = zip(*optimizer.compute_gradients(loss)) gradients, _ = tf.clip_by_global_norm(gradients, opts.clipping_size) train = optimizer.apply_gradients(zip(gradients, variables)) with tf.name_scope('accuracy'): holdout_count = tf.reduce_sum(Y_holdout, axis=1) pred = tf.reshape(holdout_count, [-1, 1]) * tf.nn.softmax( tf.matmul(G_holdout, qbeta) + qgamma) mse = tf.reduce_mean(tf.squeeze(tf.abs(pred - Y_holdout))) tf.summary.scalar('mean_absolute_error', mse) tf.summary.scalar('loss', loss) tf.summary.histogram('qbeta', qbeta) tf.summary.histogram('qgamma', qgamma) merged = tf.summary.merge_all() tf.global_variables_initializer().run() writer = tf.summary.FileWriter(save_path, session.graph) losses = np.array([0.] * num_iter) idx = np.arange(train_metadata.shape[0]) log_handle = open(os.path.join(save_path, 'run.log'), 'w') last_checkpoint_time = 0 start_time = time.time() saver = tf.train.Saver() for i in range(num_iter): batch_idx = np.random.choice(idx, size=batch_size) feed_dict = { Y_ph: y_data[batch_idx].astype(np.float32), G_ph: train_metadata.values[batch_idx].astype(np.float32), Y_holdout: y_test.astype(np.float32), G_holdout: test_metadata.values.astype(np.float32), total_count: y_data[batch_idx].sum(axis=1).astype(np.float32) } if i % 1000 == 0: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, summary, train_loss, grads = session.run( [train, merged, loss, gradients], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) writer.add_run_metadata(run_metadata, 'step%d' % i) writer.add_summary(summary, i) elif i % 5000 == 0: _, summary, err, train_loss, grads = session.run( [train, mse, merged, loss, gradients], feed_dict=feed_dict) writer.add_summary(summary, i) else: _, summary, train_loss, grads = session.run( [train, merged, loss, gradients], feed_dict=feed_dict) writer.add_summary(summary, i) now = time.time() if now - last_checkpoint_time > checkpoint_interval: saver.save(session, os.path.join(opts.save_path, "model.ckpt"), global_step=i) last_checkpoint_time = now losses[i] = train_loss elapsed_time = time.time() - start_time print('Elapsed Time: %f seconds' % elapsed_time) # Cross validation pred_beta = qbeta.eval() pred_gamma = qgamma.eval() mse, mrc = cross_validation(test_metadata.values, pred_beta, pred_gamma, y_test) print("MSE: %f, MRC: %f" % (mse, mrc))
def __call__(self, session, trainX, trainY, testX, testY): """ Initialize the actual graph Parameters ---------- session : tf.Session Tensorflow session trainX : np.array Input training design matrix. trainY : np.array Output training OTU table, where rows are samples and columns are observations. testX : np.array Input testing design matrix. testY : np.array Output testing OTU table, where rows are samples and columns are observations. """ self.session = session self.N, self.p = trainX.shape self.D = trainY.shape[1] holdout_size = testX.shape[0] # Place holder variables to accept input data self.X_ph = tf.constant(trainX, dtype=tf.float32, name='G_ph') self.Y_ph = tf.constant(trainY, dtype=tf.float32, name='Y_ph') self.X_holdout = tf.constant(testX, dtype=tf.float32, name='G_holdout') self.Y_holdout = tf.constant(testY, dtype=tf.float32, name='Y_holdout') batch_ids = tf.multinomial(tf.ones([1, self.N]), self.batch_size) sample_ids = tf.squeeze(batch_ids) Y_batch = tf.gather(self.Y_ph, sample_ids, axis=0) X_batch = tf.gather(self.X_ph, sample_ids, axis=0) total_count = tf.reduce_sum(Y_batch, axis=1) holdout_count = tf.reduce_sum(self.Y_holdout, axis=1) # Define PointMass Variables first self.qbeta = tf.Variable(tf.random_normal([self.p, self.D - 1]), name='qB') # regression coefficents distribution beta = Normal(loc=tf.zeros([self.p, self.D - 1]) + self.beta_mean, scale=tf.ones([self.p, self.D - 1]) * self.beta_scale, name='B') eta = tf.matmul(X_batch, self.qbeta, name='eta') phi = tf.nn.log_softmax(tf.concat( [tf.zeros([self.batch_size, 1]), eta], axis=1), name='phi') Y = Multinomial(total_count=total_count, logits=phi, name='Y') # cross validation with tf.name_scope('accuracy'): pred = tf.reshape(holdout_count, [-1, 1]) * tf.nn.softmax( tf.concat([ tf.zeros([holdout_size, 1]), tf.matmul(self.X_holdout, self.qbeta) ], axis=1), name='phi') self.cv = tf.reduce_mean(tf.squeeze(tf.abs(pred - self.Y_holdout))) tf.summary.scalar('mean_absolute_error', self.cv) self.loss = -(tf.reduce_sum(beta.log_prob(self.qbeta)) + tf.reduce_sum(Y.log_prob(Y_batch)) * (self.N / self.batch_size)) optimizer = tf.train.AdamOptimizer(self.learning_rate, beta1=self.beta_1, beta2=self.beta_2) gradients, variables = zip(*optimizer.compute_gradients(self.loss)) self.gradients, _ = tf.clip_by_global_norm(gradients, self.clipnorm) self.train = optimizer.apply_gradients(zip(gradients, variables)) tf.summary.scalar('loss', self.loss) tf.summary.histogram('qbeta', self.qbeta) self.merged = tf.summary.merge_all() if self.save_path is not None: self.writer = tf.summary.FileWriter(self.save_path, self.session.graph) else: self.writer = None tf.global_variables_initializer().run()
class GraphexNMF(object): def __init__(self, edge_idx, edge_vals, U, I, K, hparams, ground_truth=None, simple_graph=False, GPU=False, fix_item_params=False, comp_rem=True, edge_param_splits=1, seed=None, sess=None, device='/cpu:0', ppm=False): """ Model for Sparse Exchangeable bipartite graph """ self.ppm = ppm # Launch the session if sess: self.sess = sess else: if GPU: # For GPU mode config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True config.gpu_options.allocator_type = 'BFC' self.sess = tf.Session(config=config) else: config = tf.ConfigProto(allow_soft_placement=True) self.sess = tf.Session(config=config) self.device = device self.comp_rem = comp_rem self.seed = seed self.K = K self.ground_truth = ground_truth self.simple_graph = simple_graph self.U, self.I = U, I self.fix_item_params = fix_item_params self.hparams = hparams self.edge_param_splits = edge_param_splits # Splitting GPU parameters to fit according to GPU size self.GPU = GPU # store the data here: self.edge_idx_d = edge_idx if self.simple_graph: self.edge_vals_d = np.ones(edge_vals.shape[0], dtype=np.float32) else: self.edge_vals_d = edge_vals.astype(np.float32) # create placeholders for the computational graph with tf.name_scope("placeholders"): with tf.device(self.device): self.edge_idx = tf.placeholder(dtype=tf.int32,shape=(edge_idx.shape[0], edge_idx.shape[1])) self.edge_vals = tf.placeholder(dtype=tf.float32,shape=(edge_idx.shape[0])) if simple_graph: # Degree computation without tensorflow. Only works for simple graphs _,self.user_degree = np.unique(self.edge_idx_d[:,0],return_counts=True) _,self.item_degree = np.unique(self.edge_idx_d[:,1],return_counts=True) self.user_degree = self.user_degree.astype(np.float32) self.item_degree = self.item_degree.astype(np.float32) else: with tf.name_scope("init_deg_comp"): with tf.device(self.device): user_degree, item_degree = compute_degrees2(tf.expand_dims(self.edge_vals, axis=1), self.edge_idx, self.U, self.I) user_degree = tf.squeeze(user_degree) item_degree = tf.squeeze(item_degree) with tf.Session(config=config) as sess: self.user_degree, self.item_degree = sess.run([user_degree, item_degree], feed_dict={self.edge_vals: self.edge_vals_d, self.edge_idx: self.edge_idx_d}) print repr(np.sum(self.user_degree)) print repr(np.sum(self.item_degree)) self.occupied_pairs = edge_idx.shape[0] # oc_pa self._initialize_parameters(hparams, ppm) # random sample for diagnostics np.random.seed(self.seed) self.included_sample = self.edge_idx_d[np.random.choice(self.edge_idx_d.shape[0], 1000, replace=False)] user_sample = np.random.choice(self.U, 1000) item_sample = np.random.choice(self.I, 1000) self.pair_sample = np.vstack((user_sample, item_sample)).T # appx llhd for assessing convergence with tf.name_scope("appx_llhd"): self._build_appx_elbo() # computational graph for coordinate ascent with tf.name_scope("coordinate_ascent"): self._build_computation_graph() with tf.name_scope("evaluation"): with tf.device(self.device): self._build_predict_edges() self.edge_mean_summary = tf.reduce_mean(self.q_e_aux_vals.mean(), axis=0) with tf.name_scope("recommendation"), tf.device(self.device): self._build_rec_uncensored_edge_pops() self._censored_edge_pops = tf.placeholder(dtype=tf.float32) self._num_rec = tf.placeholder(dtype=tf.int32, shape=()) self._top_k = tf.nn.top_k(self._censored_edge_pops, self._num_rec) # logging self.summary_writer = tf.summary.FileWriter('../logs', graph=self.sess.graph) # Initializing the tensor flow variables with tf.device(self.device): init = tf.global_variables_initializer() self.sess.run(init) # qm_du, qm_di were initialized arbitrarily, and are thus inconsistent w initialize value of the edge params # this line fixes that if not(ppm): self.sess.run(self.deg_update, feed_dict={self.edge_vals: self.edge_vals_d, self.edge_idx: self.edge_idx_d}) def _initialize_parameters(self, hparams, ppm): K = np.float32(self.K) su, tu, a, b, self.size_u = (hparams['su'], hparams['tu'], hparams['a'], hparams['b'], hparams['size_u']) si, ti, c, d, self.size_i = (hparams['si'], hparams['ti'], hparams['c'], hparams['d'], hparams['size_i']) with tf.name_scope("hparams"), tf.device(self.device): ## Hyperparameters self.lsu = tf.Variable(softplus_inverse(-hparams['su'] + 1.), dtype=tf.float32, name="lsu") self.su = -tf.nn.softplus(self.lsu) + 1. self.tu = tf.Variable(hparams['tu'], dtype=tf.float32, name="tu") self.a = tf.Variable(hparams['a'], dtype=tf.float32, name="a") self.b = tf.Variable(hparams['b'], dtype=tf.float32, name="b") self.lsi = tf.Variable(softplus_inverse(-hparams['si'] + 1.), dtype=tf.float32, name="lsi") self.si = -tf.nn.softplus(self.lsi) + 1. self.ti = tf.Variable(hparams['ti'], dtype=tf.float32, name="ti") self.c = tf.Variable(hparams['c'], dtype=tf.float32, name="c") self.d = tf.Variable(hparams['d'], dtype=tf.float32, name="d") e = np.sum(self.edge_vals_d, dtype=np.float32) # initial values for total user and total item masses of type K # set st \sum_k tim_k * tum_k = e (which is in fact a bit higher than it oughta be) # and using item_mass / user_mass ~ item_size / user_size (which is only kind of true) tum_init = np.sqrt(self.size_u / self.size_i * e / K) tim_init = np.sqrt(self.size_i / self.size_u * e / K) with tf.name_scope("user_params"), tf.device(self.device): # shape params are read off immediately from update equations # rate params set to be consistent w \gam_i ~ 1, \sum_j beta_jk beta_k ~ \sqrt(e/k) (which is self consistent) if ppm : # If creating the principled predictive (ppm), don't have the user_degree. Just create some random initialization for now, we'll update it with a default value self.gam_shp = tf.Variable(tf.random_gamma([self.U, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="gam_rte") self.gam_rte = tf.Variable(tf.random_gamma([self.U, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="gam_rte") self.theta_shp = tf.Variable(tf.random_gamma([self.U, self.K], 10., 10., seed=self.seed), name="theta_shp") self.theta_rte =tf.Variable(tf.random_gamma([self.U, self.K], 5., 5., seed=self.seed), name="theta_rte") self.g = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="g") else: user_degs = np.expand_dims(self.user_degree, axis=1) self.gam_shp = tf.Variable((user_degs - su), name="gam_shp") # s^U self.gam_rte = tf.Variable(np.sqrt(e) * (0.9 + 0.1*tf.random_gamma([self.U, 1], 5., 5., seed=self.seed)), dtype=tf.float32, name="gam_rte") # r^U init_gam_mean = self.gam_shp.initial_value / self.gam_rte.initial_value self.theta_shp = tf.Variable((a + user_degs/K) * tf.random_gamma([self.U, self.K], 10., 10., seed=self.seed), name="theta_shp") # kap^U self.theta_rte = tf.Variable((b + init_gam_mean * tim_init)*(0.9 + 0.1*tf.random_gamma([self.U, self.K], 5., 5., seed=self.seed)), name="theta_rte") # lam^U self.g = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="g") # g with tf.name_scope("item_params"), tf.device(self.device): ## Items if ppm: self.omega_shp = tf.Variable(tf.random_gamma([self.I, 1], 5., 5., seed=self.seed), name="omega_shp") # s^I self.omega_rte = tf.Variable(tf.random_gamma([self.I, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="omega_rte") # r^I self.beta_shp = tf.Variable(tf.random_gamma([self.I, self.K], 10., 10., seed=self.seed), name="beta_shp") # kap^I self.beta_rte = tf.Variable(tf.random_gamma([self.I, self.K], 5., 5., seed=self.seed), name="beta_rte") # lam^I self.w = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="w") # w else: item_degs = np.expand_dims(self.item_degree, axis=1) self.omega_shp = tf.Variable((item_degs - si), name="omega_shp") # s^I self.omega_rte = tf.Variable(np.sqrt(e) * (0.9 + 0.1*tf.random_gamma([self.I, 1], 5., 5., seed=self.seed)), dtype=tf.float32, name="omega_rte") # r^I init_omega_mean = self.omega_shp.initial_value / self.omega_rte.initial_value self.beta_shp = tf.Variable((c + item_degs/K) * tf.random_gamma([self.I, self.K], 10., 10., seed=self.seed), name="beta_shp") # kap^I self.beta_rte = tf.Variable((d + init_omega_mean*tum_init) * (0.9 + 0.1*tf.random_gamma([self.I, self.K], 5., 5., seed=self.seed)), name="beta_rte") # lam^I self.w = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="w") # w with tf.device('/cpu:0'): with tf.variable_scope("edge_params", reuse=None): ## Edges if self.simple_graph: # set init value so there's approximately 1 expected edge between each pair... WARNING: this may be profoundly stupid self.sg_edge_param = tf.get_variable(name="sg_edge_param", shape=[self.occupied_pairs, self.K], dtype=tf.float32, initializer=tf.random_normal_initializer(mean=-np.log(K), stddev=1. / K, seed=self.seed), partitioner=tf.fixed_size_partitioner(self.edge_param_splits, 0)) else: self.lphi = tf.get_variable(name="lphi", shape=[self.occupied_pairs, self.K], dtype=tf.float32, initializer=tf.random_normal_initializer(mean=0, stddev=1. / K, seed=self.seed), partitioner=tf.fixed_size_partitioner(self.edge_param_splits, 0)) with tf.name_scope("variational_post"), tf.device(self.device): # Variational posterior distributions self.q_gam = Gamma(concentration=self.gam_shp, rate=self.gam_rte, name="q_gam") self.q_theta = Gamma(concentration=self.theta_shp, rate=self.theta_rte, name="q_theta") self.q_g = PointMass(self.g, name="q_g") self.q_omega = Gamma(concentration=self.omega_shp, rate=self.omega_rte, name="q_omega") self.q_beta = Gamma(concentration=self.beta_shp, rate=self.beta_rte, name="q_beta") self.q_w = PointMass(self.w, name="q_w") if self.simple_graph: self.q_e_aux_vals = tPoissonMulti(log_lams=self.sg_edge_param, name="q_e_aux_vals") # q_edges_aux_flat else: self.q_e_aux_vals = Multinomial(total_count=self.edge_vals, logits=self.lphi, name="q_e_aux_vals") # q_edges_aux_flat self.q_e_aux_vals_mean = self.q_e_aux_vals.mean() with tf.name_scope("degree_vars"): # create some structures to make it easy to work with the expected value (wrt q) of the edges # qm_du[u,k] is the expected weighted degree of user u counting only edges of type k # qm_du[u,k] = E_q[e^k_i.] in the language of the paper # initialized arbitrarily, will override at end of init to set to # we use a tf.Variable here to cache the q_e_aux_vals.mean() value self.qm_du = tf.Variable(tf.ones([self.U, self.K], dtype=tf.float32), name="qm_du") self.qm_di = tf.Variable(tf.ones([self.I, self.K], dtype=tf.float32), name="qm_di") # Total Item Mass: self.i_tot_mass_m = self.q_w.mean() + tf.matmul(self.q_beta.mean(), self.q_omega.mean(), transpose_a=True) # Total User Mass: self.u_tot_mass_m = self.q_g.mean() + tf.matmul(self.q_theta.mean(), self.q_gam.mean(), transpose_a=True) def _build_computation_graph(self): with tf.name_scope("user_update"): with tf.device(self.device): [new_gam_shp, new_gam_rte, new_theta_shp, new_theta_rte, new_g] = user_updates( q_gam=self.q_gam, q_theta=self.q_theta, q_omega=self.q_omega, q_beta=self.q_beta, q_w=self.q_w, qm_du=self.qm_du, a=self.a, b=self.b, su=self.su, tu=self.tu, size=self.size_u, comp_rem=self.comp_rem, n_samp=95 ) # observation: gamma_rte depends on theta, and theta_rte depends on gamma # so these shouldn't update simultaneously # logical division: compute gamma update, then compute theta update. # we compute theta_shp as part of gamma update to avoid (huge) repeated computation self.u_update_one = assign_list(vars=[self.gam_shp, self.gam_rte, self.theta_shp, self.g], new_values=[new_gam_shp, new_gam_rte, new_theta_shp, new_g]) self.u_update_two = assign_list(vars=[self.theta_rte], new_values=[new_theta_rte]) with tf.name_scope("item_update"): with tf.device(self.device): [new_omega_shp, new_omega_rte, new_beta_shp, new_beta_rte, new_w] = user_updates( self.q_omega, self.q_beta, self.q_gam, self.q_theta, self.q_g, self.qm_di, self.c, self.d, self.si, self.ti, size=self.size_i, comp_rem=self.comp_rem, n_samp=95) # division into two updates for same reason as users self.i_update_one = assign_list(vars=[self.omega_shp, self.omega_rte, self.beta_shp, self.w], new_values=[new_omega_shp, new_omega_rte, new_beta_shp, new_w]) self.i_update_two = assign_list(vars=[self.beta_rte], new_values=[new_beta_rte]) with tf.name_scope("edge_update"): with tf.device(self.device): # split the edge list to avoid memory issues edge_idx_split = tensor_split(self.edge_idx, self.edge_param_splits) if self.simple_graph: new_sg_edge_params = \ [simple_graph_edge_update(self.q_theta, self.q_beta, self.q_gam, self.q_omega, edge_idx) for edge_idx in edge_idx_split] else: new_lphis = \ [multi_graph_edge_update(self.q_theta, self.q_beta, edge_idx) for edge_idx in edge_idx_split] if self.simple_graph: self.sg_edge_param_update = [sg_edge_param.assign(new_sg_edge_param) for sg_edge_param, new_sg_edge_param in zip(self.sg_edge_param._get_variable_list(), new_sg_edge_params)] else: self.lphi_update = [lphi.assign(new_lphi) for (lphi, new_lphi) in zip(self.lphi._get_variable_list(), new_lphis)] with tf.name_scope("qm_deg_update"): with tf.device(self.device): new_qm_du, new_qm_di = compute_degrees2(self.q_e_aux_vals.mean(), self.edge_idx, self.U, self.I) self.deg_update = assign_list(vars=[self.qm_du, self.qm_di], new_values=[new_qm_du, new_qm_di]) def _fix_post_assigns(self, true_omega, true_beta, users=False, items=True): """ Method to be used for debugging: Fix item parameters to ground truth values """ # fix variational posteriors to be tightly concentrated around true values item_assigns = assign_list([self.omega_shp, self.omega_rte, self.beta_shp, self.beta_rte, self.w], [100 * true_omega, 100 * tf.ones_like(true_omega), 100 * true_beta, 100 * tf.ones_like(true_beta), 0.01 * tf.ones_like(self.w)]) # actually, I'm not sure about this one self.sess.run(item_assigns, feed_dict={self.edge_vals: self.edge_vals_d, self.edge_idx: self.edge_idx_d}) def _edge_prob_samples(self, pred_edges, N=100): """ :param pred_edges: edge list :param N: number of samples :param log: if True, return E[log(p(e_ij = 1 | params)] :return: E[p(e_ij = 1 | sampled_params)] for each ij in pred_edges """ users_idx = pred_edges[:, 0] items_idx = pred_edges[:, 1] # MC estimate # this is logically equivalent to drawing samples from q and then gathering the necessary ones, # but much faster when there are many users and items # relevant params for simulation omega_shp = tf.gather(self.omega_shp, items_idx) omega_rte = tf.gather(self.omega_rte, items_idx) beta_shp = tf.gather(self.beta_shp, items_idx) beta_rte = tf.gather(self.beta_rte, items_idx) gam_shp = tf.gather(self.gam_shp, users_idx) gam_rte = tf.gather(self.gam_rte, users_idx) theta_shp = tf.gather(self.theta_shp, users_idx) theta_rte = tf.gather(self.theta_rte, users_idx) # samples for MC estimate omega_smp = tf.random_gamma([N], omega_shp, omega_rte, seed=self.seed) beta_smp = tf.random_gamma([N], beta_shp, beta_rte, seed=self.seed) gam_smp = tf.random_gamma([N], gam_shp, gam_rte, seed=self.seed) theta_smp = tf.random_gamma([N], theta_shp, theta_rte, seed=self.seed) user_weights_s = gam_smp * theta_smp item_weights_s = omega_smp * beta_smp edge_weight_s = tf.reduce_sum(user_weights_s * item_weights_s, axis=2) prob_samp = 1. - tf.exp(-edge_weight_s) return prob_samp def _build_predict_edges(self, N=100): """ Only handles SG Returns prob given an edge list """ with tf.device(self.device): self.pred_edges_ph = tf.placeholder(dtype=tf.int32) # MC estimate self.predict_edges = tf.reduce_mean(self._edge_prob_samples(self.pred_edges_ph, N=N), axis=0) def _build_rec_uncensored_edge_pops(self): """ Builds matrix of expected number of edges between all items and self._rec_users """ with tf.device(self.device): self._rec_users = tf.placeholder(dtype=tf.int32) q_gam_mean = self.q_gam.mean() q_theta_mean = self.q_theta.mean() q_omega_mean = self.q_omega.mean() q_beta_mean = self.q_beta.mean() user_params = tf.gather(q_gam_mean, self._rec_users) * tf.gather(q_theta_mean, self._rec_users) item_params = q_omega_mean * q_beta_mean # edge_pops[user,item] gives the affinity of user to item self._rec_uncensored_edge_pops = tf.matmul(user_params, item_params, transpose_b=True) def _build_appx_elbo(self): """ Returns an estimate of \sum_{e in test_idxs} log(prob(e)) + \sum{e not in test_idxs} log(1-prob(e)) this is not actually the log likelihood because it ignores the contribution of uninstantiated atoms (actually, maybe this is handled after all...) :param test_idxs: tensor of shape [e, 2], indices of edges of graph :return: estimate of \sum_{e in test_idxs} log(prob(e)) + \sum{e not in test_idxs} log(1-prob(e)) """ # MC estimate of contribution from edges # obvious choice: uniformly sample terms... but resulting estimator is super high variance # edges_sample = np.copy(self.edge_idx_d[np.random.choice(self.edge_idx_d.shape[0], 3000, replace=False)]).astype(np.int32) # so instead use p-sampling... although it's unclear whether this really represents a major improvement e = self.edge_vals_d.shape[0] p_inc = np.sqrt(5000. / e) #use about 5000 edges for MC est edges_sample = item_p_sample(user_p_sample(self.edge_idx_d, p_inc)[0], p_inc)[0].astype(np.int32) # clip by value because of numerical issues p_edge_samples = tf.clip_by_value(self._edge_prob_samples(edges_sample), 1e-15, 1.) # reduce_mean is MC estimate over params of model, reduce_sum is summing cont from p-samp edge_llhd_est = 1. / p_inc**2 * tf.reduce_sum(tf.reduce_mean(tf.log(p_edge_samples), axis=0)) # log(1-p_ij) = -lambda_ij, so: tot_lam_sum = tf.reduce_sum(self.i_tot_mass_m*self.u_tot_mass_m) # includes contribution from edges as well as non-edges # subtract off edge contribution: user_params = tf.gather(self.q_gam.mean() * self.q_theta.mean(), self.edge_idx[:,0]) item_params = tf.gather(self.q_omega.mean() * self.q_beta.mean(), self.edge_idx[:,1]) edges_lam_sum = tf.reduce_sum(user_params * item_params) nonedge_llhd_term = -(tot_lam_sum - edges_lam_sum) # hopefully lower variance than direct MC est #\sum_edges log(p_ij) = -\sum_edges lam_ij + \sum_ij log(p_ij / (1-p_ij)) # note: the reduce mean here averages over both the sampled params in p_edge_samples, and over the random choice of edges # edge_llhd_est = -edges_lam_sum + e*tf.reduce_mean(tf.reduce_mean(tf.log(p_edge_samples / (1. - p_edge_samples)), axis=0)) self.appx_elbo = [edge_llhd_est, nonedge_llhd_term] def load_pretrained_model(self, gam_shp, gam_rte, theta_shp, theta_rte, g, omega_shp, omega_rte, beta_shp, beta_rte, w): user_assign = assign_list([self.gam_shp, self.gam_rte, self.theta_shp, self.theta_rte, self.g], [gam_shp, gam_rte, theta_shp, theta_rte, g]) item_assign = assign_list([self.omega_shp, self.omega_rte, self.beta_shp, self.beta_rte, self.w], [omega_shp, omega_rte, beta_shp, beta_rte, w]) if self.simple_graph: self.sess.run(self.sg_edge_param_update, feed_dict={self.edge_idx: self.edge_idx_d}) else: self.sess.run(self.lphi_update, feed_dict={self.edge_idx: self.edge_idx_d}) self.sess.run(self.deg_update, feed_dict={self.edge_vals: self.edge_vals_d, self.edge_idx: self.edge_idx_d}) pass def infer(self, n_iter=150): """ Runs the co-ordinate ascent inference on the model. """ if self.ppm: print("Running infer is forbidden for principled predictive model.") return if DEBUG: # fix some variables to their true values self._fix_post_assigns(self.ground_truth['true_omega'], self.ground_truth['true_beta']) with self.sess.as_default(): for i in range(n_iter): # users start_time = time.time() self.sess.run(self.u_update_one, feed_dict={self.edge_idx: self.edge_idx_d}) self.sess.run(self.u_update_two, feed_dict={self.edge_idx: self.edge_idx_d}) # items if not(self.fix_item_params): start_time = time.time() self.sess.run(self.i_update_one, feed_dict={self.edge_idx: self.edge_idx_d}) self.sess.run(self.i_update_two, feed_dict={self.edge_idx: self.edge_idx_d}) # edges start_time = time.time() if self.simple_graph: for sg_edge_param_update in self.sg_edge_param_update: self.sess.run(sg_edge_param_update, feed_dict={self.edge_idx: self.edge_idx_d}) else: for lphi_update in self.lphi_update: self.sess.run(lphi_update, feed_dict={self.edge_idx: self.edge_idx_d}) # mean degree (caching) start_time = time.time() self.sess.run(self.deg_update, feed_dict={self.edge_vals: self.edge_vals_d, self.edge_idx: self.edge_idx_d}) ### Print the total item and user mass ### if np.mod(i, 30) == 0: self._logging(i) print("appx_elbo: {}".format(self.sess.run(self.appx_elbo, feed_dict={self.edge_idx: self.edge_idx_d}))) ## DONE TRAINING self.user_affil_est = to_prob(self.theta_shp / self.theta_rte).eval() self.item_affil_est = to_prob(self.beta_shp / self.beta_rte).eval() if DEBUG: self.true_user_affil = to_prob(self.ground_truth['true_theta']).eval() self.true_item_affil = to_prob(self.ground_truth['true_beta']).eval() # User params gam_shp, gam_rte, theta_shp, theta_rte, g = self.sess.run([self.gam_shp, self.gam_rte, self.theta_shp, self.theta_rte, self.g]) # Item params omega_shp, omega_rte, beta_shp, beta_rte, w = self.sess.run([self.omega_shp, self.omega_rte, self.beta_shp, self.beta_rte, self.w]) return gam_shp, gam_rte, theta_shp, theta_rte, g, omega_shp, omega_rte, beta_shp, beta_rte, w def test_llhd(self, test_idxs): """ Returns an estimate of \sum_{e in test_idxs} log(prob(e)) and of \sum{e not in test_idxs} log(1-prob(e)) :param test_idxs: tensor of shape [e, 2], indices of edges of graph :return: estimate of [\sum_{e in test_idxs} log(prob(e)), \sum{e not in test_idxs} log(1-prob(e))] """ test_idxs_ = np.copy(test_idxs) users = np.unique(test_idxs_[:, 0]) train_idxs = np.copy(self.edge_idx_d[np.in1d(self.edge_idx_d[:, 0], users),:]) for en, user in enumerate(users): test_idxs_[test_idxs_[:, 0] == user, 0] = en train_idxs[train_idxs[:,0] == user, 0] = en matrix = np.ones((users.shape[0], self.I)) matrix[train_idxs.T.tolist()] = 0 matrix[test_idxs_.T.tolist()] = 0 all_but_test_idxs = np.array(matrix.nonzero()).T # Select 1000 edges randomly from test_idx to get an estimate of the expected value np.random.seed(self.seed) selected_edges = np.random.choice(test_idxs_.shape[0], min(1000, test_idxs_.shape[0]), replace=False) test_idxs_ = test_idxs_[selected_edges] # Select 1000 edges randomly from all_but_traintest_idx to get an estimate of the expected value selected_edges = np.random.choice(all_but_test_idxs.shape[0], min(1000, all_but_test_idxs.shape[0]), replace=False) all_but_test_idxs = all_but_test_idxs[selected_edges] p_test_idx = self.sess.run(self.predict_edges, feed_dict={self.pred_edges_ph: test_idxs_}) p_not_test_idx = self.sess.run(self.predict_edges, feed_dict={self.pred_edges_ph: all_but_test_idxs}) return np.mean(np.log(p_test_idx)), np.mean(np.log(1.-p_not_test_idx)) def recommend(self, K, users=None, excluded_items=None): """ Recommend Top-K Items NOTE: Does not censor train edges while recommending Reasoning: If we pass holdout set as train data and do not run infer, then we don't want to censor "train" edges while making recommendations outputs top K recommendations for each user in users Warning: assumes number of items > K :param users: numpy array, users to make recommendations for :param K: number of recommendations to output :param excluded_items: (optional) numpy array, items to exclude from recommendations """ # sort users and remove redundant (for easy 0-indexing later) # uniq_inv will be used to restore original ordering at output if users is None: users_ = np.unique(self.edge_idx_d[:,0]) uniq_inv = range(users_.shape[0]) else: users_, uniq_inv = np.unique(users, return_inverse=True) # probability of connection for each user in users and all items edge_pops = self.sess.run(self._rec_uncensored_edge_pops, feed_dict={self._rec_users: users_}) # do any necessary additional censoring if excluded_items is not None: edge_pops[:, excluded_items] = 0. recs = self.sess.run(self._top_k, feed_dict={self._censored_edge_pops: edge_pops, self._num_rec: K}) # restore original ordering recs_orig_ordering = recs recs_orig_ordering._replace(indices=recs.indices[uniq_inv, :]) return recs_orig_ordering def nDCG(self, p, users=None, test=None, ranks=None, excluded_items=None): """ Computes the normalized Discounted Cumulative Gain at rank p """ # returns a sorted array if users is None: users = np.unique(self.edge_idx_d[:,0]) if test is None: test = self.edge_idx_d if ranks is None: ranks = self.recommend(p, users, excluded_items).indices nDCG = np.zeros(users.shape[0]) for en, user in enumerate(users): user_test = np.copy(test[test[:, 0] == user]) test_ranks = np.isin(ranks[en, :], user_test[:, 1]).nonzero()[0] + 1 DCG = np.sum(np.log(2.) / (np.log(test_ranks + 1))) num_rel = min(p, user_test.shape[0]) # number of relevant itest_ranks = np.array(range(num_rel)) + 1 iDCG = np.sum(np.log(2.) / (np.log(itest_ranks + 1))) nDCG[en] = DCG / iDCG return nDCG def sample_one(self, user_size = None, item_size = None, eps=1e-8): """ Draw a posterior sample from the fitted model :param user_size: float, user size :param item_size: float, item size :param eps: float, approximation level for ggp (default 1e-8); atom weights smaller than this are ignored :return: An approximate sample of the multigraph with the associated parameters a numpy array [user_idx, item_idx, num_edges] of length equal to the total occupied pairs """ if user_size is None: _user_size = self.size_u else: _user_size = user_size if item_size is None: _item_size = self.size_i else: _item_size = item_size i_mass_samp = self.sess.run(self.q_omega.sample(seed=self.seed) * self.q_beta.sample(seed=self.seed)) u_mass_samp = self.sess.run(self.q_gam.sample(seed=self.seed) * self.q_theta.sample(seed=self.seed)) i_mass_tots = np.sum(i_mass_samp, 0) # total mass of each type in items u_mass_tots = np.sum(u_mass_samp, 0) """ edges between instantiated vertices """ # total number of edges of each type tot_edges_mean = u_mass_tots * i_mass_tots tot_edges = np.random.poisson(tot_edges_mean) # K probability distributions over items / users i_probs = i_mass_samp / i_mass_tots i_probs[i_probs < 1e-8] = 0 # numerical precision hack u_probs = u_mass_samp / u_mass_tots u_probs[u_probs < 1e-8] = 0 # numerical precision hack # assign edges to pairs item_assignments = [np.random.choice(self.I, size=tot_edges[k],replace=True,p=i_probs[:,k]) for k in range(self.K)] user_assignments = [np.random.choice(self.U, size=tot_edges[k],replace=True,p=u_probs[:,k]) for k in range(self.K)] edge_list = np.concatenate( [np.vstack([user_assignments[k], item_assignments[k]]) for k in range(self.K)], -1).T """ leftover mass contribution Approximation: uninstantiated points never connect to each other """ if _item_size != 0: # total mass belonging to uninstantiated items in each dimension rem_item_mass = (_item_size / self.size_i) * self.sess.run( self.q_w.sample(seed=self.seed)[:, 0]) # since q_w = size * rate # number of edges between instantiated users and uninstantiated items n_insu_remi = np.random.poisson(u_mass_tots * rem_item_mass) # ids of users connecting to uninstantiated atoms u_assign = np.concatenate([np.random.choice(self.U, size=n_insu_remi[k], replace=True, p=u_probs[:, k]) for k in range(self.K)]) """ it remains to assign the termini to atoms in the uninstantiated part of the marked GGPs strategy: simulate the posterior marked GGPs, and use the same multinomial assignment warning: this is computationally pricey """ # sample from the point process of atoms that failed to connect to anything when the dataset was originally generated si, ti, c, d = self.sess.run([self.si, self.ti, self.c, self.d]) new_ggp = sample_ggp(_item_size, si, ti, eps) sim_marks = np.random.gamma(shape=c, scale=1./d, size=new_ggp.shape + (self.K,)) atom_weights = np.expand_dims(new_ggp,1) * sim_marks # uninstantiated atoms not_inc_prob = np.exp(-np.sum(atom_weights * u_mass_tots, axis=1)) # probability each item atom failed to connect to any user uninstant_atom_weights = atom_weights[np.nonzero(np.random.binomial(1,p=not_inc_prob))] # weights uninstant_atom_dist = uninstant_atom_weights / np.sum(uninstant_atom_weights, 0) # K probability dists # assign edges to these new atoms in the usual multinomial way i_rem_assign = np.concatenate([np.random.choice(uninstant_atom_dist.shape[0], size=n_insu_remi[k], replace=True, p=uninstant_atom_dist[:, k]) for k in range(self.K)]) # these atoms should have labels not already taken by any previously instantiated atom i_rem_assign += self.I # and now compile the edge list insu_remi = np.vstack([u_assign , i_rem_assign]).T edge_list = np.concatenate([edge_list, insu_remi], axis=0) # repeat this for instantiated items + remaining users if _user_size != 0: rem_user_mass = (_user_size / self.size_u) * self.sess.run(self.q_g.sample(seed=self.seed)[:, 0]) # number of edges connecting to previously uninstantiated atoms n_insi_remu = np.random.poisson(i_mass_tots * rem_user_mass) # instantiated items, remaining users # ids of atoms connecting to uninstantiated users i_assign = np.concatenate([np.random.choice(self.I, size=n_insi_remu[k], replace=True, p=i_probs[:, k]) for k in range(self.K)]) """ assign the termini to atoms in the uninstantiated part of the marked GGPs """ # sample from the point process of atoms that failed to connect to anything when the dataset was originally generated su, tu, a, b = self.sess.run([self.su, self.tu, self.a, self.b]) new_ggp = sample_ggp(_user_size, su, tu, eps) sim_marks = np.random.gamma(shape=a, scale=1./b, size=new_ggp.shape + (self.K,)) atom_weights = np.expand_dims(new_ggp,1) * sim_marks not_inc_prob = np.exp(-np.sum(atom_weights * i_mass_tots, axis=1)) # probability each user atom failed to connect to any item # uninstantiated atoms uninstant_atom_weights = atom_weights[np.nonzero(np.random.binomial(1,p=not_inc_prob))] # weights uninstant_atom_dist = uninstant_atom_weights / np.sum(uninstant_atom_weights, 0) # K probability dists # now assign to these new atoms in the usual multinomial way u_rem_assign = np.concatenate([np.random.choice(uninstant_atom_dist.shape[0], size=n_insi_remu[k], replace=True, p=uninstant_atom_dist[:, k]) for k in range(self.K)]) # these atoms should have labels not already taken by any previously instantiated atom u_rem_assign += self.U # and now do the edge assignment insi_remu = np.vstack([u_rem_assign, i_assign]).T edge_list = np.concatenate([edge_list, insi_remu], axis=0) # cleanup uniques = np.unique(edge_list, return_counts=True, axis=0) return np.hstack([uniques[0], np.expand_dims(uniques[1], 1)]) def principled_predictive_model(self, test_look_edge_idx, test_look_edge_vals, test_holdout, user_update_iters=100, p=0.8, free_model_resources = True, device='/cpu:0', seed=None): """ Idea: data is originally divided into a test and train set using p-sampling of the users. Test set is further divided into test_lookup and test_holdout using p-sampling of the items in test set. The object owning this method has been trained on the train set. We now further divide the test set into test_look, which will be used to propagate the fitted model to get parameter values for the users in the test set, and test_holdout, which we use to assess our algorithm using item . This function returns a GNMF object on [items in test, users in test] to be used for further prediction. The item parameters are inherited from the trained model. The user parameters are set to be compatible with the item parameters using the usual edge+user update scheme. Remark: we return a model that includes all the items (rather than just the ones in test_look) because the items in test_holdout generally contain items not in test_look :param test_look_edge_idx: :param test_look_edge_vals: :param user_update_iters: number of iterations used to set users to be compatible w items :return: """ """ WARNING: self.hparams doesn't reflect any updates that have happened to the hyperparams, so if we ever write tuning code we'll have to be cognizant of this """ U = np.unique(test_look_edge_idx[:, 0]).shape[0] I = np.unique(test_look_edge_idx[:, 1]).shape[0] """ First, propogate the trained item values to the test set users """ lookup_items = np.unique(test_look_edge_idx[:,1]) # items connected to any lookup user lookup_I = lookup_items.shape[0] # make the lookup item labels contiguous for passing into GNMF (zero indexing) [lookup_relabel, convert] = zero_index(test_look_edge_idx, 1) with tf.variable_scope("holdout_fitter"): holdout_hparams = self.hparams.copy() holdout_hparams['size_i'] = p * self.hparams['size_i'] holdout_hparams['size_u'] = (1.-p) / p * self.hparams['size_u'] with GraphexNMF(lookup_relabel, test_look_edge_vals, U, lookup_I, self.K, holdout_hparams, ground_truth=None, simple_graph=self.simple_graph, GPU=self.GPU, comp_rem=False, # comp_rem won't work because item weights are wrong fix_item_params=True, device=device, seed=seed) \ as holdout_fitter: # item parameters for the items in the lookup set omega_shp_lookup_op = tf.gather(self.omega_shp, lookup_items) omega_rte_lookup_op = tf.gather(self.omega_rte, lookup_items) beta_shp_lookup_op = tf.gather(self.beta_shp, lookup_items) beta_rte_lookup_op = tf.gather(self.beta_rte, lookup_items) w_lookup_op = p * self.w # w is implicitly item size times w, size transforms as s -> p*s under p-sampling # run it [omega_shp_lookup, omega_rte_lookup, beta_shp_lookup, beta_rte_lookup, w_lookup] = self.sess.run( [omega_shp_lookup_op, omega_rte_lookup_op, beta_shp_lookup_op, beta_rte_lookup_op, w_lookup_op]) # fix the item parameters to the fitted values item_assign = assign_list([holdout_fitter.omega_shp, holdout_fitter.omega_rte, holdout_fitter.beta_shp, holdout_fitter.beta_rte, holdout_fitter.w], [omega_shp_lookup, omega_rte_lookup, beta_shp_lookup, beta_rte_lookup, w_lookup]) holdout_fitter.sess.run(item_assign) # infer the user parameters holdout_fitter.infer(user_update_iters) [fit_gam_shp, fit_gam_rte, fit_theta_shp, fit_theta_rte, fit_g] = holdout_fitter.sess.run( [holdout_fitter.gam_shp, holdout_fitter.gam_rte, holdout_fitter.theta_shp, holdout_fitter.theta_rte, holdout_fitter.g]) """ Next, return the model that we'll use for prediction by taking the item values from the original trained model, and the user values from the holdout_fitter """ test_holdout_users = np.unique(test_holdout[:,0]) test_holdout_items = np.unique(test_holdout[:,1]) holdout_U = test_holdout_users.shape[0] holdout_I = test_holdout_items.shape[0] # fix the item parameters to the fitted values - only users in holdout [omega_shp, omega_rte, beta_shp, beta_rte, w] = \ self.sess.run([self.omega_shp, self.omega_rte, self.beta_shp, self.beta_rte, self.w]) omega_shp = omega_shp[test_holdout_items] omega_rte = omega_rte[test_holdout_items] beta_shp = beta_shp[test_holdout_items] beta_rte = beta_rte[test_holdout_items] # fix the user parameters to the fitted values - only users in holdout gam_shp = fit_gam_shp[test_holdout_users] gam_rte = fit_gam_rte[test_holdout_users] theta_shp = fit_theta_shp[test_holdout_users] theta_rte = fit_theta_rte[test_holdout_users] g = fit_g # make the holdout item labels contiguous for passing into GNMF (zero indexing) [holdout_relabel, convert_users] = zero_index(test_holdout, 0) [holdout_relabel, convert_items] = zero_index(holdout_relabel, 1) # release the session to free up resources to do recommendation with. # this is a bit nasty, and is used in part 'cause sess.close() doesn't work properly # WARNING: I'm not sure what happens if this command is run on a server... might be a good way of making enemies if free_model_resources : tf.Session.reset(None) with tf.variable_scope("ppm_init"): ppm_hparams = self.hparams.copy() ppm_hparams['size_i'] = (1.-p) * ppm_hparams['size_i'] # 1-p of items get into the holdout ppm_hparams['size_u'] = (1.-p) / p * ppm_hparams['size_u'] # 1-p of users get into the holdout (and self.hparams[size_u] is size of *train*) # passing in holdout data so we can compute appx_llhd for holdout. # IMPORTANT: MUST NOT RUN ppm.infer()!! # We do not want to update user and item parameters based on holdout dataset # Holdout is strictly for testing ppm = GraphexNMF(holdout_relabel[:,:2], holdout_relabel[:,2], holdout_U, holdout_I, self.K, ppm_hparams, ground_truth=None, simple_graph=self.simple_graph, GPU=self.GPU, comp_rem=False, fix_item_params=False, device=device, seed=seed, ppm=True) # correct size factor on w w = (1. - p) * w ppm_item_assign = assign_list( [ppm.omega_shp, ppm.omega_rte, ppm.beta_shp, ppm.beta_rte, ppm.w], [omega_shp, omega_rte, beta_shp, beta_rte, w]) # correct size factor on g fit_g = (1. - p) / p * fit_g # fix the user parameters to the fitted values ppm_user_assign = assign_list( [ppm.gam_shp, ppm.gam_rte, ppm.theta_shp, ppm.theta_rte, ppm.g], [gam_shp, gam_rte, theta_shp, theta_rte, g]) ppm.sess.run([ppm_user_assign, ppm_item_assign]) # edge updates... strictly speaking, this doesn't matter for recommendations if ppm.simple_graph: ppm.sess.run(ppm.sg_edge_param_update, feed_dict={ppm.edge_idx: ppm.edge_idx_d}) else: ppm.sess.run(ppm.lphi_update, feed_dict={ppm.edge_idx: ppm.edge_idx_d}) ppm.sess.run(ppm.deg_update, feed_dict={ppm.edge_vals: ppm.edge_vals_d, ppm.edge_idx: ppm.edge_idx_d}) return ppm def _logging(self, itr): print("----------------------------------------------------------------") print("ITERATION #{}".format(itr)) print("mean community edge weights:{}").format( self.sess.run(self.edge_mean_summary, feed_dict={self.edge_vals: self.edge_vals_d, self.edge_idx: self.edge_idx_d})) print("----------------------------------------------------------------") print("P(inclusion | included): {}").format( np.mean(self.sess.run(self.predict_edges, feed_dict={self.pred_edges_ph: self.included_sample}))) print("----------------------------------------------------------------") print("P(inclusion | random pair): {}").format( np.mean(self.sess.run(self.predict_edges, feed_dict={self.pred_edges_ph: self.pair_sample}))) print("----------------------------------------------------------------") def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.sess.close()
def _initialize_parameters(self, hparams, ppm): K = np.float32(self.K) su, tu, a, b, self.size_u = (hparams['su'], hparams['tu'], hparams['a'], hparams['b'], hparams['size_u']) si, ti, c, d, self.size_i = (hparams['si'], hparams['ti'], hparams['c'], hparams['d'], hparams['size_i']) with tf.name_scope("hparams"), tf.device(self.device): ## Hyperparameters self.lsu = tf.Variable(softplus_inverse(-hparams['su'] + 1.), dtype=tf.float32, name="lsu") self.su = -tf.nn.softplus(self.lsu) + 1. self.tu = tf.Variable(hparams['tu'], dtype=tf.float32, name="tu") self.a = tf.Variable(hparams['a'], dtype=tf.float32, name="a") self.b = tf.Variable(hparams['b'], dtype=tf.float32, name="b") self.lsi = tf.Variable(softplus_inverse(-hparams['si'] + 1.), dtype=tf.float32, name="lsi") self.si = -tf.nn.softplus(self.lsi) + 1. self.ti = tf.Variable(hparams['ti'], dtype=tf.float32, name="ti") self.c = tf.Variable(hparams['c'], dtype=tf.float32, name="c") self.d = tf.Variable(hparams['d'], dtype=tf.float32, name="d") e = np.sum(self.edge_vals_d, dtype=np.float32) # initial values for total user and total item masses of type K # set st \sum_k tim_k * tum_k = e (which is in fact a bit higher than it oughta be) # and using item_mass / user_mass ~ item_size / user_size (which is only kind of true) tum_init = np.sqrt(self.size_u / self.size_i * e / K) tim_init = np.sqrt(self.size_i / self.size_u * e / K) with tf.name_scope("user_params"), tf.device(self.device): # shape params are read off immediately from update equations # rate params set to be consistent w \gam_i ~ 1, \sum_j beta_jk beta_k ~ \sqrt(e/k) (which is self consistent) if ppm : # If creating the principled predictive (ppm), don't have the user_degree. Just create some random initialization for now, we'll update it with a default value self.gam_shp = tf.Variable(tf.random_gamma([self.U, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="gam_rte") self.gam_rte = tf.Variable(tf.random_gamma([self.U, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="gam_rte") self.theta_shp = tf.Variable(tf.random_gamma([self.U, self.K], 10., 10., seed=self.seed), name="theta_shp") self.theta_rte =tf.Variable(tf.random_gamma([self.U, self.K], 5., 5., seed=self.seed), name="theta_rte") self.g = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="g") else: user_degs = np.expand_dims(self.user_degree, axis=1) self.gam_shp = tf.Variable((user_degs - su), name="gam_shp") # s^U self.gam_rte = tf.Variable(np.sqrt(e) * (0.9 + 0.1*tf.random_gamma([self.U, 1], 5., 5., seed=self.seed)), dtype=tf.float32, name="gam_rte") # r^U init_gam_mean = self.gam_shp.initial_value / self.gam_rte.initial_value self.theta_shp = tf.Variable((a + user_degs/K) * tf.random_gamma([self.U, self.K], 10., 10., seed=self.seed), name="theta_shp") # kap^U self.theta_rte = tf.Variable((b + init_gam_mean * tim_init)*(0.9 + 0.1*tf.random_gamma([self.U, self.K], 5., 5., seed=self.seed)), name="theta_rte") # lam^U self.g = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="g") # g with tf.name_scope("item_params"), tf.device(self.device): ## Items if ppm: self.omega_shp = tf.Variable(tf.random_gamma([self.I, 1], 5., 5., seed=self.seed), name="omega_shp") # s^I self.omega_rte = tf.Variable(tf.random_gamma([self.I, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="omega_rte") # r^I self.beta_shp = tf.Variable(tf.random_gamma([self.I, self.K], 10., 10., seed=self.seed), name="beta_shp") # kap^I self.beta_rte = tf.Variable(tf.random_gamma([self.I, self.K], 5., 5., seed=self.seed), name="beta_rte") # lam^I self.w = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="w") # w else: item_degs = np.expand_dims(self.item_degree, axis=1) self.omega_shp = tf.Variable((item_degs - si), name="omega_shp") # s^I self.omega_rte = tf.Variable(np.sqrt(e) * (0.9 + 0.1*tf.random_gamma([self.I, 1], 5., 5., seed=self.seed)), dtype=tf.float32, name="omega_rte") # r^I init_omega_mean = self.omega_shp.initial_value / self.omega_rte.initial_value self.beta_shp = tf.Variable((c + item_degs/K) * tf.random_gamma([self.I, self.K], 10., 10., seed=self.seed), name="beta_shp") # kap^I self.beta_rte = tf.Variable((d + init_omega_mean*tum_init) * (0.9 + 0.1*tf.random_gamma([self.I, self.K], 5., 5., seed=self.seed)), name="beta_rte") # lam^I self.w = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="w") # w with tf.device('/cpu:0'): with tf.variable_scope("edge_params", reuse=None): ## Edges if self.simple_graph: # set init value so there's approximately 1 expected edge between each pair... WARNING: this may be profoundly stupid self.sg_edge_param = tf.get_variable(name="sg_edge_param", shape=[self.occupied_pairs, self.K], dtype=tf.float32, initializer=tf.random_normal_initializer(mean=-np.log(K), stddev=1. / K, seed=self.seed), partitioner=tf.fixed_size_partitioner(self.edge_param_splits, 0)) else: self.lphi = tf.get_variable(name="lphi", shape=[self.occupied_pairs, self.K], dtype=tf.float32, initializer=tf.random_normal_initializer(mean=0, stddev=1. / K, seed=self.seed), partitioner=tf.fixed_size_partitioner(self.edge_param_splits, 0)) with tf.name_scope("variational_post"), tf.device(self.device): # Variational posterior distributions self.q_gam = Gamma(concentration=self.gam_shp, rate=self.gam_rte, name="q_gam") self.q_theta = Gamma(concentration=self.theta_shp, rate=self.theta_rte, name="q_theta") self.q_g = PointMass(self.g, name="q_g") self.q_omega = Gamma(concentration=self.omega_shp, rate=self.omega_rte, name="q_omega") self.q_beta = Gamma(concentration=self.beta_shp, rate=self.beta_rte, name="q_beta") self.q_w = PointMass(self.w, name="q_w") if self.simple_graph: self.q_e_aux_vals = tPoissonMulti(log_lams=self.sg_edge_param, name="q_e_aux_vals") # q_edges_aux_flat else: self.q_e_aux_vals = Multinomial(total_count=self.edge_vals, logits=self.lphi, name="q_e_aux_vals") # q_edges_aux_flat self.q_e_aux_vals_mean = self.q_e_aux_vals.mean() with tf.name_scope("degree_vars"): # create some structures to make it easy to work with the expected value (wrt q) of the edges # qm_du[u,k] is the expected weighted degree of user u counting only edges of type k # qm_du[u,k] = E_q[e^k_i.] in the language of the paper # initialized arbitrarily, will override at end of init to set to # we use a tf.Variable here to cache the q_e_aux_vals.mean() value self.qm_du = tf.Variable(tf.ones([self.U, self.K], dtype=tf.float32), name="qm_du") self.qm_di = tf.Variable(tf.ones([self.I, self.K], dtype=tf.float32), name="qm_di") # Total Item Mass: self.i_tot_mass_m = self.q_w.mean() + tf.matmul(self.q_beta.mean(), self.q_omega.mean(), transpose_a=True) # Total User Mass: self.u_tot_mass_m = self.q_g.mean() + tf.matmul(self.q_theta.mean(), self.q_gam.mean(), transpose_a=True)