def compute_value_function(q_values, ent_wt=0.0): if ent_wt > 0: # soft max v_fn = ent_wt * lse((1.0 / ent_wt) * q_values, axis=1, keepdims=False) else: # hard max v_fn = np.max(q_values, axis=1) return v_fn
def predict_b(self, data, return_labels=False, covs=None, bsize=1500): """ Predict posterior probabilities or labels given the test data (means and covs). Args: data (np.ndarray): If cov_type is `diag` then, shape is `n_docs x [twice dim]` (every row is twice the dimension of latent variable, where the first half is mean, and second half is log std.dev). Otherwise data represents only means of shape n_docs x dim. return_labels (boolean): Returns labels if True, else returns log-likelihoods covs (np.ndarray): Shape is n_docs x dim x dim. If cov_type is full then data represents only means and covs are passed as parameters bsize (int): batch size Returns: labels (np.ndarray): Posterior probabilities or predicted labels for n_docs. """ if bsize > data.shape[0]: bsize = data.shape[0] mus = (data[:, :self.dim]).astype(np.float32) # class conditional log-likelihoods cc_llh = np.zeros(shape=(mus.shape[0], self.cmus.shape[0]), dtype=np.float32) const = -0.5 * self.dim * np.log(2 * np.pi) sdoc = 0 edoc = bsize while sdoc < edoc: tot_covs = np.zeros(shape=(edoc - sdoc, self.dim, self.dim), dtype=np.float32) if self.cov_type == 'diag': # covs = np.exp(2 * data[:, self.dim:]) for n in range(sdoc, edoc, 1): tot_covs[n - sdoc, :, :] = ( self.scov + np.diag(np.exp(2. * data[n, self.dim:]))) else: tot_covs = self.scov + covs[sdoc:edoc, :, :] # inv_tot_covs = np.linalg.inv(tot_covs) sgn, log_dets = np.linalg.slogdet(tot_covs) if (sgn < 0).any(): print("WARNING: Det of tot_covs is Negative.") sys.exit() for n in range(sdoc, edoc, 1): # for each doc in the batch tmp = self.cmus - mus[n, :] z = (tmp.reshape(tmp.shape[0], tmp.shape[1], 1) @ tmp.reshape( tmp.shape[0], 1, tmp.shape[1])) cc_llh[n, :] = const - ( 0.5 * (log_dets[n - sdoc] + (z * np.linalg.inv(tot_covs[n - sdoc, :, :])).sum( axis=1).sum(axis=1))) sdoc += bsize edoc += bsize if edoc > mus.shape[0]: edoc = mus.shape[0] if self.est_prior: if return_labels: ret_val = np.argmax(cc_llh + np.log(self.priors).T, axis=1) else: cc_llh += np.log(self.priors).T ret_val = np.exp(cc_llh.T - lse(cc_llh, axis=1)).T else: if return_labels: ret_val = np.argmax(cc_llh, axis=1) else: ret_val = np.exp(cc_llh.T - lse(cc_llh, axis=1)).T return ret_val
def predict(self, data, return_labels=False, covs=None): """ Predict posterior probabilities or labels given the test data (means and covs). Args: data (np.ndarray): If cov_type is `diag` then, shape is `n_docs x twice_dim` (every row is twice the dimension of latent variable, where the first half is mean, and second half is log std.dev). Otherwise data represents only means of shape n_docs x dim. return_labels (boolean): Returns labels if True, else returns log-likelihoods covs (np.ndarray): Shape is n_docs x dim x dim. If cov_type is full then data represents only means and covs are passed as parameters Returns: labels (np.ndarray): Posterior probabilities or predicted labels for `n_docs` """ if self.cov_type == 'diag': mus = data[:, :self.dim] covs = np.exp(2 * data[:, self.dim:]) tot_covs = np.zeros(shape=(mus.shape[0], self.dim, self.dim), dtype=np.float32) for n in range(mus.shape[0]): tot_covs[n, :, :] = self.scov + np.diag(covs[n, :]) else: mus = data tot_covs = self.scov + covs # inv_tot_covs = np.linalg.inv(tot_covs) sgn, log_dets = np.linalg.slogdet(tot_covs) const = -0.5 * self.dim * np.log(2 * np.pi) if (sgn < 0).any(): print("WARNING: Det of tot_covs is Negative.") sys.exit() # class conditional log-likelihoods cc_llh = np.zeros(shape=(mus.shape[0], self.cmus.shape[0]), dtype=np.float32) for n in range(mus.shape[0]): # for each doc tmp = self.cmus - mus[n, :] z = (tmp.reshape(tmp.shape[0], tmp.shape[1], 1) @ tmp.reshape( tmp.shape[0], 1, tmp.shape[1])) cc_llh[n, :] = const - (0.5 * (log_dets[n] + ( z * np.linalg.inv(tot_covs[n, :, :])).sum(axis=1).sum(axis=1))) # cc_llh[n, :] = -log_dets[n] # for i in range(self.cmus.shape[0]): # given a class, i # cc_llh[n, i] -= (np.outer(tmp[i, :], tmp[i, :]) * # inv_tot_covs[n, :, :]).sum() if self.est_prior: if return_labels: ret_val = np.argmax(cc_llh + np.log(self.priors).T, axis=1) else: cc_llh += np.log(self.priors).T ret_val = np.exp(cc_llh.T - lse(cc_llh, axis=1)).T else: if return_labels: ret_val = np.argmax(cc_llh, axis=1) else: ret_val = np.exp(cc_llh.T - lse(cc_llh, axis=1)).T return ret_val
def test(self, beam=False, print_paths=False, save_model=True, auc=False): batch_counter = 0 paths = defaultdict(list) answers = [] # feed_dict = {} all_final_reward_1 = 0 all_final_reward_3 = 0 all_final_reward_5 = 0 all_final_reward_10 = 0 all_final_reward_20 = 0 auc = 0 total_examples = self.test_environment.total_no_examples for episode in tqdm(self.test_environment.get_episodes()): batch_counter += 1 temp_batch_size = episode.no_examples query_relation = episode.get_query_relation() query_embedding = self.agent.get_query_embedding(query_relation) # set initial beam probs beam_probs = np.zeros((temp_batch_size * self.test_rollouts, 1)) # get initial state state = episode.get_state() mem = self.agent.get_mem_shape() agent_mem = np.zeros( (mem[0], mem[1], temp_batch_size * self.test_rollouts, mem[3])).astype('float32') layer_state = tf.unstack(agent_mem, self.LSTM_layers) model_state = [tf.unstack(s, 2) for s in layer_state] previous_relation = np.ones( (temp_batch_size * self.test_rollouts, ), dtype='int64') * self.relation_vocab['DUMMY_START_RELATION'] self.range_arr_test = np.arange(temp_batch_size * self.test_rollouts) # feed_dict[self.input_path[0]] = np.zeros(temp_batch_size * self.test_rollouts) ####logger code#### if print_paths: self.entity_trajectory = [] self.relation_trajectory = [] #################### self.log_probs = np.zeros( (temp_batch_size * self.test_rollouts, )) * 1.0 # for each time step for i in range(self.path_length): if i == 0: self.first_state_of_test = True loss, agent_mem, test_scores, test_action_idx, chosen_relation = self.agent.step( state['next_relations'], state['next_entities'], model_state, previous_relation, query_embedding, state['current_entities'], range_arr=self.range_arr_test, first_step_of_test=self.first_state_of_test) agent_mem = tf.stack(agent_mem) agent_mem = agent_mem.numpy() test_scores = test_scores.numpy() test_action_idx = test_action_idx.numpy() chosen_relation = chosen_relation.numpy() if beam: k = self.test_rollouts new_scores = test_scores + beam_probs if i == 0: idx = np.argsort(new_scores) idx = idx[:, -k:] ranged_idx = np.tile([b for b in range(k)], temp_batch_size) idx = idx[np.arange(k * temp_batch_size), ranged_idx] else: idx = self.top_k(new_scores, k) y = idx // self.max_num_actions x = idx % self.max_num_actions y += np.repeat([b * k for b in range(temp_batch_size)], k) state['current_entities'] = state['current_entities'][y] state['next_relations'] = state['next_relations'][y, :] state['next_entities'] = state['next_entities'][y, :] agent_mem = agent_mem[:, :, y, :] test_action_idx = x chosen_relation = state['next_relations'][ np.arange(temp_batch_size * k), x] beam_probs = new_scores[y, x] beam_probs = beam_probs.reshape((-1, 1)) if print_paths: for j in range(i): self.entity_trajectory[j] = self.entity_trajectory[ j][y] self.relation_trajectory[ j] = self.relation_trajectory[j][y] previous_relation = chosen_relation layer_state = tf.unstack(agent_mem, self.LSTM_layers) model_state = [tf.unstack(s, 2) for s in layer_state] ####logger code#### if print_paths: self.entity_trajectory.append(state['current_entities']) self.relation_trajectory.append(chosen_relation) #################### state = episode(test_action_idx) self.log_probs += test_scores[ np.arange(self.log_probs.shape[0]), test_action_idx] if beam: self.log_probs = beam_probs ####Logger code#### if print_paths: self.entity_trajectory.append(state['current_entities']) # ask environment for final reward rewards = episode.get_reward() # [B*test_rollouts] reward_reshape = np.reshape( rewards, (temp_batch_size, self.test_rollouts)) # [orig_batch, test_rollouts] self.log_probs = np.reshape(self.log_probs, (temp_batch_size, self.test_rollouts)) sorted_indx = np.argsort(-self.log_probs) final_reward_1 = 0 final_reward_3 = 0 final_reward_5 = 0 final_reward_10 = 0 final_reward_20 = 0 AP = 0 ce = episode.state['current_entities'].reshape( (temp_batch_size, self.test_rollouts)) se = episode.start_entities.reshape( (temp_batch_size, self.test_rollouts)) for b in range(temp_batch_size): answer_pos = None seen = set() pos = 0 if self.pool == 'max': for r in sorted_indx[b]: if reward_reshape[b, r] == self.positive_reward: answer_pos = pos break if ce[b, r] not in seen: seen.add(ce[b, r]) pos += 1 if self.pool == 'sum': scores = defaultdict(list) answer = '' for r in sorted_indx[b]: scores[ce[b, r]].append(self.log_probs[b, r]) if reward_reshape[b, r] == self.positive_reward: answer = ce[b, r] final_scores = defaultdict(float) for e in scores: final_scores[e] = lse(scores[e]) sorted_answers = sorted(final_scores, key=final_scores.get, reverse=True) if answer in sorted_answers: answer_pos = sorted_answers.index(answer) else: answer_pos = None if answer_pos != None: if answer_pos < 20: final_reward_20 += 1 if answer_pos < 10: final_reward_10 += 1 if answer_pos < 5: final_reward_5 += 1 if answer_pos < 3: final_reward_3 += 1 if answer_pos < 1: final_reward_1 += 1 if answer_pos == None: AP += 0 else: AP += 1.0 / ((answer_pos + 1)) if print_paths: qr = self.train_environment.grapher.rev_relation_vocab[ self.qr[b * self.test_rollouts]] start_e = self.rev_entity_vocab[episode.start_entities[ b * self.test_rollouts]] end_e = self.rev_entity_vocab[episode.end_entities[ b * self.test_rollouts]] paths[str(qr)].append( str(start_e) + "\t" + str(end_e) + "\n") paths[str(qr)].append("Reward:" + str( 1 if answer_pos != None and answer_pos < 10 else 0) + "\n") for r in sorted_indx[b]: indx = b * self.test_rollouts + r if rewards[indx] == self.positive_reward: rev = 1 else: rev = -1 answers.append(self.rev_entity_vocab[se[b, r]] + '\t' + self.rev_entity_vocab[ce[b, r]] + '\t' + str(self.log_probs[b, r]) + '\n') paths[str(qr)].append('\t'.join([ str(self.rev_entity_vocab[e[indx]]) for e in self.entity_trajectory ]) + '\n' + '\t'.join([ str(self.rev_relation_vocab[re[indx]]) for re in self.relation_trajectory ]) + '\n' + str(rev) + '\n' + str(self.log_probs[b, r]) + '\n___' + '\n') paths[str(qr)].append("#####################\n") all_final_reward_1 += final_reward_1 all_final_reward_3 += final_reward_3 all_final_reward_5 += final_reward_5 all_final_reward_10 += final_reward_10 all_final_reward_20 += final_reward_20 auc += AP all_final_reward_1 /= total_examples all_final_reward_3 /= total_examples all_final_reward_5 /= total_examples all_final_reward_10 /= total_examples all_final_reward_20 /= total_examples auc /= total_examples # if save_model: # if all_final_reward_10 >= self.max_hits_at_10: # self.max_hits_at_10 = all_final_reward_10 # self.save_path = self.model_saver.save(sess, self.model_dir + "model" + '.ckpt') if print_paths: logger.info("[ printing paths at {} ]".format(self.output_dir + '/test_beam/')) for q in paths: j = q.replace('/', '-') with codecs.open(self.path_logger_file_ + '_' + j, 'a', 'utf-8') as pos_file: for p in paths[q]: pos_file.write(p) with open(self.path_logger_file_ + 'answers', 'w') as answer_file: for a in answers: answer_file.write(a) with open(self.output_dir + '/scores.txt', 'a') as score_file: score_file.write("Hits@1: {0:7.4f}".format(all_final_reward_1)) score_file.write("\n") score_file.write("Hits@3: {0:7.4f}".format(all_final_reward_3)) score_file.write("\n") score_file.write("Hits@5: {0:7.4f}".format(all_final_reward_5)) score_file.write("\n") score_file.write("Hits@10: {0:7.4f}".format(all_final_reward_10)) score_file.write("\n") score_file.write("Hits@20: {0:7.4f}".format(all_final_reward_20)) score_file.write("\n") score_file.write("auc: {0:7.4f}".format(auc)) score_file.write("\n") score_file.write("\n") logger.info("Hits@1: {0:7.4f}".format(all_final_reward_1)) logger.info("Hits@3: {0:7.4f}".format(all_final_reward_3)) logger.info("Hits@5: {0:7.4f}".format(all_final_reward_5)) logger.info("Hits@10: {0:7.4f}".format(all_final_reward_10)) logger.info("Hits@20: {0:7.4f}".format(all_final_reward_20)) logger.info("auc: {0:7.4f}".format(auc))