Ejemplo n.º 1
0
def compute_value_function(q_values, ent_wt=0.0):
    if ent_wt > 0:
        # soft max
        v_fn = ent_wt * lse((1.0 / ent_wt) * q_values, axis=1, keepdims=False)
    else:
        # hard max
        v_fn = np.max(q_values, axis=1)
    return v_fn
Ejemplo n.º 2
0
    def predict_b(self, data, return_labels=False, covs=None, bsize=1500):
        """ Predict posterior probabilities or labels given the test data
            (means and covs).

        Args:
            data (np.ndarray): If cov_type is `diag` then, shape is
            `n_docs x [twice dim]`
            (every row is twice the dimension of latent variable, where the
             first half is mean, and second half is log std.dev).
             Otherwise data represents only means of shape n_docs x dim.
            return_labels (boolean): Returns labels if True,
                else returns log-likelihoods
            covs (np.ndarray): Shape is n_docs x dim x dim. If cov_type is full
            then data represents only means and covs are passed as parameters
            bsize (int): batch size

        Returns:
            labels (np.ndarray): Posterior probabilities or predicted labels
                for n_docs.
        """

        if bsize > data.shape[0]:
            bsize = data.shape[0]

        mus = (data[:, :self.dim]).astype(np.float32)

        # class conditional log-likelihoods
        cc_llh = np.zeros(shape=(mus.shape[0], self.cmus.shape[0]),
                          dtype=np.float32)

        const = -0.5 * self.dim * np.log(2 * np.pi)

        sdoc = 0
        edoc = bsize

        while sdoc < edoc:

            tot_covs = np.zeros(shape=(edoc - sdoc, self.dim, self.dim),
                                dtype=np.float32)

            if self.cov_type == 'diag':
                #  covs = np.exp(2 * data[:, self.dim:])
                for n in range(sdoc, edoc, 1):
                    tot_covs[n - sdoc, :, :] = (
                        self.scov + np.diag(np.exp(2. * data[n, self.dim:])))

            else:
                tot_covs = self.scov + covs[sdoc:edoc, :, :]

            # inv_tot_covs = np.linalg.inv(tot_covs)
            sgn, log_dets = np.linalg.slogdet(tot_covs)

            if (sgn < 0).any():
                print("WARNING: Det of tot_covs is Negative.")
                sys.exit()

            for n in range(sdoc, edoc, 1):  # for each doc in the batch
                tmp = self.cmus - mus[n, :]
                z = (tmp.reshape(tmp.shape[0], tmp.shape[1], 1) @ tmp.reshape(
                    tmp.shape[0], 1, tmp.shape[1]))
                cc_llh[n, :] = const - (
                    0.5 * (log_dets[n - sdoc] +
                           (z * np.linalg.inv(tot_covs[n - sdoc, :, :])).sum(
                               axis=1).sum(axis=1)))
            sdoc += bsize
            edoc += bsize
            if edoc > mus.shape[0]:
                edoc = mus.shape[0]

        if self.est_prior:
            if return_labels:
                ret_val = np.argmax(cc_llh + np.log(self.priors).T, axis=1)
            else:
                cc_llh += np.log(self.priors).T
                ret_val = np.exp(cc_llh.T - lse(cc_llh, axis=1)).T

        else:
            if return_labels:
                ret_val = np.argmax(cc_llh, axis=1)
            else:
                ret_val = np.exp(cc_llh.T - lse(cc_llh, axis=1)).T

        return ret_val
Ejemplo n.º 3
0
    def predict(self, data, return_labels=False, covs=None):
        """ Predict posterior probabilities or labels given the
             test data (means and covs).

        Args:
            data (np.ndarray): If cov_type is `diag` then, shape
                is `n_docs x twice_dim` (every row is twice the dimension
                of latent variable, where the first half is mean,
                and second half is log std.dev). Otherwise data represents
                only means of shape n_docs x dim.
            return_labels (boolean): Returns labels if True,
                else returns log-likelihoods
            covs (np.ndarray): Shape is n_docs x dim x dim. If cov_type is
                full then data represents only means and covs are passed
                as parameters

        Returns:
            labels (np.ndarray):  Posterior probabilities or predicted
                labels for `n_docs`
        """

        if self.cov_type == 'diag':
            mus = data[:, :self.dim]
            covs = np.exp(2 * data[:, self.dim:])

            tot_covs = np.zeros(shape=(mus.shape[0], self.dim, self.dim),
                                dtype=np.float32)
            for n in range(mus.shape[0]):
                tot_covs[n, :, :] = self.scov + np.diag(covs[n, :])

        else:
            mus = data
            tot_covs = self.scov + covs

        # inv_tot_covs = np.linalg.inv(tot_covs)
        sgn, log_dets = np.linalg.slogdet(tot_covs)

        const = -0.5 * self.dim * np.log(2 * np.pi)

        if (sgn < 0).any():
            print("WARNING: Det of tot_covs is Negative.")
            sys.exit()

        # class conditional log-likelihoods
        cc_llh = np.zeros(shape=(mus.shape[0], self.cmus.shape[0]),
                          dtype=np.float32)

        for n in range(mus.shape[0]):  # for each doc
            tmp = self.cmus - mus[n, :]
            z = (tmp.reshape(tmp.shape[0], tmp.shape[1], 1) @ tmp.reshape(
                tmp.shape[0], 1, tmp.shape[1]))
            cc_llh[n, :] = const - (0.5 * (log_dets[n] + (
                z * np.linalg.inv(tot_covs[n, :, :])).sum(axis=1).sum(axis=1)))

            # cc_llh[n, :] = -log_dets[n]
            # for i in range(self.cmus.shape[0]):  # given a class, i
            #    cc_llh[n, i] -= (np.outer(tmp[i, :], tmp[i, :]) *
            #                     inv_tot_covs[n, :, :]).sum()

        if self.est_prior:
            if return_labels:
                ret_val = np.argmax(cc_llh + np.log(self.priors).T, axis=1)
            else:
                cc_llh += np.log(self.priors).T
                ret_val = np.exp(cc_llh.T - lse(cc_llh, axis=1)).T

        else:
            if return_labels:
                ret_val = np.argmax(cc_llh, axis=1)
            else:
                ret_val = np.exp(cc_llh.T - lse(cc_llh, axis=1)).T

        return ret_val
Ejemplo n.º 4
0
    def test(self, beam=False, print_paths=False, save_model=True, auc=False):
        batch_counter = 0
        paths = defaultdict(list)
        answers = []
        # feed_dict = {}
        all_final_reward_1 = 0
        all_final_reward_3 = 0
        all_final_reward_5 = 0
        all_final_reward_10 = 0
        all_final_reward_20 = 0
        auc = 0

        total_examples = self.test_environment.total_no_examples
        for episode in tqdm(self.test_environment.get_episodes()):
            batch_counter += 1

            temp_batch_size = episode.no_examples

            query_relation = episode.get_query_relation()
            query_embedding = self.agent.get_query_embedding(query_relation)

            # set initial beam probs
            beam_probs = np.zeros((temp_batch_size * self.test_rollouts, 1))
            # get initial state
            state = episode.get_state()

            mem = self.agent.get_mem_shape()
            agent_mem = np.zeros(
                (mem[0], mem[1], temp_batch_size * self.test_rollouts,
                 mem[3])).astype('float32')
            layer_state = tf.unstack(agent_mem, self.LSTM_layers)
            model_state = [tf.unstack(s, 2) for s in layer_state]
            previous_relation = np.ones(
                (temp_batch_size * self.test_rollouts, ),
                dtype='int64') * self.relation_vocab['DUMMY_START_RELATION']
            self.range_arr_test = np.arange(temp_batch_size *
                                            self.test_rollouts)
            # feed_dict[self.input_path[0]] = np.zeros(temp_batch_size * self.test_rollouts)

            ####logger code####
            if print_paths:
                self.entity_trajectory = []
                self.relation_trajectory = []
            ####################

            self.log_probs = np.zeros(
                (temp_batch_size * self.test_rollouts, )) * 1.0

            # for each time step
            for i in range(self.path_length):
                if i == 0:
                    self.first_state_of_test = True

                loss, agent_mem, test_scores, test_action_idx, chosen_relation = self.agent.step(
                    state['next_relations'],
                    state['next_entities'],
                    model_state,
                    previous_relation,
                    query_embedding,
                    state['current_entities'],
                    range_arr=self.range_arr_test,
                    first_step_of_test=self.first_state_of_test)
                agent_mem = tf.stack(agent_mem)
                agent_mem = agent_mem.numpy()
                test_scores = test_scores.numpy()
                test_action_idx = test_action_idx.numpy()
                chosen_relation = chosen_relation.numpy()
                if beam:
                    k = self.test_rollouts
                    new_scores = test_scores + beam_probs
                    if i == 0:
                        idx = np.argsort(new_scores)
                        idx = idx[:, -k:]
                        ranged_idx = np.tile([b for b in range(k)],
                                             temp_batch_size)
                        idx = idx[np.arange(k * temp_batch_size), ranged_idx]
                    else:
                        idx = self.top_k(new_scores, k)

                    y = idx // self.max_num_actions
                    x = idx % self.max_num_actions

                    y += np.repeat([b * k for b in range(temp_batch_size)], k)
                    state['current_entities'] = state['current_entities'][y]
                    state['next_relations'] = state['next_relations'][y, :]
                    state['next_entities'] = state['next_entities'][y, :]

                    agent_mem = agent_mem[:, :, y, :]
                    test_action_idx = x
                    chosen_relation = state['next_relations'][
                        np.arange(temp_batch_size * k), x]
                    beam_probs = new_scores[y, x]
                    beam_probs = beam_probs.reshape((-1, 1))
                    if print_paths:
                        for j in range(i):
                            self.entity_trajectory[j] = self.entity_trajectory[
                                j][y]
                            self.relation_trajectory[
                                j] = self.relation_trajectory[j][y]
                previous_relation = chosen_relation
                layer_state = tf.unstack(agent_mem, self.LSTM_layers)
                model_state = [tf.unstack(s, 2) for s in layer_state]
                ####logger code####
                if print_paths:
                    self.entity_trajectory.append(state['current_entities'])
                    self.relation_trajectory.append(chosen_relation)
                ####################
                state = episode(test_action_idx)
                self.log_probs += test_scores[
                    np.arange(self.log_probs.shape[0]), test_action_idx]
            if beam:
                self.log_probs = beam_probs

            ####Logger code####

            if print_paths:
                self.entity_trajectory.append(state['current_entities'])

            # ask environment for final reward
            rewards = episode.get_reward()  # [B*test_rollouts]
            reward_reshape = np.reshape(
                rewards, (temp_batch_size,
                          self.test_rollouts))  # [orig_batch, test_rollouts]
            self.log_probs = np.reshape(self.log_probs,
                                        (temp_batch_size, self.test_rollouts))
            sorted_indx = np.argsort(-self.log_probs)
            final_reward_1 = 0
            final_reward_3 = 0
            final_reward_5 = 0
            final_reward_10 = 0
            final_reward_20 = 0
            AP = 0
            ce = episode.state['current_entities'].reshape(
                (temp_batch_size, self.test_rollouts))
            se = episode.start_entities.reshape(
                (temp_batch_size, self.test_rollouts))
            for b in range(temp_batch_size):
                answer_pos = None
                seen = set()
                pos = 0
                if self.pool == 'max':
                    for r in sorted_indx[b]:
                        if reward_reshape[b, r] == self.positive_reward:
                            answer_pos = pos
                            break
                        if ce[b, r] not in seen:
                            seen.add(ce[b, r])
                            pos += 1
                if self.pool == 'sum':
                    scores = defaultdict(list)
                    answer = ''
                    for r in sorted_indx[b]:
                        scores[ce[b, r]].append(self.log_probs[b, r])
                        if reward_reshape[b, r] == self.positive_reward:
                            answer = ce[b, r]
                    final_scores = defaultdict(float)
                    for e in scores:
                        final_scores[e] = lse(scores[e])
                    sorted_answers = sorted(final_scores,
                                            key=final_scores.get,
                                            reverse=True)
                    if answer in sorted_answers:
                        answer_pos = sorted_answers.index(answer)
                    else:
                        answer_pos = None

                if answer_pos != None:
                    if answer_pos < 20:
                        final_reward_20 += 1
                        if answer_pos < 10:
                            final_reward_10 += 1
                            if answer_pos < 5:
                                final_reward_5 += 1
                                if answer_pos < 3:
                                    final_reward_3 += 1
                                    if answer_pos < 1:
                                        final_reward_1 += 1
                if answer_pos == None:
                    AP += 0
                else:
                    AP += 1.0 / ((answer_pos + 1))
                if print_paths:
                    qr = self.train_environment.grapher.rev_relation_vocab[
                        self.qr[b * self.test_rollouts]]
                    start_e = self.rev_entity_vocab[episode.start_entities[
                        b * self.test_rollouts]]
                    end_e = self.rev_entity_vocab[episode.end_entities[
                        b * self.test_rollouts]]
                    paths[str(qr)].append(
                        str(start_e) + "\t" + str(end_e) + "\n")
                    paths[str(qr)].append("Reward:" + str(
                        1 if answer_pos != None and answer_pos < 10 else 0) +
                                          "\n")
                    for r in sorted_indx[b]:
                        indx = b * self.test_rollouts + r
                        if rewards[indx] == self.positive_reward:
                            rev = 1
                        else:
                            rev = -1
                        answers.append(self.rev_entity_vocab[se[b, r]] + '\t' +
                                       self.rev_entity_vocab[ce[b, r]] + '\t' +
                                       str(self.log_probs[b, r]) + '\n')
                        paths[str(qr)].append('\t'.join([
                            str(self.rev_entity_vocab[e[indx]])
                            for e in self.entity_trajectory
                        ]) + '\n' + '\t'.join([
                            str(self.rev_relation_vocab[re[indx]])
                            for re in self.relation_trajectory
                        ]) + '\n' + str(rev) + '\n' +
                                              str(self.log_probs[b, r]) +
                                              '\n___' + '\n')
                    paths[str(qr)].append("#####################\n")

            all_final_reward_1 += final_reward_1
            all_final_reward_3 += final_reward_3
            all_final_reward_5 += final_reward_5
            all_final_reward_10 += final_reward_10
            all_final_reward_20 += final_reward_20
            auc += AP

        all_final_reward_1 /= total_examples
        all_final_reward_3 /= total_examples
        all_final_reward_5 /= total_examples
        all_final_reward_10 /= total_examples
        all_final_reward_20 /= total_examples
        auc /= total_examples
        # if save_model:
        #     if all_final_reward_10 >= self.max_hits_at_10:
        #         self.max_hits_at_10 = all_final_reward_10
        #         self.save_path = self.model_saver.save(sess, self.model_dir + "model" + '.ckpt')

        if print_paths:
            logger.info("[ printing paths at {} ]".format(self.output_dir +
                                                          '/test_beam/'))
            for q in paths:
                j = q.replace('/', '-')
                with codecs.open(self.path_logger_file_ + '_' + j, 'a',
                                 'utf-8') as pos_file:
                    for p in paths[q]:
                        pos_file.write(p)
            with open(self.path_logger_file_ + 'answers', 'w') as answer_file:
                for a in answers:
                    answer_file.write(a)

        with open(self.output_dir + '/scores.txt', 'a') as score_file:
            score_file.write("Hits@1: {0:7.4f}".format(all_final_reward_1))
            score_file.write("\n")
            score_file.write("Hits@3: {0:7.4f}".format(all_final_reward_3))
            score_file.write("\n")
            score_file.write("Hits@5: {0:7.4f}".format(all_final_reward_5))
            score_file.write("\n")
            score_file.write("Hits@10: {0:7.4f}".format(all_final_reward_10))
            score_file.write("\n")
            score_file.write("Hits@20: {0:7.4f}".format(all_final_reward_20))
            score_file.write("\n")
            score_file.write("auc: {0:7.4f}".format(auc))
            score_file.write("\n")
            score_file.write("\n")

        logger.info("Hits@1: {0:7.4f}".format(all_final_reward_1))
        logger.info("Hits@3: {0:7.4f}".format(all_final_reward_3))
        logger.info("Hits@5: {0:7.4f}".format(all_final_reward_5))
        logger.info("Hits@10: {0:7.4f}".format(all_final_reward_10))
        logger.info("Hits@20: {0:7.4f}".format(all_final_reward_20))
        logger.info("auc: {0:7.4f}".format(auc))