def tag_values(estimator, tags_array, items_array, tag_to_item, seeker_profile, tag_pop, outfile): ''' Saves the value of each individual tag. ''' #Value for each tag print('#tag_id', 'rho', 'surprisal', 'dkl', 'dkl*rho', 'dkl/surprisal', 'n_items', 'prob_tag', 'pop_tag', 'mean_pti', file=outfile) prob_tags = estimator.vect_prob_tag(tags_array) for i, tag_id in enumerate(tags_array): #Probabilities prob_tag_items = estimator.vect_prob_tag_given_item(items_array, tag_id) prob_tag = prob_tags[i] prob_item_seeker_tag = (prob_tag_items / prob_tag) * seeker_profile prob_item_seeker_tag /= prob_item_seeker_tag.sum() #Renormalize prob_items_tagged = seeker_profile[tag_to_item[tag_id]] #Metrics dkl = entropy.kullback_leiber_divergence(prob_item_seeker_tag, seeker_profile) rho = np.mean(prob_items_tagged) surprisal = np.mean(1.0 / -np.log2(prob_items_tagged)) mean_pti = np.mean(prob_tag_items[tag_to_item[tag_id]]) pop_tag = tag_pop[tag_id] print(tag_id, rho, surprisal, dkl, rho * dkl, dkl * surprisal, len(prob_items_tagged), prob_tag, pop_tag, mean_pti, file=outfile)
def test_kl3(self): x_probs = np.array([0.25, 0.20, 0, 0.55]) xy_probs = np.array([0.20, 0, 0.25, 0.55]) self.assertAlmostEqual(entropy.kullback_leiber_divergence(x_probs, xy_probs), float('inf'))
def test_kl2(self): x_probs = np.array([0.04, 0.16] * 5 + [0]) xy_probs = np.array([0.02, 0.18] * 5 + [0]) dkl = 0 for i in range(len(x_probs) - 1): div = x_probs[i] / xy_probs[i] dkl += x_probs[i] * math.log(div, 2) self.assertAlmostEqual(entropy.kullback_leiber_divergence(x_probs, xy_probs), dkl)
def tag_values(estimator, tags_array, items_array, tag_to_item, seeker_profile, tag_pop, outfile): ''' Saves the value of each individual tag. ''' #Value for each tag print('#tag_id', 'rho', 'surprisal', 'dkl', 'dkl*rho', 'dkl/surprisal', 'n_items', 'prob_tag', 'pop_tag', 'mean_pti', file=outfile) prob_tags = estimator.vect_prob_tag(tags_array) for i, tag_id in enumerate(tags_array): #Probabilities prob_tag_items = estimator.vect_prob_tag_given_item( items_array, tag_id) prob_tag = prob_tags[i] prob_item_seeker_tag = (prob_tag_items / prob_tag) * seeker_profile prob_item_seeker_tag /= prob_item_seeker_tag.sum() #Renormalize prob_items_tagged = seeker_profile[tag_to_item[tag_id]] #Metrics dkl = entropy.kullback_leiber_divergence(prob_item_seeker_tag, seeker_profile) rho = np.mean(prob_items_tagged) surprisal = np.mean(1.0 / -np.log2(prob_items_tagged)) mean_pti = np.mean(prob_tag_items[tag_to_item[tag_id]]) pop_tag = tag_pop[tag_id] print(tag_id, rho, surprisal, dkl, rho * dkl, dkl * surprisal, len(prob_items_tagged), prob_tag, pop_tag, mean_pti, file=outfile)