def evaluate_distibution(event_name, words, session, ids): """ Do a complete evaluation of the distribution of words for an event. Compute Jensen-Shannon and Jaccard Index :param event_name: Name of the event to be evaluated :return: """ print(event_name, words) words_event, distribution_event, pairs_event = calculate_distribution_event(event_name, session, ids, True) path_references = Path(LOCAL_DATA_DIR_2, 'data', event_name, 'summaries', 'reference') references_list = [reference for reference in path_references.iterdir() if reference.is_file()] event_dist = dit.ScalarDistribution(words_event, distribution_event) words_set_event = set(words_event[:words]) print('Most Common words in event: {}'.format(words_set_event)) total_dist, all_words = global_distribution(references_list) all_words_set = set(all_words[:words]) jaccard = len(words_set_event.intersection(all_words_set)) / len(words_set_event.union(all_words_set)) print('Most Common words in all timelines: {}'.format(all_words_set)) print('Jaccard Index with all timelines: {}'.format(jaccard)) print('Jensen-Shannon with all timelines: {}'.format(jensen_shannon_divergence([total_dist, event_dist]))) for reference in references_list: words_timeline, probs_timeline, pairs_timeline = calculate_distribution_timeline(event_name, reference) dist_timeline = dit.ScalarDistribution(words_timeline, probs_timeline) print('----------------------------') word_set_timeline = set(words_timeline[:words]) print(reference.name) print('Most Common words in timeline: {}'.format(word_set_timeline)) print('Jensen-Shannon: {}'.format(jensen_shannon_divergence([dist_timeline, event_dist]))) jaccard = len(words_set_event.intersection(word_set_timeline)) / len(words_set_event.union(word_set_timeline)) print('Jaccard Index: {}'.format(jaccard))
def fitness(individual, data): individual = vector_to_dna(individual) # fitness = np.linalg.norm(target - vector(individual)) fitness = jensen_shannon_divergence([ dit.ScalarDistribution(target), dit.ScalarDistribution(vector(individual)) ]) return fitness
def fitness(individual, data): individual = vector_to_dna(individual) if mode == "JSD": return jensen_shannon_divergence([ dit.ScalarDistribution(target / len(k)), dit.ScalarDistribution(vector(individual) / len(k)) ]) elif mode == "ED": return np.linalg.norm(target - vector(individual)) else: raise Exception("Fitness mode must be JSD or ED")
def calculate_jsd(input_values, input_probabilities): """ Calculated Jensen-Shannon Divergence upon the table """ processed_probabilities = scale_table(input_probabilities) x = dit.ScalarDistribution(amino_list, input_values, sample_space = amino_list, sort = True) jsd_values = [] for row in processed_probabilities: y = dit.ScalarDistribution(amino_list, row, sample_space = amino_list, sort = True) jsd_values.append(jensen_shannon_divergence([x,y])) return jsd_values
label='LEFT') h_r = ax.hist(seg_t2_r.flatten(), 100, range=(0, 100), histtype=u'step', alpha=1, lw=2, label='RIGHT') ax.set_xlabel('T2 (ms)') ax.set_ylabel('# COUNTS') ax.legend() plt.draw() plt.savefig(join(joint_save_folder, 'T2_hist.png'), format='png', dpi=300) # Compute Jensen-Shannon divergence pmf_l = np.divide(h_l[0], np.sum(h_l[0])) pmf_r = np.divide(h_r[0], np.sum(h_r[0])) d_l = Distribution.from_ndarray(pmf_l) d_r = Distribution.from_ndarray(pmf_r) JSD = jensen_shannon_divergence([d_l, d_r]) pd_header = ['#counts', 'JS_Divergence'] pd_list = [[np.sum(h_l[0]), JSD], [np.sum(h_r[0]), JSD]] df = pd.DataFrame(pd_list, columns=pd_header) df.to_csv(join(joint_save_folder, 'histogram_calculation.csv'))
def calc_stats(sequence, verbose): """ INPUT: * sequence - Sequence sampled from seq_gen class OUTPUT: - Summary Statistics: * js_temp - Jensen-Shannon-Divergence between standard-between-dev empirical distributions compared between regimes """ sequence_sub = sequence[sequence[:, 2] != 0.5, :] deviants, regime_switches = find_deviants(sequence) # Catch trial/regime switch prob catch_prob = len(sequence[sequence[:, 2] == 0.5, 0]) / sequence.shape[0] switch_prob = regime_switches / sequence.shape[0] stim_prob_overall = len(sequence[sequence[:, 2] == 1, 2]) / ( len(sequence[sequence[:, 2] == 1, 2]) + len(sequence[sequence[:, 2] == 0, 2])) # 0th Order Stimulus probability (empirical) stim_prob_reg0 = np.mean(sequence[sequence[:, 1] == 0, 2]) stim_prob_reg1 = np.mean(sequence[sequence[:, 1] == 1, 2]) # 1st Order Stimulus prob (empirical) alt_prob_reg0 = np.mean(deviants[deviants[:, 0] == 0, 1]) alt_prob_reg1 = np.mean(deviants[deviants[:, 0] == 1, 1]) # Empirical pmf of standards between deviants for both regimes reg_0_dev = deviants[deviants[:, 0] == 0, :] reg_1_dev = deviants[deviants[:, 0] == 1, :] # Average train-length per regime: avg_train_reg0 = (np.sum(deviants[deviants[:, 0] == 0, 3]) / np.count_nonzero(deviants[deviants[:, 0] == 0, 3])) avg_train_reg1 = (np.sum(deviants[deviants[:, 0] == 1, 3]) / np.count_nonzero(deviants[deviants[:, 0] == 1, 3])) # Time spent in Regimes trials_in_reg0 = deviants[deviants[:, 0] == 0, 0] time_reg0 = trials_in_reg0.shape[0] / deviants.shape[0] try: epmf_reg_0_dev = np.histogram(reg_0_dev[:, 3], bins=int(np.max(reg_0_dev[:, 3])), density=True) epmf_reg_1_dev = np.histogram(reg_1_dev[:, 3], bins=int(np.max(reg_1_dev[:, 3])), density=True) # Calculate symmetric Jensen - Shannon divergence d1 = dit.ScalarDistribution(epmf_reg_0_dev[1][:-1], epmf_reg_0_dev[0]) d2 = dit.ScalarDistribution(epmf_reg_1_dev[1][:-1], epmf_reg_1_dev[0]) js_temp = jensen_shannon_divergence([d1, d2]) except: js_temp = None if verbose: print( "Empirical Probabilities: \n Empirical Catch Prob.: {} \n Empirical Regime Switch Prob.: {} \n Empirical Overall High-Intensity Stimulus Prob.: {} \n Empirical Regime 0 High-Intensity Stimulus Prob.: {} \n Empirical Regime 1 High-Intensity Stimulus Prob.: {} \n Empirical Regime 0 Alternation Prob.: {} \n Empirical Regime 1 Alternation Prob.: {} \n JS Div. Deviant Waiting Time Distr. between Regimes: {} \n Time in Regime 0: {} \n Average Train Length in Regime 0: {} \n Average Train Length in Regime 1: {}" .format(catch_prob, switch_prob, stim_prob_overall, stim_prob_reg0, stim_prob_reg1, alt_prob_reg0, alt_prob_reg1, js_temp, time_reg0, avg_train_reg0, avg_train_reg1)) print("--------------------------------------------") stats_out = { "emp_catch_prob": catch_prob, "emp_overall_sp": stim_prob_overall, "emp_reg0_sp": stim_prob_reg0, "emp_reg1_sp": stim_prob_reg1, "emp_reg0_ap": alt_prob_reg0, "emp_reg1_ap": alt_prob_reg1, "js_div": js_temp, "avg_train_r0": avg_train_reg0, "avg_train_r1": avg_train_reg1 } return stats_out, reg_0_dev, reg_1_dev
rand_seq_1 = create_random_seq(sl) rand_seq_2 = create_random_seq(sl) #kmer_freqs_1 = k_mer_frequencies(rand_seq_1, k, include_missing = True) #kmer_freqs_2 = k_mer_frequencies(rand_seq_2, k, include_missing = True) #print(kmer_freqs_1) #print(kmer_freqs_2) vector_1 = vector(rand_seq_1, [k]) vector_2 = vector(rand_seq_2, [k]) eds.append(np.linalg.norm(vector_1 - vector_2)) jsds.append( jensen_shannon_divergence([ dit.ScalarDistribution(vector_1), dit.ScalarDistribution(vector_2) ])) plt.figure() plt.scatter(eds, jsds, edgecolor='black', linewidth='1', alpha=0.5, facecolor='green') plt.xlabel('Euclidean Distance') plt.ylabel('Jensen-Shannon Divergence') plt.title('JSD vs. ED, k = ' + str(k) + ', seq_len = ' + str(sl)) plt.show()
def js_divergence(logits, ae_logits): a = dit.ScalarDistribution([0, 1], logits) b = dit.ScalarDistribution([0, 1], ae_logits) return jensen_shannon_divergence([a, b])
## for this, using ln JS_by_hand = 0.5*(prob1[0][0][0][0]*torch.log(prob1[0][0][0][0]/ensemble_probs[0][0][0][0])\ +prob1[0][1][0][0]*torch.log(prob1[0][1][0][0]/ensemble_probs[0][1][0][0]) \ + prob3[0][0][0][0] * torch.log(prob3[0][0][0][0] / ensemble_probs[0][0][0][0]) \ +prob3[0][1][0][0]*torch.log(prob3[0][1][0][0]/ensemble_probs[0][1][0][0])) print('implemented JS by pytorch: ', JS_Div_loss, ' , implemented by hands:', JS_by_hand) # for this, using log2 import dit, numpy as np from dit.divergences import jensen_shannon_divergence X = dit.ScalarDistribution(['0', '1'], prob1.numpy().ravel()) Y = dit.ScalarDistribution(['0', '1'], prob3.numpy().ravel()) print('JS-div in log2:', jensen_shannon_divergence([X, Y]), ' , in ln: ', jensen_shannon_divergence([X, Y]) / np.log(2) * np.log(np.e)) ## image examples: prob1 = F.softmax(torch.rand(1, 2, 256, 256), 1) # prob2 = F.softmax(torch.rand(1,2,256,256),1) prob2 = copy.deepcopy(prob1) prob3 = F.softmax(torch.rand(1, 2, 256, 256), 1) # prob3 = copy.deepcopy(prob1) ensemble_probs = torch.cat([prob1, prob2, prob3], 0) distribution_number = ensemble_probs.shape[0] Mixture_dist = ensemble_probs.mean(0, keepdim=True).expand( distribution_number, ensemble_probs.shape[1], ensemble_probs.shape[2],
def compute_jensen_shannon(event, reference_name, summary_name): reference_dist, summary_dist = create_distribution(event, reference_name, summary_name) return jensen_shannon_divergence([summary_dist, reference_dist])