def PerformIca(X,Y,num_components,random_state): result = {} algo = FastICA(random_state=random_state,max_iter=800) algo.fit(X) full_mixing_matrix = algo.mixing_ full_unmixing_matrix = algo.components_ _x = algo.transform(X) kt_value = np.abs(kt(_x)) largest_kt_values_idx = np.argsort(kt_value)[::-1] result["ica_kt_all"] = kt_value for n in num_components: prefix = "ica_" + str(n) + "_" component_idx_to_select = largest_kt_values_idx[0:n] mixing_matrix = full_mixing_matrix.T[component_idx_to_select,:].T unmixing_matrix = full_unmixing_matrix[component_idx_to_select,:] algo.components_ = unmixing_matrix algo.mixing_ = mixing_matrix result[prefix+"mm"] = mixing_matrix result[prefix+"umm"] = unmixing_matrix _x = algo.transform(X) result[prefix+"data"] = _x X_recons = algo.inverse_transform(_x) result[prefix+"reconstruction_error"] = ComputeReconstructionSSE(X,X_recons) n_kt_value = kt_value[component_idx_to_select] avg_kt = n_kt_value.mean() #print("ICA num dim {0} : reconstruction error {1} avg kt {2}".format(str(n),str(result[prefix+"reconstruction_error"]),str(avg_kt))) #print(np.sort(n_kt_value)) return result
def rank_loss_kt(self,test_fn,test_rows = []): preds = self.preds(test_fn) taus = [] for u in range(self.nu): u_mask = self.test_data[:,0] == u u_ratings = self.test_data[u_mask,:] u_preds = preds[u_mask] nrat = u_ratings.shape[0] if nrat == 1: print "Insufficient ratings per user" return 0 elif nrat == 0: continue elif np.all(u_ratings[:,2] == u_ratings[0,2]): continue elif np.all(u_preds == u_preds[0]): taus.append(0) taus.append(kt(u_preds,u_ratings[:,2])[0]) print(len(taus)) tau = np.mean(np.array(taus)) return tau
def get_kt(rsm1, rsm2): '''Gets Kendall tau-a measurements between two RDM matrices, first vectorizes matrices and then computes kt using scipy kendall-tau function''' #vecRDM1 = vectorize(RDM1) #vecRDM2 = vectorize(RDM2) vec_rsm1 = scipy.spatial.distance.squareform(rdm1) vec_rsm1 = scipy.spatial.distance.squareform(rdm2) k = kt(vec_rdm1, vec_rdm1).correlation return k
def get_kendall_tau_measures(self, rankings, competitors): original_rank = range(1, params.number_of_competitors + 1) kendall_tau = [] for query in rankings: ranked_list = rankings[query] rank_vector = self.transition_to_rank_vector( query, competitors[query], ranked_list) kendall, p_value = kt(original_rank, rank_vector) kendall_tau.append(kendall) mean = np.mean(kendall_tau) std = np.std(kendall_tau) return mean, std
def get_kt(rsm1,rsm2): '''Gets Kendall tau-a measurements between two RDM matrices, first vectorizes matrices and then computes kt using scipy kendall-tau function''' np.fill_diagonal(rsm1,0) np.fill_diagonal(rsm2,0) vec_rsm1 = vectorize(rsm1) # vec_rsm2 = vectorize(rsm2) #vec_rsm1 = scipy.spatial.distance.squareform(rsm1) #vec_rsm2 = scipy.spatial.distance.squareform(rsm2) k = kt(vec_rsm1, vec_rsm2).correlation return k
def PerformIca2(X,Y,num_components,random_state): result = {} for n in num_components: prefix = "ica_" + str(n) + "_" algo = FastICA(n_components=n,random_state=random_state) algo.fit(X) result[prefix+"algo"] = algo _x = algo.transform(X) X_recons = algo.inverse_transform(_x) result[prefix+"reconstruction_error"] = ComputeReconstructionSSE(X,X_recons) kt_value = np.abs(kt(_x)) avg_kt = kt_value.mean() print("ICA num dim {0} : reconstruction error {1} avg kt {2}".format(str(n),str(result[prefix+"reconstruction_error"]),str(avg_kt))) print(np.sort(kt_value)) return result
def test_kt(self): self.assertAlmostEqual( kt(self.data, fisher=False), get_kurtosis(self.data), msg="Kurtosis not within bounds", delta=1e-6 )
def competition(self, cost_model): results = {} query_tagged = {} competitors = self.budget_creator.get_competitors_for_query( self.score_file, self.number_of_competitors) reference_of_indexes = cp.loads(cp.dumps(competitors, 1)) document_feature_index = self.budget_creator.index_features_for_competitors( competitors, self.data_set_location, True) original_vectors = cp.loads(cp.dumps(document_feature_index, -1)) model_weights_per_fold_index = self.budget_creator.get_chosen_model_weights_for_fold( self.chosen_models) x_axis = [] y_axis = [] changed_winner_averages = [] average_distances = [] original_reference = [] average_winner_rank = [] average_feature_number = [] last_winner_original_rank = {} original_winner_final_rank = {} for iteration in range(0, self.num_of_iterations): print "iteration number ", iteration + 1 sum_of_kendalltau = 0 average_distance = self.budget_creator.create_budget_per_query( self.fraction, document_feature_index) cost_index, value_for_change = self.budget_creator.create_items_for_knapsack( competitors, document_feature_index, model_weights_per_fold_index, self.query_per_fold, original_vectors) print "getting features to change" features_to_change, avg_feature_num = self.get_features_to_change( competitors, cost_index, value_for_change, document_feature_index, original_vectors) print "got features to change" average_feature_number.append(avg_feature_num) print "updating competitors" document_feature_index = self.update_competitors( features_to_change, cp.loads(cp.dumps(document_feature_index, -1)), value_for_change) print "update complete" print "getting new rankings" competitors_new = self.get_new_rankings( reference_of_indexes, document_feature_index, model_weights_per_fold_index, self.query_per_fold) print "finished new rankings" number_of_time_winner_changed = 0 denominator = 0 sum_of_original_kt = 0 sum_rank_of_winner = 0 for query in competitors_new: old_rank = self.transition_to_rank_vector( query, reference_of_indexes, competitors[query]) new_rank = self.transition_to_rank_vector( query, reference_of_indexes, competitors_new[query]) orig_rank = self.transition_to_rank_vector( query, reference_of_indexes, reference_of_indexes[query]) if iteration + 1 == self.num_of_iterations: if not last_winner_original_rank.get( new_rank.index(1) + 1, False): last_winner_original_rank[new_rank.index(1) + 1] = 0 last_winner_original_rank[new_rank.index(1) + 1] += 1 if not original_winner_final_rank.get(new_rank[0], False): original_winner_final_rank[new_rank[0]] = 0 original_winner_final_rank[new_rank[0]] += 1 kendall_tau, p_value = kt(old_rank, new_rank) if not math.isnan(kendall_tau): sum_of_kendalltau += kendall_tau denominator += 1 if old_rank.index(1) != new_rank.index(1): number_of_time_winner_changed += 1 sum_rank_of_winner += new_rank[0] original_kt, p_val = kt(new_rank, orig_rank) if not math.isnan(original_kt): sum_of_original_kt += original_kt print "number of times winner changed ", number_of_time_winner_changed average = sum_of_kendalltau / denominator average_distances.append(average_distance) changed_winner_averages.append( float(number_of_time_winner_changed) / denominator) average_winner_rank.append(float(sum_rank_of_winner) / denominator) x_axis.append(iteration + 1) y_axis.append(average) original_reference.append(float(sum_of_original_kt) / denominator) competitors = cp.loads(cp.dumps(competitors_new, -1)) results["kendall"] = (x_axis, y_axis) results["cos"] = (x_axis, average_distances) results["winner"] = (x_axis, changed_winner_averages) results["orig"] = (x_axis, original_reference) results["win_rank"] = (x_axis, average_winner_rank) results["whoisthewinner"] = last_winner_original_rank results["originalwinnerrank"] = original_winner_final_rank results["avg_f"] = (x_axis, average_feature_number) meta_results = {} meta_results[self.budget_creator.model] = results return meta_results
x = [] y_exp = [] y_log = [] for i, perm in enumerate(itertools.permutations(sent)): if gt[i] == 1: x.append(1) score = get_total_prob(model, perm) y_log.append(score) y_exp.append(math.exp(score)) f = open('bnc_grammatical.txt').read().split('\n') for ff in f: try: score = get_total_prob(model, ff.strip().lower().split()) x.append(0) y_log.append(score) y_exp.append(math.exp(score)) except: print('error occur!') # for j, perm in enumerate(itertools.permutations(sent_color)): # # if x[j] is not None: # y_color.append(math.exp(get_total_prob(model, perm))) print('LOG', pb(x, y_log), kt(x, y_log)) print('EXP', pb(x, y_exp), kt(x, y_exp))