import load_data import matplotlib.pyplot as plt from collections import Counter import numpy as np gold_set = load_data.load_object("bin/results/goldset.pk1") feat1 = load_data.load_object("bin/results/feat1.pk1") feat2 = load_data.load_object("bin/results/feat2.pk1") feat3 = load_data.load_object("bin/results/feat3.pk1") rand = load_data.load_object("bin/results/random_dec_search_1ktrials.pk1") largest_deg_1k = load_data.load_object("bin/results/neighbor_with_largest_degree_1000pairs.pk1") # 0 successes largest_deg_100 = load_data.load_object("bin/results/neighbor_with_largest_degree.pk1") smallest_deg_1k = load_data.load_object("bin/results/neighbor_with_smallest_degree_1ktrials.pk1") # 0 successes smallest_deg_100 = load_data.load_object("bin/results/neighbor_with_smallest_degree.pk1") # text feats 1-3: 100 random article pairs # random: for each of 100 random pairs, 1000 trials # graph heuristics: 1000 random article pairs def get_avg_success_rate(results): total_suc = 0 total_fail = 0 for (a1_name, a2_name, suc, fail, path_lengths) in results: total_suc += suc total_fail += fail return float(total_suc) / (total_fail + total_suc)
def load_pairwise_distances(): return load_data.load_object("bin/pairwise_distances.pk1")
def load_article_pairs(): return load_data.load_object(ARTICLE_PAIRS_FILE)
def load_30k_adj_list(): return load_data.load_object(ADJ_LIST_30K_FILE)
def load_30k_articles(): return load_data.load_object(ARTICLE_NAMES_30K_FILE)
import wiki_index import ml import copy import matplotlib.pyplot as plt from collections import Counter print "Starting main.py..." ARTICLE_NAMES_30K_FILE = os.environ['ARTICLE_NAMES_30K'] ADJ_LIST_30K_FILE = os.environ['ADJ_LIST_30K'] ARTICLE_PAIRS_FILE = os.environ['ARTICLE_PAIRS'] GRAPH_OBJECT_FILE = os.environ['GRAPH_OBJECT_FILE'] # Load necessary data structures from file (those computed in load_data) articles = load_data.load_object("bin/article_names.pk1") name_to_type = load_data.load_object("bin/name_to_type.pk1") title_to_linenum = load_data.load_object("bin/title_to_linenum.pk1") linenum_to_title = load_data.load_object("bin/linenum_to_title.pk1") adj_list = load_data.load_object("bin/adj_list.pk1") type_to_depth = load_data.load_object("bin/type_to_depth.pk1") type_to_node = load_data.load_object("bin/type_to_node.pk1") print "Loaded objects from binary files." # Debug: look at adj_list length distribution def print_adj_list_lengths(k): vals = adj_list.values() lengths = [] for v in vals: