import load_data
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np

gold_set = load_data.load_object("bin/results/goldset.pk1")
feat1 = load_data.load_object("bin/results/feat1.pk1")
feat2 = load_data.load_object("bin/results/feat2.pk1")
feat3 = load_data.load_object("bin/results/feat3.pk1")
rand = load_data.load_object("bin/results/random_dec_search_1ktrials.pk1")
largest_deg_1k = load_data.load_object("bin/results/neighbor_with_largest_degree_1000pairs.pk1")

# 0 successes
largest_deg_100 = load_data.load_object("bin/results/neighbor_with_largest_degree.pk1")

smallest_deg_1k = load_data.load_object("bin/results/neighbor_with_smallest_degree_1ktrials.pk1")

# 0 successes
smallest_deg_100 = load_data.load_object("bin/results/neighbor_with_smallest_degree.pk1")


# text feats 1-3: 100 random article pairs
# random: for each of 100 random pairs, 1000 trials
# graph heuristics: 1000 random article pairs
def get_avg_success_rate(results):
    total_suc = 0
    total_fail = 0
    for (a1_name, a2_name, suc, fail, path_lengths) in results:
        total_suc += suc
        total_fail += fail
    return float(total_suc) / (total_fail + total_suc)
Exemple #2
0
def load_pairwise_distances():
    return load_data.load_object("bin/pairwise_distances.pk1")
Exemple #3
0
def load_article_pairs():
    return load_data.load_object(ARTICLE_PAIRS_FILE)
Exemple #4
0
def load_30k_adj_list():
    return load_data.load_object(ADJ_LIST_30K_FILE)
Exemple #5
0
def load_30k_articles():
    return load_data.load_object(ARTICLE_NAMES_30K_FILE)
Exemple #6
0
import wiki_index
import ml
import copy
import matplotlib.pyplot as plt
from collections import Counter

print "Starting main.py..."

ARTICLE_NAMES_30K_FILE = os.environ['ARTICLE_NAMES_30K']
ADJ_LIST_30K_FILE = os.environ['ADJ_LIST_30K']
ARTICLE_PAIRS_FILE = os.environ['ARTICLE_PAIRS']
GRAPH_OBJECT_FILE = os.environ['GRAPH_OBJECT_FILE']

# Load necessary data structures from file (those computed in load_data)

articles = load_data.load_object("bin/article_names.pk1")
name_to_type = load_data.load_object("bin/name_to_type.pk1")
title_to_linenum = load_data.load_object("bin/title_to_linenum.pk1")
linenum_to_title = load_data.load_object("bin/linenum_to_title.pk1")
adj_list = load_data.load_object("bin/adj_list.pk1")
type_to_depth = load_data.load_object("bin/type_to_depth.pk1")
type_to_node = load_data.load_object("bin/type_to_node.pk1")

print "Loaded objects from binary files."


# Debug: look at adj_list length distribution
def print_adj_list_lengths(k):
    vals = adj_list.values()
    lengths = []
    for v in vals: