def run_search(): #G = load_30k_graph_object() article_pairs = load_article_pairs() adj_list_30k = load_30k_adj_list() num_fail = 0 num_successes = 0 results = [] c = 0 for (article1_name, article2_name) in article_pairs[:100]: print c #print "Article 1: %s, article 2: %s" % (article1_name, article2_name) src_id = int(title_to_linenum[article1_name]) dst_id = int(title_to_linenum[article2_name]) success_dec_path_lengths = [] suc = 0 fail = 0 #try: (success_or_fail, dec_search_path_length) = util.run_decentralized_search(src_id, dst_id, \ adj_list_30k, linenum_to_title, util.get_article_distance) #shortest_path_length = get_graph_shortest_path(G, src_id, dst_id) #(ont_dist, lca_height) = get_ontology_distance(article1_name, article2_name) # failure if success_or_fail == "FAILURE": fail += 1 num_fail += 1 else: suc += 1 num_successes += 1 success_dec_path_lengths.append(dec_search_path_length) # except KeyError: # continue x = (article1_name, article2_name, suc, fail, success_dec_path_lengths) results.append(x) c += 1 #print success_or_fail print "%d successes, %d failures" % (num_successes, num_fail) # save object to file load_data.save_object(results, "bin/results/feat3.pk1")
def run_random_search(): article_pairs = load_article_pairs() adj_list_30k = load_30k_adj_list() # tuple of (a1_name, a2_name, # success, # fail, success_dec_path_lengths) results = [] for (article1_name, article2_name) in article_pairs[:100]: src_id = int(title_to_linenum[article1_name]) dst_id = int(title_to_linenum[article2_name]) success_dec_path_lengths = [] num_successes = 0 num_fail = 0 try: for i in range(1000): (success_or_fail, dec_search_path_length) = util.run_decentralized_search(src_id, dst_id, \ adj_list_30k, linenum_to_title, util.get_article_distance) # failure if success_or_fail == "FAILURE": num_fail += 1 else: num_successes += 1 success_dec_path_lengths.append(dec_search_path_length) except KeyError: continue x = (article1_name, article2_name, num_successes, num_fail, success_dec_path_lengths) results.append(x) print "%d successes, %d failures" % (num_successes, num_fail) print "Number of pairs actually completed: %d" % len(results) # save object to file load_data.save_object(results, "bin/results/random_dec_search_1ktrials.pk1")
def run_experiment(): N = 5 num_success = 0 # pick some number of random pairs of articles. # for each pair, compute the dist given by decentralized search: that's the predicted dist # for that same pair, also compute the distance using the ontology tree while num_success < N: article1_name = random.choice(articles) article2_name = random.choice(articles) while article1_name == article2_name: article2_name = random.choice(articles) print "Article 1: %s, article 2: %s" % (article1_name, article2_name) src_id = int(title_to_linenum[article1_name]) dst_id = int(title_to_linenum[article2_name]) # predicted distance (success_or_fail, predicted_distance) = util.run_decentralized_search(src_id, dst_id, \ adj_list, linenum_to_title, util.get_article_distance) # failure or error if success_or_fail == None or success_or_fail == "FAILURE": if success_or_fail != None: print "%s. Article 1: %s, Article 2: %s" % \ (success_or_fail, article1_name, article2_name) else: print "KeyError..." # success else: num_success += 1 # ontology distance ontology_distance = get_ontology_distance(article1_name, article2_name) print "%s. Article 1: %s, Article 2: %s, Predicted distance = %d, Ontology distance = %d" % \ (success_or_fail, article1_name, article2_name, predicted_distance, ontology_distance)