def greedy_search(pages): model = train_models(100, ['heuristic'], [classification.get_logistic_regression_model_liblinear])[0] total_states_explored = 0 num_with_paths = 0 total_time = 0 for i in range(100): start_time = datetime.datetime.now() current_article = random.sample(pages, 1)[0] print 'start article: ', current_article print 'goal article: ', GOAL_ARTICLE for j in range(10): links = pages[current_article][1] min_cost = 1000000 for link in links: cost = classification.apply_model([pages[link][2]], model) if cost < min_cost: print 'current min cost: ', min_cost min_cost = cost current_article = link if current_article == GOAL_ARTICLE: break end_time = datetime.datetime.now() total_states_explored += j print 'dist: ',j if j == 9: continue num_with_paths += 1 total_time += int((end_time - start_time).microseconds) print 'av states explored:', float(total_states_explored)/(i+1) print 'percent with paths:', 100*float(num_with_paths)/(i+1), '%' print 'av time:', float(total_time)/(i+1)
def test_models(num_testing_examples, models): print 'generating testing data' training_data = {} for i in range(num_testing_examples): if i % 10 == 0: print 'generated', i, 'examples' start_article = random.sample(pages, 1)[0] search_prob = ucs.SearchProblem(pages, start_article, GOAL_ARTICLE) ucs_prob = ucs.UniformCostSearch() ucs_prob.solve(search_prob) if ucs_prob.totalCost is None: training_data[start_article] = INFINITE_COST continue num_actions = len(ucs_prob.actions) training_data[start_article] = num_actions x = [] y = [] for key, val in training_data.iteritems(): x.append(pages[key][2]) y.append(val) results = {} for i, model in enumerate(models): print 'applying model', i classifications = classification.apply_model(x, model) correct_count = 0 reachable_count = 0 wrong_inf_count = 0 dist = 0 for j in range(len(y)): if y[j] == INFINITE_COST: continue reachable_count += 1 if y[j] == classifications[j]: correct_count += 1 else: if y[j] == INFINITE_COST or classifications[j] == INFINITE_COST: wrong_inf_count += 1 else: dist += abs(y[j] - classifications[j]) results[model] = (correct_count, dist, wrong_inf_count, reachable_count) print type(model) print 'fully correct', 100 * float(correct_count) / reachable_count, '%' print 'dist:', float(dist) / reachable_count print 'wrong inf.s:', 100 * float(wrong_inf_count) / reachable_count, '%' print '' return results
def heuristic(link): return classification.apply_model([pages[link][2]], model)
def h(v): guess = 1000000*classification.apply_model([pages[pv[v]][2]], model) #print guess return guess