def greedy_search(pages):
    model = train_models(100, ['heuristic'], [classification.get_logistic_regression_model_liblinear])[0]
    total_states_explored = 0
    num_with_paths = 0
    total_time = 0
    for i in range(100):
        start_time = datetime.datetime.now()
        current_article = random.sample(pages, 1)[0]
        print 'start article: ', current_article
        print 'goal article: ', GOAL_ARTICLE
        for j in range(10):
            links = pages[current_article][1]
            min_cost = 1000000
            for link in links:
                cost = classification.apply_model([pages[link][2]], model)
                if cost < min_cost:
                    print 'current min cost: ', min_cost
                    min_cost = cost
                    current_article = link
            if current_article == GOAL_ARTICLE:
                break
        end_time = datetime.datetime.now()
        total_states_explored += j
        print 'dist: ',j 
        if j == 9:
            continue 
        num_with_paths += 1

        total_time += int((end_time - start_time).microseconds)

    print 'av states explored:', float(total_states_explored)/(i+1)
    print 'percent with paths:', 100*float(num_with_paths)/(i+1), '%'
    print 'av time:', float(total_time)/(i+1)
def test_models(num_testing_examples, models):
    print 'generating testing data'

    training_data = {}
    for i in range(num_testing_examples):
        if i % 10 == 0:
            print 'generated', i, 'examples'
        start_article = random.sample(pages, 1)[0]
        search_prob = ucs.SearchProblem(pages, start_article, GOAL_ARTICLE)
        ucs_prob = ucs.UniformCostSearch()
        ucs_prob.solve(search_prob)

        if ucs_prob.totalCost is None:
            training_data[start_article] = INFINITE_COST
            continue
        num_actions = len(ucs_prob.actions)
        training_data[start_article] = num_actions

    x = []
    y = []
    for key, val in training_data.iteritems():
        x.append(pages[key][2])
        y.append(val)

    results = {}
    for i, model in enumerate(models):
        print 'applying model', i
        classifications = classification.apply_model(x, model)
        correct_count = 0
        reachable_count = 0
        wrong_inf_count = 0
        dist = 0
        for j in range(len(y)):
            if y[j] == INFINITE_COST:
                continue
            reachable_count += 1
            if y[j] == classifications[j]:
                correct_count += 1
            else:
                if y[j] == INFINITE_COST or classifications[j] == INFINITE_COST:
                    wrong_inf_count += 1
                else:
                    dist += abs(y[j] - classifications[j])

        results[model] = (correct_count, dist, wrong_inf_count, reachable_count)
        print type(model)
        print 'fully correct', 100 * float(correct_count) / reachable_count, '%'
        print 'dist:', float(dist) / reachable_count
        print 'wrong inf.s:', 100 * float(wrong_inf_count) / reachable_count, '%'
        print ''

    return results
 def heuristic(link):
     return classification.apply_model([pages[link][2]], model)
 def h(v):
     guess = 1000000*classification.apply_model([pages[pv[v]][2]], model)
     #print guess
     return guess