Ejemplo n.º 1
0
def distances_tests():
    HAUS_S_D = 0.0673441488
    haus = image_template.modified_hausdorff_distance
    assert abs(haus(temp_s, temp_d) - HAUS_S_D) < 0.0001
    assert image_template.list_classification(temp_s, [temp_d, temp_s])[0] == "temp_s"
    test = page.image_templates[0]
    train = page.image_templates[1:50]
    assert image_template.list_classification_vec(test, train) == \
           image_template.list_classification(test, train)

    dist_matrix = image_template.distance_matrix([temp_s, temp_d, temp_s])
    assert np.allclose(dist_matrix, np.array([[ 0.      , HAUS_S_D, 0.      ],
                                              [ HAUS_S_D, 0.      , HAUS_S_D],
                                              [ 0.      , HAUS_S_D, 0.      ]]))
Ejemplo n.º 2
0
def character_rec(dim=48, resample=True):
    print "Building data"
    pages = load_pages(base_directory, dim=dim, resample=resample)
    
    accuracies = []
    total_temps = 0.0
    total_right = 0.0
    for i, (test, train) in enumerate(holdout(pages)):
        print i, "Classifiying"
        train_images = [image for page in train 
                        for image in page.image_templates
                        if image.name != "NO LABEL"]
        grouped_labels = [list_classification(t, train_images)[0]
                          for t in test.image_templates]
        
        predicted_labels = distribute_labels(test.groups,
                                             grouped_labels,
                                             test.num_temps)
        
        real_labels = test.labels
        
        # num_right = np.sum([1.0 if predicted_labels[i] == real_labels[i]
        #                     else 0.0
        #                     for i in range(len(real_labels))])
        # accuracies.append(num_right/test.num_temps)
        num_right = num_correct_labels(predicted_labels, real_labels)
        total_temps += test.num_temps
        total_right += num_right
        accuracies.append(num_right/len(test.labels))
    avg_accuracy = total_right/total_temps
    return (accuracies, avg_accuracy)
Ejemplo n.º 3
0
def ensemble_rec():
    print "Building data"
    pages = load_pages(base_directory)
    
    accuracies = []
    for i, (test, train) in enumerate(holdout(pages)):
        print i, "Grouping"
        group_clf = create_grouping_classifier(train, tree.DecisionTreeClassifier)
        (g_acc, grouped_test) = group_classify(test, group_clf)
                                                  
        print i, "Classifiying"
        train_images = [image for page in train 
                        for image in page.image_templates
                        if image.name != "NO LABEL"]
        grouped_labels = [list_classification(t, train_images)[0]
                          for t in grouped_test]

        predicted_labels = distribute_labels(test.groups,
                                             grouped_labels,
                                             test.num_temps)
        
        real_labels = test.labels

        num_right = np.sum([1.0 if predicted_labels[i] == real_labels[i]
                            else 0.0
                            for i in range(len(real_labels))])
        accuracies.append([num_right/test.num_temps, g_acc])

    return accuracies