def test_mnist():
    path=os.path.join(os.getcwd(), 'data/HW5/haar')
    if not os.path.exists(path):
        print os.getcwd()
    images, labels = load_mnist('training', digits=[0], path=path)
    #print images
    imshow(images.mean(axis=0), cmap=cm.gray)
    show()

    images, labels = load_mnist("training", path=path)
    images /= 255.0
def test_rectangle():
    path=os.path.join(os.getcwd(), 'data/HW5/haar')
    images, labels = load_mnist('training', digits=[4], path=path)
    one_img = images[7]
    one_img /= 128.0
    b = hw5u.count_black(one_img)
    print one_img
    print b
def q5():
    """ ECOC for image analysis
    1000 Set: train. Accuracy: 1.000
         Set: test. Accuracy: 0.851
    12,000 (20% of 60,000)
         Set: train. Accuracy: 0.923
         Set: test. Accuracy: 0.905

Process finished with exit code 0
    http://colah.github.io/posts/2014-10-Visualizing-MNIST/
    """
    path = os.path.join(os.getcwd(), 'data/HW5/haar')
    limit = 60000
    images, labels = load_mnist('training', path=path)
    images /= 128.0
    X = []
    print 'processing images'
    black = [hw5u.count_black(b) for b in images[:limit]]
    #bdf = [pd.DataFrame(bd) for bd in black]
    #with open('save_img_' + str(limit) + '.csv', 'w') as fimg:
    #    pd.concat(bdf, axis=1).to_csv(fimg)
    print 'finished processing'

    rects = hw5u.get_rect_coords(100)
    #hw5u.show_rectangles(rects)

    for i in range(len(black)):
        row = []
        for r in range(len(rects)):
            h_diff, v_diff = hw5u.get_features(black[i], rects[r])
            row.append(h_diff)
            row.append(v_diff)
        X.append(row)
    save(X, labels)
    # Each image is a row in table X.
    # Features are
    # rectangle_1_horizontal_difference, rectangle_1_vertical_difference, rectangle_2_ho...

    data = utils.add_row(X, labels)
    data_split = hw5u.split_test_and_train(data, .2)
    data_test = data_split[0]
    data_train = data_split[1]

    y_train, X_train = utils.split_truth_from_data(data_train)
    y_test, X_test = utils.split_truth_from_data(data_test)

    cls = ec.ECOCClassifier(learner=lambda: adac.AdaboostOptimal(learner=lambda: DecisionTreeClassifier(max_depth=1), max_rounds=200), #LogisticRegression,  # TODO: replace with AdaBoost
    #cls = ec.ECOCClassifier(learner=LogisticRegression,  # TODO: replace with AdaBoost
                         verbose=True,
                         encoding_type='exhaustive').fit(X_train, y_train)
    for set_name, X, y in [('train', X_train, y_train),
                       ('test', X_test, y_test)]:
        print("Set: {}. Accuracy: {:.3f}".format(set_name, accuracy_score(y, cls.predict(X))))