# this method is slow, because it keeps calling a feature reduction
    # method for each bar and each estimator. We will globally reduce the
    # features before starting

    global_f_select = MultiSelectKBest(f_classif,
                                       pooling_function=np.min,
                                       k=3000)

    res2 = first_layer_predictor2.fit_transform(
        global_f_select.fit_transform(data, stimuli), stimuli)


    # Now visualise the predictions.
    from viz import get_bars, draw_words, pad, make_collage
    bars = get_bars(img_size=(50, 50))
    words1 = draw_words(res1, bars)
    words2 = draw_words(res2, bars)
    words = draw_words(stimuli, bars)

    stacked = np.concatenate([words1, words2, words], axis=1)
    # pad this slightly in order to be able to distinguish groups

    stacked = pad(stacked, [0, 10, 10])

    num_x = 8
    num_y = 12

    start_at = 0

    collage = make_collage(stacked[start_at:start_at + (num_x * num_y)].\
        reshape(num_x, num_y, stacked.shape[1], stacked.shape[2]))
    # Use the layer 1 results to learn a second level classifier on letters

    forests = [ExtraTreesClassifier(n_estimators=200) for i in range(4)]
    letter_length = stimuli.shape[1] / 4
    predictions = []
    for i, forest in zip(range(0, stimuli.shape[1], letter_length), forests):
        forest.fit(train_data, train_target[:, i:i + letter_length])
        predictions.append(np.array(forest.predict_proba(test_data)).T[1])
    predictions = np.hstack(predictions)

    # visualise the random forests result

    from viz import get_bars, draw_words, pad, make_collage
    bars = get_bars(img_size=(50, 50))
    words_layer1 = draw_words(res1[200:], bars)
    words_forest = draw_words(p, bars)
    words_forest_by_letters = draw_words(predictions, bars)
    words = draw_words(stimuli[200:], bars)

    stacked = np.concatenate(
        [words_layer1, words_forest, words_forest_by_letters, words], axis=1)
    # pad this slightly in order to be able to distinguish groups

    stacked = pad(stacked, [0, 10, 10])

    num_x = 5
    num_y = 8

    start_at = 0
Example #3
0
    # this method is slow, because it keeps calling a feature reduction
    # method for each bar and each estimator. We will globally reduce the
    # features before starting

    global_f_select = MultiSelectKBest(f_classif,
                                       pooling_function=np.min,
                                       k=3000)

    #res2 = first_layer_predictor2.fit_transform(
    #    global_f_select.fit_transform(data, stimuli), stimuli)
    res2 = first_layer_predictor2.fit_transform(data, stimuli)

    # Now visualise the predictions.
    from viz import get_bars, draw_words, pad, make_collage
    bars = get_bars(img_size=(50, 50))
    words1 = draw_words(res1, bars)
    words2 = draw_words(res2, bars)
    words = draw_words(stimuli, bars)

    stacked = np.concatenate([words1, words2, words], axis=1)
    # pad this slightly in order to be able to distinguish groups

    stacked = pad(stacked, [0, 10, 10])

    num_x = 8
    num_y = 12

    start_at = 0

    collage = make_collage(stacked[start_at:start_at + (num_x * num_y)].\
        reshape(num_x, num_y, stacked.shape[1], stacked.shape[2]))
    forests = [ExtraTreesClassifier(n_estimators=200) for i in range(4)]
    letter_length = stimuli.shape[1] / 4
    predictions = []
    for i, forest in zip(
            range(0, stimuli.shape[1], letter_length),
            forests):
        forest.fit(train_data, train_target[:, i:i + letter_length])
        predictions.append(np.array(forest.predict_proba(test_data)).T[1])
    predictions = np.hstack(predictions)

    # visualise the random forests result

    from viz import get_bars, draw_words, pad, make_collage
    bars = get_bars(img_size=(50, 50))
    words_layer1 = draw_words(res1[200:], bars)
    words_forest = draw_words(p, bars)
    words_forest_by_letters = draw_words(predictions, bars)
    words = draw_words(stimuli[200:], bars)

    stacked = np.concatenate([words_layer1, words_forest,
                              words_forest_by_letters, words], axis=1)
    # pad this slightly in order to be able to distinguish groups

    stacked = pad(stacked, [0, 10, 10])

    num_x = 5
    num_y = 8

    start_at = 0