예제 #1
0
def test_sparse_matrices():
    # Test that sparse and dense input matrices output the same results."""
    X = X_digits_binary[:50]
    y = y_digits_binary[:50]
    X_sparse = csr_matrix(X)
    mlp = MLPClassifier(random_state=1, hidden_layer_sizes=15)
    mlp.fit(X, y)
    pred1 = mlp.decision_function(X)
    mlp.fit(X_sparse, y)
    pred2 = mlp.decision_function(X_sparse)
    assert_almost_equal(pred1, pred2)
    pred1 = mlp.predict(X)
    pred2 = mlp.predict(X_sparse)
    assert_array_equal(pred1, pred2)
예제 #2
0
def test_fit():
    # Test that the algorithm solution is equal to a worked out example."""
    X = np.array([[0.6, 0.8, 0.7]])
    y = np.array([0])
    mlp = MLPClassifier(algorithm='sgd',
                        learning_rate_init=0.1,
                        alpha=0.1,
                        activation='logistic',
                        random_state=1,
                        max_iter=1,
                        hidden_layer_sizes=2,
                        momentum=0)
    # set weights
    mlp.coefs_ = [0] * 2
    mlp.intercepts_ = [0] * 2
    mlp.classes_ = [0, 1]
    mlp.n_outputs_ = 1
    mlp.coefs_[0] = np.array([[0.1, 0.2], [0.3, 0.1], [0.5, 0]])
    mlp.coefs_[1] = np.array([[0.1], [0.2]])
    mlp.intercepts_[0] = np.array([0.1, 0.1])
    mlp.intercepts_[1] = np.array([1.0])
    mlp._coef_grads = [] * 2
    mlp._intercept_grads = [] * 2

    mlp.label_binarizer_.y_type_ = 'binary'

    # Initialize parameters
    mlp.n_iter_ = 0
    mlp.learning_rate_ = 0.1

    # Compute the number of layers
    mlp.n_layers_ = 3

    # Pre-allocate gradient matrices
    mlp._coef_grads = [0] * (mlp.n_layers_ - 1)
    mlp._intercept_grads = [0] * (mlp.n_layers_ - 1)

    mlp.out_activation_ = 'logistic'
    mlp.t_ = 0
    mlp.best_loss_ = np.inf
    mlp.loss_curve_ = []
    mlp._no_improvement_count = 0
    mlp._intercept_velocity = [
        np.zeros_like(intercepts) for intercepts in mlp.intercepts_
    ]
    mlp._coef_velocity = [np.zeros_like(coefs) for coefs in mlp.coefs_]

    mlp.partial_fit(X, y, classes=[0, 1])
    # Manually worked out example
    # h1 = g(X1 * W_i1 + b11) = g(0.6 * 0.1 + 0.8 * 0.3 + 0.7 * 0.5 + 0.1)
    #       =  0.679178699175393
    # h2 = g(X2 * W_i2 + b12) = g(0.6 * 0.2 + 0.8 * 0.1 + 0.7 * 0 + 0.1)
    #         = 0.574442516811659
    # o1 = g(h * W2 + b21) = g(0.679 * 0.1 + 0.574 * 0.2 + 1)
    #       = 0.7654329236196236
    # d21 = -(0 - 0.765) = 0.765
    # d11 = (1 - 0.679) * 0.679 * 0.765 * 0.1 = 0.01667
    # d12 = (1 - 0.574) * 0.574 * 0.765 * 0.2 = 0.0374
    # W1grad11 = X1 * d11 + alpha * W11 = 0.6 * 0.01667 + 0.1 * 0.1 = 0.0200
    # W1grad11 = X1 * d12 + alpha * W12 = 0.6 * 0.0374 + 0.1 * 0.2 = 0.04244
    # W1grad21 = X2 * d11 + alpha * W13 = 0.8 * 0.01667 + 0.1 * 0.3 = 0.043336
    # W1grad22 = X2 * d12 + alpha * W14 = 0.8 * 0.0374 + 0.1 * 0.1 = 0.03992
    # W1grad31 = X3 * d11 + alpha * W15 = 0.6 * 0.01667 + 0.1 * 0.5 = 0.060002
    # W1grad32 = X3 * d12 + alpha * W16 = 0.6 * 0.0374 + 0.1 * 0 = 0.02244
    # W2grad1 = h1 * d21 + alpha * W21 = 0.679 * 0.765 + 0.1 * 0.1 = 0.5294
    # W2grad2 = h2 * d21 + alpha * W22 = 0.574 * 0.765 + 0.1 * 0.2 = 0.45911
    # b1grad1 = d11 = 0.01667
    # b1grad2 = d12 = 0.0374
    # b2grad = d21 = 0.765
    # W1 = W1 - eta * [W1grad11, .., W1grad32] = [[0.1, 0.2], [0.3, 0.1],
    #          [0.5, 0]] - 0.1 * [[0.0200, 0.04244], [0.043336, 0.03992],
    #          [0.060002, 0.02244]] = [[0.098, 0.195756], [0.2956664,
    #          0.096008], [0.4939998, -0.002244]]
    # W2 = W2 - eta * [W2grad1, W2grad2] = [[0.1], [0.2]] - 0.1 *
    #        [[0.5294], [0.45911]] = [[0.04706], [0.154089]]
    # b1 = b1 - eta * [b1grad1, b1grad2] = 0.1 - 0.1 * [0.01667, 0.0374]
    #         = [0.098333, 0.09626]
    # b2 = b2 - eta * b2grad = 1.0 - 0.1 * 0.765 = 0.9235
    assert_almost_equal(mlp.coefs_[0],
                        np.array([[0.098, 0.195756], [0.2956664, 0.096008],
                                  [0.4939998, -0.002244]]),
                        decimal=3)
    assert_almost_equal(mlp.coefs_[1],
                        np.array([[0.04706], [0.154089]]),
                        decimal=3)
    assert_almost_equal(mlp.intercepts_[0],
                        np.array([0.098333, 0.09626]),
                        decimal=3)
    assert_almost_equal(mlp.intercepts_[1], np.array(0.9235), decimal=3)
    # Testing output
    #  h1 = g(X1 * W_i1 + b11) = g(0.6 * 0.098 + 0.8 * 0.2956664 +
    #               0.7 * 0.4939998 + 0.098333) = 0.677
    #  h2 = g(X2 * W_i2 + b12) = g(0.6 * 0.195756 + 0.8 * 0.096008 +
    #            0.7 * -0.002244 + 0.09626) = 0.572
    #  o1 = h * W2 + b21 = 0.677 * 0.04706 +
    #             0.572 * 0.154089 + 0.9235 = 1.043
    assert_almost_equal(mlp.decision_function(X), 1.043, decimal=3)
예제 #3
0
def test_fit():
    # Test that the algorithm solution is equal to a worked out example."""
    X = np.array([[0.6, 0.8, 0.7]])
    y = np.array([0])
    mlp = MLPClassifier(algorithm='sgd', learning_rate_init=0.1, alpha=0.1,
                        activation='logistic', random_state=1, max_iter=1,
                        hidden_layer_sizes=2, momentum=0)
    # set weights
    mlp.coefs_ = [0] * 2
    mlp.intercepts_ = [0] * 2
    mlp.classes_ = [0, 1]
    mlp.n_outputs_ = 1
    mlp.coefs_[0] = np.array([[0.1, 0.2], [0.3, 0.1], [0.5, 0]])
    mlp.coefs_[1] = np.array([[0.1], [0.2]])
    mlp.intercepts_[0] = np.array([0.1, 0.1])
    mlp.intercepts_[1] = np.array([1.0])
    mlp._coef_grads = [] * 2
    mlp._intercept_grads = [] * 2

    mlp.label_binarizer_.y_type_ = 'binary'

    # Initialize parameters
    mlp.n_iter_ = 0
    mlp.learning_rate_ = 0.1

    # Compute the number of layers
    mlp.n_layers_ = 3

    # Pre-allocate gradient matrices
    mlp._coef_grads = [0] * (mlp.n_layers_ - 1)
    mlp._intercept_grads = [0] * (mlp.n_layers_ - 1)

    mlp.out_activation_ = 'logistic'
    mlp.t_ = 0
    mlp.best_loss_ = np.inf
    mlp.loss_curve_ = []
    mlp._no_improvement_count = 0
    mlp._intercept_velocity = [np.zeros_like(intercepts) for
                               intercepts in
                               mlp.intercepts_]
    mlp._coef_velocity = [np.zeros_like(coefs) for coefs in
                          mlp.coefs_]

    mlp.partial_fit(X, y, classes=[0, 1])
    # Manually worked out example
    # h1 = g(X1 * W_i1 + b11) = g(0.6 * 0.1 + 0.8 * 0.3 + 0.7 * 0.5 + 0.1)
    #       =  0.679178699175393
    # h2 = g(X2 * W_i2 + b12) = g(0.6 * 0.2 + 0.8 * 0.1 + 0.7 * 0 + 0.1)
    #         = 0.574442516811659
    # o1 = g(h * W2 + b21) = g(0.679 * 0.1 + 0.574 * 0.2 + 1)
    #       = 0.7654329236196236
    # d21 = -(0 - 0.765) = 0.765
    # d11 = (1 - 0.679) * 0.679 * 0.765 * 0.1 = 0.01667
    # d12 = (1 - 0.574) * 0.574 * 0.765 * 0.2 = 0.0374
    # W1grad11 = X1 * d11 + alpha * W11 = 0.6 * 0.01667 + 0.1 * 0.1 = 0.0200
    # W1grad11 = X1 * d12 + alpha * W12 = 0.6 * 0.0374 + 0.1 * 0.2 = 0.04244
    # W1grad21 = X2 * d11 + alpha * W13 = 0.8 * 0.01667 + 0.1 * 0.3 = 0.043336
    # W1grad22 = X2 * d12 + alpha * W14 = 0.8 * 0.0374 + 0.1 * 0.1 = 0.03992
    # W1grad31 = X3 * d11 + alpha * W15 = 0.6 * 0.01667 + 0.1 * 0.5 = 0.060002
    # W1grad32 = X3 * d12 + alpha * W16 = 0.6 * 0.0374 + 0.1 * 0 = 0.02244
    # W2grad1 = h1 * d21 + alpha * W21 = 0.679 * 0.765 + 0.1 * 0.1 = 0.5294
    # W2grad2 = h2 * d21 + alpha * W22 = 0.574 * 0.765 + 0.1 * 0.2 = 0.45911
    # b1grad1 = d11 = 0.01667
    # b1grad2 = d12 = 0.0374
    # b2grad = d21 = 0.765
    # W1 = W1 - eta * [W1grad11, .., W1grad32] = [[0.1, 0.2], [0.3, 0.1],
    #          [0.5, 0]] - 0.1 * [[0.0200, 0.04244], [0.043336, 0.03992],
    #          [0.060002, 0.02244]] = [[0.098, 0.195756], [0.2956664,
    #          0.096008], [0.4939998, -0.002244]]
    # W2 = W2 - eta * [W2grad1, W2grad2] = [[0.1], [0.2]] - 0.1 *
    #        [[0.5294], [0.45911]] = [[0.04706], [0.154089]]
    # b1 = b1 - eta * [b1grad1, b1grad2] = 0.1 - 0.1 * [0.01667, 0.0374]
    #         = [0.098333, 0.09626]
    # b2 = b2 - eta * b2grad = 1.0 - 0.1 * 0.765 = 0.9235
    assert_almost_equal(mlp.coefs_[0], np.array([[0.098, 0.195756],
                                                 [0.2956664, 0.096008],
                                                 [0.4939998, -0.002244]]),
                        decimal=3)
    assert_almost_equal(mlp.coefs_[1], np.array([[0.04706], [0.154089]]),
                        decimal=3)
    assert_almost_equal(mlp.intercepts_[0],
                        np.array([0.098333, 0.09626]), decimal=3)
    assert_almost_equal(mlp.intercepts_[1], np.array(0.9235), decimal=3)
    # Testing output
    #  h1 = g(X1 * W_i1 + b11) = g(0.6 * 0.098 + 0.8 * 0.2956664 +
    #               0.7 * 0.4939998 + 0.098333) = 0.677
    #  h2 = g(X2 * W_i2 + b12) = g(0.6 * 0.195756 + 0.8 * 0.096008 +
    #            0.7 * -0.002244 + 0.09626) = 0.572
    #  o1 = h * W2 + b21 = 0.677 * 0.04706 +
    #             0.572 * 0.154089 + 0.9235 = 1.043
    assert_almost_equal(mlp.decision_function(X), 1.043, decimal=3)
예제 #4
0
    finetune_classifier.coefs_ = new_coefs
    finetune_classifier.fit(Xgen_train, ygen_train)

#copy the params, retrain by 50% generated data to finetune the params
if (classifier_type == 'svm'):
    #    finetune_classifier.coef_=classifier.coef_
    finetune_classifier = deepcopy(classifier)
    finetune_classifier.fit(Xgen_train, ygen_train)

#---------------finetune end---------------

if (classifier_type == 'svm'):
    #note that in svm predict_proba is inconsistent with predict function
    #use decision_function-->consistent
    y_pred_proba = finetune_classifier.decision_function(
        Xgen_test)  #return inverse of distance

if (classifier_type == 'mlp'):
    y_pred_proba = finetune_classifier.predict_proba(Xgen_test)

all_labels = finetune_classifier.classes_

if (cur_exp_param == 'cpu'):
    K = 10
y_top_K = []
#--pick out the max probability labels(by sorting predict_proba or decision_function)
#--note this may be different in rnn
if (classifier_type == 'mlp' or classifier_type == 'svm'):
    for each_proba in y_pred_proba:
        sort_proba_index = each_proba.argsort()
        #sort all_labels in descending order
예제 #5
0
    def plot_feature_space_level_set(seed,
                                     dir_out='pycalib/out/synthetic_data/'):
        import sklearn.datasets
        from sklearn.neural_network import MLPClassifier
        import matplotlib.colors

        # Setup
        train_size = 1000
        cal_size = 100
        noise = .25
        contour_levels = 10

        # generate 2d classification dataset
        np.random.seed(seed)
        X, y = sklearn.datasets.make_circles(n_samples=train_size, noise=noise)

        # train classifier
        clf = MLPClassifier(hidden_layer_sizes=[10, 10], alpha=1, max_iter=200)
        clf.fit(X, y)

        # scatter plot, dots colored by class value
        df = pd.DataFrame(dict(x=X[:, 0], y=X[:, 1], label=y))
        markers = {0: 'x', 1: '.'}

        fig, ax = texfig.subplots(width=8,
                                  ratio=.3,
                                  nrows=1,
                                  ncols=3,
                                  sharex=True,
                                  sharey=True)
        # grouped = df.groupby('label')
        # for key, group in grouped:
        #     group.plot(ax=ax[0], kind='scatter', x='x', y='y', label=key, marker=markers[key], color='gray', alpha=.75)

        # Put the result into a color plot
        x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
        y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
        h = .02  # step size in the mesh
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                             np.arange(y_min, y_max, h))

        # Plot the decision boundary. For that, we will assign a color to each
        # point in the mesh [x_min, x_max]x[y_min, y_max].
        if hasattr(clf, "decision_function"):
            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
            p_pred = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])
        else:
            p_pred = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])
            Z = p_pred[:, 1]
        Z0 = Z.reshape(xx.shape)
        cm = plt.cm.RdBu_r  # colormap
        cm_bright = matplotlib.colors.ListedColormap(['#FF0000', '#0000FF'])
        cont0 = ax[0].contourf(xx,
                               yy,
                               Z0,
                               cmap=cm,
                               alpha=.8,
                               levels=contour_levels,
                               vmin=0,
                               vmax=1)
        ax[0].set_title("Classification Uncertainty")

        # calibrate
        X_cal, y_cal = sklearn.datasets.make_circles(n_samples=cal_size,
                                                     noise=noise)
        p_cal = clf.predict_proba(X_cal)
        clf_cal = cm.GPCalibration(SVGP=True)
        clf_cal.fit(p_cal, y_cal)

        # calibrated contour plot
        Z1 = clf_cal.predict_proba(p_pred)[:, 1].reshape(xx.shape)
        cont1 = ax[1].contourf(xx,
                               yy,
                               Z1,
                               cmap=cm,
                               alpha=.8,
                               levels=contour_levels,
                               vmin=0,
                               vmax=1)
        ax[1].set_title("Calibrated Uncertainty")

        # difference plot
        cm_diff = plt.cm.viridis_r  # colormap
        cont1 = ax[2].contourf(xx, yy, Z1 - Z0, cmap=cm_diff, alpha=.8)
        ax[2].set_title("Uncertainty Difference")

        # color bar
        # fig.subplots_adjust(right=0.8)
        # cbar_ax = fig.add_axes([.96, 0.15, 0.05, 0.7])
        # cbar = fig.colorbar(cont1, cax=cbar_ax)

        # # contour labels
        # ax[0].clabel(cont0, inline=1, fontsize=8)
        # ax[1].clabel(cont1, inline=1, fontsize=8)

        texfig.savefig(dir_out + '/plots/' + 'level_sets')
### FIT & PREDICT
clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(X, y)

clf.predict([[2., 2.], [-1., -2.]])


### COEFFICIENTS
# MLP can fit a non-linear model to the training data.
# clf.coefs_ contains the weight matrices that constitute the model parameters:
[coef.shape for coef in clf.coefs_]


### PROBABILITIES
# To get the raw values before applying the output activation function, run the following command,
clf.decision_function([[2., 2.], [1., 2.]])

"""
MLP trains using Backpropagation. More precisely, it trains using
some form of gradient descent and the gradients are calculated using Backpropagation.
For classification, it minimizes the Cross-Entropy loss function,
giving a vector of probability estimates P(y|x) per sample x.
"""
clf.predict_proba([[2., 2.], [1., 2.]])


# The algorithm supports multi-label classification in which a sample can belong to more than one class.
# For each class, the output of MLPClassifier.decision_function passes through the logistic function.
# Values larger or equal to 0.5 are rounded to 1, otherwise to 0.
X = [[0., 0.], [1., 1.]]
y = [[0, 1], [1, 1]]
예제 #7
0
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xticks(())
    ax.set_yticks(())
    i += 1

    # iterate over classifiers
    for name, clf in zip(names, classifiers):
        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)

        # Plot the decision boundary. For that, we will assign a color to each
        # point in the mesh [x_min, x_max]x[y_min, y_max].
        if hasattr(clf, "decision_function"):
            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
        else:
            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

        # Put the result into a color plot
        Z = Z.reshape(xx.shape)
        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

        # Plot also the training points
        ax.scatter(X_train[:, 0],
                   X_train[:, 1],
                   c=y_train,
                   cmap=cm_bright,
                   edgecolors='black',
                   s=25)
        # and testing points