def test_label_binarizer_multilabel():
    lb = LabelBinarizer()

    # test input as lists of tuples
    inp = [(2, 3), (1,), (1, 2)]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 1, 0]])
    got = lb.fit_transform(inp)
    assert_array_equal(indicator_mat, got)
    assert_equal(lb.inverse_transform(got), inp)

    # test input as label indicator matrix
    lb.fit(indicator_mat)
    assert_array_equal(indicator_mat,
                       lb.inverse_transform(indicator_mat))

    # regression test for the two-class multilabel case
    lb = LabelBinarizer()

    inp = [[1, 0], [0], [1], [0, 1]]
    expected = np.array([[1, 1],
                         [1, 0],
                         [0, 1],
                         [1, 1]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_equal([set(x) for x in lb.inverse_transform(got)],
                 [set(x) for x in inp])
def test_label_binarizer_multilabel():
    lb = LabelBinarizer()

    # test input as lists of tuples
    inp = [(2, 3), (1,), (1, 2)]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 1, 0]])
    got = lb.fit_transform(inp)
    assert_array_equal(indicator_mat, got)
    assert_equal(lb.inverse_transform(got), inp)

    # test input as label indicator matrix
    lb.fit(indicator_mat)
    assert_array_equal(indicator_mat,
                       lb.inverse_transform(indicator_mat))

    # regression test for the two-class multilabel case
    lb = LabelBinarizer()

    inp = [[1, 0], [0], [1], [0, 1]]
    expected = np.array([[1, 1],
                         [1, 0],
                         [0, 1],
                         [1, 1]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_equal([set(x) for x in lb.inverse_transform(got)],
                 [set(x) for x in inp])
class LabelBinarizer2:
    def __init__(self):
        self.lb = LabelBinarizer()

    def fit(self, X):
        # Convert X to array
        X = np.array(X)
        # Fit X using the LabelBinarizer object
        self.lb.fit(X)
        # Save the classes
        self.classes_ = self.lb.classes_

    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)

    def transform(self, X):
        # Convert X to array
        X = np.array(X)
        # Transform X using the LabelBinarizer object
        Xlb = self.lb.transform(X)

        if len(self.classes_) == 2 and len(np.unique(X)) <= 2:
            Xlb = np.hstack((1 - Xlb, Xlb))
        return Xlb

    def inverse_transform(self, Xlb):
        # Convert Xlb to array
        Xlb = np.array(Xlb)
        if len(self.classes_) == 2:
            X = self.lb.inverse_transform(Xlb[:, 0])
        else:
            X = self.lb.inverse_transform(Xlb)
        return X
def run_cnn26(_x_train, _x_test, _y_train, _y_test, _x_valid, _y_valid):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Conv2D(20, (3, 3),
                                     activation='relu', input_shape=(100, 100, 3)))
    model.add(tf.keras.layers.Conv2D(20, (3, 3), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(128, activation='relu'))
    model.add(tf.keras.layers.Dense(4, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()

    history = model.fit(_x_train, _y_train,
              batch_size=32, epochs=10, verbose=1,
              validation_data=(_x_valid, _y_valid))

    # Evaluate model
    score = model.evaluate(_x_test, _y_test)
    print("score =", score)

    # Plot model

    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs_range = range(10)

    plt.figure(figsize=(8, 8))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()

    # Predict Test set results
    labels = ['apple', 'banana', 'mixed', 'orange']
    y_pred = model.predict(_x_test)

    lb = LabelBinarizer()

    lb.fit(labels)
    predict_class = lb.inverse_transform(y_pred)
    test_y = lb.inverse_transform(_y_test)
    print(np.concatenate((predict_class.reshape(len(predict_class), 1), test_y.reshape(len(test_y), 1)), 1))
    print(classification_report(test_y, predict_class))
Beispiel #5
0
class AnswerEncoder:
    def __init__(self, num_class=1000, multi_label=False, unknown_answer='idontknow'):
        self.unknown_answer = unknown_answer
        self.num_class = num_class
        self.multi_label = multi_label
        self.classes_ = None
        self.encoder = None

    def fit(self, answers):
        # TODO support multi-label
        answer_map, num = count_freq(answers)
        answer_freq = sorted(answer_map.items(), key=operator.itemgetter(1), reverse=True)
        kept_answers = (list(answer_freq_pair[0] for answer_freq_pair in answer_freq[:self.num_class]))
        if self.unknown_answer == 'most_freq':
            self.unknown_answer = answer_freq[self.num_class][0]
        if self.multi_label:
            self.classes_ = kept_answers
            self.encoder = MultiLabelBinarizer(classes=self.classes_).fit([self.classes_])
        else:
            self.classes_ = kept_answers + [self.unknown_answer]
            self.encoder = LabelBinarizer().fit(self.classes_)
        return self

    def transform(self, answers):
        # TODO efficient transform
        if self.multi_label:
            cleared_answers = []
            for as_list in answers:
                new_as = []
                for a in as_list:
                    if a in self.classes_:
                        new_as.append(a)
                cleared_answers.append(new_as)
        else:
            cleared_answers = []
            for answer in answers:
                if answer in self.classes_:
                    cleared_answers.append(answer)
                else:
                    cleared_answers.append(self.unknown_answer)
        return self.encoder.transform(cleared_answers)

    def inverse_transform(self, answers):
        # TODO efficient inverse_transform
        if not self.multi_label:
            return self.encoder.inverse_transform(answers)
        else:
            # get one most possible label
            t = np.zeros_like(answers)
            gold_answer_index = np.argmax(answers, axis=1)
            t[np.arange(len(answers)), gold_answer_index] = 1
            results = self.encoder.inverse_transform(t)
            return list(result[0] for result in results)
Beispiel #6
0
class OneHotVector(object):
    def __init__(self, chars: list):
        if not chars or type(chars) is not list or len(chars) == 0:
            raise Exception('values must be list and len(values)>0 %s' % chars)

        self.encoder = LabelBinarizer(
            neg_label=0, pos_label=1,
            sparse_output=False)  # TODO: performance test
        self.encoder.fit(chars)

    @property
    def classes(self):
        return self.encoder.classes_

    def __len__(self):
        return self.encoder.classes_.shape[0]

    def to_vector(self, c: str) -> np.ndarray:
        """
        
        :param c: character. len(c)==1
        :return:
        """
        return self.encoder.transform([c])[0]

    def to_vectors(self, chars: list) -> np.ndarray:
        """
        
        :param chars: list of characters. len(chars)>0
        :return:
        """
        if type(chars) is str or np.str_:
            chars = [c for c in chars]
        return self.encoder.transform(chars)

    def to_value(self, v: np.ndarray) -> np.ndarray:
        """
        
        :param v: one hot vector 
        :return: 
        """
        return self.encoder.inverse_transform(np.array([v]))[0]

    def to_values(self, vectors: list) -> np.ndarray:
        """

        :param vectors: list of one hot vector 
        :return: 
        """
        return self.encoder.inverse_transform(vectors)

    def to_index(self, c: str) -> int:
        return np.argmax(self.to_vector(c))
def partb():
    def load(file_name):
        file = np.load(file_name)
        X_train =file['X_train'].T
        y_train =file['y_train']
        X_test =file['X_test'].T
        y_test =file['y_test']
        X_cv =file['X_cv'].T
        y_cv =file['y_cv']

        return X_train,y_train,X_cv,y_cv,X_test,y_test

    train_ = [0,0]
    test_ = [0,0]
    overall = []
    for i in range(14):

        X_train,y_train,X_cv,y_cv,X_test,y_test = load('pofa{}.npz'.format(i))

        from sklearn.preprocessing import LabelBinarizer
        binarizer = LabelBinarizer()
        binarizer.fit(y_train)
        Y_train = binarizer.transform(y_train).T
        Y_cv = binarizer.transform(y_cv).T


#nn.forward(X)
#nn.backprop(X,Y,graient_check=True)

        print(X_train.shape[0], Y_train.shape[0])
        nn = NeuralNetwork([X_train.shape[0],30,Y_train.shape[0]], functions=[sigmoid,softmax], derivatives=[derivative_sigmoid])

        nn.fit(X_train,Y_train,eta=0.01,momentum=0.5,minibatch=16,regularizer=0.15,max_iter=200,gradient_check=False,cv = (X_cv,Y_cv),graphs=False, lbfgs=False)

        output = nn.forward(X_train)

        y_train_output = binarizer.inverse_transform(output.T)
        y_test_output = binarizer.inverse_transform(nn.forward(X_test).T)
        print("Iteration: ",i)
        print((y_train_output==y_train).mean())
        print((y_test_output ==y_test).mean())

        overall.append((y_test == y_test_output).mean())

        train_[0] += (y_train_output==y_train).sum()
        train_[1] += y_train.shape[0]
        test_[0] += (y_test_output==y_test).sum()
        test_[1] += y_test.shape[0]

    print("Average train accuracy: ", train_[0]/train_[1],"Average test accuracy: ",test_[0]/test_[1])
    print(train_,test_)
    overall = np.array(overall)
    print(overall.mean())
def train_and_save_model(model='xvector',
                         binary_class=False,
                         single_class='glass'):
    model = define_xvector()
    model.compile(loss='categorical_crossentropy',
                  optimizer=SGD(lr=0.001),
                  metrics=['acc',
                           km.precision(label=1),
                           km.recall(label=0)])
    model.summary()
    callback_list = [
        ModelCheckpoint(
            'checkpoint-{epoch:02d}.h5',
            monitor='loss',
            verbose=1,
            save_best_only=True,
            period=2
        ),  # do the check point each epoch, and save the best model
        ReduceLROnPlateau(
            monitor='loss', patience=3, verbose=1, min_lr=1e-6
        ),  # reducing the learning rate if the val_loss is not improving
        CSVLogger(filename='training_log.csv'),  # logger to csv
        EarlyStopping(
            monitor='loss',
            patience=5)  # early stop if there's no improvment of the loss
    ]
    tr_data, tr_label, ts_data, ts_label = train_test_split()
    encoder = LabelBinarizer()
    tr_label = encoder.fit_transform(tr_label)
    ts_label = encoder.transform(ts_label)
    print(
        "Start Training process \nTraining data shape {} \nTraining label shape {}"
        .format(tr_data.shape, tr_label.shape))
    model.fit(tr_data,
              tr_label,
              batch_size=16,
              epochs=100,
              verbose=1,
              validation_split=0.2)
    model.save('5class_segmentYoutube_model.h5')
    pred = model.predict(ts_data)
    pred = encoder.inverse_transform(pred)
    ts_label = encoder.inverse_transform(ts_label)
    cm = confusion_matrix(y_target=ts_label, y_predicted=pred, binary=False)
    cm = confusion_matrix(y_target=ts_label, y_predicted=pred, binary=False)
    plt.figure(figsize=(10, 10))
    fig, ax = plot_confusion_matrix(conf_mat=cm)
    ax.set_xticklabels([''] + CLASS_TYPE, rotation=40, ha='right')
    ax.set_yticklabels([''] + CLASS_TYPE)
    plt.savefig("ConfusionMatrix_segment_youtube.png")
    plt.show()
Beispiel #9
0
def get_labelidx(img_path,model):
    img = cv2.imread(img_path)
    label_list = np.arange(16)
    lb = LabelBinarizer().fit(label_list)
    predictions = model.predict(np.array([img]))
    predict_idx = lb.inverse_transform(predictions)
    return predict_idx
def display_image_predictions(features, labels, predictions):
    label_binarizer = LabelBinarizer()  # 将图片进行矩阵二值化,即将读取的像素块只分为黑或白
    label_binarizer.fit(range(LABELS_COUNT))  # fit函数用来调用后面的数据训练模型
    label_ids = label_binarizer.inverse_transform(np.array(labels))  # 将标准化后的数据转化为原始数据

    fig, axies = plt.subplots(nrows=4, ncols=2)  #创建一个4行2列的图(幕布)
    fig.tight_layout()  #tight_layout会自动调整子图参数,使之填充整个图像区域
    fig.suptitle('Softmax Predictions', fontsize=20, y=1.1)  #加标题

    n_predictions = 3
    margin = 0.05
    ind = np.arange(n_predictions)  #ind = [0,1,2,3]
    width = (1. - 2. * margin) / n_predictions

    for image_i, (feature, label_id, pred_indicies, pred_values) \
        in enumerate(zip(features, label_ids, predictions.indices, predictions.values)):   #enumerate将一个可遍历的序列整合成一个索引列表,zip用于将各个数组中对应的元素打包
        pred_names = [LABEL_NAMES[pred_i] for pred_i in pred_indicies]
        correct_name = LABEL_NAMES[label_id]

        axies[image_i][0].imshow(feature)  #显示一个图像
        axies[image_i][0].set_title(correct_name)  #加标题
        axies[image_i][0].set_axis_off()  #关闭x轴和y轴

        #绘制水平直方图
        axies[image_i][1].barh(ind + margin, pred_values[::-1], width)
        axies[image_i][1].set_yticks(ind + margin)
        axies[image_i][1].set_yticklabels(pred_names[::-1])
        axies[image_i][1].set_xticks([0, 0.5, 1.0])
class BinaryRelevanceClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, estimator):
        self.estimator = estimator

    def fit(self, X, Y):
        # binarize labels
        self.bl = LabelBinarizer()
        Y = self.bl.fit_transform(Y)
        self.classes_ = self.bl.classes_

        # create an estimator for each label
        self.estimators_ = []
        for i in xrange(self.bl.classes_.shape[0]):
            estimator = clone(self.estimator)
            estimator.fit(X, Y[:, i])
            self.estimators_.append(estimator)

    def predict(self, X):
        self._check_is_fitted()

        X = np.atleast_2d(X)
        Y = np.empty((X.shape[0], self.classes_.shape[0]))
        for i, estimator in enumerate(self.estimators_):
            Y[:, i] = estimator.predict(X).T

        return self.bl.inverse_transform(Y)

    def _check_is_fitted(self):
        if not hasattr(self, "estimators_"):
            raise ValueError("The object hasn't been fitted yet!")
Beispiel #12
0
def test_proba_classif_convergence():
    X_train, _, y_train, _ = load_scaled_boston()
    y_train = np.round(y_train)
    mc = MondrianTreeClassifier(random_state=0)
    mc.fit(X_train, y_train)

    lb = LabelBinarizer()
    y_bin = lb.fit_transform(y_train)

    le = LabelEncoder()
    y_enc = le.fit_transform(y_train)

    proba = mc.predict_proba(X_train)
    labels = mc.predict(X_train)
    assert_array_equal(proba, y_bin)
    assert_array_equal(labels, lb.inverse_transform(y_bin))

    # For points completely far away from the training data, this
    # should converge to the empirical distribution of labels.
    # X is scaled between to -1.0 and 1.0
    X_inf = np.vstack(
        (30.0 * np.ones(X_train.shape[1]), -30.0 * np.ones(X_train.shape[1])))
    inf_proba = mc.predict_proba(X_inf)
    emp_proba = np.bincount(y_enc) / float(len(y_enc))
    assert_array_almost_equal(inf_proba, [emp_proba, emp_proba])
class GBClassifier(_BaseGB, ClassifierMixin):
    def __init__(self,
                 estimator,
                 n_estimators=100,
                 step_size="line_search",
                 learning_rate=0.1,
                 loss="squared_hinge",
                 subsample=1.0,
                 callback=None,
                 random_state=None):
        self.estimator = estimator
        self.n_estimators = n_estimators
        self.step_size = step_size
        self.learning_rate = learning_rate
        self.loss = loss
        self.subsample = subsample
        self.callback = callback
        self.random_state = random_state

    def _get_loss(self):
        losses = dict(squared_hinge=_SquaredHingeLoss(), log=_LogLoss())
        return losses[self.loss]

    def fit(self, X, y):
        self._lb = LabelBinarizer(neg_label=-1)
        Y = self._lb.fit_transform(y)
        return super(GBClassifier, self).fit(X, Y)

    def predict(self, X):
        pred = self.decision_function(X)
        return self._lb.inverse_transform(pred)
Beispiel #14
0
class MinimalLearningMachineClassifier(MinimalLearningMachine,
                                       ClassifierMixin):
    def __init__(self, selector=None):
        MinimalLearningMachine.__init__(self,
                                        selector,
                                        estimator_type='classifier')
        self.lb = LabelBinarizer()

    def fit(self, X, y=None):
        self.lb.fit(y)
        return MinimalLearningMachine.fit(self, X, self.lb.transform(y))

    def active_(self, dyhat):
        classes = self.lb.transform(self.lb.classes_)

        result = [
            np.argmin(
                list(map(lambda y_class: self.mulat_(y_class, dyh), classes)))
            for dyh in dyhat
        ]

        return self.lb.inverse_transform(self.lb.classes_[result])

    def score(self, X, y, sample_weight=None):
        return ClassifierMixin.score(self, X, y, sample_weight)
Beispiel #15
0
class NeuralNetworkClassifier(BaseNeuralNetwork, ClassifierMixin):
    def __init__(self,
                 layers=[],
                 loss_function="logistic_loss",
                 learning_rate=0.1,
                 batch_size=100,
                 max_epochs=10,
                 update_algorithm="sgd",
                 verbose="False"):

        sup = super(NeuralNetworkClassifier, self)
        sup.__init__(layers=layers,
                     loss_function=loss_function,
                     learning_rate=learning_rate,
                     batch_size=batch_size,
                     max_epcohs=max_epochs,
                     update_algorithm=update_algorithm,
                     verbose=verbose)

        self.label_binarizer_ = LabelBinarizer()

    def fit(self, X, y):
        y_binarized = self.label_binarizer_.fit_transform(y)
        super(NeuralNetworkClassifier, self).fit(X, y_binarized)

    def predict(self, X):
        y_pred = self._predict(X)

        return self.label_binarizer_.inverse_transform(y_pred)

    def predict_proba(self, X):
        return self._predict(X)
def test_label_binarizer_set_label_encoding():
    lb = LabelBinarizer(neg_label=-2, pos_label=2)

    # two-class case
    inp = np.array([0, 1, 1, 0])
    expected = np.array([[-2, 2, 2, -2]]).T
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)

    # multi-class case
    inp = np.array([3, 2, 1, 2, 0])
    expected = np.array([[-2, -2, -2, +2], [-2, -2, +2, -2], [-2, +2, -2, -2], [-2, -2, +2, -2], [+2, -2, -2, -2]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)
def test_label_binarizer():
    lb = LabelBinarizer()

    # two-class case
    inp = ["neg", "pos", "pos", "neg"]
    expected = np.array([[0, 1, 1, 0]]).T
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)

    # multi-class case
    inp = ["spam", "ham", "eggs", "ham", "0"]
    expected = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)
def display_image_predictions(features, labels, predictions):
    n_classes = 10
    label_names = _load_label_names()
    label_binarizer = LabelBinarizer()
    label_binarizer.fit(range(n_classes))
    label_ids = label_binarizer.inverse_transform(np.array(labels))

    fig, axies = plt.subplots(nrows=4, ncols=2)
    fig.tight_layout()
    fig.suptitle('Softmax Predictions', fontsize=20, y=1.1)

    n_predictions = 3
    margin = 0.05
    ind = np.arange(n_predictions)
    width = (1. - 2. * margin) / n_predictions

    for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate(
            zip(features, label_ids, predictions.indices, predictions.values)):
        pred_names = [label_names[pred_i] for pred_i in pred_indicies]
        correct_name = label_names[label_id]

        axies[image_i][0].imshow(feature)
        axies[image_i][0].set_title(correct_name)
        axies[image_i][0].set_axis_off()

        axies[image_i][1].barh(ind + margin, pred_values[::-1], width)
        axies[image_i][1].set_yticks(ind + margin)
        axies[image_i][1].set_yticklabels(pred_names[::-1])
        axies[image_i][1].set_xticks([0, 0.5, 1.0])
Beispiel #19
0
class OneHotEncoder:
    """Joins the two encoders needed for OneHot transform."""

    int_encoder: LabelEncoder
    label_binarizer: LabelBinarizer

    def __init__(self, values: List[T]) -> None:
        self.int_encoder = LabelEncoder().fit(values)
        self.label_binarizer = LabelBinarizer().fit(
            self.int_encoder.transform(values))

    def transform(self, labels: List[T]) -> np.ndarray:
        """One hot encode a list of labels."""
        return self.label_binarizer.transform(
            self.int_encoder.transform(labels))

    def inverse_transform(self, encoded_labels: List[T]) -> List[T]:
        """Inverse transorm a list of one hot encoded labels."""
        return self.int_encoder.inverse_transform(
            self.label_binarizer.inverse_transform(encoded_labels))

    @property
    def classes(self) -> np.ndarray:
        """Return number of classes discovered while fitting transform."""
        return (self.label_binarizer.
                classes_  # pyre-ignore[16]: missing attribute classes_
                )
Beispiel #20
0
class GBClassifier(_BaseGB, ClassifierMixin):

    def __init__(self, estimator, n_estimators=100,
                 step_size="line_search", learning_rate=0.1,
                 loss="squared_hinge", subsample=1.0,
                 callback=None, random_state=None):
        self.estimator = estimator
        self.n_estimators = n_estimators
        self.step_size = step_size
        self.learning_rate = learning_rate
        self.loss = loss
        self.subsample = subsample
        self.callback = callback
        self.random_state = random_state

    def _get_loss(self):
        losses = dict(squared_hinge=_SquaredHingeLoss(),
                      log=_LogLoss())
        return losses[self.loss]

    def fit(self, X, y):
        self._lb = LabelBinarizer(neg_label=-1)
        Y = self._lb.fit_transform(y)
        return super(GBClassifier, self).fit(X, Y)

    def predict(self, X):
        pred = self.decision_function(X)
        return self._lb.inverse_transform(pred)
Beispiel #21
0
def test_proba_classif_convergence():
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.6,
                                                        test_size=0.4)
    mfc = MondrianForestClassifier(random_state=0)
    mfc.fit(X_train, y_train)

    lb = LabelBinarizer()
    y_bin = lb.fit_transform(y_train)
    le = LabelEncoder()
    y_enc = le.fit_transform(y_train)

    proba = mfc.predict_proba(X_train)
    labels = mfc.predict(X_train)
    assert_array_equal(proba, y_bin)
    assert_array_equal(labels, lb.inverse_transform(y_bin))

    # For points completely far away from the training data, this
    # should converge to the empirical distribution of labels.
    X_inf = np.vstack(
        (30.0 * np.ones(X_train.shape[1]), -30.0 * np.ones(X_train.shape[1])))
    inf_proba = mfc.predict_proba(X_inf)
    emp_proba = np.bincount(y_enc) / float(len(y_enc))
    assert_array_almost_equal(inf_proba, [emp_proba, emp_proba], 3)
class TargetTypeTransformer(FeatureTypeTransformer):
    def __init__(self, needs_label_binarizer=False):
        super().__init__()
        self.needs_label_binarizer = needs_label_binarizer

    def fit(self, y, **fit_kwargs):
        super().fit(y, **fit_kwargs)
        if self.needs_label_binarizer:
            self.label_binarizer_ = LabelBinarizer()
            self.label_binarizer_.fit(y)
        return self

    def transform(self, y, **transform_kwargs):
        y = super().transform(y)
        if self.needs_label_binarizer:
            y = self.label_binarizer_.transform(y)
        else:
            y = y.ravel()
        return y

    def inverse_transform(self, y, **inverse_transform_kwargs):
        if self.needs_label_binarizer:
            y = self.label_binarizer_.inverse_transform(y)
        y = super().inverse_transform(y)
        return y
Beispiel #23
0
class OneHotEncoder:
    def __init__(self, labels):
        self.encoder = LabelBinarizer()
        self.labels = self.encoder.fit_transform(labels)

    def get(self, onehot):
        return self.encoder.inverse_transform(np.array([onehot]))[0]
Beispiel #24
0
class MLPClassifier(BaseMLP, ClassifierMixin):
    """ Multilayer Perceptron Classifier.
    Uses a neural network with one hidden layer.
    Parameters
    ----------
    Attributes
    ----------
    Notes
    -----
    References
    ----------"""
    def __init__(self, n_hidden=200, lr=0.1, l2decay=0, loss='cross_entropy',
            output_layer='softmax', batch_size=100, verbose=0):
        super(MLPClassifier, self).__init__(n_hidden, lr, l2decay, loss,
                output_layer, batch_size, verbose)

    def fit(self, X, y, max_epochs=10, shuffle_data=False):
        self.lb = LabelBinarizer()
        one_hot_labels = self.lb.fit_transform(y)
        super(MLPClassifier, self).fit(
                X, one_hot_labels, max_epochs,
                shuffle_data)
        return self

    def predict(self, X):
        prediction = super(MLPClassifier, self).predict(X)
        return self.lb.inverse_transform(prediction)
def display_samples_predictions(input_features, target_labels, samples_predictions):
    num_classes = 10

    cifar10_class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

    label_binarizer = LabelBinarizer()
    label_binarizer.fit(range(num_classes))
    label_inds = label_binarizer.inverse_transform(np.array(target_labels))

    fig, axies = plt.subplots(nrows=4, ncols=2)
    fig.tight_layout()
    fig.suptitle('Softmax Predictions', fontsize=20, y=1.1)

    num_predictions = 4
    margin = 0.05
    ind = np.arange(num_predictions)
    width = (1. - 2. * margin) / num_predictions

    for image_ind, (feature, label_ind, prediction_indicies, prediction_values) in enumerate(
            zip(input_features, label_inds, samples_predictions.indices, samples_predictions.values)):
        prediction_names = [cifar10_class_names[pred_i] for pred_i in prediction_indicies]
        correct_name = cifar10_class_names[label_ind]

        axies[image_ind][0].imshow(feature)
        axies[image_ind][0].set_title(correct_name)
        axies[image_ind][0].set_axis_off()

        axies[image_ind][1].barh(ind + margin, prediction_values[::-1], width)
        axies[image_ind][1].set_yticks(ind + margin)
        axies[image_ind][1].set_yticklabels(prediction_names[::-1])
        axies[image_ind][1].set_xticks([0, 0.5, 1.0])
Beispiel #26
0
class MLPClassifier(BaseMLP, ClassifierMixin):
    """ Multilayer Perceptron Classifier.

    Uses a neural network with one hidden layer.


    Parameters
    ----------


    Attributes
    ----------

    Notes
    -----


    References
    ----------"""

    def __init__(
        self, n_hidden=200, lr=0.1, l2decay=0, loss="cross_entropy", output_layer="softmax", batch_size=100, verbose=0
    ):
        super(MLPClassifier, self).__init__(n_hidden, lr, l2decay, loss, output_layer, batch_size, verbose)

    def fit(self, X, y, max_epochs=10, shuffle_data=False):
        self.lb = LabelBinarizer()
        one_hot_labels = self.lb.fit_transform(y)
        super(MLPClassifier, self).fit(X, one_hot_labels, max_epochs, shuffle_data)
        return self

    def predict(self, X):
        prediction = super(MLPClassifier, self).predict(X)
        return self.lb.inverse_transform(prediction)
class SingletonLabelBinarizer:
    def __init__(self, *args, **kwargs):
        self.label_binarizer = LabelBinarizer()

    def fit(self, x):
        self.label_binarizer.fit(x)

    def transform(self, x):
        return self.label_binarizer.transform(x)

    def fit_transform(self, x):
        return self.label_binarizer.fit_transform(x)

    def __new__(cls):
        if not hasattr(cls, "instance"):
            cls.instance = super(SingletonLabelBinarizer, cls).__new__(cls)
        return cls.instance

    @property
    def encoder(self):
        return self.label_binarizer

    @encoder.setter
    def encoder(self, x):
        self.label_binarizer = LabelBinarizer()

    def inverse_transform(self, y):
        return self.label_binarizer.inverse_transform(y)
Beispiel #28
0
def display_image_predictions(features, labels, predictions):
    n_classes = 10
    label_names = _load_label_names()
    label_binarizer = LabelBinarizer()
    label_binarizer.fit(range(n_classes))
    label_ids = label_binarizer.inverse_transform(np.array(labels))

    fig, axies = plt.subplots(nrows=4, ncols=2)
    fig.tight_layout()
    fig.suptitle('Softmax Predictions', fontsize=20, y=1.1)

    n_predictions = 3
    margin = 0.05
    ind = np.arange(n_predictions)
    width = (1. - 2. * margin) / n_predictions

    for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate(zip(features, label_ids, predictions.indices, predictions.values)):
        pred_names = [label_names[pred_i] for pred_i in pred_indicies]
        correct_name = label_names[label_id]

        axies[image_i][0].imshow(feature*255)
        axies[image_i][0].set_title(correct_name)
        axies[image_i][0].set_axis_off()

        axies[image_i][1].barh(ind + margin, pred_values[::-1], width)
        axies[image_i][1].set_yticks(ind + margin)
        axies[image_i][1].set_yticklabels(pred_names[::-1])
        axies[image_i][1].set_xticks([0, 0.5, 1.0])
Beispiel #29
0
def test_label_binarizer_set_label_encoding():
    lb = LabelBinarizer(neg_label=-2, pos_label=2)

    # two-class case
    inp = np.array([0, 1, 1, 0])
    expected = np.array([[-2, 2, 2, -2]]).T
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)

    # multi-class case
    inp = np.array([3, 2, 1, 2, 0])
    expected = np.array([[-2, -2, -2, +2], [-2, -2, +2, -2], [-2, +2, -2, -2],
                         [-2, -2, +2, -2], [+2, -2, -2, -2]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)
Beispiel #30
0
def test_label_binarizer():
    lb = LabelBinarizer()

    # two-class case
    inp = ["neg", "pos", "pos", "neg"]
    expected = np.array([[0, 1, 1, 0]]).T
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)

    # multi-class case
    inp = ["spam", "ham", "eggs", "ham", "0"]
    expected = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0],
                         [0, 0, 1, 0], [1, 0, 0, 0]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)
Beispiel #31
0
def test_label_binarizer():
    lb = LabelBinarizer()

    # two-class case
    inp = np.array([0, 1, 1, 0])
    expected = np.array([[0, 1, 1, 0]]).T
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)

    # multi-class case
    inp = np.array([3, 2, 1, 2, 0])
    expected = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0],
                         [0, 0, 1, 0], [1, 0, 0, 0]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)
Beispiel #32
0
def parta():
    def load(file_name):
        file = np.load(file_name)
        X_train = file['X_train']
        y_train = file['y_train']
        X_test = file['X_test']
        y_test = file['y_test']

        return X_train, y_train, X_test, y_test

    X_train, y_train, X_test, y_test = load('simnim.npz')
    print(X_train.shape)
    print(y_train.shape)
    print(X_test.shape)
    print(y_test.shape)

    X_train = X_train.T

    from sklearn.preprocessing import LabelBinarizer
    binarizer = LabelBinarizer()
    binarizer.fit(y_train)
    Y_train_hat = binarizer.transform(y_train)
    Y_train = Y_train_hat.T

    nn = NeuralNetwork([X_train.shape[0], 30, Y_train.shape[0]],
                       functions=[sigmoid, softmax],
                       derivatives=[derivative_sigmoid])

    #nn.forward(X)
    #nn.backprop(X,Y,graient_check=True)
    nn.fit(X_train,
           Y_train,
           eta=0.01,
           momentum=0.5,
           minibatch=32,
           regularizer=0.2,
           max_iter=150,
           gradient_check=False,
           lbfgs=True)

    output = nn.forward(X_train)

    y_train_output = binarizer.inverse_transform(output.T)
    y_test_output = binarizer.inverse_transform(nn.forward(X_test.T).T)
    print((y_train_output == y_train).mean())
    print((y_test_output == y_test).mean())
Beispiel #33
0
def to_categorical(y):
    nb_classes = [
        'negativo_maioria', 'negativo_unânime', 'positivo_maioria',
        'positivo_unânime'
    ]
    binarize = LabelBinarizer()
    binarize.fit_transform(nb_classes)
    return binarize.inverse_transform(y)
Beispiel #34
0
def test_label_binarizer_multilabel():
    lb = LabelBinarizer()

    inp = [(2, 3), (1, ), (1, 2)]
    expected = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_equal(lb.inverse_transform(got), inp)

    # regression test for the two-class multilabel case
    lb = LabelBinarizer()

    inp = [[1, 0], [0], [1], [0, 1]]
    expected = np.array([[1, 1], [1, 0], [0, 1], [1, 1]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_equal([set(x) for x in lb.inverse_transform(got)],
                 [set(x) for x in inp])
Beispiel #35
0
def test_fit_reg_squared_multiple_outputs():
    reg = CDRegressor(C=0.05, random_state=0, penalty="l1/l2",
                      loss="squared", max_iter=100)
    lb = LabelBinarizer()
    Y = lb.fit_transform(mult_target)
    reg.fit(mult_dense, Y)
    y_pred = lb.inverse_transform(reg.predict(mult_dense))
    assert_almost_equal(np.mean(y_pred == mult_target), 0.797, 3)
    assert_almost_equal(reg.n_nonzero(percentage=True), 0.5)
Beispiel #36
0
class BasicExtreamLearningMachine(BaseEstimator, ClassifierMixin):
    def __init__(self):
        self.L = None  # 은닉노드 개수
        self.a = None  # 입력-은닉 노드 사이의 가중치
        self.b = None  # 은닉-출력 노드 사이의 가중치
        self.g_func = np.tanh  # 은닉노드의 활성화 함수 tanh을 사용함

    def _append_bias(self, X):
        # 입력의 마지막에 1.을 추가
        return np.append(X, np.ones((X.shape[0], 1)), axis=1)

    def _set_L(self, X):
        #self.L = X.shape[0]
        # 은닉노드 개수 설정
        # 데이터 개수의 1/7 개의 은닉노드를 사용한다.
        # 딱히 이렇게 정한 근거는 없다.
        self.L = int(X.shape[0] / 7.)

    def fit(self, X, y):
        # 입력 데이터 준비
        # [-1, 1] 사이로 정규화하고, 바이어스를 추가함
        stdsc = StandardScaler()
        X = stdsc.fit_transform(X)
        X = self._append_bias(X)

        # 은닉노드 개수 설정
        self._set_L(X)

        # target 데이터(클래스) 준비
        self.classes_ = np.unique(y)
        self.n_class = len(self.classes_)
        self.binarizer = LabelBinarizer(-1, 1)
        T = self.binarizer.fit_transform(y)

        # 학습단계
        # 1. 무작위로 입력-은닉노드 사이 가중치 설정
        self.a = np.random.random((self.L, X.shape[1])) * 2.0 - 1.0
        # 2. 은닉노드의 출력 H 구함
        H = self.g_func(X.dot(self.a.T))
        # 3. 은닉-출력 계층 사이 가중치 b 구함: penrose moore 역행렬 (pinv2) 이용
        self.b = pinv2(H).dot(T)
        return self

    def decision_function(self, X):
        stdsc = StandardScaler()
        X = stdsc.fit_transform(X)
        X = self._append_bias(X)

        H = self.g_func(X.dot(self.a.T))
        raw_prediction = H.dot(self.b)
        normalized_prediction = stdsc.fit_transform(raw_prediction)
        class_prediction = self.binarizer.inverse_transform(
            normalized_prediction)
        return class_prediction

    def predict(self, X):
        return self.decision_function(X)
Beispiel #37
0
def test_fit_reg_squared_multiple_outputs():
    reg = CDRegressor(C=0.05, random_state=0, penalty="l1/l2",
                      loss="squared", max_iter=100)
    lb = LabelBinarizer()
    Y = lb.fit_transform(mult_target)
    reg.fit(mult_dense, Y)
    y_pred = lb.inverse_transform(reg.predict(mult_dense))
    assert_almost_equal(np.mean(y_pred == mult_target), 0.797, 3)
    assert_almost_equal(reg.n_nonzero(percentage=True), 0.5)
Beispiel #38
0
class AnprLabelProcessor:
    # init the label binarizers. Maps classes to a set of one-hot vectors
    def __init__(self, plateChars, plateLens):
        # convert the labels from integers to vectors
        self.plate_lb = LabelBinarizer().fit(plateChars)
        self.charCnt_lb = LabelBinarizer().fit(plateLens)
        self.numClassesPerChar = len(plateChars)
        self.maxPlateLen = plateLens[-1]

    # Generate one-hot vectors for every plate
    def transform(self, labels):
        # Create a list of chars for each plate
        plateLabel = np.empty((len(labels), self.maxPlateLen),
                              dtype=np.unicode_)
        for (i, label) in enumerate(labels):
            for j in range(0, self.maxPlateLen):
                plateLabel[i, j] = label[j]

        # Create a list of plate lengths for each plate
        #plateLenLabel = np.zeros((len(labels), 1), dtype=int)
        #for (i, label) in enumerate(labels):
        #  plateLenLabel[i, 0] = label[7]

        # Create the one hot labels for each plate
        #plateLabelsOneHot = np.zeros((len(labels), (37 * 7) + 7), dtype=int)
        plateLabelsOneHot = np.zeros(
            (len(labels), (self.numClassesPerChar * self.maxPlateLen)),
            dtype=int)
        for i in range(len(labels)):
            oneHotText = self.plate_lb.transform(plateLabel[i])
            #oneHotCharCnt = self.charCnt_lb.transform(plateLenLabel[i])
            #plateLabelsOneHot[i] = np.concatenate((oneHotText.flatten(), oneHotCharCnt.flatten()))
            plateLabelsOneHot[i] = oneHotText.flatten()

        return plateLabelsOneHot

    # for every plate generate license plate chars, and license plate length
    def inverse_transform(self, oneHotLabels):
        plates = []
        plateLens = []
        oneHotLenDemuxed = []
        for i in range(len(oneHotLabels)):
            oneHotDemuxed = []
            for j in range(self.maxPlateLen):
                onehotDemux = np.array(oneHotLabels[i, j])
                oneHotDemuxed.append(onehotDemux)
            oneHotDemuxed = np.array(oneHotDemuxed)
            plate = self.plate_lb.inverse_transform(oneHotDemuxed)
            plates.append(plate)
            #oneHotLenDemux = np.array(oneHotLabels[i, 37 * 7:])
            #oneHotLenDemuxed.append(oneHotLenDemux)
        #oneHotLenDemuxed = np.array(oneHotLenDemuxed)
        #plateLens = (self.charCnt_lb.inverse_transform(oneHotLenDemuxed))

        #return plates, plateLens
        return plates
Beispiel #39
0
class BaseClassifier(BaseEstimator):

    def predict_proba(self, X):
        if len(self.classes_) != 2:
            raise NotImplementedError("predict_(log_)proba only supported"
                                      " for binary classification")

        if self.loss == "log":
            df = self.decision_function(X).ravel()
            prob = 1.0 / (1.0 + np.exp(-df))
        elif self.loss == "modified_huber":
            df = self.decision_function(X).ravel()
            prob = np.minimum(1, np.maximum(-1, df))
            prob += 1
            prob /= 2
        else:
            raise NotImplementedError("predict_(log_)proba only supported when"
                                      " loss='log' or loss='modified_huber' "
                                      "(%s given)" % self.loss)

        out = np.zeros((X.shape[0], 2), dtype=np.float64)
        out[:, 1] = prob
        out[:, 0] = 1 - prob

        return out

    def _set_label_transformers(self, y, reencode=False, neg_label=-1):
        if reencode:
            self.label_encoder_ = LabelEncoder()
            y = self.label_encoder_.fit_transform(y).astype(np.int32)
        else:
            y = y.astype(np.int32)

        self.label_binarizer_ = LabelBinarizer(neg_label=neg_label,
                                               pos_label=1)
        self.label_binarizer_.fit(y)
        self.classes_ = self.label_binarizer_.classes_.astype(np.int32)
        n_classes = len(self.label_binarizer_.classes_)
        n_vectors = 1 if n_classes <= 2 else n_classes

        return y, n_classes, n_vectors

    def decision_function(self, X):
        pred = safe_sparse_dot(X, self.coef_.T)
        if hasattr(self, "intercept_"):
            pred += self.intercept_
        return pred

    def predict(self, X):
        pred = self.decision_function(X)
        out = self.label_binarizer_.inverse_transform(pred)

        if hasattr(self, "label_encoder_"):
            out = self.label_encoder_.inverse_transform(out)

        return out
def test_label_binarizer_iris():
    lb = LabelBinarizer()
    Y = lb.fit_transform(iris.target)
    clfs = [SGDClassifier().fit(iris.data, Y[:, k]) for k in range(len(lb.classes_))]
    Y_pred = np.array([clf.decision_function(iris.data) for clf in clfs]).T
    y_pred = lb.inverse_transform(Y_pred)
    accuracy = np.mean(iris.target == y_pred)
    y_pred2 = SGDClassifier().fit(iris.data, iris.target).predict(iris.data)
    accuracy2 = np.mean(iris.target == y_pred2)
    assert_almost_equal(accuracy, accuracy2)
Beispiel #41
0
def test_label_binarizer_multilabel():
    lb = LabelBinarizer()

    inp = [(2, 3), (1,), (1, 2)]
    expected = np.array([[0, 1, 1],
                         [1, 0, 0],
                         [1, 1, 0]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_equal(lb.inverse_transform(got), inp)
Beispiel #42
0
class _CategoricalEncoder:
    """OneHotEncoder that can handle categorical variables."""

    def __init__(self):
        """Convert labeled categories into one-hot encoded features."""
        self._lb = LabelBinarizer()

    def fit(self, X):
        """Fit a list or array of categories.

        Parameters
        ----------
        * `X` [array-like, shape=(n_categories,)]:
            List of categories.
        """
        self.mapping_ = {v: i for i, v in enumerate(X)}
        self.inverse_mapping_ = {i: v for v, i in self.mapping_.items()}
        self._lb.fit([self.mapping_[v] for v in X])
        self.n_classes = len(self._lb.classes_)

        return self

    def transform(self, X):
        """Transform an array of categories to a one-hot encoded representation.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples,)]:
            List of categories.

        Returns
        -------
        * `Xt` [array-like, shape=(n_samples, n_categories)]:
            The one-hot encoded categories.
        """
        return self._lb.transform([self.mapping_[v] for v in X])

    def inverse_transform(self, Xt):
        """Inverse transform one-hot encoded categories back to their original
           representation.

        Parameters
        ----------
        * `Xt` [array-like, shape=(n_samples, n_categories)]:
            One-hot encoded categories.

        Returns
        -------
        * `X` [array-like, shape=(n_samples,)]:
            The original categories.
        """
        Xt = np.asarray(Xt)
        return [
            self.inverse_mapping_[i] for i in self._lb.inverse_transform(Xt)
        ]
Beispiel #43
0
class AdaBoostClassifier(BaseEstimator, ClassifierMixin):

    def __init__(self, estimator, n_estimators=10):
        self.estimator = estimator
        self.n_estimators = n_estimators

    def fit(self, X, y):
        n_samples = X.shape[0]

        weights = np.ones(n_samples, dtype=np.float64) / n_samples

        self._lb = LabelBinarizer(neg_label=-1)
        y = self._lb.fit_transform(y).ravel()

        self.estimators_ = np.zeros(self.n_estimators, dtype=np.object)
        self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64)

        y_pred_ = np.zeros(n_samples, dtype=np.float64)

        for it in xrange(self.n_estimators):
            est = clone(self.estimator)
            est = est.fit(X, y, sample_weight=weights)

            y_pred = est.predict(X)
            err = 1 - accuracy_score(y, y_pred, sample_weight=weights)

            if err == 0:
                self.estimator_weights_[it] = 1
                self.estimators_[it] = est
                break

            alpha = 0.5 * np.log((1 - err) / err)

            #weights *= np.exp(- alpha * y * y_pred)
            #weights /= weights.sum()

            y_pred_ += alpha * y_pred
            weights = np.exp(-y * y_pred_)
            #weights = 1.0 / (1 + np.exp(y * y_pred_))  # logit boost
            weights /= weights.sum()

            self.estimator_weights_[it] = alpha
            self.estimators_[it] = est


        return self

    def predict(self, X):
        y_pred = np.zeros(X.shape[0], dtype=np.float64)
        for it in xrange(self.n_estimators):
            if self.estimator_weights_[it] != 0:
                pred = self.estimators_[it].predict(X)
                y_pred += self.estimator_weights_[it] * pred
        y_pred = np.sign(y_pred)
        return self._lb.inverse_transform(y_pred.reshape(-1, 1))
Beispiel #44
0
class NN_Classifier(NNBase):

  def __init__(self,layers = [], lr=0.01, epochs=None, noisy=None, verbose=False):
    
    super(NN_Classifier, self).__init__(layers=layers, lr=lr, epochs=epochs, noisy=noisy, verbose=verbose)
    self.type = 'C'
    self.error_func = CrossEntropyError
    self.accuracy_score = AccuracyScore
    self.label_binarizer = LabelBinarizer()

  def predict(self, X):
    predictions = []
    for el in X:
      current_prediction = NNBase._predict(self, row(el))
      predictions.append(current_prediction)
    predictions = np.vstack(predictions)
    current_results = coalesce(predictions)
    return self.label_binarizer.inverse_transform(current_results)

  def predict_proba(self, X):
    predictions = []
    for el in X:
      current_prediction = NNBase._predict(self, row(el))
      predictions.append(current_prediction)
    predictions = np.vstack(predictions)
    return predictions

  def fit(self, X, T):
    T_impl = self.label_binarizer.fit_transform(T)
    if not self.epochs:
      self.epochs = 1

    for num in xrange(self.epochs):
      if self.verbose:
        print "Epoch: %d" % num
      for i in xrange(len(X)):
        NNBase._update(self, row(X[i]), row(T_impl[i]))

  def error(self, X, T):
    T_impl = self.label_binarizer.transform(T)
    Y = self.predict_proba(X)
    return self.error_func.func(Y, T_impl)

  def score(self, X, T):
    Y = self.predict(X)
    return self.accuracy_score.func(Y,T)

  def analytical_gradient(self, X, T):
    T_impl = self.label_binarizer.transform(T)
    return NNBase._analytical_gradient(self, X, T_impl)

  def numerical_gradient(self, X, T):
    T_impl = self.label_binarizer.transform(T)
    return NNBase._numerical_gradient(self, X, T_impl)
def test_label_binarizer():
    lb = LabelBinarizer()

    # two-class case
    inp = np.array([0, 1, 1, 0])
    expected = np.array([[0, 1, 1, 0]]).T
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)

    # multi-class case
    inp = np.array([3, 2, 1, 2, 0])
    expected = np.array([[0, 0, 0, 1],
                         [0, 0, 1, 0],
                         [0, 1, 0, 0],
                         [0, 0, 1, 0],
                         [1, 0, 0, 0]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)
Beispiel #46
0
class DualLinearSVC(BaseEstimator, ClassifierMixin):

    def __init__(self, C=1.0, loss="l1", max_iter=1000, tol=1e-3,
                 termination="convergence", sv_upper_bound=1000,
                 shrinking=True, warm_start=False, random_state=None,
                 callback=None,
                 verbose=0, n_jobs=1):
        self.C = C
        self.loss = loss
        self.max_iter = max_iter
        self.tol = tol
        self.termination = termination
        self.sv_upper_bound = sv_upper_bound
        self.shrinking = shrinking
        self.warm_start = warm_start
        self.random_state = random_state
        self.callback = callback
        self.verbose = verbose
        self.n_jobs = n_jobs
        self.coef_ = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        rs = check_random_state(self.random_state)
        self.label_binarizer_ = LabelBinarizer(neg_label=-1, pos_label=1)
        Y = self.label_binarizer_.fit_transform(y)
        n_vectors = Y.shape[1]

        if not self.warm_start or self.coef_ is None:
            self.coef_ = np.zeros((n_vectors, n_features),
                                  dtype=np.float64)
            self.dual_coef_ = np.zeros((n_vectors, n_samples),
                                       dtype=np.float64)

        kernel = get_kernel("linear")
        kcache = KernelCache(kernel, n_samples, 0, 0, self.verbose)

        for i in xrange(n_vectors):
            _dual_cd(self, self.coef_[i], self.dual_coef_[i],
                     X, Y[:, i], kcache, True,
                     "permute", 60, self.termination, self.sv_upper_bound,
                     self.C, self.loss, self.max_iter, rs, self.tol,
                     self.shrinking, self.callback, verbose=self.verbose)

        return self

    def decision_function(self, X):
        return np.dot(X, self.coef_.T)

    def predict(self, X):
        pred = self.decision_function(X)
        return self.label_binarizer_.inverse_transform(pred, threshold=0)
def parta():

    def load(file_name):
        file = np.load(file_name)
        X_train =file['X_train']
        y_train =file['y_train']
        X_test =file['X_test']
        y_test =file['y_test']

        return X_train,y_train,X_test,y_test

    X_train,y_train,X_test,y_test = load('simnim.npz')
    print(X_train.shape)
    print(y_train.shape)
    print(X_test.shape)
    print(y_test.shape)


    X_train = X_train.T

    from sklearn.preprocessing import LabelBinarizer
    binarizer = LabelBinarizer()
    binarizer.fit(y_train)
    Y_train_hat = binarizer.transform(y_train)
    Y_train = Y_train_hat.T


    nn = NeuralNetwork([X_train.shape[0],30,Y_train.shape[0]], functions=[sigmoid,softmax], derivatives=[derivative_sigmoid])

    #nn.forward(X)
    #nn.backprop(X,Y,graient_check=True)
    nn.fit(X_train,Y_train,eta=0.01,momentum=0.5,minibatch=32,regularizer=0.2,max_iter=150,gradient_check=False, lbfgs=True)

    output = nn.forward(X_train)

    y_train_output = binarizer.inverse_transform(output.T)
    y_test_output = binarizer.inverse_transform(nn.forward(X_test.T).T)
    print((y_train_output==y_train).mean())
    print((y_test_output ==y_test).mean())
def test_label_binarizer_multilabel():
    lb = LabelBinarizer()

    inp = [(2, 3), (1,), (1, 2)]
    expected = np.array([[0, 1, 1],
                         [1, 0, 0],
                         [1, 1, 0]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_equal(lb.inverse_transform(got), inp)

    # regression test for the two-class multilabel case
    lb = LabelBinarizer()

    inp = [[1, 0], [0], [1], [0, 1]]
    expected = np.array([[1, 1],
                         [1, 0],
                         [0, 1],
                         [1, 1]])
    got = lb.fit_transform(inp)
    assert_array_equal(expected, got)
    assert_equal([set(x) for x in lb.inverse_transform(got)],
                 [set(x) for x in inp])
Beispiel #49
0
class MLPClassifier(BaseMLP, ClassifierMixin):
    """ Multilayer Perceptron Classifier.

    Uses a neural network with one hidden layer.


    Parameters
    ----------


    Attributes
    ----------

    Notes
    -----


    References
    ----------"""
    def __init__(self, n_hidden=200, lr=0.1, l2decay=0, loss='cross_entropy',
            output_layer='softmax', batch_size=100, verbose=0):
        super(MLPClassifier, self).__init__(n_hidden, lr, l2decay, loss,
                output_layer, batch_size, verbose)

    def fit(self, X, y, max_epochs=10, shuffle_data=False):
        self.lb = LabelBinarizer()
        one_hot_labels = self.lb.fit_transform(y)
        super(MLPClassifier, self).fit(
                X, one_hot_labels, max_epochs,
                shuffle_data)
        return self

    def predict(self, X):
        prediction = super(MLPClassifier, self).predict(X)
        return self.lb.inverse_transform(prediction)


# def test_classification():
#     from sklearn.datasets import load_digits
#     digits = load_digits()
#     X, y = digits.data, digits.target
#     mlp = MLPClassifier()
#     mlp.fit(X, y)
#     training_score = mlp.score(X, y)
#     print("training accuracy: %f" % training_score)
#     assert(training_score > .95)
#
#
# if __name__ == "__main__":
#     test_classification()
Beispiel #50
0
class ELM(BaseEstimator):

    def __init__(self, h=60, activation='linear', random_state=None, C=100):
        self.name = 'elm'
        self.h = h
        self.activation = activation
        self.random_state = random_state
        self.C = C

        assert self.activation in ['rbf', 'sigmoid', 'linear']

    def fit(self, X, y):

        if self.random_state is None:
            self.random_state = np.random.RandomState(np.random.randint(0, np.iinfo(np.int32).max))
        elif type(self.random_state) == int:
            self.random_state = np.random.RandomState(self.random_state)

        self.lb = LabelBinarizer()
        self.W = self.random_state.normal(size=(X.shape[1], self.h))
        self.B = self.random_state.normal(size=self.h)

        if self.activation == 'rbf':
            H = _elm_vectorized_rbf(X, self.W, self.B)
        elif self.activation == 'sigmoid':
            H = _elm_sigmoid(X, self.W, self.B)
        else :
            H = X.dot(self.W)

        self.lb.fit(y)

        lam = np.eye(H.shape[1]) * (1./self.C)
        H_inv = np.linalg.inv(H.T.dot(H) + lam)
        self.beta = H_inv.dot(H.T.dot(self.lb.transform(y)))

        return self


    def decision_function(self, X):
        if self.activation == 'rbf':
            return _elm_vectorized_rbf(X, self.W, self.B).dot(self.beta)
        elif self.activation == 'sigmoid':
            return _elm_sigmoid(X, self.W, self.B).dot(self.beta)
        else :
            return X.dot(self.W).dot(self.beta)


    def predict(self, X):
        return self.lb.inverse_transform(self.decision_function(X))
Beispiel #51
0
class KMPClassifier(KMPBase, ClassifierMixin):

    def fit(self, X, y):
        n_nonzero_coefs, K, y, norms = self._pre_fit(X, y)

        self.lb_ = LabelBinarizer()
        Y = self.lb_.fit_transform(y)
        self._fit(K, y, Y, n_nonzero_coefs, norms)

        self._post_fit()

        return self

    def predict(self, X):
        pred = self.decision_function(X)
        return self.lb_.inverse_transform(pred, threshold=0.5)
Beispiel #52
0
class TWELM(ProjectorMixin, BaseEstimator):

    def __str__(self):
        if self.C==None:
            solver =  self.solve.__name__
        else:
            solver = 'algebraic,C='+str(self.C)

        return 'TWELM(h='+str(self.h)+',f='+self.f.__name__+',balanced=true,solver='+solver+',extreme='+str(self.extreme)+')'

    def __init__(self, projector, h=100, C=None, solver=la.lstsq, random_state=0, extreme=True):
        self.h = h
        self.C = C
        self.projector = projector
        self.random_state = random_state
        self.solve = solver

        self.extreme = extreme

    def fit(self, X, y ):
        self.labeler = LabelBinarizer()
        rng = check_random_state(self.random_state)
        self.projector.set_params(h=self.h, rng=rng)
        H = self.projector.fit(X).project(X)

        y = y.tolist()
        s = { l : float(y.count(l)) for l in set(y) }
        ms= max([ s[k] for k in s ])
        s = { l : ms/s[l] for l in s }
        w = np.array( [[ np.sqrt( s[a] ) for a in y ]] ).T

        T = self.labeler.fit_transform(y)
        start = time.time()
        if self.C==None:
            self.beta, _, _, _ = self.solve( np.multiply(H,w), np.multiply(T,w) )
        else:
            H = np.multiply(H,w)
            self.beta = ( la.inv( np.eye(H.shape[1])/self.C + H.T.dot(H) ) ).dot( H.T.dot(np.multiply(T,w)) )

        self.train_time = time.time()-start
        return self

    def predict(self, X ):
        return self.labeler.inverse_transform(np.dot(self.projector.project(X), self.beta)).T

    def decision_function(self, X):
        return np.dot(self.projector.project(X), self.beta)
Beispiel #53
0
class ELMClassifier(BaseELM, ClassifierMixin):

    def __init__(self, n_hidden=20, regularized=False):

        super(ELMClassifier, self).__init__(n_hidden, regularized)

        self.classes_ = None

    def fit(self, X, y):

        self.classes_ = np.unique(y)

        self._lbin = LabelBinarizer()
        y = self._lbin.fit_transform(y)

        super(ELMClassifier, self).fit(X, y)
        return self

    def predict(self, X):
        X = atleast2d_or_csr(X)
        scores = self.decision_function(X)

        # if len(scores.shape) == 1:
        #scores = logistic_sigmoid(scores)
        #results = (scores > 0.5).astype(np.int)

        # else:
            #scores = _softmax(scores)
            #results = scores.argmax(axis=1)
            # self.classes_[results]
        return self._lbin.inverse_transform(scores)

    def predict_proba(self, X):
        scores = self.decision_function(X)

        if len(scores.shape) == 1:
            scores = logistic_sigmoid(scores)
            return np.vstack([1 - scores, scores]).T
        else:
            return _softmax(scores)
class NeuralNetworkClassifier(BaseNeuralNetwork, ClassifierMixin):
    def __init__(self, layers=[], loss_function="logistic_loss", learning_rate=0.1, batch_size=100, max_epochs=10,
                 update_algorithm="sgd", verbose="False"):

        sup = super(NeuralNetworkClassifier, self)
        sup.__init__(layers=layers, loss_function=loss_function, learning_rate=learning_rate,
                     batch_size=batch_size, max_epcohs=max_epochs, update_algorithm=update_algorithm,
                     verbose=verbose)

        self.label_binarizer_ = LabelBinarizer()

    def fit(self, X, y):
        y_binarized = self.label_binarizer_.fit_transform(y)
        super(NeuralNetworkClassifier, self).fit(X, y_binarized)

    def predict(self, X):
        y_pred =  self._predict(X)

        return self.label_binarizer_.inverse_transform(y_pred)

    def predict_proba(self, X):
        return  self._predict(X)
Beispiel #55
0
class encode_cat:
    """Wraps labelbinarizer and encoder together"""

    def __init__(self):
        self.LB = LabelBinarizer()
        self.LE = LabelEncoder()
        return

    def fit(self, X, y=None):
        self.LE.fit(X)
        self.LB.fit(self.LE.transform(X))
        return

    def transform(self, X, y=None):
        return self.LB.transform(self.LE.transform(X))

    def fit_transform(self, X, y=None):
        self.LE.fit(X)
        self.LB.fit(self.LE.transform(X))
        return self.LB.transform(self.LE.transform(X))

    def inverse_transform(self, X, y=None):
        return self.LE.inverse_transform(self.LB.inverse_transform(X))
class ActualTreatmentPredictor(_BasePredictor):
    """Returns the most likely treatments for a patient.

    Args:
        prediction_model: The model used to make predictions
        preprocessor: The preprocessor used to transform the patient features into a format that can be
        used by the prediction_model
        recommendation_probability_threshold: The probability threshold that a potential recommendation needs to have
        a higher probability than to be considered a possible treatment.
    """

    def __init__(self, prediction_model, preprocessor, recommendation_probability_threshold=0.05):
        super().__init__(prediction_model, preprocessor)

        self._treatment_label_binarizer = LabelBinarizer()
        self._recommendation_probability_threshold = recommendation_probability_threshold

    def _pre_fit_hook(self, data):
        self._treatment_label_binarizer.fit(data.treatment.unique())

    def _get_outcome_data_for_training(self, data):
        return self._treatment_label_binarizer.transform(data.treatment.values)

    def _get_predicted_value(self, prediction):
        return self._treatment_label_binarizer.inverse_transform(prediction)

    def get_possible_treatments(self, data):
        """Returns the most likely treatments for a patient.

        Args:
            data: A dataframe containing patient features as well as a sample_id column. The sample_id column
            is needed because there can be many most likely treatments for a sample_id and the column is used
            to reconcile the treatment with the record.

        Returns:
            A dataframe with the following columns:
            sample_id: The sample_id the treatment is for.
            treatment: The treatment category

        """
        self._checked_is_trained()

        # leave comment on structure
        probabilities_sectioned_by_treatment = self._pipeline.predict_proba(data)
        ordered_treatments = self._treatment_label_binarizer.classes_
        treatment_dfs = []

        for (treatment, probabilities_for_treatment) in zip(ordered_treatments, probabilities_sectioned_by_treatment):
            probability_of_treatment = [prob[1] if len(prob) > 1 else 0 for prob in probabilities_for_treatment]
            df = pd.DataFrame({
                "treatment": treatment,
                "probability_of_treatment": probability_of_treatment,
                "sample_id": range(len(probability_of_treatment))
            })
            treatment_dfs.append(df)

        combined_df = pd.concat(treatment_dfs)

        # Get all treatments that have a probability greater than the threshold
        sample_with_high_probability = \
            combined_df[combined_df.probability_of_treatment > self._recommendation_probability_threshold]

        # Get the top probability for a sample_id. This treatment will be used if there is no treatment for the
        # sample_id greater than the threshold
        top_treatment_per_sample_id = combined_df.groupby("sample_id")["probability_of_treatment"].nlargest(
            1).reset_index().drop('level_1', axis=1)

        # Find top treatments for samples that have not treatment above the threshold. This is a rare case but can
        # happen.
        samples_ids_with_high_prob = set(sample_with_high_probability.sample_id.unique())
        all_sample_ids = set(combined_df.sample_id.unique())
        ids_not_in_high_prob = all_sample_ids - samples_ids_with_high_prob

        top_treatments_for_samples_missing_high_prob =\
            top_treatment_per_sample_id[top_treatment_per_sample_id.sample_id.isin(ids_not_in_high_prob)]

        return pd.concat([sample_with_high_probability, top_treatments_for_samples_missing_high_prob])
class ELMClassifier(BaseELM, ClassifierMixin):
    """Extreme learning machine classifier.

    The algorithm trains a single-hidden layer feedforward network by computing
    the hidden layer values using randomized parameters, then solving
    for the output weights using least-square solutions. For prediction,
    after computing the forward pass, the continuous output values pass
    through a gate function converting them to integers that represent classes.

    This implementation works with data represented as dense and sparse numpy
    arrays of floating point values for the features.

    Parameters
    ----------
    C : float, optional, default 100
        A regularization term that controls the linearity of the decision
        function. Smaller value of C makes the decision boundary more linear.

    class_weight : {dict, 'auto', None}, default None
        If 'auto', class weights will be given inversely proportional
        to the frequency of the class in the data.
        If a dictionary is given, keys are the class labels and the
        corresponding values are the class weights.
        If None is given, then no class weights will be applied.

    weight_scale : float, default 1.
        Initializes and scales the input-to-hidden weights.
        The weight values will range between plus and minus
        'sqrt(weight_scale * 6. / (n_features + n_hidden))' based on the
        uniform distribution.

    n_hidden : int, default 100
        The number of units in the hidden layer.

    activation : {'logistic', 'tanh', 'relu'}, default 'relu'
        Activation function for the hidden layer.

         - 'logistic', the logistic sigmoid function,
            returns f(x) = 1 / (1 + exp(x)).

         - 'tanh', the hyperbolic tan function,
            returns f(x) = tanh(x).

         - 'relu', the rectified linear unit function,
            returns f(x) = max(0, x).

    batch_size : int, optional, default None
        If None is given, batch_size is set as the number of samples.
        Otherwise, it will be set as the given integer.

    verbose : bool, optional, default False
        Whether to print the training score.

    warm_start : bool, optional, default False
        When set to True, reuse the solution of the previous
        call to fit as initialization, otherwise, just erase the
        previous solution.

    random_state : int or RandomState, optional, default None
        State of or seed for random number generator.

    Attributes
    ----------
    `classes_` : array-list, shape (n_classes,)
        Class labels for each output.

    `n_outputs_` : int
        Number of output neurons.

    `coef_hidden_` : array-like, shape (n_features, n_hidden)
        The input-to-hidden weights.

    `intercept_hidden_` : array-like, shape (n_hidden,)
        The bias added to the hidden layer neurons.

    `coef_output_` : array-like, shape (n_hidden, n_outputs_)
        The hidden-to-output weights.

    `label_binarizer_` : LabelBinarizer
        A LabelBinarizer object trained on the training set.

    References
    ----------
    Liang, Nan-Ying, et al.
        "A fast and accurate online sequential learning algorithm for
        feedforward networks." Neural Networks, IEEE Transactions on
        17.6 (2006): 1411-1423.
        http://www.ntu.edu.sg/home/egbhuang/pdf/OS-ELM-TNN.pdf

    Zong, Weiwei, Guang-Bin Huang, and Yiqiang Chen.
        "Weighted extreme learning machine for imbalance learning."
        Neurocomputing 101 (2013): 229-242.

    Glorot, Xavier, and Yoshua Bengio. "Understanding the difficulty of
        training deep feedforward neural networks." International Conference
        on Artificial Intelligence and Statistics. 2010.
    """
    def __init__(self, n_hidden=100, activation='relu', C=1,
                 class_weight=None, weight_scale=1.0, batch_size=None,
                 verbose=False, warm_start=False, random_state=None):
        super(ELMClassifier, self).__init__(n_hidden=n_hidden,
                                            activation=activation,
                                            C=C, class_weight=class_weight,
                                            weight_scale=weight_scale,
                                            batch_size=batch_size,
                                            verbose=verbose,
                                            warm_start=warm_start,
                                            random_state=random_state)

        self.label_binarizer_ = LabelBinarizer(-1, 1)

    def partial_fit(self, X, y, classes=None, sample_weight=None):
        """Fit the model to the data X and target y.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data.

        y : array-like, shape (n_samples,)
            Subset of the target values.

        classes : array-like, shape (n_classes,)
            List of all the classes that can possibly appear in the y vector.

            Must be provided at the first call to partial_fit, can be omitted
            in subsequent calls.

        sample_weight : array-like, shape (n_samples,)
            Per-sample weights. Rescale C per sample. Higher weights
            force the classifier to put more emphasis on these points.

        Returns
        -------
        self : returns a trained elm usable for prediction.
        """
        self.classes_ = classes

        super(ELMClassifier, self).partial_fit(X, y, sample_weight)

        return self

    def decision_function(self, X):
        """Decision function of the elm model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data.

        Returns
        -------
        y : array-like, shape (n_samples,) or (n_samples, n_classes)
            The predicted values.
        """
        y_scores = self._decision_scores(X)

        if self.n_outputs_ == 1:
            return y_scores.ravel()
        else:
            return y_scores

    def predict(self, X):
        """Predict using the ELM model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data.

        Returns
        -------
        y : array-like, shape (n_samples,) or (n_samples, n_classes)
            The predicted classes, or the predicted values.
        """
        y_scores = self._decision_scores(X)

        return self.label_binarizer_.inverse_transform(y_scores)

    def predict_proba(self, X):
        """Probability estimates.

        Warning: the estimates aren't callibrated since the model optimizes a
        penalized least squares objective function based on the One Vs Rest
        binary encoding of the class membership.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data.

        Returns
        -------
        y_prob : array-like, shape (n_samples, n_classes)
            The predicted probability of the sample for each class in the
            model, where classes are ordered as they are in
            `self.classes_`.
        """
        y_scores = self._decision_scores(X)

        if len(self.classes_) == 2:
            y_scores = logistic(y_scores)
            return np.hstack([1 - y_scores, y_scores])
        else:
            return softmax(y_scores)
def exp(solvers, penalties, single_target, n_samples=30000, max_iter=20,
        dataset='rcv1', n_jobs=1, skip_slow=False):
    mem = Memory(cachedir=expanduser('~/cache'), verbose=0)

    if dataset == 'rcv1':
        rcv1 = fetch_rcv1()

        lbin = LabelBinarizer()
        lbin.fit(rcv1.target_names)

        X = rcv1.data
        y = rcv1.target
        y = lbin.inverse_transform(y)
        le = LabelEncoder()
        y = le.fit_transform(y)
        if single_target:
            y_n = y.copy()
            y_n[y > 16] = 1
            y_n[y <= 16] = 0
            y = y_n

    elif dataset == 'digits':
        digits = load_digits()
        X, y = digits.data, digits.target
        if single_target:
            y_n = y.copy()
            y_n[y < 5] = 1
            y_n[y >= 5] = 0
            y = y_n
    elif dataset == 'iris':
        iris = load_iris()
        X, y = iris.data, iris.target
    elif dataset == '20newspaper':
        ng = fetch_20newsgroups_vectorized()
        X = ng.data
        y = ng.target
        if single_target:
            y_n = y.copy()
            y_n[y > 4] = 1
            y_n[y <= 16] = 0
            y = y_n

    X = X[:n_samples]
    y = y[:n_samples]

    cached_fit = mem.cache(fit_single)
    out = Parallel(n_jobs=n_jobs, mmap_mode=None)(
        delayed(cached_fit)(solver, X, y,
                            penalty=penalty, single_target=single_target,
                            C=1, max_iter=max_iter, skip_slow=skip_slow)
        for solver in solvers
        for penalty in penalties)

    res = []
    idx = 0
    for solver in solvers:
        for penalty in penalties:
            if not (skip_slow and solver == 'lightning' and penalty == 'l1'):
                lr, times, train_scores, test_scores, accuracies = out[idx]
                this_res = dict(solver=solver, penalty=penalty,
                                single_target=single_target,
                                times=times, train_scores=train_scores,
                                test_scores=test_scores,
                                accuracies=accuracies)
                res.append(this_res)
            idx += 1

    with open('bench_saga.json', 'w+') as f:
        json.dump(res, f)
Beispiel #59
0
class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
    """Multi-layer Perceptron classifier.

    This algorithm optimizes the log-loss function using l-bfgs or gradient
    descent.

    Parameters
    ----------
    hidden_layer_sizes : tuple, length = n_layers - 2, default (100,)
        The ith element represents the number of neurons in the ith
        hidden layer.

    activation : {'logistic', 'tanh', 'relu'}, default 'relu'
        Activation function for the hidden layer.

        - 'logistic', the logistic sigmoid function,
          returns f(x) = 1 / (1 + exp(-x)).

        - 'tanh', the hyperbolic tan function,
          returns f(x) = tanh(x).

        - 'relu', the rectified linear unit function,
          returns f(x) = max(0, x)

    algorithm : {'l-bfgs', 'sgd', 'adam'}, default 'adam'
        The algorithm for weight optimization.

        - 'l-bfgs' is an optimization algorithm in the family of
          quasi-Newton methods.

        - 'sgd' refers to stochastic gradient descent.

        - 'adam' refers to a stochastic gradient-based optimization algorithm
          proposed by Kingma, Diederik, and Jimmy Ba

        Note: The default algorithm 'adam' works pretty well on relatively
        large datasets (with thousands of training samples or more) in terms of
        both training time and validation score.
        For small datasets, however, 'l-bfgs' can converge faster and perform
        better.

    alpha : float, optional, default 0.0001
        L2 penalty (regularization term) parameter.

    batch_size : int, optional, default 'auto'
        Size of minibatches for stochastic optimizers.
        If the algorithm is 'l-bfgs', the classifier will not use minibatch.
        When set to "auto", `batch_size=min(200, n_samples)`

    learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant'
        Learning rate schedule for weight updates.

        -'constant', is a constant learning rate given by
         'learning_rate_init'.

        -'invscaling' gradually decreases the learning rate ``learning_rate_`` at
          each time step 't' using an inverse scaling exponent of 'power_t'.
          effective_learning_rate = learning_rate_init / pow(t, power_t)

        -'adaptive', keeps the learning rate constant to
         'learning_rate_init' as long as training loss keeps decreasing.
         Each time two consecutive epochs fail to decrease training loss by at
         least tol, or fail to increase validation score by at least tol if
         'early_stopping' is on, the current learning rate is divided by 5.

         Only used when algorithm='sgd'.

    max_iter : int, optional, default 200
        Maximum number of iterations. The algorithm iterates until convergence
        (determined by 'tol') or this number of iterations.

    random_state : int or RandomState, optional, default None
        State or seed for random number generator.

    shuffle : bool, optional, default True
        Whether to shuffle samples in each iteration. Only used when
        algorithm='sgd' or 'adam'.

    tol : float, optional, default 1e-4
        Tolerance for the optimization. When the loss or score is not improving
        by at least tol for two consecutive iterations, unless `learning_rate`
        is set to 'adaptive', convergence is considered to be reached and
        training stops.

    learning_rate_init : double, optional, default 0.001
        The initial learning rate used. It controls the step-size
        in updating the weights. Only used when algorithm='sgd' or 'adam'.

    power_t : double, optional, default 0.5
        The exponent for inverse scaling learning rate.
        It is used in updating effective learning rate when the learning_rate
        is set to 'invscaling'. Only used when algorithm='sgd'.

    verbose : bool, optional, default False
        Whether to print progress messages to stdout.

    warm_start : bool, optional, default False
        When set to True, reuse the solution of the previous
        call to fit as initialization, otherwise, just erase the
        previous solution.

    momentum : float, default 0.9
        Momentum for gradient descent update. Should be between 0 and 1. Only
        used when algorithm='sgd'.

    nesterovs_momentum : boolean, default True
        Whether to use Nesterov's momentum. Only used when algorithm='sgd' and
        momentum > 0.

    early_stopping : bool, default False
        Whether to use early stopping to terminate training when validation
        score is not improving. If set to true, it will automatically set
        aside 10% of training data as validation and terminate training when
        validation score is not improving by at least tol for two consecutive
        epochs.
        Only effective when algorithm='sgd' or 'adam'

    validation_fraction : float, optional, default 0.1
        The proportion of training data to set aside as validation set for
        early stopping. Must be between 0 and 1.
        Only used if early_stopping is True

    beta_1 : float, optional, default 0.9
        Exponential decay rate for estimates of first moment vector in adam,
        should be in [0, 1). Only used when algorithm='adam'

    beta_2 : float, optional, default 0.999
        Exponential decay rate for estimates of second moment vector in adam,
        should be in [0, 1). Only used when algorithm='adam'

    epsilon : float, optional, default 1e-8
        Value for numerical stability in adam. Only used when algorithm='adam'

    Attributes
    ----------
    `classes_` : array or list of array of shape (n_classes,)
        Class labels for each output.

    `loss_` : float
        The current loss computed with the loss function.

    `label_binarizer_` : LabelBinarizer
        A LabelBinarizer object trained on the training set.

    `coefs_` : list, length n_layers - 1
        The ith element in the list represents the weight matrix corresponding
        to layer i.

    `intercepts_` : list, length n_layers - 1
        The ith element in the list represents the bias vector corresponding to
        layer i + 1.

    n_iter_ : int,
        The number of iterations the algorithm has ran.

    n_layers_ : int
        Number of layers.

    `n_outputs_` : int
        Number of outputs.

    `out_activation_` : string
        Name of the output activation function.

    Notes
    -----
    MLPClassifier trains iteratively since at each time step
    the partial derivatives of the loss function with respect to the model
    parameters are computed to update the parameters.

    It can also have a regularization term added to the loss function
    that shrinks model parameters to prevent overfitting.

    This implementation works with data represented as dense numpy arrays or
    sparse scipy arrays of floating point values.

    References
    ----------
    Hinton, Geoffrey E.
        "Connectionist learning procedures." Artificial intelligence 40.1
        (1989): 185-234.

    Glorot, Xavier, and Yoshua Bengio. "Understanding the difficulty of
        training deep feedforward neural networks." International Conference
        on Artificial Intelligence and Statistics. 2010.

    He, Kaiming, et al. "Delving deep into rectifiers: Surpassing human-level
        performance on imagenet classification." arXiv preprint
        arXiv:1502.01852 (2015).

    Kingma, Diederik, and Jimmy Ba. "Adam: A method for stochastic
        optimization." arXiv preprint arXiv:1412.6980 (2014).
    """
    def __init__(self, hidden_layer_sizes=(100,), activation="relu",
                 algorithm='adam', alpha=0.0001,
                 batch_size='auto', learning_rate="constant",
                 learning_rate_init=0.001, power_t=0.5, max_iter=200,
                 shuffle=True, random_state=None, tol=1e-4,
                 verbose=False, warm_start=False, momentum=0.9,
                 nesterovs_momentum=True, early_stopping=False,
                 validation_fraction=0.1, beta_1=0.9, beta_2=0.999,
                 epsilon=1e-8):

        sup = super(MLPClassifier, self)
        sup.__init__(hidden_layer_sizes=hidden_layer_sizes,
                     activation=activation, algorithm=algorithm, alpha=alpha,
                     batch_size=batch_size, learning_rate=learning_rate,
                     learning_rate_init=learning_rate_init, power_t=power_t,
                     max_iter=max_iter, loss='log_loss', shuffle=shuffle,
                     random_state=random_state, tol=tol, verbose=verbose,
                     warm_start=warm_start, momentum=momentum,
                     nesterovs_momentum=nesterovs_momentum,
                     early_stopping=early_stopping,
                     validation_fraction=validation_fraction,
                     beta_1=beta_1, beta_2=beta_2, epsilon=epsilon)

        self.label_binarizer_ = LabelBinarizer()

    def _validate_input(self, X, y, incremental):
        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
                         multi_output=True)
        if y.ndim == 2 and y.shape[1] == 1:
            y = column_or_1d(y, warn=True)
        self.label_binarizer_.fit(y)

        if not hasattr(self, 'classes_') or not incremental:
            self.classes_ = self.label_binarizer_.classes_
        else:
            classes = self.label_binarizer_.classes_
            if not np.all(np.in1d(classes, self.classes_)):
                raise ValueError("`y` has classes not in `self.classes_`."
                                 " `self.classes_` has %s. 'y' has %s." %
                                 (self.classes_, classes))

        y = self.label_binarizer_.transform(y)
        return X, y

    def decision_function(self, X):
        """Decision function of the mlp model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data.

        Returns
        -------
        y : array-like, shape (n_samples,) or (n_samples, n_classes)
            The values of decision function for each class in the model.
        """
        check_is_fitted(self, "coefs_")
        y_scores = self._decision_scores(X)

        if self.n_outputs_ == 1:
            return y_scores.ravel()
        else:
            return y_scores

    def predict(self, X):
        """Predict using the multi-layer perceptron classifier

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data.

        Returns
        -------
        y : array-like, shape (n_samples,) or (n_samples, n_classes)
            The predicted classes.
        """
        check_is_fitted(self, "coefs_")
        y_scores = self.decision_function(X)
        y_scores = ACTIVATIONS[self.out_activation_](y_scores)

        return self.label_binarizer_.inverse_transform(y_scores)

    @property
    def partial_fit(self):
        """Fit the model to data matrix X and target y.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data.

        y : array-like, shape (n_samples,)
            The target values.

        classes : array, shape (n_classes)
            Classes across all calls to partial_fit.
            Can be obtained via `np.unique(y_all)`, where y_all is the
            target vector of the entire dataset.
            This argument is required for the first call to partial_fit
            and can be omitted in the subsequent calls.
            Note that y doesn't need to contain all labels in `classes`.

        Returns
        -------
        self : returns a trained MLP model.
        """
        if self.algorithm not in _STOCHASTIC_ALGOS:
            raise AttributeError("partial_fit is only available for stochastic"
                                 "optimization algorithms. %s is not"
                                 " stochastic" % self.algorithm)
        return self._partial_fit

    def _partial_fit(self, X, y, classes=None):
        _check_partial_fit_first_call(self, classes)

        super(MLPClassifier, self)._partial_fit(X, y)

        return self

    def predict_log_proba(self, X):
        """Return the log of probability estimates.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The input data.

        Returns
        -------
        log_y_prob : array-like, shape (n_samples, n_classes)
            The predicted log-probability of the sample for each class
            in the model, where classes are ordered as they are in
            `self.classes_`. Equivalent to log(predict_proba(X))
        """
        y_prob = self.predict_proba(X)
        return np.log(y_prob, out=y_prob)

    def predict_proba(self, X):
        """Probability estimates.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data.

        Returns
        -------
        y_prob : array-like, shape (n_samples, n_classes)
            The predicted probability of the sample for each class in the
            model, where classes are ordered as they are in `self.classes_`.
        """
        y_scores = self.decision_function(X)

        if y_scores.ndim == 1:
            y_scores = logistic(y_scores)
            return np.vstack([1 - y_scores, y_scores]).T
        else:
            return softmax(y_scores)