def test_label_binarizer_multilabel(): lb = LabelBinarizer() # test input as lists of tuples inp = [(2, 3), (1,), (1, 2)] indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]]) got = lb.fit_transform(inp) assert_array_equal(indicator_mat, got) assert_equal(lb.inverse_transform(got), inp) # test input as label indicator matrix lb.fit(indicator_mat) assert_array_equal(indicator_mat, lb.inverse_transform(indicator_mat)) # regression test for the two-class multilabel case lb = LabelBinarizer() inp = [[1, 0], [0], [1], [0, 1]] expected = np.array([[1, 1], [1, 0], [0, 1], [1, 1]]) got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_equal([set(x) for x in lb.inverse_transform(got)], [set(x) for x in inp])
class LabelBinarizer2: def __init__(self): self.lb = LabelBinarizer() def fit(self, X): # Convert X to array X = np.array(X) # Fit X using the LabelBinarizer object self.lb.fit(X) # Save the classes self.classes_ = self.lb.classes_ def fit_transform(self, X): self.fit(X) return self.transform(X) def transform(self, X): # Convert X to array X = np.array(X) # Transform X using the LabelBinarizer object Xlb = self.lb.transform(X) if len(self.classes_) == 2 and len(np.unique(X)) <= 2: Xlb = np.hstack((1 - Xlb, Xlb)) return Xlb def inverse_transform(self, Xlb): # Convert Xlb to array Xlb = np.array(Xlb) if len(self.classes_) == 2: X = self.lb.inverse_transform(Xlb[:, 0]) else: X = self.lb.inverse_transform(Xlb) return X
def run_cnn26(_x_train, _x_test, _y_train, _y_test, _x_valid, _y_valid): model = tf.keras.Sequential() model.add(tf.keras.layers.Conv2D(20, (3, 3), activation='relu', input_shape=(100, 100, 3))) model.add(tf.keras.layers.Conv2D(20, (3, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2))) model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dropout(0.5)) model.add(tf.keras.layers.Dense(128, activation='relu')) model.add(tf.keras.layers.Dense(4, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() history = model.fit(_x_train, _y_train, batch_size=32, epochs=10, verbose=1, validation_data=(_x_valid, _y_valid)) # Evaluate model score = model.evaluate(_x_test, _y_test) print("score =", score) # Plot model acc = history.history['accuracy'] val_acc = history.history['val_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs_range = range(10) plt.figure(figsize=(8, 8)) plt.subplot(1, 2, 1) plt.plot(epochs_range, acc, label='Training Accuracy') plt.plot(epochs_range, val_acc, label='Validation Accuracy') plt.legend(loc='lower right') plt.title('Training and Validation Accuracy') plt.subplot(1, 2, 2) plt.plot(epochs_range, loss, label='Training Loss') plt.plot(epochs_range, val_loss, label='Validation Loss') plt.legend(loc='upper right') plt.title('Training and Validation Loss') plt.show() # Predict Test set results labels = ['apple', 'banana', 'mixed', 'orange'] y_pred = model.predict(_x_test) lb = LabelBinarizer() lb.fit(labels) predict_class = lb.inverse_transform(y_pred) test_y = lb.inverse_transform(_y_test) print(np.concatenate((predict_class.reshape(len(predict_class), 1), test_y.reshape(len(test_y), 1)), 1)) print(classification_report(test_y, predict_class))
class AnswerEncoder: def __init__(self, num_class=1000, multi_label=False, unknown_answer='idontknow'): self.unknown_answer = unknown_answer self.num_class = num_class self.multi_label = multi_label self.classes_ = None self.encoder = None def fit(self, answers): # TODO support multi-label answer_map, num = count_freq(answers) answer_freq = sorted(answer_map.items(), key=operator.itemgetter(1), reverse=True) kept_answers = (list(answer_freq_pair[0] for answer_freq_pair in answer_freq[:self.num_class])) if self.unknown_answer == 'most_freq': self.unknown_answer = answer_freq[self.num_class][0] if self.multi_label: self.classes_ = kept_answers self.encoder = MultiLabelBinarizer(classes=self.classes_).fit([self.classes_]) else: self.classes_ = kept_answers + [self.unknown_answer] self.encoder = LabelBinarizer().fit(self.classes_) return self def transform(self, answers): # TODO efficient transform if self.multi_label: cleared_answers = [] for as_list in answers: new_as = [] for a in as_list: if a in self.classes_: new_as.append(a) cleared_answers.append(new_as) else: cleared_answers = [] for answer in answers: if answer in self.classes_: cleared_answers.append(answer) else: cleared_answers.append(self.unknown_answer) return self.encoder.transform(cleared_answers) def inverse_transform(self, answers): # TODO efficient inverse_transform if not self.multi_label: return self.encoder.inverse_transform(answers) else: # get one most possible label t = np.zeros_like(answers) gold_answer_index = np.argmax(answers, axis=1) t[np.arange(len(answers)), gold_answer_index] = 1 results = self.encoder.inverse_transform(t) return list(result[0] for result in results)
class OneHotVector(object): def __init__(self, chars: list): if not chars or type(chars) is not list or len(chars) == 0: raise Exception('values must be list and len(values)>0 %s' % chars) self.encoder = LabelBinarizer( neg_label=0, pos_label=1, sparse_output=False) # TODO: performance test self.encoder.fit(chars) @property def classes(self): return self.encoder.classes_ def __len__(self): return self.encoder.classes_.shape[0] def to_vector(self, c: str) -> np.ndarray: """ :param c: character. len(c)==1 :return: """ return self.encoder.transform([c])[0] def to_vectors(self, chars: list) -> np.ndarray: """ :param chars: list of characters. len(chars)>0 :return: """ if type(chars) is str or np.str_: chars = [c for c in chars] return self.encoder.transform(chars) def to_value(self, v: np.ndarray) -> np.ndarray: """ :param v: one hot vector :return: """ return self.encoder.inverse_transform(np.array([v]))[0] def to_values(self, vectors: list) -> np.ndarray: """ :param vectors: list of one hot vector :return: """ return self.encoder.inverse_transform(vectors) def to_index(self, c: str) -> int: return np.argmax(self.to_vector(c))
def partb(): def load(file_name): file = np.load(file_name) X_train =file['X_train'].T y_train =file['y_train'] X_test =file['X_test'].T y_test =file['y_test'] X_cv =file['X_cv'].T y_cv =file['y_cv'] return X_train,y_train,X_cv,y_cv,X_test,y_test train_ = [0,0] test_ = [0,0] overall = [] for i in range(14): X_train,y_train,X_cv,y_cv,X_test,y_test = load('pofa{}.npz'.format(i)) from sklearn.preprocessing import LabelBinarizer binarizer = LabelBinarizer() binarizer.fit(y_train) Y_train = binarizer.transform(y_train).T Y_cv = binarizer.transform(y_cv).T #nn.forward(X) #nn.backprop(X,Y,graient_check=True) print(X_train.shape[0], Y_train.shape[0]) nn = NeuralNetwork([X_train.shape[0],30,Y_train.shape[0]], functions=[sigmoid,softmax], derivatives=[derivative_sigmoid]) nn.fit(X_train,Y_train,eta=0.01,momentum=0.5,minibatch=16,regularizer=0.15,max_iter=200,gradient_check=False,cv = (X_cv,Y_cv),graphs=False, lbfgs=False) output = nn.forward(X_train) y_train_output = binarizer.inverse_transform(output.T) y_test_output = binarizer.inverse_transform(nn.forward(X_test).T) print("Iteration: ",i) print((y_train_output==y_train).mean()) print((y_test_output ==y_test).mean()) overall.append((y_test == y_test_output).mean()) train_[0] += (y_train_output==y_train).sum() train_[1] += y_train.shape[0] test_[0] += (y_test_output==y_test).sum() test_[1] += y_test.shape[0] print("Average train accuracy: ", train_[0]/train_[1],"Average test accuracy: ",test_[0]/test_[1]) print(train_,test_) overall = np.array(overall) print(overall.mean())
def train_and_save_model(model='xvector', binary_class=False, single_class='glass'): model = define_xvector() model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.001), metrics=['acc', km.precision(label=1), km.recall(label=0)]) model.summary() callback_list = [ ModelCheckpoint( 'checkpoint-{epoch:02d}.h5', monitor='loss', verbose=1, save_best_only=True, period=2 ), # do the check point each epoch, and save the best model ReduceLROnPlateau( monitor='loss', patience=3, verbose=1, min_lr=1e-6 ), # reducing the learning rate if the val_loss is not improving CSVLogger(filename='training_log.csv'), # logger to csv EarlyStopping( monitor='loss', patience=5) # early stop if there's no improvment of the loss ] tr_data, tr_label, ts_data, ts_label = train_test_split() encoder = LabelBinarizer() tr_label = encoder.fit_transform(tr_label) ts_label = encoder.transform(ts_label) print( "Start Training process \nTraining data shape {} \nTraining label shape {}" .format(tr_data.shape, tr_label.shape)) model.fit(tr_data, tr_label, batch_size=16, epochs=100, verbose=1, validation_split=0.2) model.save('5class_segmentYoutube_model.h5') pred = model.predict(ts_data) pred = encoder.inverse_transform(pred) ts_label = encoder.inverse_transform(ts_label) cm = confusion_matrix(y_target=ts_label, y_predicted=pred, binary=False) cm = confusion_matrix(y_target=ts_label, y_predicted=pred, binary=False) plt.figure(figsize=(10, 10)) fig, ax = plot_confusion_matrix(conf_mat=cm) ax.set_xticklabels([''] + CLASS_TYPE, rotation=40, ha='right') ax.set_yticklabels([''] + CLASS_TYPE) plt.savefig("ConfusionMatrix_segment_youtube.png") plt.show()
def get_labelidx(img_path,model): img = cv2.imread(img_path) label_list = np.arange(16) lb = LabelBinarizer().fit(label_list) predictions = model.predict(np.array([img])) predict_idx = lb.inverse_transform(predictions) return predict_idx
def display_image_predictions(features, labels, predictions): label_binarizer = LabelBinarizer() # 将图片进行矩阵二值化,即将读取的像素块只分为黑或白 label_binarizer.fit(range(LABELS_COUNT)) # fit函数用来调用后面的数据训练模型 label_ids = label_binarizer.inverse_transform(np.array(labels)) # 将标准化后的数据转化为原始数据 fig, axies = plt.subplots(nrows=4, ncols=2) #创建一个4行2列的图(幕布) fig.tight_layout() #tight_layout会自动调整子图参数,使之填充整个图像区域 fig.suptitle('Softmax Predictions', fontsize=20, y=1.1) #加标题 n_predictions = 3 margin = 0.05 ind = np.arange(n_predictions) #ind = [0,1,2,3] width = (1. - 2. * margin) / n_predictions for image_i, (feature, label_id, pred_indicies, pred_values) \ in enumerate(zip(features, label_ids, predictions.indices, predictions.values)): #enumerate将一个可遍历的序列整合成一个索引列表,zip用于将各个数组中对应的元素打包 pred_names = [LABEL_NAMES[pred_i] for pred_i in pred_indicies] correct_name = LABEL_NAMES[label_id] axies[image_i][0].imshow(feature) #显示一个图像 axies[image_i][0].set_title(correct_name) #加标题 axies[image_i][0].set_axis_off() #关闭x轴和y轴 #绘制水平直方图 axies[image_i][1].barh(ind + margin, pred_values[::-1], width) axies[image_i][1].set_yticks(ind + margin) axies[image_i][1].set_yticklabels(pred_names[::-1]) axies[image_i][1].set_xticks([0, 0.5, 1.0])
class BinaryRelevanceClassifier(BaseEstimator, ClassifierMixin): def __init__(self, estimator): self.estimator = estimator def fit(self, X, Y): # binarize labels self.bl = LabelBinarizer() Y = self.bl.fit_transform(Y) self.classes_ = self.bl.classes_ # create an estimator for each label self.estimators_ = [] for i in xrange(self.bl.classes_.shape[0]): estimator = clone(self.estimator) estimator.fit(X, Y[:, i]) self.estimators_.append(estimator) def predict(self, X): self._check_is_fitted() X = np.atleast_2d(X) Y = np.empty((X.shape[0], self.classes_.shape[0])) for i, estimator in enumerate(self.estimators_): Y[:, i] = estimator.predict(X).T return self.bl.inverse_transform(Y) def _check_is_fitted(self): if not hasattr(self, "estimators_"): raise ValueError("The object hasn't been fitted yet!")
def test_proba_classif_convergence(): X_train, _, y_train, _ = load_scaled_boston() y_train = np.round(y_train) mc = MondrianTreeClassifier(random_state=0) mc.fit(X_train, y_train) lb = LabelBinarizer() y_bin = lb.fit_transform(y_train) le = LabelEncoder() y_enc = le.fit_transform(y_train) proba = mc.predict_proba(X_train) labels = mc.predict(X_train) assert_array_equal(proba, y_bin) assert_array_equal(labels, lb.inverse_transform(y_bin)) # For points completely far away from the training data, this # should converge to the empirical distribution of labels. # X is scaled between to -1.0 and 1.0 X_inf = np.vstack( (30.0 * np.ones(X_train.shape[1]), -30.0 * np.ones(X_train.shape[1]))) inf_proba = mc.predict_proba(X_inf) emp_proba = np.bincount(y_enc) / float(len(y_enc)) assert_array_almost_equal(inf_proba, [emp_proba, emp_proba])
class GBClassifier(_BaseGB, ClassifierMixin): def __init__(self, estimator, n_estimators=100, step_size="line_search", learning_rate=0.1, loss="squared_hinge", subsample=1.0, callback=None, random_state=None): self.estimator = estimator self.n_estimators = n_estimators self.step_size = step_size self.learning_rate = learning_rate self.loss = loss self.subsample = subsample self.callback = callback self.random_state = random_state def _get_loss(self): losses = dict(squared_hinge=_SquaredHingeLoss(), log=_LogLoss()) return losses[self.loss] def fit(self, X, y): self._lb = LabelBinarizer(neg_label=-1) Y = self._lb.fit_transform(y) return super(GBClassifier, self).fit(X, Y) def predict(self, X): pred = self.decision_function(X) return self._lb.inverse_transform(pred)
class MinimalLearningMachineClassifier(MinimalLearningMachine, ClassifierMixin): def __init__(self, selector=None): MinimalLearningMachine.__init__(self, selector, estimator_type='classifier') self.lb = LabelBinarizer() def fit(self, X, y=None): self.lb.fit(y) return MinimalLearningMachine.fit(self, X, self.lb.transform(y)) def active_(self, dyhat): classes = self.lb.transform(self.lb.classes_) result = [ np.argmin( list(map(lambda y_class: self.mulat_(y_class, dyh), classes))) for dyh in dyhat ] return self.lb.inverse_transform(self.lb.classes_[result]) def score(self, X, y, sample_weight=None): return ClassifierMixin.score(self, X, y, sample_weight)
class NeuralNetworkClassifier(BaseNeuralNetwork, ClassifierMixin): def __init__(self, layers=[], loss_function="logistic_loss", learning_rate=0.1, batch_size=100, max_epochs=10, update_algorithm="sgd", verbose="False"): sup = super(NeuralNetworkClassifier, self) sup.__init__(layers=layers, loss_function=loss_function, learning_rate=learning_rate, batch_size=batch_size, max_epcohs=max_epochs, update_algorithm=update_algorithm, verbose=verbose) self.label_binarizer_ = LabelBinarizer() def fit(self, X, y): y_binarized = self.label_binarizer_.fit_transform(y) super(NeuralNetworkClassifier, self).fit(X, y_binarized) def predict(self, X): y_pred = self._predict(X) return self.label_binarizer_.inverse_transform(y_pred) def predict_proba(self, X): return self._predict(X)
def test_label_binarizer_set_label_encoding(): lb = LabelBinarizer(neg_label=-2, pos_label=2) # two-class case inp = np.array([0, 1, 1, 0]) expected = np.array([[-2, 2, 2, -2]]).T got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp) # multi-class case inp = np.array([3, 2, 1, 2, 0]) expected = np.array([[-2, -2, -2, +2], [-2, -2, +2, -2], [-2, +2, -2, -2], [-2, -2, +2, -2], [+2, -2, -2, -2]]) got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp)
def test_label_binarizer(): lb = LabelBinarizer() # two-class case inp = ["neg", "pos", "pos", "neg"] expected = np.array([[0, 1, 1, 0]]).T got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp) # multi-class case inp = ["spam", "ham", "eggs", "ham", "0"] expected = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]) got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp)
def display_image_predictions(features, labels, predictions): n_classes = 10 label_names = _load_label_names() label_binarizer = LabelBinarizer() label_binarizer.fit(range(n_classes)) label_ids = label_binarizer.inverse_transform(np.array(labels)) fig, axies = plt.subplots(nrows=4, ncols=2) fig.tight_layout() fig.suptitle('Softmax Predictions', fontsize=20, y=1.1) n_predictions = 3 margin = 0.05 ind = np.arange(n_predictions) width = (1. - 2. * margin) / n_predictions for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate( zip(features, label_ids, predictions.indices, predictions.values)): pred_names = [label_names[pred_i] for pred_i in pred_indicies] correct_name = label_names[label_id] axies[image_i][0].imshow(feature) axies[image_i][0].set_title(correct_name) axies[image_i][0].set_axis_off() axies[image_i][1].barh(ind + margin, pred_values[::-1], width) axies[image_i][1].set_yticks(ind + margin) axies[image_i][1].set_yticklabels(pred_names[::-1]) axies[image_i][1].set_xticks([0, 0.5, 1.0])
class OneHotEncoder: """Joins the two encoders needed for OneHot transform.""" int_encoder: LabelEncoder label_binarizer: LabelBinarizer def __init__(self, values: List[T]) -> None: self.int_encoder = LabelEncoder().fit(values) self.label_binarizer = LabelBinarizer().fit( self.int_encoder.transform(values)) def transform(self, labels: List[T]) -> np.ndarray: """One hot encode a list of labels.""" return self.label_binarizer.transform( self.int_encoder.transform(labels)) def inverse_transform(self, encoded_labels: List[T]) -> List[T]: """Inverse transorm a list of one hot encoded labels.""" return self.int_encoder.inverse_transform( self.label_binarizer.inverse_transform(encoded_labels)) @property def classes(self) -> np.ndarray: """Return number of classes discovered while fitting transform.""" return (self.label_binarizer. classes_ # pyre-ignore[16]: missing attribute classes_ )
def test_proba_classif_convergence(): X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6, test_size=0.4) mfc = MondrianForestClassifier(random_state=0) mfc.fit(X_train, y_train) lb = LabelBinarizer() y_bin = lb.fit_transform(y_train) le = LabelEncoder() y_enc = le.fit_transform(y_train) proba = mfc.predict_proba(X_train) labels = mfc.predict(X_train) assert_array_equal(proba, y_bin) assert_array_equal(labels, lb.inverse_transform(y_bin)) # For points completely far away from the training data, this # should converge to the empirical distribution of labels. X_inf = np.vstack( (30.0 * np.ones(X_train.shape[1]), -30.0 * np.ones(X_train.shape[1]))) inf_proba = mfc.predict_proba(X_inf) emp_proba = np.bincount(y_enc) / float(len(y_enc)) assert_array_almost_equal(inf_proba, [emp_proba, emp_proba], 3)
class TargetTypeTransformer(FeatureTypeTransformer): def __init__(self, needs_label_binarizer=False): super().__init__() self.needs_label_binarizer = needs_label_binarizer def fit(self, y, **fit_kwargs): super().fit(y, **fit_kwargs) if self.needs_label_binarizer: self.label_binarizer_ = LabelBinarizer() self.label_binarizer_.fit(y) return self def transform(self, y, **transform_kwargs): y = super().transform(y) if self.needs_label_binarizer: y = self.label_binarizer_.transform(y) else: y = y.ravel() return y def inverse_transform(self, y, **inverse_transform_kwargs): if self.needs_label_binarizer: y = self.label_binarizer_.inverse_transform(y) y = super().inverse_transform(y) return y
class OneHotEncoder: def __init__(self, labels): self.encoder = LabelBinarizer() self.labels = self.encoder.fit_transform(labels) def get(self, onehot): return self.encoder.inverse_transform(np.array([onehot]))[0]
class MLPClassifier(BaseMLP, ClassifierMixin): """ Multilayer Perceptron Classifier. Uses a neural network with one hidden layer. Parameters ---------- Attributes ---------- Notes ----- References ----------""" def __init__(self, n_hidden=200, lr=0.1, l2decay=0, loss='cross_entropy', output_layer='softmax', batch_size=100, verbose=0): super(MLPClassifier, self).__init__(n_hidden, lr, l2decay, loss, output_layer, batch_size, verbose) def fit(self, X, y, max_epochs=10, shuffle_data=False): self.lb = LabelBinarizer() one_hot_labels = self.lb.fit_transform(y) super(MLPClassifier, self).fit( X, one_hot_labels, max_epochs, shuffle_data) return self def predict(self, X): prediction = super(MLPClassifier, self).predict(X) return self.lb.inverse_transform(prediction)
def display_samples_predictions(input_features, target_labels, samples_predictions): num_classes = 10 cifar10_class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] label_binarizer = LabelBinarizer() label_binarizer.fit(range(num_classes)) label_inds = label_binarizer.inverse_transform(np.array(target_labels)) fig, axies = plt.subplots(nrows=4, ncols=2) fig.tight_layout() fig.suptitle('Softmax Predictions', fontsize=20, y=1.1) num_predictions = 4 margin = 0.05 ind = np.arange(num_predictions) width = (1. - 2. * margin) / num_predictions for image_ind, (feature, label_ind, prediction_indicies, prediction_values) in enumerate( zip(input_features, label_inds, samples_predictions.indices, samples_predictions.values)): prediction_names = [cifar10_class_names[pred_i] for pred_i in prediction_indicies] correct_name = cifar10_class_names[label_ind] axies[image_ind][0].imshow(feature) axies[image_ind][0].set_title(correct_name) axies[image_ind][0].set_axis_off() axies[image_ind][1].barh(ind + margin, prediction_values[::-1], width) axies[image_ind][1].set_yticks(ind + margin) axies[image_ind][1].set_yticklabels(prediction_names[::-1]) axies[image_ind][1].set_xticks([0, 0.5, 1.0])
class MLPClassifier(BaseMLP, ClassifierMixin): """ Multilayer Perceptron Classifier. Uses a neural network with one hidden layer. Parameters ---------- Attributes ---------- Notes ----- References ----------""" def __init__( self, n_hidden=200, lr=0.1, l2decay=0, loss="cross_entropy", output_layer="softmax", batch_size=100, verbose=0 ): super(MLPClassifier, self).__init__(n_hidden, lr, l2decay, loss, output_layer, batch_size, verbose) def fit(self, X, y, max_epochs=10, shuffle_data=False): self.lb = LabelBinarizer() one_hot_labels = self.lb.fit_transform(y) super(MLPClassifier, self).fit(X, one_hot_labels, max_epochs, shuffle_data) return self def predict(self, X): prediction = super(MLPClassifier, self).predict(X) return self.lb.inverse_transform(prediction)
class SingletonLabelBinarizer: def __init__(self, *args, **kwargs): self.label_binarizer = LabelBinarizer() def fit(self, x): self.label_binarizer.fit(x) def transform(self, x): return self.label_binarizer.transform(x) def fit_transform(self, x): return self.label_binarizer.fit_transform(x) def __new__(cls): if not hasattr(cls, "instance"): cls.instance = super(SingletonLabelBinarizer, cls).__new__(cls) return cls.instance @property def encoder(self): return self.label_binarizer @encoder.setter def encoder(self, x): self.label_binarizer = LabelBinarizer() def inverse_transform(self, y): return self.label_binarizer.inverse_transform(y)
def display_image_predictions(features, labels, predictions): n_classes = 10 label_names = _load_label_names() label_binarizer = LabelBinarizer() label_binarizer.fit(range(n_classes)) label_ids = label_binarizer.inverse_transform(np.array(labels)) fig, axies = plt.subplots(nrows=4, ncols=2) fig.tight_layout() fig.suptitle('Softmax Predictions', fontsize=20, y=1.1) n_predictions = 3 margin = 0.05 ind = np.arange(n_predictions) width = (1. - 2. * margin) / n_predictions for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate(zip(features, label_ids, predictions.indices, predictions.values)): pred_names = [label_names[pred_i] for pred_i in pred_indicies] correct_name = label_names[label_id] axies[image_i][0].imshow(feature*255) axies[image_i][0].set_title(correct_name) axies[image_i][0].set_axis_off() axies[image_i][1].barh(ind + margin, pred_values[::-1], width) axies[image_i][1].set_yticks(ind + margin) axies[image_i][1].set_yticklabels(pred_names[::-1]) axies[image_i][1].set_xticks([0, 0.5, 1.0])
def test_label_binarizer(): lb = LabelBinarizer() # two-class case inp = np.array([0, 1, 1, 0]) expected = np.array([[0, 1, 1, 0]]).T got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp) # multi-class case inp = np.array([3, 2, 1, 2, 0]) expected = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]) got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp)
def parta(): def load(file_name): file = np.load(file_name) X_train = file['X_train'] y_train = file['y_train'] X_test = file['X_test'] y_test = file['y_test'] return X_train, y_train, X_test, y_test X_train, y_train, X_test, y_test = load('simnim.npz') print(X_train.shape) print(y_train.shape) print(X_test.shape) print(y_test.shape) X_train = X_train.T from sklearn.preprocessing import LabelBinarizer binarizer = LabelBinarizer() binarizer.fit(y_train) Y_train_hat = binarizer.transform(y_train) Y_train = Y_train_hat.T nn = NeuralNetwork([X_train.shape[0], 30, Y_train.shape[0]], functions=[sigmoid, softmax], derivatives=[derivative_sigmoid]) #nn.forward(X) #nn.backprop(X,Y,graient_check=True) nn.fit(X_train, Y_train, eta=0.01, momentum=0.5, minibatch=32, regularizer=0.2, max_iter=150, gradient_check=False, lbfgs=True) output = nn.forward(X_train) y_train_output = binarizer.inverse_transform(output.T) y_test_output = binarizer.inverse_transform(nn.forward(X_test.T).T) print((y_train_output == y_train).mean()) print((y_test_output == y_test).mean())
def to_categorical(y): nb_classes = [ 'negativo_maioria', 'negativo_unânime', 'positivo_maioria', 'positivo_unânime' ] binarize = LabelBinarizer() binarize.fit_transform(nb_classes) return binarize.inverse_transform(y)
def test_label_binarizer_multilabel(): lb = LabelBinarizer() inp = [(2, 3), (1, ), (1, 2)] expected = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]]) got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_equal(lb.inverse_transform(got), inp) # regression test for the two-class multilabel case lb = LabelBinarizer() inp = [[1, 0], [0], [1], [0, 1]] expected = np.array([[1, 1], [1, 0], [0, 1], [1, 1]]) got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_equal([set(x) for x in lb.inverse_transform(got)], [set(x) for x in inp])
def test_fit_reg_squared_multiple_outputs(): reg = CDRegressor(C=0.05, random_state=0, penalty="l1/l2", loss="squared", max_iter=100) lb = LabelBinarizer() Y = lb.fit_transform(mult_target) reg.fit(mult_dense, Y) y_pred = lb.inverse_transform(reg.predict(mult_dense)) assert_almost_equal(np.mean(y_pred == mult_target), 0.797, 3) assert_almost_equal(reg.n_nonzero(percentage=True), 0.5)
class BasicExtreamLearningMachine(BaseEstimator, ClassifierMixin): def __init__(self): self.L = None # 은닉노드 개수 self.a = None # 입력-은닉 노드 사이의 가중치 self.b = None # 은닉-출력 노드 사이의 가중치 self.g_func = np.tanh # 은닉노드의 활성화 함수 tanh을 사용함 def _append_bias(self, X): # 입력의 마지막에 1.을 추가 return np.append(X, np.ones((X.shape[0], 1)), axis=1) def _set_L(self, X): #self.L = X.shape[0] # 은닉노드 개수 설정 # 데이터 개수의 1/7 개의 은닉노드를 사용한다. # 딱히 이렇게 정한 근거는 없다. self.L = int(X.shape[0] / 7.) def fit(self, X, y): # 입력 데이터 준비 # [-1, 1] 사이로 정규화하고, 바이어스를 추가함 stdsc = StandardScaler() X = stdsc.fit_transform(X) X = self._append_bias(X) # 은닉노드 개수 설정 self._set_L(X) # target 데이터(클래스) 준비 self.classes_ = np.unique(y) self.n_class = len(self.classes_) self.binarizer = LabelBinarizer(-1, 1) T = self.binarizer.fit_transform(y) # 학습단계 # 1. 무작위로 입력-은닉노드 사이 가중치 설정 self.a = np.random.random((self.L, X.shape[1])) * 2.0 - 1.0 # 2. 은닉노드의 출력 H 구함 H = self.g_func(X.dot(self.a.T)) # 3. 은닉-출력 계층 사이 가중치 b 구함: penrose moore 역행렬 (pinv2) 이용 self.b = pinv2(H).dot(T) return self def decision_function(self, X): stdsc = StandardScaler() X = stdsc.fit_transform(X) X = self._append_bias(X) H = self.g_func(X.dot(self.a.T)) raw_prediction = H.dot(self.b) normalized_prediction = stdsc.fit_transform(raw_prediction) class_prediction = self.binarizer.inverse_transform( normalized_prediction) return class_prediction def predict(self, X): return self.decision_function(X)
class AnprLabelProcessor: # init the label binarizers. Maps classes to a set of one-hot vectors def __init__(self, plateChars, plateLens): # convert the labels from integers to vectors self.plate_lb = LabelBinarizer().fit(plateChars) self.charCnt_lb = LabelBinarizer().fit(plateLens) self.numClassesPerChar = len(plateChars) self.maxPlateLen = plateLens[-1] # Generate one-hot vectors for every plate def transform(self, labels): # Create a list of chars for each plate plateLabel = np.empty((len(labels), self.maxPlateLen), dtype=np.unicode_) for (i, label) in enumerate(labels): for j in range(0, self.maxPlateLen): plateLabel[i, j] = label[j] # Create a list of plate lengths for each plate #plateLenLabel = np.zeros((len(labels), 1), dtype=int) #for (i, label) in enumerate(labels): # plateLenLabel[i, 0] = label[7] # Create the one hot labels for each plate #plateLabelsOneHot = np.zeros((len(labels), (37 * 7) + 7), dtype=int) plateLabelsOneHot = np.zeros( (len(labels), (self.numClassesPerChar * self.maxPlateLen)), dtype=int) for i in range(len(labels)): oneHotText = self.plate_lb.transform(plateLabel[i]) #oneHotCharCnt = self.charCnt_lb.transform(plateLenLabel[i]) #plateLabelsOneHot[i] = np.concatenate((oneHotText.flatten(), oneHotCharCnt.flatten())) plateLabelsOneHot[i] = oneHotText.flatten() return plateLabelsOneHot # for every plate generate license plate chars, and license plate length def inverse_transform(self, oneHotLabels): plates = [] plateLens = [] oneHotLenDemuxed = [] for i in range(len(oneHotLabels)): oneHotDemuxed = [] for j in range(self.maxPlateLen): onehotDemux = np.array(oneHotLabels[i, j]) oneHotDemuxed.append(onehotDemux) oneHotDemuxed = np.array(oneHotDemuxed) plate = self.plate_lb.inverse_transform(oneHotDemuxed) plates.append(plate) #oneHotLenDemux = np.array(oneHotLabels[i, 37 * 7:]) #oneHotLenDemuxed.append(oneHotLenDemux) #oneHotLenDemuxed = np.array(oneHotLenDemuxed) #plateLens = (self.charCnt_lb.inverse_transform(oneHotLenDemuxed)) #return plates, plateLens return plates
class BaseClassifier(BaseEstimator): def predict_proba(self, X): if len(self.classes_) != 2: raise NotImplementedError("predict_(log_)proba only supported" " for binary classification") if self.loss == "log": df = self.decision_function(X).ravel() prob = 1.0 / (1.0 + np.exp(-df)) elif self.loss == "modified_huber": df = self.decision_function(X).ravel() prob = np.minimum(1, np.maximum(-1, df)) prob += 1 prob /= 2 else: raise NotImplementedError("predict_(log_)proba only supported when" " loss='log' or loss='modified_huber' " "(%s given)" % self.loss) out = np.zeros((X.shape[0], 2), dtype=np.float64) out[:, 1] = prob out[:, 0] = 1 - prob return out def _set_label_transformers(self, y, reencode=False, neg_label=-1): if reencode: self.label_encoder_ = LabelEncoder() y = self.label_encoder_.fit_transform(y).astype(np.int32) else: y = y.astype(np.int32) self.label_binarizer_ = LabelBinarizer(neg_label=neg_label, pos_label=1) self.label_binarizer_.fit(y) self.classes_ = self.label_binarizer_.classes_.astype(np.int32) n_classes = len(self.label_binarizer_.classes_) n_vectors = 1 if n_classes <= 2 else n_classes return y, n_classes, n_vectors def decision_function(self, X): pred = safe_sparse_dot(X, self.coef_.T) if hasattr(self, "intercept_"): pred += self.intercept_ return pred def predict(self, X): pred = self.decision_function(X) out = self.label_binarizer_.inverse_transform(pred) if hasattr(self, "label_encoder_"): out = self.label_encoder_.inverse_transform(out) return out
def test_label_binarizer_iris(): lb = LabelBinarizer() Y = lb.fit_transform(iris.target) clfs = [SGDClassifier().fit(iris.data, Y[:, k]) for k in range(len(lb.classes_))] Y_pred = np.array([clf.decision_function(iris.data) for clf in clfs]).T y_pred = lb.inverse_transform(Y_pred) accuracy = np.mean(iris.target == y_pred) y_pred2 = SGDClassifier().fit(iris.data, iris.target).predict(iris.data) accuracy2 = np.mean(iris.target == y_pred2) assert_almost_equal(accuracy, accuracy2)
def test_label_binarizer_multilabel(): lb = LabelBinarizer() inp = [(2, 3), (1,), (1, 2)] expected = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]]) got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_equal(lb.inverse_transform(got), inp)
class _CategoricalEncoder: """OneHotEncoder that can handle categorical variables.""" def __init__(self): """Convert labeled categories into one-hot encoded features.""" self._lb = LabelBinarizer() def fit(self, X): """Fit a list or array of categories. Parameters ---------- * `X` [array-like, shape=(n_categories,)]: List of categories. """ self.mapping_ = {v: i for i, v in enumerate(X)} self.inverse_mapping_ = {i: v for v, i in self.mapping_.items()} self._lb.fit([self.mapping_[v] for v in X]) self.n_classes = len(self._lb.classes_) return self def transform(self, X): """Transform an array of categories to a one-hot encoded representation. Parameters ---------- * `X` [array-like, shape=(n_samples,)]: List of categories. Returns ------- * `Xt` [array-like, shape=(n_samples, n_categories)]: The one-hot encoded categories. """ return self._lb.transform([self.mapping_[v] for v in X]) def inverse_transform(self, Xt): """Inverse transform one-hot encoded categories back to their original representation. Parameters ---------- * `Xt` [array-like, shape=(n_samples, n_categories)]: One-hot encoded categories. Returns ------- * `X` [array-like, shape=(n_samples,)]: The original categories. """ Xt = np.asarray(Xt) return [ self.inverse_mapping_[i] for i in self._lb.inverse_transform(Xt) ]
class AdaBoostClassifier(BaseEstimator, ClassifierMixin): def __init__(self, estimator, n_estimators=10): self.estimator = estimator self.n_estimators = n_estimators def fit(self, X, y): n_samples = X.shape[0] weights = np.ones(n_samples, dtype=np.float64) / n_samples self._lb = LabelBinarizer(neg_label=-1) y = self._lb.fit_transform(y).ravel() self.estimators_ = np.zeros(self.n_estimators, dtype=np.object) self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64) y_pred_ = np.zeros(n_samples, dtype=np.float64) for it in xrange(self.n_estimators): est = clone(self.estimator) est = est.fit(X, y, sample_weight=weights) y_pred = est.predict(X) err = 1 - accuracy_score(y, y_pred, sample_weight=weights) if err == 0: self.estimator_weights_[it] = 1 self.estimators_[it] = est break alpha = 0.5 * np.log((1 - err) / err) #weights *= np.exp(- alpha * y * y_pred) #weights /= weights.sum() y_pred_ += alpha * y_pred weights = np.exp(-y * y_pred_) #weights = 1.0 / (1 + np.exp(y * y_pred_)) # logit boost weights /= weights.sum() self.estimator_weights_[it] = alpha self.estimators_[it] = est return self def predict(self, X): y_pred = np.zeros(X.shape[0], dtype=np.float64) for it in xrange(self.n_estimators): if self.estimator_weights_[it] != 0: pred = self.estimators_[it].predict(X) y_pred += self.estimator_weights_[it] * pred y_pred = np.sign(y_pred) return self._lb.inverse_transform(y_pred.reshape(-1, 1))
class NN_Classifier(NNBase): def __init__(self,layers = [], lr=0.01, epochs=None, noisy=None, verbose=False): super(NN_Classifier, self).__init__(layers=layers, lr=lr, epochs=epochs, noisy=noisy, verbose=verbose) self.type = 'C' self.error_func = CrossEntropyError self.accuracy_score = AccuracyScore self.label_binarizer = LabelBinarizer() def predict(self, X): predictions = [] for el in X: current_prediction = NNBase._predict(self, row(el)) predictions.append(current_prediction) predictions = np.vstack(predictions) current_results = coalesce(predictions) return self.label_binarizer.inverse_transform(current_results) def predict_proba(self, X): predictions = [] for el in X: current_prediction = NNBase._predict(self, row(el)) predictions.append(current_prediction) predictions = np.vstack(predictions) return predictions def fit(self, X, T): T_impl = self.label_binarizer.fit_transform(T) if not self.epochs: self.epochs = 1 for num in xrange(self.epochs): if self.verbose: print "Epoch: %d" % num for i in xrange(len(X)): NNBase._update(self, row(X[i]), row(T_impl[i])) def error(self, X, T): T_impl = self.label_binarizer.transform(T) Y = self.predict_proba(X) return self.error_func.func(Y, T_impl) def score(self, X, T): Y = self.predict(X) return self.accuracy_score.func(Y,T) def analytical_gradient(self, X, T): T_impl = self.label_binarizer.transform(T) return NNBase._analytical_gradient(self, X, T_impl) def numerical_gradient(self, X, T): T_impl = self.label_binarizer.transform(T) return NNBase._numerical_gradient(self, X, T_impl)
class DualLinearSVC(BaseEstimator, ClassifierMixin): def __init__(self, C=1.0, loss="l1", max_iter=1000, tol=1e-3, termination="convergence", sv_upper_bound=1000, shrinking=True, warm_start=False, random_state=None, callback=None, verbose=0, n_jobs=1): self.C = C self.loss = loss self.max_iter = max_iter self.tol = tol self.termination = termination self.sv_upper_bound = sv_upper_bound self.shrinking = shrinking self.warm_start = warm_start self.random_state = random_state self.callback = callback self.verbose = verbose self.n_jobs = n_jobs self.coef_ = None def fit(self, X, y): n_samples, n_features = X.shape rs = check_random_state(self.random_state) self.label_binarizer_ = LabelBinarizer(neg_label=-1, pos_label=1) Y = self.label_binarizer_.fit_transform(y) n_vectors = Y.shape[1] if not self.warm_start or self.coef_ is None: self.coef_ = np.zeros((n_vectors, n_features), dtype=np.float64) self.dual_coef_ = np.zeros((n_vectors, n_samples), dtype=np.float64) kernel = get_kernel("linear") kcache = KernelCache(kernel, n_samples, 0, 0, self.verbose) for i in xrange(n_vectors): _dual_cd(self, self.coef_[i], self.dual_coef_[i], X, Y[:, i], kcache, True, "permute", 60, self.termination, self.sv_upper_bound, self.C, self.loss, self.max_iter, rs, self.tol, self.shrinking, self.callback, verbose=self.verbose) return self def decision_function(self, X): return np.dot(X, self.coef_.T) def predict(self, X): pred = self.decision_function(X) return self.label_binarizer_.inverse_transform(pred, threshold=0)
def parta(): def load(file_name): file = np.load(file_name) X_train =file['X_train'] y_train =file['y_train'] X_test =file['X_test'] y_test =file['y_test'] return X_train,y_train,X_test,y_test X_train,y_train,X_test,y_test = load('simnim.npz') print(X_train.shape) print(y_train.shape) print(X_test.shape) print(y_test.shape) X_train = X_train.T from sklearn.preprocessing import LabelBinarizer binarizer = LabelBinarizer() binarizer.fit(y_train) Y_train_hat = binarizer.transform(y_train) Y_train = Y_train_hat.T nn = NeuralNetwork([X_train.shape[0],30,Y_train.shape[0]], functions=[sigmoid,softmax], derivatives=[derivative_sigmoid]) #nn.forward(X) #nn.backprop(X,Y,graient_check=True) nn.fit(X_train,Y_train,eta=0.01,momentum=0.5,minibatch=32,regularizer=0.2,max_iter=150,gradient_check=False, lbfgs=True) output = nn.forward(X_train) y_train_output = binarizer.inverse_transform(output.T) y_test_output = binarizer.inverse_transform(nn.forward(X_test.T).T) print((y_train_output==y_train).mean()) print((y_test_output ==y_test).mean())
def test_label_binarizer_multilabel(): lb = LabelBinarizer() inp = [(2, 3), (1,), (1, 2)] expected = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]]) got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_equal(lb.inverse_transform(got), inp) # regression test for the two-class multilabel case lb = LabelBinarizer() inp = [[1, 0], [0], [1], [0, 1]] expected = np.array([[1, 1], [1, 0], [0, 1], [1, 1]]) got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_equal([set(x) for x in lb.inverse_transform(got)], [set(x) for x in inp])
class MLPClassifier(BaseMLP, ClassifierMixin): """ Multilayer Perceptron Classifier. Uses a neural network with one hidden layer. Parameters ---------- Attributes ---------- Notes ----- References ----------""" def __init__(self, n_hidden=200, lr=0.1, l2decay=0, loss='cross_entropy', output_layer='softmax', batch_size=100, verbose=0): super(MLPClassifier, self).__init__(n_hidden, lr, l2decay, loss, output_layer, batch_size, verbose) def fit(self, X, y, max_epochs=10, shuffle_data=False): self.lb = LabelBinarizer() one_hot_labels = self.lb.fit_transform(y) super(MLPClassifier, self).fit( X, one_hot_labels, max_epochs, shuffle_data) return self def predict(self, X): prediction = super(MLPClassifier, self).predict(X) return self.lb.inverse_transform(prediction) # def test_classification(): # from sklearn.datasets import load_digits # digits = load_digits() # X, y = digits.data, digits.target # mlp = MLPClassifier() # mlp.fit(X, y) # training_score = mlp.score(X, y) # print("training accuracy: %f" % training_score) # assert(training_score > .95) # # # if __name__ == "__main__": # test_classification()
class ELM(BaseEstimator): def __init__(self, h=60, activation='linear', random_state=None, C=100): self.name = 'elm' self.h = h self.activation = activation self.random_state = random_state self.C = C assert self.activation in ['rbf', 'sigmoid', 'linear'] def fit(self, X, y): if self.random_state is None: self.random_state = np.random.RandomState(np.random.randint(0, np.iinfo(np.int32).max)) elif type(self.random_state) == int: self.random_state = np.random.RandomState(self.random_state) self.lb = LabelBinarizer() self.W = self.random_state.normal(size=(X.shape[1], self.h)) self.B = self.random_state.normal(size=self.h) if self.activation == 'rbf': H = _elm_vectorized_rbf(X, self.W, self.B) elif self.activation == 'sigmoid': H = _elm_sigmoid(X, self.W, self.B) else : H = X.dot(self.W) self.lb.fit(y) lam = np.eye(H.shape[1]) * (1./self.C) H_inv = np.linalg.inv(H.T.dot(H) + lam) self.beta = H_inv.dot(H.T.dot(self.lb.transform(y))) return self def decision_function(self, X): if self.activation == 'rbf': return _elm_vectorized_rbf(X, self.W, self.B).dot(self.beta) elif self.activation == 'sigmoid': return _elm_sigmoid(X, self.W, self.B).dot(self.beta) else : return X.dot(self.W).dot(self.beta) def predict(self, X): return self.lb.inverse_transform(self.decision_function(X))
class KMPClassifier(KMPBase, ClassifierMixin): def fit(self, X, y): n_nonzero_coefs, K, y, norms = self._pre_fit(X, y) self.lb_ = LabelBinarizer() Y = self.lb_.fit_transform(y) self._fit(K, y, Y, n_nonzero_coefs, norms) self._post_fit() return self def predict(self, X): pred = self.decision_function(X) return self.lb_.inverse_transform(pred, threshold=0.5)
class TWELM(ProjectorMixin, BaseEstimator): def __str__(self): if self.C==None: solver = self.solve.__name__ else: solver = 'algebraic,C='+str(self.C) return 'TWELM(h='+str(self.h)+',f='+self.f.__name__+',balanced=true,solver='+solver+',extreme='+str(self.extreme)+')' def __init__(self, projector, h=100, C=None, solver=la.lstsq, random_state=0, extreme=True): self.h = h self.C = C self.projector = projector self.random_state = random_state self.solve = solver self.extreme = extreme def fit(self, X, y ): self.labeler = LabelBinarizer() rng = check_random_state(self.random_state) self.projector.set_params(h=self.h, rng=rng) H = self.projector.fit(X).project(X) y = y.tolist() s = { l : float(y.count(l)) for l in set(y) } ms= max([ s[k] for k in s ]) s = { l : ms/s[l] for l in s } w = np.array( [[ np.sqrt( s[a] ) for a in y ]] ).T T = self.labeler.fit_transform(y) start = time.time() if self.C==None: self.beta, _, _, _ = self.solve( np.multiply(H,w), np.multiply(T,w) ) else: H = np.multiply(H,w) self.beta = ( la.inv( np.eye(H.shape[1])/self.C + H.T.dot(H) ) ).dot( H.T.dot(np.multiply(T,w)) ) self.train_time = time.time()-start return self def predict(self, X ): return self.labeler.inverse_transform(np.dot(self.projector.project(X), self.beta)).T def decision_function(self, X): return np.dot(self.projector.project(X), self.beta)
class ELMClassifier(BaseELM, ClassifierMixin): def __init__(self, n_hidden=20, regularized=False): super(ELMClassifier, self).__init__(n_hidden, regularized) self.classes_ = None def fit(self, X, y): self.classes_ = np.unique(y) self._lbin = LabelBinarizer() y = self._lbin.fit_transform(y) super(ELMClassifier, self).fit(X, y) return self def predict(self, X): X = atleast2d_or_csr(X) scores = self.decision_function(X) # if len(scores.shape) == 1: #scores = logistic_sigmoid(scores) #results = (scores > 0.5).astype(np.int) # else: #scores = _softmax(scores) #results = scores.argmax(axis=1) # self.classes_[results] return self._lbin.inverse_transform(scores) def predict_proba(self, X): scores = self.decision_function(X) if len(scores.shape) == 1: scores = logistic_sigmoid(scores) return np.vstack([1 - scores, scores]).T else: return _softmax(scores)
class encode_cat: """Wraps labelbinarizer and encoder together""" def __init__(self): self.LB = LabelBinarizer() self.LE = LabelEncoder() return def fit(self, X, y=None): self.LE.fit(X) self.LB.fit(self.LE.transform(X)) return def transform(self, X, y=None): return self.LB.transform(self.LE.transform(X)) def fit_transform(self, X, y=None): self.LE.fit(X) self.LB.fit(self.LE.transform(X)) return self.LB.transform(self.LE.transform(X)) def inverse_transform(self, X, y=None): return self.LE.inverse_transform(self.LB.inverse_transform(X))
class ActualTreatmentPredictor(_BasePredictor): """Returns the most likely treatments for a patient. Args: prediction_model: The model used to make predictions preprocessor: The preprocessor used to transform the patient features into a format that can be used by the prediction_model recommendation_probability_threshold: The probability threshold that a potential recommendation needs to have a higher probability than to be considered a possible treatment. """ def __init__(self, prediction_model, preprocessor, recommendation_probability_threshold=0.05): super().__init__(prediction_model, preprocessor) self._treatment_label_binarizer = LabelBinarizer() self._recommendation_probability_threshold = recommendation_probability_threshold def _pre_fit_hook(self, data): self._treatment_label_binarizer.fit(data.treatment.unique()) def _get_outcome_data_for_training(self, data): return self._treatment_label_binarizer.transform(data.treatment.values) def _get_predicted_value(self, prediction): return self._treatment_label_binarizer.inverse_transform(prediction) def get_possible_treatments(self, data): """Returns the most likely treatments for a patient. Args: data: A dataframe containing patient features as well as a sample_id column. The sample_id column is needed because there can be many most likely treatments for a sample_id and the column is used to reconcile the treatment with the record. Returns: A dataframe with the following columns: sample_id: The sample_id the treatment is for. treatment: The treatment category """ self._checked_is_trained() # leave comment on structure probabilities_sectioned_by_treatment = self._pipeline.predict_proba(data) ordered_treatments = self._treatment_label_binarizer.classes_ treatment_dfs = [] for (treatment, probabilities_for_treatment) in zip(ordered_treatments, probabilities_sectioned_by_treatment): probability_of_treatment = [prob[1] if len(prob) > 1 else 0 for prob in probabilities_for_treatment] df = pd.DataFrame({ "treatment": treatment, "probability_of_treatment": probability_of_treatment, "sample_id": range(len(probability_of_treatment)) }) treatment_dfs.append(df) combined_df = pd.concat(treatment_dfs) # Get all treatments that have a probability greater than the threshold sample_with_high_probability = \ combined_df[combined_df.probability_of_treatment > self._recommendation_probability_threshold] # Get the top probability for a sample_id. This treatment will be used if there is no treatment for the # sample_id greater than the threshold top_treatment_per_sample_id = combined_df.groupby("sample_id")["probability_of_treatment"].nlargest( 1).reset_index().drop('level_1', axis=1) # Find top treatments for samples that have not treatment above the threshold. This is a rare case but can # happen. samples_ids_with_high_prob = set(sample_with_high_probability.sample_id.unique()) all_sample_ids = set(combined_df.sample_id.unique()) ids_not_in_high_prob = all_sample_ids - samples_ids_with_high_prob top_treatments_for_samples_missing_high_prob =\ top_treatment_per_sample_id[top_treatment_per_sample_id.sample_id.isin(ids_not_in_high_prob)] return pd.concat([sample_with_high_probability, top_treatments_for_samples_missing_high_prob])
class ELMClassifier(BaseELM, ClassifierMixin): """Extreme learning machine classifier. The algorithm trains a single-hidden layer feedforward network by computing the hidden layer values using randomized parameters, then solving for the output weights using least-square solutions. For prediction, after computing the forward pass, the continuous output values pass through a gate function converting them to integers that represent classes. This implementation works with data represented as dense and sparse numpy arrays of floating point values for the features. Parameters ---------- C : float, optional, default 100 A regularization term that controls the linearity of the decision function. Smaller value of C makes the decision boundary more linear. class_weight : {dict, 'auto', None}, default None If 'auto', class weights will be given inversely proportional to the frequency of the class in the data. If a dictionary is given, keys are the class labels and the corresponding values are the class weights. If None is given, then no class weights will be applied. weight_scale : float, default 1. Initializes and scales the input-to-hidden weights. The weight values will range between plus and minus 'sqrt(weight_scale * 6. / (n_features + n_hidden))' based on the uniform distribution. n_hidden : int, default 100 The number of units in the hidden layer. activation : {'logistic', 'tanh', 'relu'}, default 'relu' Activation function for the hidden layer. - 'logistic', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(x)). - 'tanh', the hyperbolic tan function, returns f(x) = tanh(x). - 'relu', the rectified linear unit function, returns f(x) = max(0, x). batch_size : int, optional, default None If None is given, batch_size is set as the number of samples. Otherwise, it will be set as the given integer. verbose : bool, optional, default False Whether to print the training score. warm_start : bool, optional, default False When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. random_state : int or RandomState, optional, default None State of or seed for random number generator. Attributes ---------- `classes_` : array-list, shape (n_classes,) Class labels for each output. `n_outputs_` : int Number of output neurons. `coef_hidden_` : array-like, shape (n_features, n_hidden) The input-to-hidden weights. `intercept_hidden_` : array-like, shape (n_hidden,) The bias added to the hidden layer neurons. `coef_output_` : array-like, shape (n_hidden, n_outputs_) The hidden-to-output weights. `label_binarizer_` : LabelBinarizer A LabelBinarizer object trained on the training set. References ---------- Liang, Nan-Ying, et al. "A fast and accurate online sequential learning algorithm for feedforward networks." Neural Networks, IEEE Transactions on 17.6 (2006): 1411-1423. http://www.ntu.edu.sg/home/egbhuang/pdf/OS-ELM-TNN.pdf Zong, Weiwei, Guang-Bin Huang, and Yiqiang Chen. "Weighted extreme learning machine for imbalance learning." Neurocomputing 101 (2013): 229-242. Glorot, Xavier, and Yoshua Bengio. "Understanding the difficulty of training deep feedforward neural networks." International Conference on Artificial Intelligence and Statistics. 2010. """ def __init__(self, n_hidden=100, activation='relu', C=1, class_weight=None, weight_scale=1.0, batch_size=None, verbose=False, warm_start=False, random_state=None): super(ELMClassifier, self).__init__(n_hidden=n_hidden, activation=activation, C=C, class_weight=class_weight, weight_scale=weight_scale, batch_size=batch_size, verbose=verbose, warm_start=warm_start, random_state=random_state) self.label_binarizer_ = LabelBinarizer(-1, 1) def partial_fit(self, X, y, classes=None, sample_weight=None): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. y : array-like, shape (n_samples,) Subset of the target values. classes : array-like, shape (n_classes,) List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls. sample_weight : array-like, shape (n_samples,) Per-sample weights. Rescale C per sample. Higher weights force the classifier to put more emphasis on these points. Returns ------- self : returns a trained elm usable for prediction. """ self.classes_ = classes super(ELMClassifier, self).partial_fit(X, y, sample_weight) return self def decision_function(self, X): """Decision function of the elm model Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. Returns ------- y : array-like, shape (n_samples,) or (n_samples, n_classes) The predicted values. """ y_scores = self._decision_scores(X) if self.n_outputs_ == 1: return y_scores.ravel() else: return y_scores def predict(self, X): """Predict using the ELM model Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. Returns ------- y : array-like, shape (n_samples,) or (n_samples, n_classes) The predicted classes, or the predicted values. """ y_scores = self._decision_scores(X) return self.label_binarizer_.inverse_transform(y_scores) def predict_proba(self, X): """Probability estimates. Warning: the estimates aren't callibrated since the model optimizes a penalized least squares objective function based on the One Vs Rest binary encoding of the class membership. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. Returns ------- y_prob : array-like, shape (n_samples, n_classes) The predicted probability of the sample for each class in the model, where classes are ordered as they are in `self.classes_`. """ y_scores = self._decision_scores(X) if len(self.classes_) == 2: y_scores = logistic(y_scores) return np.hstack([1 - y_scores, y_scores]) else: return softmax(y_scores)
def exp(solvers, penalties, single_target, n_samples=30000, max_iter=20, dataset='rcv1', n_jobs=1, skip_slow=False): mem = Memory(cachedir=expanduser('~/cache'), verbose=0) if dataset == 'rcv1': rcv1 = fetch_rcv1() lbin = LabelBinarizer() lbin.fit(rcv1.target_names) X = rcv1.data y = rcv1.target y = lbin.inverse_transform(y) le = LabelEncoder() y = le.fit_transform(y) if single_target: y_n = y.copy() y_n[y > 16] = 1 y_n[y <= 16] = 0 y = y_n elif dataset == 'digits': digits = load_digits() X, y = digits.data, digits.target if single_target: y_n = y.copy() y_n[y < 5] = 1 y_n[y >= 5] = 0 y = y_n elif dataset == 'iris': iris = load_iris() X, y = iris.data, iris.target elif dataset == '20newspaper': ng = fetch_20newsgroups_vectorized() X = ng.data y = ng.target if single_target: y_n = y.copy() y_n[y > 4] = 1 y_n[y <= 16] = 0 y = y_n X = X[:n_samples] y = y[:n_samples] cached_fit = mem.cache(fit_single) out = Parallel(n_jobs=n_jobs, mmap_mode=None)( delayed(cached_fit)(solver, X, y, penalty=penalty, single_target=single_target, C=1, max_iter=max_iter, skip_slow=skip_slow) for solver in solvers for penalty in penalties) res = [] idx = 0 for solver in solvers: for penalty in penalties: if not (skip_slow and solver == 'lightning' and penalty == 'l1'): lr, times, train_scores, test_scores, accuracies = out[idx] this_res = dict(solver=solver, penalty=penalty, single_target=single_target, times=times, train_scores=train_scores, test_scores=test_scores, accuracies=accuracies) res.append(this_res) idx += 1 with open('bench_saga.json', 'w+') as f: json.dump(res, f)
class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin): """Multi-layer Perceptron classifier. This algorithm optimizes the log-loss function using l-bfgs or gradient descent. Parameters ---------- hidden_layer_sizes : tuple, length = n_layers - 2, default (100,) The ith element represents the number of neurons in the ith hidden layer. activation : {'logistic', 'tanh', 'relu'}, default 'relu' Activation function for the hidden layer. - 'logistic', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)). - 'tanh', the hyperbolic tan function, returns f(x) = tanh(x). - 'relu', the rectified linear unit function, returns f(x) = max(0, x) algorithm : {'l-bfgs', 'sgd', 'adam'}, default 'adam' The algorithm for weight optimization. - 'l-bfgs' is an optimization algorithm in the family of quasi-Newton methods. - 'sgd' refers to stochastic gradient descent. - 'adam' refers to a stochastic gradient-based optimization algorithm proposed by Kingma, Diederik, and Jimmy Ba Note: The default algorithm 'adam' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, 'l-bfgs' can converge faster and perform better. alpha : float, optional, default 0.0001 L2 penalty (regularization term) parameter. batch_size : int, optional, default 'auto' Size of minibatches for stochastic optimizers. If the algorithm is 'l-bfgs', the classifier will not use minibatch. When set to "auto", `batch_size=min(200, n_samples)` learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant' Learning rate schedule for weight updates. -'constant', is a constant learning rate given by 'learning_rate_init'. -'invscaling' gradually decreases the learning rate ``learning_rate_`` at each time step 't' using an inverse scaling exponent of 'power_t'. effective_learning_rate = learning_rate_init / pow(t, power_t) -'adaptive', keeps the learning rate constant to 'learning_rate_init' as long as training loss keeps decreasing. Each time two consecutive epochs fail to decrease training loss by at least tol, or fail to increase validation score by at least tol if 'early_stopping' is on, the current learning rate is divided by 5. Only used when algorithm='sgd'. max_iter : int, optional, default 200 Maximum number of iterations. The algorithm iterates until convergence (determined by 'tol') or this number of iterations. random_state : int or RandomState, optional, default None State or seed for random number generator. shuffle : bool, optional, default True Whether to shuffle samples in each iteration. Only used when algorithm='sgd' or 'adam'. tol : float, optional, default 1e-4 Tolerance for the optimization. When the loss or score is not improving by at least tol for two consecutive iterations, unless `learning_rate` is set to 'adaptive', convergence is considered to be reached and training stops. learning_rate_init : double, optional, default 0.001 The initial learning rate used. It controls the step-size in updating the weights. Only used when algorithm='sgd' or 'adam'. power_t : double, optional, default 0.5 The exponent for inverse scaling learning rate. It is used in updating effective learning rate when the learning_rate is set to 'invscaling'. Only used when algorithm='sgd'. verbose : bool, optional, default False Whether to print progress messages to stdout. warm_start : bool, optional, default False When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. momentum : float, default 0.9 Momentum for gradient descent update. Should be between 0 and 1. Only used when algorithm='sgd'. nesterovs_momentum : boolean, default True Whether to use Nesterov's momentum. Only used when algorithm='sgd' and momentum > 0. early_stopping : bool, default False Whether to use early stopping to terminate training when validation score is not improving. If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for two consecutive epochs. Only effective when algorithm='sgd' or 'adam' validation_fraction : float, optional, default 0.1 The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True beta_1 : float, optional, default 0.9 Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1). Only used when algorithm='adam' beta_2 : float, optional, default 0.999 Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1). Only used when algorithm='adam' epsilon : float, optional, default 1e-8 Value for numerical stability in adam. Only used when algorithm='adam' Attributes ---------- `classes_` : array or list of array of shape (n_classes,) Class labels for each output. `loss_` : float The current loss computed with the loss function. `label_binarizer_` : LabelBinarizer A LabelBinarizer object trained on the training set. `coefs_` : list, length n_layers - 1 The ith element in the list represents the weight matrix corresponding to layer i. `intercepts_` : list, length n_layers - 1 The ith element in the list represents the bias vector corresponding to layer i + 1. n_iter_ : int, The number of iterations the algorithm has ran. n_layers_ : int Number of layers. `n_outputs_` : int Number of outputs. `out_activation_` : string Name of the output activation function. Notes ----- MLPClassifier trains iteratively since at each time step the partial derivatives of the loss function with respect to the model parameters are computed to update the parameters. It can also have a regularization term added to the loss function that shrinks model parameters to prevent overfitting. This implementation works with data represented as dense numpy arrays or sparse scipy arrays of floating point values. References ---------- Hinton, Geoffrey E. "Connectionist learning procedures." Artificial intelligence 40.1 (1989): 185-234. Glorot, Xavier, and Yoshua Bengio. "Understanding the difficulty of training deep feedforward neural networks." International Conference on Artificial Intelligence and Statistics. 2010. He, Kaiming, et al. "Delving deep into rectifiers: Surpassing human-level performance on imagenet classification." arXiv preprint arXiv:1502.01852 (2015). Kingma, Diederik, and Jimmy Ba. "Adam: A method for stochastic optimization." arXiv preprint arXiv:1412.6980 (2014). """ def __init__(self, hidden_layer_sizes=(100,), activation="relu", algorithm='adam', alpha=0.0001, batch_size='auto', learning_rate="constant", learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=1e-4, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-8): sup = super(MLPClassifier, self) sup.__init__(hidden_layer_sizes=hidden_layer_sizes, activation=activation, algorithm=algorithm, alpha=alpha, batch_size=batch_size, learning_rate=learning_rate, learning_rate_init=learning_rate_init, power_t=power_t, max_iter=max_iter, loss='log_loss', shuffle=shuffle, random_state=random_state, tol=tol, verbose=verbose, warm_start=warm_start, momentum=momentum, nesterovs_momentum=nesterovs_momentum, early_stopping=early_stopping, validation_fraction=validation_fraction, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon) self.label_binarizer_ = LabelBinarizer() def _validate_input(self, X, y, incremental): X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], multi_output=True) if y.ndim == 2 and y.shape[1] == 1: y = column_or_1d(y, warn=True) self.label_binarizer_.fit(y) if not hasattr(self, 'classes_') or not incremental: self.classes_ = self.label_binarizer_.classes_ else: classes = self.label_binarizer_.classes_ if not np.all(np.in1d(classes, self.classes_)): raise ValueError("`y` has classes not in `self.classes_`." " `self.classes_` has %s. 'y' has %s." % (self.classes_, classes)) y = self.label_binarizer_.transform(y) return X, y def decision_function(self, X): """Decision function of the mlp model Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. Returns ------- y : array-like, shape (n_samples,) or (n_samples, n_classes) The values of decision function for each class in the model. """ check_is_fitted(self, "coefs_") y_scores = self._decision_scores(X) if self.n_outputs_ == 1: return y_scores.ravel() else: return y_scores def predict(self, X): """Predict using the multi-layer perceptron classifier Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. Returns ------- y : array-like, shape (n_samples,) or (n_samples, n_classes) The predicted classes. """ check_is_fitted(self, "coefs_") y_scores = self.decision_function(X) y_scores = ACTIVATIONS[self.out_activation_](y_scores) return self.label_binarizer_.inverse_transform(y_scores) @property def partial_fit(self): """Fit the model to data matrix X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. y : array-like, shape (n_samples,) The target values. classes : array, shape (n_classes) Classes across all calls to partial_fit. Can be obtained via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is required for the first call to partial_fit and can be omitted in the subsequent calls. Note that y doesn't need to contain all labels in `classes`. Returns ------- self : returns a trained MLP model. """ if self.algorithm not in _STOCHASTIC_ALGOS: raise AttributeError("partial_fit is only available for stochastic" "optimization algorithms. %s is not" " stochastic" % self.algorithm) return self._partial_fit def _partial_fit(self, X, y, classes=None): _check_partial_fit_first_call(self, classes) super(MLPClassifier, self)._partial_fit(X, y) return self def predict_log_proba(self, X): """Return the log of probability estimates. Parameters ---------- X : array-like, shape (n_samples, n_features) The input data. Returns ------- log_y_prob : array-like, shape (n_samples, n_classes) The predicted log-probability of the sample for each class in the model, where classes are ordered as they are in `self.classes_`. Equivalent to log(predict_proba(X)) """ y_prob = self.predict_proba(X) return np.log(y_prob, out=y_prob) def predict_proba(self, X): """Probability estimates. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. Returns ------- y_prob : array-like, shape (n_samples, n_classes) The predicted probability of the sample for each class in the model, where classes are ordered as they are in `self.classes_`. """ y_scores = self.decision_function(X) if y_scores.ndim == 1: y_scores = logistic(y_scores) return np.vstack([1 - y_scores, y_scores]).T else: return softmax(y_scores)