def test_multi_logistic_regression_gd_acc(): lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=1, random_seed=1) lr.fit(X, y) assert (y == lr.predict(X)).all()
def test_binary_logistic_regression_gd(): t = np.array([[0.13, -0.12], [-3.07, 3.05]]) lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=1, random_seed=1) lr.fit(X_bin, y_bin) np.testing.assert_almost_equal(lr.w_, t, 2) assert (y_bin == lr.predict(X_bin)).all()
def test_multi_logistic_probas(): lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=1, random_seed=1) lr.fit(X, y) idx = [0, 50, 149] # sample labels: 0, 1, 2 y_pred = lr.predict_proba(X[idx]) exp = np.array([[1.0, 0.0, 0.0], [0.08, 0.60, 0.32], [0.0, 0.00, 0.99]]) np.testing.assert_almost_equal(y_pred, exp, 2)
def test_progress_2(): lr = SoftmaxRegression(epochs=1, eta=0.005, minibatches=1, print_progress=2, random_seed=1) lr.fit(X_bin, y_bin) # 0, 1 class
def test_progress_3(): lr = SoftmaxRegression(epochs=1, eta=0.005, minibatches=1, print_progress=3, random_seed=1) lr.fit(X_bin, y_bin) # 0, 1 class
def test_score_function(): lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=1, random_seed=1) lr.fit(X, y) acc = lr.score(X, y) assert acc == 1.0, acc
def test_multi_logistic_regression_gd_weights(): t = np.array([[-0.95, -2.45, 3.4], [-3.95, 2.34, 1.59]]) lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=1, random_seed=1) lr.fit(X, y) np.testing.assert_almost_equal(lr.w_, t, 2)
def test_binary_logistic_regression_sgd(): t = np.array([[-0.68, 0.68], [-3.2, 3.2]]) lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=len(y_bin), random_seed=1) lr.fit(X_bin, y_bin) # 0, 1 class np.testing.assert_almost_equal(lr.w_, t, 2) assert (y_bin == lr.predict(X_bin)).all()
def test_binary_logistic_regression_gd(): t = np.array([[-0.2, 0.2], [-3.09, 3.09]]) lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=1, random_seed=1) lr.fit(X_bin, y_bin) np.testing.assert_almost_equal(lr.w_, t, 2) assert((y_bin == lr.predict(X_bin)).all())
def test_binary_logistic_regression_sgd(): t = np.array([[0.13, -0.12], [-3.06, 3.05]]) lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=len(y_bin), random_seed=1) lr.fit(X_bin, y_bin) # 0, 1 class np.testing.assert_almost_equal(lr.w_, t, 2) assert (y_bin == lr.predict(X_bin)).all()
def test_binary_l2_regularization_gd(): t = np.array([[-0.17, 0.17], [-2.26, 2.26]]) lr = SoftmaxRegression(epochs=200, eta=0.005, l2=1.0, minibatches=1, random_seed=1) lr.fit(X_bin, y_bin) np.testing.assert_almost_equal(lr.w_, t, 2) assert (y_bin == lr.predict(X_bin)).all()
def test_multi_logistic_probas(): lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=1, random_seed=1) lr.fit(X, y) idx = [0, 50, 149] # sample labels: 0, 1, 2 y_pred = lr.predict_proba(X[idx]) exp = np.array([[0.99, 0.01, 0.00], [0.01, 0.88, 0.11], [0.00, 0.02, 0.98]]) np.testing.assert_almost_equal(y_pred, exp, 2)
def test_binary_l2_regularization_gd(): lr = SoftmaxRegression(eta=0.005, epochs=200, minibatches=1, l2_lambda=1.0, random_seed=1) lr.fit(X_bin, y_bin) y_pred = lr.predict(X_bin) expect_weights = np.array([[-0.316, 0.317], [-2.265, 2.265]]) np.testing.assert_almost_equal(lr.w_, expect_weights, 3) acc = sum(y_pred == y_bin) / len(y_bin) assert acc == 1.0
def test_binary_l2_regularization_gd(): lr = SoftmaxRegression(eta=0.005, epochs=200, minibatches=1, l2_lambda=1.0, random_seed=1) lr.fit(X_bin, y_bin) y_pred = lr.predict(X_bin) expect_weights = np.array([[-0.316, 0.317], [-2.265, 2.265]]) np.testing.assert_almost_equal(lr.w_, expect_weights, 3) acc = sum(y_pred == y_bin) / len(y_bin) assert(acc == 1.0)
def __init__(self, eta=0.01, epochs=50, l2=0.0, minibatches=1, n_classes=None, random_seed=None, print_progress=0): warnings.filterwarnings(module='mlxtend*', action='ignore', category=FutureWarning) epochs = int(epochs) _SoftmaxRegression.__init__(self, eta, epochs, l2, minibatches, n_classes, random_seed, print_progress) BaseWrapperClf.__init__(self)
def test_refit_weights(): t = np.array([[0.13, -0.12], [-3.07, 3.05]]) lr = SoftmaxRegression(epochs=100, eta=0.005, minibatches=1, random_seed=1) lr.fit(X_bin, y_bin) w1 = lr.w_[0][0] w2 = lr.w_[0][0] lr.fit(X_bin, y_bin, init_params=False) assert w1 != lr.w_[0][0] assert w2 != lr.w_[1][0] np.testing.assert_almost_equal(lr.w_, t, 2)
def test_labels(): X = np.array([[1, 2], [3, 4]]) y = np.array([-1, 1]) lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=1, random_seed=1) assert_raises(AttributeError, 'y array must not contain negative labels.\nFound [-1 1]', lr.fit, X, y)
def test_clone_params_pass(): iris = load_iris() X = iris.data y = iris.target lr = SoftmaxRegression(random_seed=1) efs1 = EFS(lr, min_features=2, max_features=2, scoring='accuracy', cv=0, clone_estimator=False, print_progress=False, n_jobs=1) efs1 = efs1.fit(X, y) assert(efs1.best_idx_ == (1, 3))
def test_clone_params_pass(): iris = load_iris() X = iris.data y = iris.target lr = SoftmaxRegression(random_seed=1) sfs1 = SFS(lr, k_features=2, forward=True, floating=False, scoring='accuracy', cv=0, clone_estimator=True, verbose=0, n_jobs=1) sfs1 = sfs1.fit(X, y) assert (sfs1.k_feature_idx_ == (1, 3))
def test_clone_params_pass(): iris = load_iris() X = iris.data y = iris.target lr = SoftmaxRegression(random_seed=1) sfs1 = SFS(lr, k_features=3, forward=True, floating=False, scoring='accuracy', cv=0, skip_if_stuck=True, clone_estimator=False, print_progress=False, n_jobs=1) sfs1 = sfs1.fit(X, y) assert sfs1.k_feature_idx_ == (0, 1, 2)
def test_check_pandas_dataframe_fit_backward(): for floating in [True, False]: iris = load_iris() X = iris.data y = iris.target lr = SoftmaxRegression(random_seed=1) sfs1 = SFS(lr, k_features=2, forward=False, floating=floating, scoring='accuracy', cv=0, verbose=0, n_jobs=1) df = pd.DataFrame( X, columns=['sepal len', 'sepal width', 'petal len', 'petal width']) sfs1 = sfs1.fit(X, y) assert sfs1.k_feature_idx_ == (1, 2) assert sfs1.k_feature_names_ == ('1', '2') assert sfs1.subsets_[2]['feature_names'] == ('1', '2') sfs1 = sfs1.fit(df, y) assert sfs1.subsets_[3]['feature_names'] == ('sepal len', 'sepal width', 'petal len') assert sfs1.subsets_[2]['feature_names'] == ('sepal width', 'petal len') assert sfs1.subsets_[3]['feature_idx'] == (0, 1, 2) assert sfs1.subsets_[2]['feature_idx'] == (1, 2) assert sfs1.k_feature_idx_ == (1, 2) assert sfs1.k_feature_names_ == ('sepal width', 'petal len') sfs1._TESTING_INTERRUPT_MODE = True out = sfs1.fit(df, y) assert len(out.subsets_.keys()) > 0 assert sfs1.interrupted_ assert sfs1.subsets_[3]['feature_names'] == ('sepal len', 'sepal width', 'petal len') assert sfs1.k_feature_idx_ == (0, 1, 2) assert sfs1.k_feature_names_ == ('sepal len', 'sepal width', 'petal len')
def test_check_pandas_dataframe_transform(): iris = load_iris() X = iris.data y = iris.target lr = SoftmaxRegression(random_seed=1) sfs1 = SFS(lr, k_features=2, forward=True, floating=False, scoring='accuracy', cv=0, verbose=0, n_jobs=1) df = pd.DataFrame(X, columns=['sepal length', 'sepal width', 'petal length', 'petal width']) sfs1 = sfs1.fit(df, y) assert sfs1.k_feature_idx_ == (1, 3) assert (150, 2) == sfs1.transform(df).shape
def test_custom_feature_names(): iris = load_iris() X = iris.data y = iris.target lr = SoftmaxRegression(random_seed=1) sfs1 = SFS(lr, k_features=2, forward=True, floating=False, scoring='accuracy', cv=0, verbose=0, n_jobs=1) sfs1 = sfs1.fit(X, y, custom_feature_names=( 'sepal length', 'sepal width', 'petal length', 'petal width')) assert sfs1.k_feature_idx_ == (1, 3) assert sfs1.k_feature_names_ == ('sepal width', 'petal width') assert sfs1.subsets_[2]['feature_names'] == ('sepal width', 'petal width')
y = data[:, data.shape[1] - 1] # Label - shape: 150, 1 X = data[:, 0:data.shape[1] - 1].astype(float) # Data - shape: 150, 4 X_train = X[0:105, :] #shape: 120, 4 X_test = X[105:X.shape[0], :] #30, 4 y_train = y[0:105] #shape: 120, 4 y_test = y[105:y.shape[0]] #30, 4 del data, X, y # Map label sang 0, 1, 2 classes = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2} y_train = [classes[item] for item in y_train] y_test = [classes[item] for item in y_test] y_train = np.asarray(y_train) y_test = np.asarray(y_test) # Softmax softmax = SoftmaxRegression(eta=1 / (10 ^ 4), epochs=500, minibatches=1, random_seed=0, print_progress=3) softmax.fit(X_train, y_train, init_params=True) """ plt.plot(range(len(softmax.cost_)), softmax.cost_) plt.xlabel('Iterations') plt.ylabel('Cost') plt.show() """ accuracy = softmax.score(X_test, y_test) print(accuracy)
from mlxtend.plotting import plot_decision_regions from mlxtend.classifier import SoftmaxRegression # Load data np.random.seed(0) #fix result of np.random data = pd.read_csv(r"D:\NAL\IRIS\iris.data", sep=',', header=None) data = data.values # Shape: 150, 5 np.random.shuffle(data) y = data[:, data.shape[1] - 1] # Label - shape: 150, 1 X = data[:, 0:data.shape[1] - 1] # Data - shape: 150, 4 X_train = X[0:105, :] #shape: 120, 4 X_test = X[105:X.shape[0], :] #30, 4 y_train = y[0:105] #shape: 120, 4 y_test = y[105:y.shape[0]] #30, 4 del data, X, y # One-hot vector label encoder = LabelBinarizer() y_train = encoder.fit_transform(y_train) #shape: 120, 3 y_test = encoder.fit_transform(y_test) #shape: 30, 3 print(y_train) # Softmax softmax = SoftmaxRegression(eta=0.01, epochs=500, minibatches=1, random_seed=0, print_progress=3) """softmax.fit(X_train, y_train) plot_decision_regions(X, y, clf=softmax) plt.title('Softmax Regression - Gradient Descent') plt.show()"""
from mlxtend.plotting import plot_decision_regions from mlxtend.classifier import SoftmaxRegression import matplotlib.pyplot as plt # Loading Data X, y = iris_data() X = X[:, [0, 3]] # sepal length and petal width # standardize X[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() lr = SoftmaxRegression(eta=0.01, epochs=500, minibatches=1, random_seed=1, print_progress=3) lr.fit(X, y) plot_decision_regions(X, y, clf=lr) plt.title('Softmax Regression - Gradient Descent') plt.show() plt.plot(range(len(lr.cost_)), lr.cost_) plt.xlabel('Iterations') plt.ylabel('Cost') plt.show() # In[75]:
def main(): #SETUP!!! train = 0.9 #percetage of data for training dev = 0.05 #percetage of data for development test = 0.05 #percetage of data for test n_features = 1500 #this could be adjusted later by the algorithm #this is setting the CountVectorizer from sklearn.feature_extraction.text vectorizer = CountVectorizer( min_df=20, #you may want to adjust this max_features=n_features, lowercase=False) DO_STANDARDIZE_DATA = 1 #1 yes, 0 no regularization_lambda = 0.1 ETA = 0.00005 EPOCHS = 50 model_sm = SoftmaxRegression( eta=ETA, epochs=EPOCHS, l2=regularization_lambda, #n_classes=U, minibatches=1, random_seed=1, print_progress=3) print("-----------------------------") print("METHOD - SOFTMAX REGRESSION") print("-----------------------------") print("Hello,\nwe will use Softmax Regression to classify twitter users\n") setpath() #get the users screen_names = get_users(FILE_USERS) info_data = get_info() U = len(screen_names) #number of users for i in range(U): print("For", screen_names[i], " one has ", info_data[i, 1], "tweets") if os.path.isfile(FOLDER + "/update_SM" + str(U) + ".txt") == True: with open(FOLDER + "/update_SM" + str(U) + ".txt", "r") as h: update = h.read() h.close() print("We load the dataset.") file = FOLDER + "/X_train_politic" + update + ".npy" with open(file, 'rb') as f: X_train = pickle.load(f) file = FOLDER + "/Y_train_politic" + update + ".npy" with open(file, 'rb') as f: Y_train = pickle.load(f) file = FOLDER + "/X_dev_politic" + update + ".npy" with open(file, 'rb') as f: X_dev = pickle.load(f) file = FOLDER + "/Y_dev_politic" + update + ".npy" with open(file, 'rb') as f: Y_dev = pickle.load(f) file = FOLDER + "/X_test_politic" + update + ".npy" with open(file, 'rb') as f: X_test = pickle.load(f) file = FOLDER + "/Y_test_politic" + update + ".npy" with open(file, 'rb') as f: Y_test = pickle.load(f) else: all_tweets = load_data() random.shuffle(all_tweets) random.shuffle( all_tweets) #Always shuffle your opponent cards when you play :) tweets = [] YY = [] for i in range(len(all_tweets)): tweets.append(all_tweets[i][2]) YY.append(all_tweets[i][0]) if len(tweets) == len(all_tweets): print("We load the data and we create the data set!") Y = np.array(YY) #this is the output label vector print("-----------------------------") m = len(tweets) X_train_1, x_appoggio, Y_train, y_appoggio = train_test_split( tweets, Y, test_size=(dev + test)) X_dev_1, X_test_1, Y_dev, Y_test = train_test_split( x_appoggio, y_appoggio, test_size=(test / (dev + test))) print("We will train with the", train * 100, " % of the data;") print(dev * 100, "% of the data is reserve for the method development;") print(test * 100, "% of the data is for the test.") vectorizer.fit(X_train_1) X_train = vectorizer.transform(X_train_1) X_dev = vectorizer.transform(X_dev_1) X_test = vectorizer.transform(X_test_1) if DO_STANDARDIZE_DATA == 0: print("We don't standardize data") else: print( "We will provide to the model with standardize data, mean zero and variance 1" ) X_train, X_dev, X_test = standardize_data(X_train, X_dev, X_test) del (all_tweets) del (X_train_1, X_dev_1, X_test_1, x_appoggio, y_appoggio) today = date.today() today_string = today.strftime("%y_%b_%d") #we save the data we have prepared with open( FOLDER + "/X_train_politic" + today_string + "_SM" + str(U) + ".npy", 'wb') as f: pickle.dump(X_train, f) file = FOLDER + "/X_train_politic" + today_string + "_SM" + str( U) + ".npy" with open(file, "wb") as f: pickle.dump(X_train, f) file = FOLDER + "/Y_train_politic" + today_string + "_SM" + str( U) + ".npy" with open(file, "wb") as f: pickle.dump(Y_train, f) file = FOLDER + "/X_dev_politic" + today_string + "_SM" + str( U) + ".npy" with open(file, "wb") as f: pickle.dump(X_dev, f) file = FOLDER + "/Y_dev_politic" + today_string + "_SM" + str( U) + ".npy" with open(file, "wb") as f: pickle.dump(Y_dev, f) file = FOLDER + "/X_test_politic" + today_string + "_SM" + str( U) + ".npy" with open(file, "wb") as f: pickle.dump(X_test, f) file = FOLDER + "/Y_test_politic" + today_string + "_SM" + str( U) + ".npy" with open(file, "wb") as f: pickle.dump(Y_test, f) with open(FOLDER + "/update_SM" + str(U) + ".txt", "w") as h: h.write(today_string + "_SM" + str(U)) h.close() D = X_test.toarray().shape[1] #this is the lengh of the input vector print("\n") if n_features > D: n_features = D print("The # of features is", n_features) print("The regularization parameter is", regularization_lambda) print("The learning step is", ETA) print("The # of cycle is", EPOCHS) print("\n") #WE START TRAINING THE MODEL model_sm.fit(X_train.toarray(), Y_train) acc = model_sm.score(X_train.toarray(), Y_train) acc_dev = model_sm.score(X_dev.toarray(), Y_dev) print("\n") print("Accuracy on the training set", acc) print("Accuracy on the development set", acc_dev) #print some statistics about the model df_score, df_fp, df_pre = compute_accuracies(model_sm, 1, screen_names, X_train, X_dev, Y_train, Y_dev)
def test_clone(): lr = SoftmaxRegression() clone(lr)
clf8 = RandomForestClassifier(max_features=0.34808889858456293, criterion='entropy', min_samples_split=2, n_estimators=4401) clf9 = KNeighborsClassifier(leaf_size=11, n_neighbors=10) clf10 = svm.SVC(kernel='rbf', probability=True) clf11 = ExtraTreesClassifier(max_features=0.4537270875668709, criterion='entropy', min_samples_leaf=1, min_samples_split=2, n_estimators=3138) # eclf = EnsembleVoteClassifier(clfs=[clf11], weights=[1], voting='soft') lr = SoftmaxRegression() stacks = StackingClassifier( classifiers=[clf1, clf2, clf3, clf7, clf8, clf9, clf10, clf11], meta_classifier=lr) labels = [ 'model_1', 'model_2', 'model_3', 'model_4', 'model_5', 'model_6', 'model_7', 'model_8', 'model_9', 'model_10', 'model_11', 'ensemble' ] for clf, label in zip([ clf1, clf2, clf3, clf4, clf5, clf6, clf7, clf8, clf9, clf10, clf11, stacks ], labels): scores = model_selection.cross_val_score(clf, X[best_columns], Y['target'],
from mlxtend.data import iris_data from mlxtend.plotting import plot_decision_regions from mlxtend.classifier import SoftmaxRegression import matplotlib.pyplot as plt # Loading Data X, y = iris_data() X = X[:, [0, 3]] # sepal length and petal width # standardize X[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() lr = SoftmaxRegression(eta=0.01, epochs=500, minibatches=1, random_seed=1, print_progress=3) lr.fit(X, y) plot_decision_regions(X, y, clf=lr) plt.title('Softmax Regression - Gradient Descent') plt.show() plt.plot(range(len(lr.cost_)), lr.cost_) plt.xlabel('Iterations') plt.ylabel('Cost') plt.show()
def test_multi_logistic_regression_gd_weights(): t = np.array([[0.58, -3.72, 3.15], [-3.52, 3.21, 0.28]]) lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=1, random_seed=1) lr.fit(X, y) np.testing.assert_almost_equal(lr.w_, t, 2)
# Sebastian Raschka 2014-2020 # mlxtend Machine Learning Library Extensions # Author: Sebastian Raschka <sebastianraschka.com> # # License: BSD 3 clause import numpy as np from mlxtend.utils import assert_raises from mlxtend.plotting import plot_decision_regions from mlxtend.data import iris_data from mlxtend.classifier import SoftmaxRegression import matplotlib.pyplot as plt plt.switch_backend('agg') X, y = iris_data() sr = SoftmaxRegression(epochs=15, eta=0.01, random_seed=1) def test_pass(): sr.fit(X[:, :2], y) plot_decision_regions(X=X[:, :2], y=y, clf=sr) def test_ylist(): sr.fit(X[:, :2], y) assert_raises(ValueError, 'y must be a NumPy array. Found {}'.format(type([])), plot_decision_regions, X[:, :2], list(y), sr) def test_filler_feature_values_fail():
from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report, confusion_matrix from mlxtend.classifier import SoftmaxRegression from sklearn import datasets iris = datasets.load_iris() #X = iris.data[:, [2, 3]] X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) ############# softmax Regresion ############### # Fitting softmax regression to the tranning set foft_regressor = SoftmaxRegression() foft_regressor.fit(X_train, y_train) # Predicting the test set result y_pred = foft_regressor.predict(X_test) print("############ softmax Regression ############") print(confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred))