Beispiel #1
0
def test_binary_logistic_regression_gd():
    t = np.array([[0.13, -0.12], [-3.07, 3.05]])
    lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=1, random_seed=1)

    lr.fit(X_bin, y_bin)
    np.testing.assert_almost_equal(lr.w_, t, 2)
    assert (y_bin == lr.predict(X_bin)).all()
def test_multi_logistic_regression_gd_acc():
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           minibatches=1,
                           random_seed=1)
    lr.fit(X, y)
    assert (y == lr.predict(X)).all()
def test_multi_logistic_regression_gd_acc():
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           minibatches=1,
                           random_seed=1)
    lr.fit(X, y)
    assert (y == lr.predict(X)).all()
Beispiel #4
0
def test_multi_logistic_probas():
    lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=1, random_seed=1)
    lr.fit(X, y)
    idx = [0, 50, 149]  # sample labels: 0, 1, 2
    y_pred = lr.predict_proba(X[idx])
    exp = np.array([[1.0, 0.0, 0.0], [0.08, 0.60, 0.32], [0.0, 0.00, 0.99]])
    np.testing.assert_almost_equal(y_pred, exp, 2)
def test_score_function():
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           minibatches=1,
                           random_seed=1)
    lr.fit(X, y)
    acc = lr.score(X, y)
    assert acc == 1.0, acc
def test_score_function():
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           minibatches=1,
                           random_seed=1)
    lr.fit(X, y)
    acc = lr.score(X, y)
    assert acc == 1.0, acc
Beispiel #7
0
def test_progress_3():
    lr = SoftmaxRegression(epochs=1,
                           eta=0.005,
                           minibatches=1,
                           print_progress=3,
                           random_seed=1)

    lr.fit(X_bin, y_bin)  # 0, 1 class
def test_progress_2():
    lr = SoftmaxRegression(epochs=1,
                           eta=0.005,
                           minibatches=1,
                           print_progress=2,
                           random_seed=1)

    lr.fit(X_bin, y_bin)  # 0, 1 class
def test_multi_logistic_regression_gd_weights():
    t = np.array([[-0.95, -2.45, 3.4],
                  [-3.95, 2.34, 1.59]])
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           minibatches=1,
                           random_seed=1)
    lr.fit(X, y)
    np.testing.assert_almost_equal(lr.w_, t, 2)
def test_multi_logistic_regression_gd_weights():
    t = np.array([[-0.95, -2.45, 3.4],
                  [-3.95, 2.34, 1.59]])
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           minibatches=1,
                           random_seed=1)
    lr.fit(X, y)
    np.testing.assert_almost_equal(lr.w_, t, 2)
def test_binary_logistic_regression_sgd():
    t = np.array([[-0.68, 0.68], [-3.2, 3.2]])
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           minibatches=len(y_bin),
                           random_seed=1)

    lr.fit(X_bin, y_bin)  # 0, 1 class
    np.testing.assert_almost_equal(lr.w_, t, 2)
    assert (y_bin == lr.predict(X_bin)).all()
def test_binary_logistic_regression_gd():
    t = np.array([[-0.2, 0.2],
                  [-3.09, 3.09]])
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           minibatches=1,
                           random_seed=1)

    lr.fit(X_bin, y_bin)
    np.testing.assert_almost_equal(lr.w_, t, 2)
    assert((y_bin == lr.predict(X_bin)).all())
Beispiel #13
0
def test_binary_l2_regularization_gd():
    t = np.array([[-0.17, 0.17], [-2.26, 2.26]])
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           l2=1.0,
                           minibatches=1,
                           random_seed=1)

    lr.fit(X_bin, y_bin)
    np.testing.assert_almost_equal(lr.w_, t, 2)
    assert (y_bin == lr.predict(X_bin)).all()
def test_binary_logistic_regression_sgd():
    t = np.array([[0.13, -0.12],
                  [-3.06, 3.05]])
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           minibatches=len(y_bin),
                           random_seed=1)

    lr.fit(X_bin, y_bin)  # 0, 1 class
    np.testing.assert_almost_equal(lr.w_, t, 2)
    assert (y_bin == lr.predict(X_bin)).all()
Beispiel #15
0
def test_refit_weights():
    t = np.array([[0.13, -0.12], [-3.07, 3.05]])
    lr = SoftmaxRegression(epochs=100, eta=0.005, minibatches=1, random_seed=1)

    lr.fit(X_bin, y_bin)
    w1 = lr.w_[0][0]
    w2 = lr.w_[0][0]
    lr.fit(X_bin, y_bin, init_params=False)

    assert w1 != lr.w_[0][0]
    assert w2 != lr.w_[1][0]
    np.testing.assert_almost_equal(lr.w_, t, 2)
def test_binary_l2_regularization_gd():
    t = np.array([[-0.17, 0.17],
                  [-2.26, 2.26]])
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           l2=1.0,
                           minibatches=1,
                           random_seed=1)

    lr.fit(X_bin, y_bin)
    np.testing.assert_almost_equal(lr.w_, t, 2)
    assert (y_bin == lr.predict(X_bin)).all()
def test_multi_logistic_probas():
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           minibatches=1,
                           random_seed=1)
    lr.fit(X, y)
    idx = [0, 50, 149]  # sample labels: 0, 1, 2
    y_pred = lr.predict_proba(X[idx])
    exp = np.array([[0.99, 0.01, 0.00],
                    [0.01, 0.88, 0.11],
                    [0.00, 0.02, 0.98]])
    np.testing.assert_almost_equal(y_pred, exp, 2)
def test_binary_l2_regularization_gd():
    lr = SoftmaxRegression(eta=0.005,
                           epochs=200,
                           minibatches=1,
                           l2_lambda=1.0,
                           random_seed=1)
    lr.fit(X_bin, y_bin)
    y_pred = lr.predict(X_bin)
    expect_weights = np.array([[-0.316, 0.317], [-2.265, 2.265]])

    np.testing.assert_almost_equal(lr.w_, expect_weights, 3)
    acc = sum(y_pred == y_bin) / len(y_bin)
    assert acc == 1.0
def test_binary_l2_regularization_gd():
    lr = SoftmaxRegression(eta=0.005,
                           epochs=200,
                           minibatches=1,
                           l2_lambda=1.0,
                           random_seed=1)
    lr.fit(X_bin, y_bin)
    y_pred = lr.predict(X_bin)
    expect_weights = np.array([[-0.316, 0.317],
                               [-2.265, 2.265]])

    np.testing.assert_almost_equal(lr.w_, expect_weights, 3)
    acc = sum(y_pred == y_bin) / len(y_bin)
    assert(acc == 1.0)
def test_refit_weights():
    t = np.array([[0.13, -0.12],
                  [-3.07, 3.05]])
    lr = SoftmaxRegression(epochs=100,
                           eta=0.005,
                           minibatches=1,
                           random_seed=1)

    lr.fit(X_bin, y_bin)
    w1 = lr.w_[0][0]
    w2 = lr.w_[0][0]
    lr.fit(X_bin, y_bin, init_params=False)

    assert w1 != lr.w_[0][0]
    assert w2 != lr.w_[1][0]
    np.testing.assert_almost_equal(lr.w_, t, 2)
Beispiel #21
0
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from mlxtend.classifier import SoftmaxRegression
from sklearn import datasets

iris = datasets.load_iris()
#X = iris.data[:, [2, 3]]
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)

############# softmax Regresion ###############

# Fitting softmax regression to the tranning set
foft_regressor = SoftmaxRegression()
foft_regressor.fit(X_train, y_train)

# Predicting the test set result
y_pred = foft_regressor.predict(X_test)

print("############ softmax Regression ############")
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
Beispiel #22
0
y = data[:, data.shape[1] - 1]  # Label - shape: 150, 1
X = data[:, 0:data.shape[1] - 1].astype(float)  # Data - shape: 150, 4
X_train = X[0:105, :]  #shape: 120, 4
X_test = X[105:X.shape[0], :]  #30, 4
y_train = y[0:105]  #shape: 120, 4
y_test = y[105:y.shape[0]]  #30, 4
del data, X, y

# Map label sang 0, 1, 2
classes = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
y_train = [classes[item] for item in y_train]
y_test = [classes[item] for item in y_test]
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)

# Softmax
softmax = SoftmaxRegression(eta=1 / (10 ^ 4),
                            epochs=500,
                            minibatches=1,
                            random_seed=0,
                            print_progress=3)
softmax.fit(X_train, y_train, init_params=True)
"""
plt.plot(range(len(softmax.cost_)), softmax.cost_)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.show()
"""
accuracy = softmax.score(X_test, y_test)
print(accuracy)
Beispiel #23
0
def main():
    #SETUP!!!
    train = 0.9  #percetage of data for training
    dev = 0.05  #percetage of data for development
    test = 0.05  #percetage of data for test

    n_features = 1500  #this could be adjusted later by the algorithm

    #this is setting the  CountVectorizer from sklearn.feature_extraction.text
    vectorizer = CountVectorizer(
        min_df=20,  #you may want to adjust this
        max_features=n_features,
        lowercase=False)

    DO_STANDARDIZE_DATA = 1  #1 yes, 0 no

    regularization_lambda = 0.1
    ETA = 0.00005
    EPOCHS = 50
    model_sm = SoftmaxRegression(
        eta=ETA,
        epochs=EPOCHS,
        l2=regularization_lambda,
        #n_classes=U,
        minibatches=1,
        random_seed=1,
        print_progress=3)

    print("-----------------------------")
    print("METHOD - SOFTMAX REGRESSION")
    print("-----------------------------")

    print("Hello,\nwe will use Softmax Regression to classify twitter users\n")
    setpath()

    #get the users
    screen_names = get_users(FILE_USERS)
    info_data = get_info()
    U = len(screen_names)  #number of users
    for i in range(U):
        print("For", screen_names[i], " one has ", info_data[i, 1], "tweets")

    if os.path.isfile(FOLDER + "/update_SM" + str(U) + ".txt") == True:
        with open(FOLDER + "/update_SM" + str(U) + ".txt", "r") as h:
            update = h.read()
            h.close()

        print("We load the dataset.")
        file = FOLDER + "/X_train_politic" + update + ".npy"
        with open(file, 'rb') as f:
            X_train = pickle.load(f)

        file = FOLDER + "/Y_train_politic" + update + ".npy"
        with open(file, 'rb') as f:
            Y_train = pickle.load(f)

        file = FOLDER + "/X_dev_politic" + update + ".npy"
        with open(file, 'rb') as f:
            X_dev = pickle.load(f)

        file = FOLDER + "/Y_dev_politic" + update + ".npy"
        with open(file, 'rb') as f:
            Y_dev = pickle.load(f)

        file = FOLDER + "/X_test_politic" + update + ".npy"
        with open(file, 'rb') as f:
            X_test = pickle.load(f)

        file = FOLDER + "/Y_test_politic" + update + ".npy"
        with open(file, 'rb') as f:
            Y_test = pickle.load(f)
    else:
        all_tweets = load_data()
        random.shuffle(all_tweets)
        random.shuffle(
            all_tweets)  #Always shuffle your opponent cards when you play :)

        tweets = []
        YY = []
        for i in range(len(all_tweets)):
            tweets.append(all_tweets[i][2])
            YY.append(all_tweets[i][0])

        if len(tweets) == len(all_tweets):
            print("We load the data and we create the data set!")

        Y = np.array(YY)  #this is the output label vector

        print("-----------------------------")
        m = len(tweets)
        X_train_1, x_appoggio, Y_train, y_appoggio = train_test_split(
            tweets, Y, test_size=(dev + test))
        X_dev_1, X_test_1, Y_dev, Y_test = train_test_split(
            x_appoggio, y_appoggio, test_size=(test / (dev + test)))
        print("We will train with the", train * 100, " % of the data;")
        print(dev * 100,
              "% of the data is reserve for the method development;")
        print(test * 100, "% of the data is for the test.")

        vectorizer.fit(X_train_1)
        X_train = vectorizer.transform(X_train_1)
        X_dev = vectorizer.transform(X_dev_1)
        X_test = vectorizer.transform(X_test_1)

        if DO_STANDARDIZE_DATA == 0:
            print("We don't standardize data")
        else:
            print(
                "We will provide to the model with standardize data, mean zero and variance 1"
            )
            X_train, X_dev, X_test = standardize_data(X_train, X_dev, X_test)

        del (all_tweets)
        del (X_train_1, X_dev_1, X_test_1, x_appoggio, y_appoggio)

        today = date.today()
        today_string = today.strftime("%y_%b_%d")
        #we save the data we have prepared
        with open(
                FOLDER + "/X_train_politic" + today_string + "_SM" + str(U) +
                ".npy", 'wb') as f:
            pickle.dump(X_train, f)

        file = FOLDER + "/X_train_politic" + today_string + "_SM" + str(
            U) + ".npy"
        with open(file, "wb") as f:
            pickle.dump(X_train, f)

        file = FOLDER + "/Y_train_politic" + today_string + "_SM" + str(
            U) + ".npy"
        with open(file, "wb") as f:
            pickle.dump(Y_train, f)

        file = FOLDER + "/X_dev_politic" + today_string + "_SM" + str(
            U) + ".npy"
        with open(file, "wb") as f:
            pickle.dump(X_dev, f)

        file = FOLDER + "/Y_dev_politic" + today_string + "_SM" + str(
            U) + ".npy"
        with open(file, "wb") as f:
            pickle.dump(Y_dev, f)

        file = FOLDER + "/X_test_politic" + today_string + "_SM" + str(
            U) + ".npy"
        with open(file, "wb") as f:
            pickle.dump(X_test, f)

        file = FOLDER + "/Y_test_politic" + today_string + "_SM" + str(
            U) + ".npy"
        with open(file, "wb") as f:
            pickle.dump(Y_test, f)

        with open(FOLDER + "/update_SM" + str(U) + ".txt", "w") as h:
            h.write(today_string + "_SM" + str(U))
            h.close()

    D = X_test.toarray().shape[1]  #this is the lengh of the input vector

    print("\n")
    if n_features > D:
        n_features = D
    print("The # of features is", n_features)
    print("The regularization parameter is", regularization_lambda)
    print("The learning step is", ETA)
    print("The # of cycle is", EPOCHS)
    print("\n")

    #WE START TRAINING THE MODEL
    model_sm.fit(X_train.toarray(), Y_train)

    acc = model_sm.score(X_train.toarray(), Y_train)
    acc_dev = model_sm.score(X_dev.toarray(), Y_dev)
    print("\n")
    print("Accuracy on the training set", acc)
    print("Accuracy on the development set", acc_dev)

    #print some statistics about the model
    df_score, df_fp, df_pre = compute_accuracies(model_sm, 1, screen_names,
                                                 X_train, X_dev, Y_train,
                                                 Y_dev)
Beispiel #24
0
from mlxtend.data import iris_data
from mlxtend.plotting import plot_decision_regions
from mlxtend.classifier import SoftmaxRegression
import matplotlib.pyplot as plt

# Loading Data

X, y = iris_data()
X = X[:, [0, 3]]  # sepal length and petal width

# standardize
X[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
X[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

lr = SoftmaxRegression(eta=0.01,
                       epochs=500,
                       minibatches=1,
                       random_seed=1,
                       print_progress=3)
lr.fit(X, y)

plot_decision_regions(X, y, clf=lr)
plt.title('Softmax Regression - Gradient Descent')
plt.show()

plt.plot(range(len(lr.cost_)), lr.cost_)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.show()
Beispiel #25
0
def test_multi_logistic_regression_gd_weights():
    t = np.array([[0.58, -3.72, 3.15], [-3.52, 3.21, 0.28]])
    lr = SoftmaxRegression(epochs=200, eta=0.005, minibatches=1, random_seed=1)
    lr.fit(X, y)
    np.testing.assert_almost_equal(lr.w_, t, 2)