def Highlighting_Test_Data_Points():
    from mlxtend.plotting import plot_decision_regions
    from mlxtend.preprocessing import shuffle_arrays_unison
    import matplotlib.pyplot as plt
    from sklearn import datasets
    from sklearn.svm import SVC

    # Loading some example data
    iris = datasets.load_iris()
    data = pd.read_csv('2clstrain1200.csv', header=None)

    X, y = data.iloc[:, 0:2].values, data.iloc[:, 2].values
    X = X.astype(np.integer)
    y = y.astype(np.integer)
    X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3)

    X_train, y_train = X[:700], y[:700]
    X_test, y_test = X[700:], y[700:]

    # Training a classifier
    svm = SVC(C=0.5, kernel='linear')
    svm.fit(X_train, y_train)

    # Plotting decision regions
    plot_decision_regions(X, y, clf=svm, legend=2, X_highlight=X_test)

    # Adding axes annotations
    plt.xlabel('')
    plt.ylabel('')
    plt.title('SVM on Iris')
    plt.show()
Beispiel #2
0
    def main(self):

        print(__doc__)
        #creating object instances
        obj = sat()
        #extracting features and output
        x, y = obj.load_data("gpa.csv")
        #splitting the data
        x_train, x_test, y_train, y_test = obj.split(x, y)
        #scaling the data
        #x_train,x_test,y_train,y_test = obj.scale(x_train,x_test,y_train,y_test)
        #missing value imputation
        x_train = obj.missing_val(x_train)
        x_test = obj.missing_val(x_test)
        y_train = obj.missing_val(y_train)
        y_test = obj.missing_val(y_test)
        #generating classifier
        clf = obj.classifier()
        #fitting the features into the model
        clf.fit(x_train, y_train)
        #plotting training set
        obj.plot(clf, x_train, y_train, "orange", "blue",
                 "sat score (Training set)", "GPA", "SAT SCORE")
        #plotting the testing set
        obj.plot(clf, x_test, y_test, "orange", "blue",
                 "sat score (Testing set)", "GPA", "SAT SCORE")
        #saving classifier
        obj.save_classifier(clf, "sat_score.pkl", "wb")
        #loading the data
        clf = obj.load_classifier("sat_score.pkl", "rb")

        x, y = shuffle_arrays_unison(arrays=[x, y], random_seed=5)

        plot_learning_curves(x_train, y_train, x, y, clf)
        plt.show()
def test_shuffle_arrays_unison():
    X1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    y1 = np.array([1, 2, 3])

    X2, y2 = shuffle_arrays_unison(arrays=[X1, y1], random_seed=3)

    assert (X2.all() == np.array([[4, 5, 6], [1, 2, 3], [7, 8, 9]]).all())
    assert (y2.all() == np.array([2, 1, 3]).all())
def test_shuffle_arrays_unison():
    X1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    y1 = np.array([1, 2, 3])

    X2, y2 = shuffle_arrays_unison(arrays=[X1, y1], random_seed=3)

    assert(X2.all() == np.array([[4, 5, 6], [1, 2, 3], [7, 8, 9]]).all())
    assert(y2.all() == np.array([2, 1, 3]).all())
Beispiel #5
0
def batch_generator(file_siize):
    if 1000 % file_size != 0:
        print('wrong size!')
        os._exit()

    global batch_count
    file_num = int(batch_count % 11)
    batch_count += 1

    filenames = glob('data/radarTrend.train.*.input.npy')
    filenames.sort()
    x_train = np.load(filenames[file_num])

    filenames = glob('data/radarTrend.train.*.label.npy')
    filenames.sort()
    y_train = np.load(filenames[file_num])

    x_train, y_train = shuffle_arrays_unison(arrays=[x_train, y_train])
    x_train = np.split(x_train, file_size)
    y_train = np.split(y_train, file_size)
    x_train = np.array(x_train)
    y_train = np.array(y_train)

    file_num = np.random.choice(10)

    filenames = glob('data/radarTrend.test.*.input.npy')
    x_valid = np.load(filenames[file_num])

    filenames = glob('data/radarTrend.test.*label.npy')
    y_valid = np.load(filenames[file_num])

    x_valid, y_valid = shuffle_arrays_unison(arrays=[x_valid, y_valid])
    x_valid = np.split(x_valid, file_size)
    y_valid = np.split(y_valid, file_size)
    x_valid = np.array(x_valid)
    y_valid = np.array(y_valid)

    return x_train, y_train, x_valid, y_valid
def Customizing_the_plotting_style():
    from mlxtend.plotting import plot_decision_regions
    from mlxtend.preprocessing import shuffle_arrays_unison
    import matplotlib.pyplot as plt
    from sklearn.svm import SVC

    # Loading some example data
    data = pd.read_csv('2clstrain1200.csv', header=None)

    X, y = data.iloc[:, 0:2].values, data.iloc[:, 2].values
    X = X.astype(np.integer)
    y = y.astype(np.integer)
    X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3)

    X_train, y_train = X[:700], y[:700]
    X_test, y_test = X[700:], y[700:]

    # Training a classifier
    svm = SVC(C=0.5, kernel='linear')
    svm.fit(X_train, y_train)

    # Specify keyword arguments to be passed to underlying plotting functions
    scatter_kwargs = {'s': 120, 'edgecolor': None, 'alpha': 0.7}
    contourf_kwargs = {'alpha': 0.2}
    scatter_highlight_kwargs = {'s': 120, 'label': 'Test data', 'alpha': 0.7}
    # Plotting decision regions
    plot_decision_regions(X,
                          y,
                          clf=svm,
                          legend=2,
                          X_highlight=X_test,
                          scatter_kwargs=scatter_kwargs,
                          contourf_kwargs=contourf_kwargs,
                          scatter_highlight_kwargs=scatter_highlight_kwargs)

    # Adding axes annotations
    plt.xlabel('')
    plt.ylabel('')
    plt.title('SVM on Iris')
    plt.show()
Beispiel #7
0
# dropout function
def dropout(a, prob):
    shape = a.shape[0]
    vec = np.random.choice([0,1], size = (shape,1), p = [prob, 1-prob])
    return vec * a

#load dataset
iris = datasets.load_iris()
data = iris.data
actual = iris.target

# scramble arrays
data = inputn(data)
actual = inputn(actual)
# shuffle the arrays the same
data, actual = shuffle_arrays_unison(arrays=[data, actual], random_seed=3)

# initialize weights, divide by sqrt(4), to prevent explosive gradients
syn0 = np.random.randn(4,4) / 2
syn1 =  np.random.randn(3,4) /2
bias0 =  np.random.randn(4,1) /2
bias1 = np.random.randn(3,1) /2

# stores array of
xarr = []
yarr = []

rate_syn = 0.1
rate_bias = 0.1

for yo in range(200):
Beispiel #8
0
# Load the dataset - This dataset has 60 columns plus the decision variable 'R' and 'M'. It has a total of 208 rows.
X = np.loadtxt('sonar.txt', usecols=range(0, 60), delimiter=',')
labels = np.genfromtxt('sonar.txt', delimiter=',', usecols=-1, dtype=str)

# Traverse through the array Labels and replace 'R' with 1 and 'M' with 0.
for i, label in enumerate(labels):
    if label == 'R':
        labels[i] = 1
    else:
        labels[i] = 0

y = np.array(labels)

# Shuffle the data so that the test data towards the end does not end up with only 0's
from mlxtend.preprocessing import shuffle_arrays_unison
X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3)

# Convert y from an array of strings to array of integers
y = y.astype(np.int)

# Initialize the MLP class and fit the multi-layer perceptron model on X and y data. X and y is split implicitly inside the
# MLP class into training and test data
# Then predict the y values for X_test and store it in y_predicted
# The default number of iterations is 100, but can be changed by passing a value to n_iters. Similarly, learning rate is
# 0.01, but can be changed. In each iteration, we are printing the accuracy metrics for the predicted values of the training
# data.
mlp = MLP()
mlp.fit(X, y)
y_predicted = mlp.predict()

# An accuracy of 70% has been gained on test data after 100 epochs.
Beispiel #9
0
# mnist.py
from mlxtend.data import mnist_data
from mlxtend.classifier import MultiLayerPerceptron as MLP
from mlxtend.preprocessing import shuffle_arrays_unison
X, y = mnist_data()
X, y = shuffle_arrays_unison((X, y), random_seed=1)
X_train, y_train = X[:500], y[:500]
X_test, y_test = X[500:], y[500:]

import matplotlib.pyplot as plt


def plot_digit(X, y, idx):
    img = X[idx].reshape(28, 28)
    plt.imshow(img, cmap='Greys', interpolation='nearest')
    plt.title('true label: %d' % y[idx])
    plt.show()


plot_digit(X, y, 3500)

from mlxtend.preprocessing import standardize

X_train_std, params = standardize(X_train,
                                  columns=range(X_train.shape[1]),
                                  return_params=True)
X_test_std = standardize(X_test, columns=range(X_test.shape[1]), params=params)
nn1 = MLP(hidden_layers=[150],
          l2=0.00,
          l1=0.0,
          epochs=100,
# In[47]:

plt.plot(range(len(nn2.cost_)), nn2.cost_)
plt.ylabel('Cost')
plt.xlabel('Epochs')
plt.show()

# ## Example 2 - Classifying Handwritten Digits from a 10% MNIST Subset

# In[53]:

from mlxtend.data import mnist_data
from mlxtend.preprocessing import shuffle_arrays_unison

X, y = mnist_data()
X, y = shuffle_arrays_unison((X, y), random_seed=1)
X_train, y_train = X[:500], y[:500]
X_test, y_test = X[500:], y[500:]

# Visualize a sample from the MNIST dataset to check if it was loaded correctly:

# In[55]:

import matplotlib.pyplot as plt


def plot_digit(X, y, idx):
    img = X[idx].reshape(28, 28)
    plt.imshow(img, cmap='Greys', interpolation='nearest')
    plt.title('true label: %d' % y[idx])
    plt.show()
# TODO  error

from mlxtend.evaluate import plot_learning_curves
import matplotlib.pyplot as plt
from mlxtend.data import iris_data
from mlxtend.preprocessing import shuffle_arrays_unison
from sklearn.neighbors import KNeighborsClassifier


# Loading some example data
X, y = iris_data()
X, y = shuffle_arrays_unison(arrays=[X, y])
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]

clf = KNeighborsClassifier(n_neighbors=5)

plot_learning_curves(X_train, y_train, X_test, y_test, clf)
plt.show()
Beispiel #12
0
bias0 = pickle.load(b0)
bias1 = pickle.load(b1)

# load MNIST dataset
mnist = fetch_mldata("MNIST original")

# learning rates for weights and bias
rate_syn = .0001
rate_bias = .0001

# data set
xarr = mnist.data
yarr = mnist.target

# shuffle arrays
xarr, yarr = shuffle_arrays_unison(arrays=[xarr, yarr], random_seed=4)
# only ujse first 1000 samples for training
xarr = xarr[:1000]
yarr = yarr[:1000]

# graphing variables
xbar = []
ybar = []
correct = 0
loss = 0

# training loop
for epoch in range(10):
    for image in range(1000):
        # convolution operation with max pooling
        input = layer.convert_to_2d_image(xarr[image])
Beispiel #13
0
plt.show()

plt.plot(range(len(nn2.cost_)), nn2.cost_)
plt.ylabel('Cost')
plt.xlabel('Epochs')
plt.show()


# Classify handwritten digits from a 10% mnist subset

from mlxtend.data import mnist_data
from mlxtend.preprocessing import shuffle_arrays_unison, standardize


X, y = mnist_data()
X, y = shuffle_arrays_unison((X, y), random_seed=1)
X_train, y_train = X[:500], y[:500]
X_test, y_test = X[500:], y[500:]

def plot_digit(X, y, idx):
  img = X[idx].reshape(28, 28) # 784 => 28 * 28
  plt.imshow(img, cmap='Greys', interpolation='nearest')
  plt.title('true label: %d' % y[idx])
  plt.show()

plot_digit(X, y, 3500)

X_train_std, params = standardize(X_train, columns=range(X_train.shape[1]), return_params=True)
X_test_std = standardize(X_test, columns=range(X_test.shape[1]), params=params)

nn1 = MLP(hidden_layers=[150],