Example #1
0
# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth7.mat')
X = np.matrix(mat_data['X'])
y = np.matrix(mat_data['y'])
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)

# Number of rounds of bagging
L = 100

# Fit model using random tree classifier:
rf_classifier = RandomForestClassifier(L)
rf_classifier.fit(X.A, y.A.ravel())
y_est = rf_classifier.predict(X).T
y_est_prob = rf_classifier.predict_proba(X).T

# Compute classification error
ErrorRate = (y != np.mat(y_est).T).sum(dtype=float) / N
print('Error rate: {:.2f}%'.format(ErrorRate * 100))

# Plot decision boundaries
figure(1)
dbprobplot(rf_classifier, X, y, 'auto', resolution=400)
figure(2)
dbplot(rf_classifier, X, y, 'auto', resolution=400)

show()
from sklearn.ensemble import RandomForestClassifier

# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth7.mat')
X = np.matrix(mat_data['X'])
y = np.matrix(mat_data['y'])
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)


# Number of rounds of bagging
L = 100

# Fit model using random tree classifier:
rf_classifier = RandomForestClassifier(L)
rf_classifier.fit(X.A, y.A.ravel())
y_est = rf_classifier.predict(X).T
y_est_prob = rf_classifier.predict_proba(X).T

# Compute classification error
ErrorRate = (y!=np.mat(y_est).T).sum(dtype=float)/N
print('Error rate: {:.2f}%'.format(ErrorRate*100))    

# Plot decision boundaries    
figure(1); dbprobplot(rf_classifier, X, y, 'auto', resolution=400)
figure(2); dbplot(rf_classifier, X, y, 'auto', resolution=400)

show()
Example #3
0
# For each round of bagging
for l in range(L):

    # Extract training set by random sampling with replacement from X and y
    X_train, y_train = bootstrap(X, y, N, weights)
    
    # Fit logistic regression model to training data and save result
    logit_classifier = LogisticRegression()
    logit_classifier.fit(X_train, y_train)
    logits[l] = logit_classifier
    y_est = logit_classifier.predict(X).T
    votes = votes + y_est

    ErrorRate = (y!=y_est).sum(dtype=float)/N
    print('Error rate: {:2.2f}%'.format(ErrorRate*100))    
    
# Estimated value of class labels (using 0.5 as threshold) by majority voting
y_est_ensemble = votes>(L/2)

# Compute error rate
ErrorRate = (y!=y_est_ensemble).sum(dtype=float)/N
print('Error rate: {:3.2f}%'.format(ErrorRate*100))

ce = BinClassifierEnsemble(logits)
figure(1); dbprobplot(ce, X, y, 'auto', resolution=200)
figure(2); dbplot(ce, X, y, 'auto', resolution=200)

show()

print('Ran Exercise 9.2.1')
Example #4
0
# For each round of bagging
for l in range(L):

    # Extract training set by random sampling with replacement from X and y
    X_train, y_train = bootstrap(X, y, N, weights)

    # Fit logistic regression model to training data and save result
    logit_classifier = LogisticRegression()
    logit_classifier.fit(X_train, y_train.A.ravel())
    logits[l] = logit_classifier
    y_est = np.mat(logit_classifier.predict(X)).T
    votes = votes + y_est

    ErrorRate = (y != y_est).sum(dtype=float) / N
    print('Error rate: {0}%'.format(ErrorRate * 100))

# Estimated value of class labels (using 0.5 as threshold) by majority voting
y_est_ensemble = votes > (L / 2)

# Compute error rate
ErrorRate = (y != y_est_ensemble).sum(dtype=float) / N
print('Error rate: {:.1f}%'.format(ErrorRate * 100))

ce = BinClassifierEnsemble(logits)
figure(1)
dbprobplot(ce, X, y, 'auto', resolution=200)
figure(2)
dbplot(ce, X, y, 'auto', resolution=200)

show()
X_test = np.matrix(mat_data['X_test'])
y = np.matrix(mat_data['y'])
y_train = np.matrix(mat_data['y_train'])
y_test = np.matrix(mat_data['y_test'])
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)

# Fit and plot one-vs-rest classifiers
y_test_est = np.mat(np.zeros((y_test.shape[0],C)))
for c in range(C):
    logit_classifier = LogisticRegression()
    logit_classifier.fit(X_train,ravel((y_train==c).astype(int)))
    y_test_est[:,c] = np.mat(logit_classifier.predict(X_test)).T
    figure(c+1)
    dbplot(logit_classifier,X_test,(y_test==c).astype(int),'auto')

# Plot results for multinomial fit (softmax)
figure(C+1)
logit_classifier = LogisticRegression()
logit_classifier.fit(X_train,ravel(y_train))
dbplot(logit_classifier,X_test,y_test,'auto')

# Compute error rate
y_test_ensemble = np.mat(logit_classifier.predict(X_test)).T
ErrorRate = (y_test!=y_test_ensemble).sum(dtype=float)/y_test.shape[0]
show()
print('Error rate (ensemble): {0}%'.format(100*ErrorRate))

Example #6
0
X_train = np.matrix(mat_data['X_train'])
X_test = np.matrix(mat_data['X_test'])
y = np.matrix(mat_data['y'])
y_train = np.matrix(mat_data['y_train'])
y_test = np.matrix(mat_data['y_test'])
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)

# Fit and plot one-vs-rest classifiers
y_test_est = np.mat(np.zeros((y_test.shape[0], C)))
for c in range(C):
    logit_classifier = LogisticRegression()
    logit_classifier.fit(X_train, ravel((y_train == c).astype(int)))
    y_test_est[:, c] = np.mat(logit_classifier.predict(X_test)).T
    figure(c + 1)
    dbplot(logit_classifier, X_test, (y_test == c).astype(int), 'auto')

# Plot results for multinomial fit (softmax)
figure(C + 1)
logit_classifier = LogisticRegression()
logit_classifier.fit(X_train, ravel(y_train))
dbplot(logit_classifier, X_test, y_test, 'auto')

# Compute error rate
y_test_ensemble = np.mat(logit_classifier.predict(X_test)).T
ErrorRate = (y_test != y_test_ensemble).sum(dtype=float) / y_test.shape[0]
show()
print('Error rate (ensemble): {0}%'.format(100 * ErrorRate))
logits = [0]*L
votes = np.zeros((N,1))

# For each round of bagging
for l in range(L):

    # Extract training set by random sampling with replacement from X and y
    X_train, y_train = bootstrap(X, y, N, weights)
    
    # Fit logistic regression model to training data and save result
    logit_classifier = LogisticRegression()
    logit_classifier.fit(X_train, y_train.A.ravel())
    logits[l] = logit_classifier
    y_est = np.mat(logit_classifier.predict(X)).T
    votes = votes + y_est

    ErrorRate = (y!=y_est).sum(dtype=float)/N
    print('Error rate: {0}%'.format(ErrorRate*100))    
    
# Estimated value of class labels (using 0.5 as threshold) by majority voting
y_est_ensemble = votes>(L/2)

# Compute error rate
ErrorRate = (y!=y_est_ensemble).sum(dtype=float)/N
print('Error rate: {:.1f}%'.format(ErrorRate*100))

ce = BinClassifierEnsemble(logits)
figure(1); dbprobplot(ce, X, y, 'auto', resolution=200)
figure(2); dbplot(ce, X, y, 'auto', resolution=200)

show()
Example #8
0
from sklearn.ensemble import RandomForestClassifier

# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth7.mat')
X = np.matrix(mat_data['X'])
y = np.matrix(mat_data['y'])
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)


# Number of rounds of bagging
L = 100

# Fit model using random tree classifier:
rf_classifier = RandomForestClassifier(L)
rf_classifier.fit(X.A, y.A.ravel())
y_est = rf_classifier.predict(X).T
y_est_prob = rf_classifier.predict_proba(X).T

# Compute classification error
ErrorRate = (y!=np.mat(y_est).T).sum(dtype=float)/N
print('Error rate: {:.2f}%'.format(ErrorRate*100))    

# Plot decision boundaries    
figure(1); dbprobplot(rf_classifier, X, y, 'auto', resolution=400)
figure(2); dbplot(rf_classifier, X, y, 'auto', resolution=400)

show()