def visualizeFit(X, mu, sigma2):
    n = np.arange(0, 35, 0.5)
    X1, X2 = np.meshgrid(n, n)
    Z = multivariateGaussian(np.column_stack((X1.T.flatten(), X2.T.flatten())),
                             mu, sigma2)
    Z = Z.reshape(X1.shape)

    plt.plot(X[:, 0], X[:, 1], 'bx', markersize=5)
    if not isinf(np.sum(Z)):
        plt.contour(X1, X2, Z, 10.0**np.arange(-20, 0, 3))
def visualizeFit(X, mu, sigma2):
    n = np.arange(0, 35, 0.5)
    X1, X2 = np.meshgrid(n, n)
    Z = multivariateGaussian(np.column_stack(
        (X1.T.flatten(), X2.T.flatten())), mu, sigma2)
    Z = Z.reshape(X1.shape)

    plt.plot(X[:, 0], X[:, 1], 'bx', markersize=5)
    if not isinf(np.sum(Z)):
        plt.contour(X1, X2, Z, 10.0**np.arange(-20, 0, 3))
Example #3
0
def visualizeFit(p, mu, sigma2):
    X1, X2 = np.meshgrid(np.linspace(0, 35, num=70), np.linspace(0, 35,
                                                                 num=70))
    p2 = multivariateGaussian(
        np.hstack((X1.flatten()[:, np.newaxis], X2.flatten()[:, np.newaxis])),
        mu, sigma2)
    contour_level = 10**(np.arange(-20., 1, 3))
    plt.contour(X1, X2, p2[:, np.newaxis].reshape(X1.shape), contour_level)
    plt.xlim(0, 35)
    plt.ylim(0, 35)
    plt.xlabel("Latency (ms)")
    plt.ylabel("Throughput (mb/s)")
    return
Example #4
0
def visualizeFit(X, mu, sigma2):

    X1, X2 = np.meshgrid(np.arange(0, 35.1, 0.5), np.arange(0, 35.1, 0.5))
    Z = multivariateGaussian(
        np.column_stack(
            (X1.reshape(X1.size, order='F'), X2.reshape(X2.size, order='F'))),
        mu, sigma2)
    Z = Z.reshape(X1.shape, order='F')

    plt.plot(X[:, 0], X[:, 1], 'bx')
    # Do not plot if there are infinities
    if (np.sum(np.isinf(Z)) == 0):
        plt.contour(X1, X2, Z, np.power(10, (np.arange(-20, 0.1, 3)).T))
def visualizeFit(X, mu, sigma2):
    """
    This visualization shows you the
    probability density function of the Gaussian distribution. Each example
    has a location (x1, x2) that depends on its feature values.
    """
    n = np.linspace(0,35,71)
    X1 = np.meshgrid(n,n)
    Z = multivariateGaussian(np.column_stack((X1[0].T.flatten(), X1[1].T.flatten())),mu,sigma2)
    Z = Z.reshape(X1[0].shape)

    plt.plot(X[:, 0], X[:, 1],'bx')
    # Do not plot if there are infinities
    if not isinf(np.sum(Z)):
        plt.contour(X1[0], X1[1], Z, 10.0**np.arange(-20, 0, 3).T)
def visualizeFit(X, mu, sigma2):
    """
    This visualization shows you the
    probability density function of the Gaussian distribution. Each example
    has a location (x1, x2) that depends on its feature values.
    """
    n = np.linspace(0,35,71)
    X1 = np.meshgrid(n,n)
    Z = multivariateGaussian(np.column_stack((X1[0].T.flatten(), X1[1].T.flatten())),mu,sigma2)
    Z = Z.reshape(X1[0].shape)

    plt.plot(X[:, 0], X[:, 1],'bx')
    # Do not plot if there are infinities
    if not isinf(np.sum(Z)):
        plt.contour(X1[0], X1[1], Z, 10.0**np.arange(-20, 0, 3).T)
        show()
def visualizeFit(X, mu, sigma2):
    #VISUALIZEFIT Visualize the dataset and its estimated distribution.
    #   VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the
    #   probability density function of the Gaussian distribution. Each example
    #   has a location (x1, x2) that depends on its feature values.
    #
    l = np.arange(0, 35.5, 0.5)
    X1, X2 = np.meshgrid(l, l)

    X_tmp = np.vstack((X1.ravel(), X2.ravel())).T
    Z = multivariateGaussian(X_tmp, mu, sigma2)
    Z.resize(X1.shape)
    plt.plot(X[:, 0], X[:, 1], 'bx')

    if np.sum(np.isinf(Z)) == 0:
        plt.contour(X1, X2, Z, 10.0**np.arange(-20, 0, 3))
Example #8
0
def visualizeFit(X, mu, Sigma2):

    t = np.linspace(0, 35, 71)
    X1, X2 = np.meshgrid(t, t)

    Z = multivariateGaussian(
        np.vstack((X1.reshape(1, -1), X2.reshape(1, -1))).T, mu, Sigma2)
    Z = Z.reshape(X1.shape[0], -1)

    plt.figure()
    plt.plot(X[:, 0], X[:, 1], 'bx', markersize=4)
    plt.axis([0, 30, 0, 30])

    # Do not plot if there are infinities
    if np.sum(np.isinf(Z)) == 0:
        plt.contour(X1, X2, Z, np.power(10, np.linspace(-20, 0, 7)))
def visualizeFit(X, mu, sigma2):
    #VISUALIZEFIT Visualize the dataset and its estimated distribution.
    #   VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the 
    #   probability density function of the Gaussian distribution. Each example
    #   has a location (x1, x2) that depends on its feature values.
    #

    X1,X2 = np.meshgrid(np.arange(0, 35.1, 0.5), np.arange(0, 35.1, 0.5))
    Z = multivariateGaussian(np.column_stack((X1.reshape(X1.size, order='F'), X2.reshape(X2.size, order='F'))), mu, sigma2)
    Z = Z.reshape(X1.shape, order='F')

    plt.plot(X[:, 0], X[:, 1],'bx', markersize=13, markeredgewidth=1)
    # plt.scatter(X[:, 0], X[:, 1], s=150, c='b', marker='x', linewidths=1)

    # Do not plot if there are infinities
    if (np.sum(np.isinf(Z)) == 0):
        plt.contour(X1, X2, Z, np.power(10,(np.arange(-20, 0.1, 3)).T))
def visualizeFit(X, mu, sigma2):
    """Visualizes the dataset and its estimated distribution.
       visualizeFit(X, p, mu, sigma2) This visualization shows you the
       probability density function of the Gaussian distribution. Each example
       has a location (x1, x2) that depends on its feature values.
    """

    X1, X2 = np.meshgrid(np.arange(0, 35.5, 0.5), np.arange(0, 35.5, 0.5))

    Z = multivariateGaussian(np.c_[X1.ravel(), X2.ravel()], mu, sigma2)
    Z = Z.reshape(X1.shape)

    plt.plot(X[:, 0], X[:, 1], 'bx')

    cont_levels = [10 ** exp for exp in range(-20, 0, 3)]

    plt.contour(X1, X2, Z, cmap=plt.cm.Paired, alpha=0.9, levels=cont_levels)
Example #11
0
def visualizeFit(X, mu, sigma2):
    """Visualizes the dataset and its estimated distribution.
       visualizeFit(X, p, mu, sigma2) This visualization shows you the
       probability density function of the Gaussian distribution. Each example
       has a location (x1, x2) that depends on its feature values.
    """

    X1, X2 = np.meshgrid(np.arange(0, 35.5, 0.5), np.arange(0, 35.5, 0.5))

    Z = multivariateGaussian(np.c_[X1.ravel(), X2.ravel()], mu, sigma2)
    Z = Z.reshape(X1.shape)

    plt.plot(X[:, 0], X[:, 1], 'bx')

    cont_levels = [10**exp for exp in range(-20, 0, 3)]

    plt.contour(X1, X2, Z, cmap=plt.cm.Paired, alpha=0.9, levels=cont_levels)
Example #12
0
def visualizeFit(X, mu, sigma2):
    #VISUALIZEFIT Visualize the dataset and its estimated distribution.
    #   VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the
    #   probability density function of the Gaussian distribution. Each example
    #   has a location (x1, x2) that depends on its feature values.
    #

    X1, X2 = np.meshgrid(np.arange(0, 35.5, 0.5), np.arange(0, 35.5, 0.5))
    Z = multivariateGaussian(np.stack([X1.ravel(), X2.ravel()], axis=1), mu,
                             sigma2)
    Z = Z.reshape(X1.shape)

    plt.plot(X[:, 0], X[:, 1], 'bx')

    #hold on
    # Do not plot if there are infinities
    if np.all(abs(Z) != np.inf):
        plt.contour(X1, X2, Z, levels=10**(np.arange(-20., 1, 3)), zorder=100)
Example #13
0
def visualizeFit(X, mu, sigma2):
    """
    This visualization shows you the
    probability density function of the Gaussian distribution. Each example
    has a location (x1, x2) that depends on its feature values.
    """

    n = np.arange(0, 35.5, 0.5)
    X1, X2 = np.meshgrid(n, n)
    Z = multivariateGaussian(np.c_[X1.ravel(order='F'),
                                   X2.ravel(order='F')], mu, sigma2)
    Z = np.reshape(Z, X1.shape, order='F')

    plt.figure()
    plt.plot(X[:, 0], X[:, 1], 'bx', markersize=3, markeredgewidth=0.5)
    # Do not plot if there are infinities
    if not isinf(np.sum(Z)):
        plt.contour(X1, X2, Z, 10**np.arange(-20, 0, 3, dtype='float'))
Example #14
0
def visualizeFit(X, mu, sigma2):
    #VISUALIZEFIT Visualize the dataset and its estimated distribution.
    #   VISUALIZEFIT(X, mu, sigma2) This visualization shows you the
    #   probability density function of the Gaussian distribution. Each example
    #   has a location (x1, x2) that depends on its feature values.
    #

    coords = linspace(0,30,61)
    X1, X2 = meshgrid(coords, coords)
    Z = multivariateGaussian(column_stack((X1.ravel(),X2.ravel())), mu, sigma2)
    Z = reshape(Z, shape(X1))

    plot(X[:, 0], X[:, 1],'bx')
    hold(True)
    # Do not plot if there are infinities
    if not any(isinf(Z)):
        contour(X1, X2, Z, power(10., arange(-20,0,3)))
    hold(False)
def visualizeFit(X, mu, sigma2):
    """ 
    VISUALIZEFIT Visualize the dataset and its estimated distribution.
    VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the 
    probability density function of the Gaussian distribution. Each example
    has a location (x1, x2) that depends on its feature values.
    """
    xx = np.arange(0, 35, 0.5)
    yy = np.arange(0, 35, 0.5)

    X1, X2 = np.meshgrid(xx, yy)

    points = np.c_[X1.ravel(), X2.ravel()]
    Z = multivariateGaussian(points, mu, sigma2)
    Z = Z.reshape(X1.shape)

    # 这个levels是作业里面给的参考,或者通过求解的概率推出来
    cont_levels = [10**h for h in range(-20, 0, 3)]

    plt.contour(X1, X2, Z, cont_levels)
def visualizeFit(X, mu, sigma2):
    #VISUALIZEFIT Visualize the dataset and its estimated distribution.
    #   VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the 
    #   probability density function of the Gaussian distribution. Each example
    #   has a location (x1, x2) that depends on its feature values.
    #

    X1,X2 = np.meshgrid(np.arange(0, 35.1, 0.5), np.arange(0, 35.1, 0.5))
    Z = mvg.multivariateGaussian(np.column_stack((X1.reshape(X1.size, order='F'), X2.reshape(X2.size, order='F'))), mu, sigma2)
    Z = Z.reshape(X1.shape, order='F')

    plt.plot(X[:, 0], X[:, 1],'bx', markersize=13, markeredgewidth=1)
    # plt.scatter(X[:, 0], X[:, 1], s=150, c='b', marker='x', linewidths=1)

    plt.hold(True)
    # Do not plot if there are infinities
    if (np.sum(np.isinf(Z)) == 0):
        plt.contour(X1, X2, Z, np.power(10,(np.arange(-20, 0.1, 3)).T))

    plt.hold(False)
Example #17
0
def visualizeFit(X, mu, sigma2):
    '''
    VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the 
    probability density function of the Gaussian distribution. Each example
    has a location (x1, x2) that depends on its feature values.
    '''

    import numpy as np
    from multivariateGaussian import multivariateGaussian
    import matplotlib.pyplot as plt

    X1, X2 = np.meshgrid(np.arange(0, 35.5, 0.5), np.arange(0, 35.5, 0.5))
    Z = multivariateGaussian(
        np.vstack((X1.flatten(), X2.flatten())).T, mu, sigma2)
    Z = np.reshape(Z, np.shape(X1))

    fig, ax = plt.subplots()
    plt.plot(X[:, 0], X[:, 1], 'bx')
    plt.contour(X1, X2, Z, 10.**np.arange(-20, 0, 3))
    ax.set_xlim(0, 30)
    ax.set_ylim(0, 30)
    plt.xlabel('Latency (ms)')
    plt.ylabel('Throughput (mb/s)')
Example #18
0
def visualizeFit(X, mu, sigma2):
#VISUALIZEFIT Visualize the dataset and its estimated distribution.
#   VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the
#   probability density function of the Gaussian distribution. Each example
#   has a location (x1, x2) that depends on its feature values.
    x = np.arange(0, 35, 0.5)
    X1,X2 = np.meshgrid(x,x)
    X1_s = X1.flatten()
    X1_s = X1.reshape(-1,1)
    X2_s = X2.flatten()
    X2_s = X2_s.reshape(-1,1)
    X1X2 = np.hstack((X1_s,X2_s))
    Z = multivariateGaussian(X1X2,mu,sigma2)
    Z = Z.reshape(X1.shape)
    #ml_dir = '/Users/gregory/Desktop/me/coursera/machine_learning/ml_python/machine-learning-ex8/ex8/'
    #np.savetxt(ml_dir+'Z.csv', Z)
    plt.plot(X[:, 0], X[:, 1],'bx')
    l = 10.**np.arange(-20, 0, 3)
    #if (sum(isinf(Z)) == 0)
    plt.contour(X1, X2, Z, l)#, 10.^(-20:3:0)')
    plt.xlabel('Latency (ms)')
    plt.ylabel('Throughput (mb/s)')
    plt.show()
Example #19
0
def visualizeFit(X, mu, sigma2):
    X1, X2 = np.meshgrid(np.arange(0, 35, 0.5), np.arange(0, 35, 0.5))
    m, n = np.shape(X1)
    a = X1.ravel().reshape(m * n, 1)

    Z = mG.multivariateGaussian(
        np.c_[(X1.ravel().reshape(m * n, 1), X2.ravel().reshape(m * n, 1))],
        mu, sigma2)
    Z = Z.reshape(np.shape(X1))

    # plot points in buttom layer
    plt.scatter(X[:, 0], X[:, 1], marker="x", color='b', linewidths=0.01)
    plt.axis([0, 30, 0, 30])
    plt.xlabel('Latency (ms)')
    plt.ylabel('Throughput (mb/s)')

    # plot countours; Do not plot if there are infinities
    if (np.sum(np.isinf(Z)) == 0):
        C = plt.contour(X1, X2, Z, 10, colors='black', linewidth=0.5)
        # C.contour(X1, X2, Z, 10**np.arange(-20,0,3))
    plt.xlabel('Latency (ms)')
    plt.ylabel('Throughput (mb/s)')
    plt.show()
Example #20
0
## ================== Part 2: Estimate the dataset statistics ===================
#  For this exercise, we assume a Gaussian distribution for the dataset.
#
#  We first estimate the parameters of our assumed Gaussian distribution, 
#  then compute the probabilities for each of the points and then visualize 
#  both the overall distribution and where each of the points falls in 
#  terms of that distribution.
#
print 'Visualizing Gaussian fit.'

#  Estimate my and sigma2
mu, sigma2 = estimateGaussian(X)

#  Returns the density of the multivariate normal at each data point (row) 
#  of X
p = multivariateGaussian(X, mu, sigma2)

#  Visualize the fit
visualizeFit(X,  mu, sigma2)
plt.xlabel('Latency (ms)')
plt.ylabel('Throughput (mb/s)')
show()

raw_input("Program paused. Press Enter to continue...")  

## ================== Part 3: Find Outliers ===================
#  Now you will find a good epsilon threshold using a cross-validation set
#  probabilities given the estimated Gaussian distribution
# 

pval = multivariateGaussian(Xval, mu, sigma2)
from sklearn.svm import SVC
from estimateGaussian import estimateGaussian
from multivariateGaussian import multivariateGaussian
from visualizeFit import visualizeFit
from selectThreshold import selectThreshold

data = loadmat('ex8data1.mat')
X = data['X']
Xval = data["Xval"]
yval = data['yval']
plt.plot(X[:, 0], X[:, 1], 'bx')
#plt.show()

#1.2
(mu, sig2) = estimateGaussian(X)
p = multivariateGaussian(X, mu, sig2)
#visualizeFit(p,mu,sig2)
#plt.show()

#1.3

pcv = multivariateGaussian(Xval, mu, sig2)

(epi, f1) = selectThreshold(pcv, yval)
outliers = (p < epi)
#plt.scatter(X[outliers,0],X[outliers,1],marker ="o",facecolor="none",edgecolor="r",s=70)
#plt.show()

#1.4
mat2 = loadmat("ex8data2.mat")
X2 = mat2["X"]
Example #22
0
import scipy.io as spio
import numpy as np
import matplotlib.pyplot as plt
import math
import seaborn as sns
import pandas as pd
from estimateGaussian import estimateGaussian
from multivariateGaussian import multivariateGaussian

mat = spio.loadmat('ex8data1.mat', squeeze_me=True)
# print(mat.keys())
data = mat['X']
xval = mat['Xval']
yval = mat['yval']

df = pd.DataFrame(data, columns=["Latency (ms)", "Throughput (mb/s)"])
sns.scatterplot(x="Latency (ms)", y="Throughput (mb/s)", data=df)

plt.show()

[mu, sigma2] = estimateGaussian(data)
# print(mu)
print(np.shape(sigma2))
print(sigma2)
p = multivariateGaussian(data, mu, sigma2)
Example #23
0
## ================== Part 2: Estimate the dataset statistics ===================
#  For this exercise, we assume a Gaussian distribution for the dataset.
#
#  We first estimate the parameters of our assumed Gaussian distribution,
#  then compute the probabilities for each of the points and then visualize
#  both the overall distribution and where each of the points falls in
#  terms of that distribution.
#
print 'Visualizing Gaussian fit.\n'

#  Estimate my and sigma2
mu, sigma2 = estimateGaussian(X)

#  Returns the density of the multivariate normal at each data point (row)
#  of X
p = multivariateGaussian(X, mu, sigma2)

#  Visualize the fit
fig = figure()
visualizeFit(X,  mu, sigma2)
xlabel('Latency (ms)')
ylabel('Throughput (mb/s)')
fig.show()

print 'Program paused. Press enter to continue.'
raw_input()

## ================== Part 3: Find Outliers ===================
#  Now you will find a good epsilon threshold using a cross-validation set
#  probabilities given the estimated Gaussian distribution
#
Example #24
0
def ex8():
    ## Machine Learning Online Class
    #  Exercise 8 | Anomaly Detection and Collaborative Filtering
    #
    #  Instructions
    #  ------------
    #
    #  This file contains code that helps you get started on the
    #  exercise. You will need to complete the following functions:
    #
    #     estimateGaussian.m
    #     selectThreshold.m
    #     cofiCostFunc.m
    #
    #  For this exercise, you will not need to change any code in this file,
    #  or any other files other than those mentioned above.
    #

    ## Initialization
    #clear ; close all; clc

    ## ================== Part 1: Load Example Dataset  ===================
    #  We start this exercise by using a small dataset that is easy to
    #  visualize.
    #
    #  Our example case consists of 2 network server statistics across
    #  several machines: the latency and throughput of each machine.
    #  This exercise will help us find possibly faulty (or very fast) machines.
    #

    print('Visualizing example dataset for outlier detection.\n')

    #  The following command loads the dataset. You should now have the
    #  variables X, Xval, yval in your environment
    mat = scipy.io.loadmat('ex8data1.mat')
    X = mat['X']
    Xval = mat['Xval']
    yval = mat['yval']

    #  Visualize the example dataset
    plt.plot(X[:, 0], X[:, 1], 'bx')
    plt.axis([0, 30, 0, 30])
    plt.xlabel('Latency (ms)')
    plt.ylabel('Throughput (mb/s)')
    plt.savefig('figure1.png')

    print('Program paused. Press enter to continue.')
    #pause

    ## ================== Part 2: Estimate the dataset statistics ===================
    #  For this exercise, we assume a Gaussian distribution for the dataset.
    #
    #  We first estimate the parameters of our assumed Gaussian distribution,
    #  then compute the probabilities for each of the points and then visualize
    #  both the overall distribution and where each of the points falls in
    #  terms of that distribution.
    #
    print('Visualizing Gaussian fit.\n')

    #  Estimate my and sigma2
    mu, sigma2 = estimateGaussian(X)

    #  Returns the density of the multivariate normal at each data point (row)
    #  of X
    p = multivariateGaussian(X, mu, sigma2)

    #  Visualize the fit
    visualizeFit(X, mu, sigma2)
    plt.xlabel('Latency (ms)')
    plt.ylabel('Throughput (mb/s)')
    plt.savefig('figure2.png')

    print('Program paused. Press enter to continue.\n')
    #pause

    ## ================== Part 3: Find Outliers ===================
    #  Now you will find a good epsilon threshold using a cross-validation set
    #  probabilities given the estimated Gaussian distribution
    #

    pval = multivariateGaussian(Xval, mu, sigma2)

    epsilon, F1 = selectThreshold(yval, pval)
    print('Best epsilon found using cross-validation: %e' % epsilon)
    print('Best F1 on Cross Validation Set:  %f' % F1)
    print('   (you should see a value epsilon of about 8.99e-05)\n')

    #  Find the outliers in the training set and plot the
    outliers = p < epsilon

    #  Draw a red circle around those outliers
    #hold on
    plt.plot(X[outliers, 0], X[outliers, 1], 'ro', lw=2, ms=10)
    #hold off
    plt.savefig('figure3.png')

    print('Program paused. Press enter to continue.\n')
    #pause

    ## ================== Part 4: Multidimensional Outliers ===================
    #  We will now use the code from the previous part and apply it to a
    #  harder problem in which more features describe each datapoint and only
    #  some features indicate whether a point is an outlier.
    #

    #  Loads the second dataset. You should now have the
    #  variables X, Xval, yval in your environment
    mat = scipy.io.loadmat('ex8data2.mat')
    X = mat['X']
    Xval = mat['Xval']
    yval = mat['yval']

    #  Apply the same steps to the larger dataset
    mu, sigma2 = estimateGaussian(X)

    #  Training set
    p = multivariateGaussian(X, mu, sigma2)

    #  Cross-validation set
    pval = multivariateGaussian(Xval, mu, sigma2)

    #  Find the best threshold
    epsilon, F1 = selectThreshold(yval, pval)

    print('Best epsilon found using cross-validation: %e' % epsilon)
    print('Best F1 on Cross Validation Set:  %f' % F1)
    print('# Outliers found: %d ' % np.sum(p < epsilon))
    print('   (you should see a value epsilon of about 1.38e-18)\n')