#!/usr/bin/env python import matplotlib.pyplot as pl import numpy as np from utils import util from matplotlib.patches import Ellipse data = util.load_mat('heightWeight/heightWeight') data = data['heightWeightData'] sex = data[:, 0] x = data[:, 1] y = data[:, 2] male_arg = (sex == 1) female_arg = (sex == 2) x_male = x[male_arg] y_male = y[male_arg] x_female = x[female_arg] y_female = y[female_arg] fig = pl.figure() ax = fig.add_subplot(111) ax.plot(x_male, y_male, 'bx') ax.plot(x_female, y_female, 'ro') pl.savefig('gaussHeightWeight_1.png') def draw_ell(cov, xy, color): u, v = np.linalg.eigh(cov) angle = np.arctan2(v[0][1], v[0][0]) angle = (180 * angle / np.pi) # here we time u2 with 5, assume 95% are in this ellipse
#!/usr/bin/env python import numpy as np import matplotlib.pyplot as pl import utils.util as util from scipy import ndimage data = util.load_mat('20news_w100') documents = data['documents'] documents = documents.toarray().T newsgroups = data['newsgroups'][0] #sort documents by number of words and choose the first 1000 chosen_docs_arg = np.argsort(np.sum(documents, axis=1)) chosen_docs_arg = chosen_docs_arg[-1000:][::-1] # descend documents = documents[chosen_docs_arg] newsgroups = newsgroups[chosen_docs_arg] #sort by newsgroups label sorted_arg = np.argsort(newsgroups) documents = documents[sorted_arg] newsgroups = newsgroups[sorted_arg] #zoom the image to show it image = ndimage.zoom(documents, (1, 10)) pl.imshow(image, cmap=pl.cm.gray, interpolation='none') #draw a red line betweent different newsgroups groups_label = np.unique(newsgroups) for i in range(len(groups_label) - 1): y, = np.where(newsgroups == groups_label[i + 1]) y = y[0]
#!/usr/bin/env python # Fit logistic model to SAT scores. import matplotlib.pyplot as pl import numpy as np import utils.util as util from scipy.special import logit from sklearn.linear_model import LogisticRegressionCV data = util.load_mat('sat/sat.mat') sat = data['sat'] # 3rd column contains SAT scores X, y = sat[:,3], sat[:,0] X = X.reshape((len(X), 1)) y = y.reshape((len(X), 1)) logistic = LogisticRegressionCV() print X model = logistic.fit(X, y) # Solve for the decision boundary a = model.coef_; b = model.intercept_; threshold = (logit(0.5) - b)/a; pl.axis([450, 655, -.05, 1.05]) pl.plot(X, y, 'ko') pl.plot(X, model.predict_proba(X)[:,1], 'ro') pl.plot(525, 0, 'bx', linewidth=2, markersize=14); pl.plot(525, 1, 'bx', linewidth=2, markersize=14);
#!/usr/bin/env python import matplotlib.pyplot as pl import numpy as np import utils.util as util from sklearn.naive_bayes import MultinomialNB data = util.load_mat('XwindowsDocData') xtrain = data['xtrain'] ytrain = data['ytrain'] clf = MultinomialNB() clf.fit(xtrain, ytrain.ravel()) counts = clf.feature_count_ y_counts = clf.class_count_ for i in range(len(counts)): pl.figure() pl.bar(np.arange(len(counts[i])), counts[i] / y_counts[i]) pl.title('p(xj=1|y=%d)' % (i + 1)) pl.savefig('naiveBayesBowDemo_%d.png' % i) pl.show()
#!/usr/bin/env python import matplotlib.pyplot as pl import numpy as np from utils import util data = util.load_mat('mnistAll') mnist = data['mnist'] train_images = mnist['train_images'][0][0] # 28*28*60000 train_labels = mnist['train_labels'][0][0] # 60000*1 test_images = mnist['test_images'][0][0] # 28*28*10000 test_labels = mnist['test_labels'][0][0] # 10000*1 fig1 = pl.figure(1) fig2 = pl.figure(2) np.random.seed(seed=10) for i in range(1, 10): img = test_images[:, :, i] ax1 = fig1.add_subplot(3, 3, i) ax1.imshow(img) ax1.set_xticks(()) ax1.set_yticks(()) ax1.set_title('true class = %s' % test_labels[i]) img_shuffled = img.copy() # np.shuffle only along the first index, ravel it first np.random.shuffle(img_shuffled.ravel()) img_shuffled = img_shuffled.reshape(img.shape) ax2 = fig2.add_subplot(3, 3, i) ax2.imshow(img_shuffled)
#!/usr/bin/env python import matplotlib.pyplot as pl import numpy as np import utils.util as util from matplotlib.patches import Ellipse data = util.load_mat('heightWeight') data = data['heightWeightData'] sex = data[:, 0] x = data[:, 1] y = data[:, 2] male_arg = (sex == 1) female_arg = (sex == 2) x_male = x[male_arg] y_male = y[male_arg] x_female = x[female_arg] y_female = y[female_arg] fig = pl.figure() ax = fig.add_subplot(111) ax.plot(x_male, y_male, 'bx') ax.plot(x_female, y_female, 'ro') pl.savefig('gaussHeightWeight_1.png') def draw_ell(cov, xy, color): u, v = np.linalg.eigh(cov) angle = np.arctan2(v[0][1], v[0][0]) angle = (180 * angle / np.pi) # here we time u2 with 5, assume 95% are in this ellipse
#!/usr/bin/env python # Fit linear and quadratic surfaces to data # Based on code by Romain Thibaux <*****@*****.**> import matplotlib.pyplot as pl import numpy as np import utils.util as util from mpl_toolkits.mplot3d import Axes3D data = util.load_mat('moteData/moteData.mat') X = data['X'] y = data['y'] X_pad = util.add_ones(X) for use_quad in (False, True): phi = X_pad if use_quad: phi = np.column_stack((X_pad, X**2)) fig = pl.figure() ax = Axes3D(fig) ax.set_zlim(15, 19) ax.scatter(X[:, 0], X[:, 1], y) xrange = np.linspace(min(X[:, 0]), max(X[:, 0]), 10) yrange = np.linspace(min(X[:, 1]), max(X[:, 1]), 10) xx, yy = np.meshgrid(xrange, yrange) flatxx = xx.reshape((100, 1))
#!/usr/bin/env python import numpy as np import matplotlib.pyplot as pl from utils import util from scipy import ndimage data = util.load_mat('20news_w100') documents = data['documents'] documents = documents.toarray().T newsgroups = data['newsgroups'][0] #sort documents by number of words and choose the first 1000 chosen_docs_arg = np.argsort(np.sum(documents, axis=1)) chosen_docs_arg = chosen_docs_arg[-1000:][::-1] # descend documents = documents[chosen_docs_arg] newsgroups = newsgroups[chosen_docs_arg] #sort by newsgroups label sorted_arg = np.argsort(newsgroups) documents = documents[sorted_arg] newsgroups = newsgroups[sorted_arg] #zoom the image to show it image = ndimage.zoom(documents, (1, 10)) pl.imshow(image, cmap=pl.cm.gray, interpolation='none') #draw a red line betweent different newsgroups groups_label = np.unique(newsgroups) for i in range(len(groups_label) - 1): y, = np.where(newsgroups == groups_label[i + 1]) y = y[0]
#!/usr/bin/env python # Fit logistic model to SAT scores. import matplotlib.pyplot as plt import numpy as np from utils import util from scipy.special import logit from sklearn.linear_model import LogisticRegressionCV, LogisticRegression, LinearRegression data = util.load_mat('sat/sat.mat') sat = data['sat'] # 3rd column contains SAT scores X, y = sat[:, 3], sat[:, 0] X = X.reshape((len(X), 1)) #y = y.reshape((len(X), 1)) #logistic = LogisticRegressionCV() # by default, cv=None, C=10 logistic = LogisticRegression(C=1e9) # turn off regularization model = logistic.fit(X, y) xtest = np.linspace(400, 700, 100) xtest = xtest.reshape((len(xtest), 1)) # Solve for the decision boundary a = model.coef_ b = model.intercept_ threshold = (logit(0.5) - b) / a fig, ax = plt.subplots() #plt.axis([450, 655, -.05, 1.05])
#!/usr/bin/env python # Fit linear and quadratic surfaces to data # Based on code by Romain Thibaux <*****@*****.**> import matplotlib.pyplot as pl import numpy as np import utils.util as util from mpl_toolkits.mplot3d import Axes3D data = util.load_mat('moteData/moteData.mat') X = data['X'] y = data['y'] X_pad = util.add_ones(X) for use_quad in (False, True): phi = X_pad if use_quad: phi = np.column_stack((X_pad, X**2)) fig = pl.figure() ax = Axes3D(fig) ax.set_zlim(15, 19) ax.scatter(X[:,0], X[:,1], y) xrange = np.linspace(min(X[:,0]), max(X[:,0]), 10) yrange = np.linspace(min(X[:,1]), max(X[:,1]), 10) xx, yy = np.meshgrid(xrange, yrange) flatxx = xx.reshape((100, 1))
#!/usr/bin/env python import matplotlib.pyplot as pl import numpy as np import utils.util as util data = util.load_mat('mnistAll') mnist = data['mnist'] train_images = mnist['train_images'][0][0] # 28*28*60000 train_labels = mnist['train_labels'][0][0] # 60000*1 test_images = mnist['test_images'][0][0] # 28*28*10000 test_labels = mnist['test_labels'][0][0] # 10000*1 fig1 = pl.figure(1) fig2 = pl.figure(2) np.random.seed(seed=10) for i in range(10): img = test_images[:, :, i] ax1 = fig1.add_subplot(3, 3, i) ax1.imshow(img) ax1.set_xticks(()) ax1.set_yticks(()) ax1.set_title('true class = %s' % test_labels[i]) img_shuffled = img.copy() # np.shuffle only along the first index, ravel it first np.random.shuffle(img_shuffled.ravel()) img_shuffled = img_shuffled.reshape(img.shape) ax2 = fig2.add_subplot(3, 3, i) ax2.imshow(img_shuffled)