def plot_learning_curves(num_points, X_train, Y_train, X_test, Y_test, positive_class=1, negative_class=0): train_set_sizes = [len(X_train) / k for k in range(num_points + 1, 0, -1)] test_errors = [] training_errors = [] for training_set_size in train_set_sizes: model = train(X_train, Y_train, training_set_size) test_error = evaluate(model, X_test, Y_test, positive_class, negative_class) training_error = evaluate(model, X_train, Y_train, positive_class, negative_class) test_errors.append(test_error) training_errors.append(training_error) plt.plot(train_set_sizes, training_errors, 'bs-', label='Training accuracy') plt.plot(train_set_sizes, test_errors, 'g^-', label='Test accuracy') plt.ylabel('Accuracy') plt.xlabel('Number of training samples') plt.title('Augmented Logistic Regression Learning Curve') plt.legend(loc='lower right') plt.savefig('../Figures/accuracyPlotAugmented.png', dpi=100) pylab.show()
def plot_sdss(cat_path): for catfile in find_files(cat_path, "*merged+sdss.txt"): # for now ignore the channel 2 files if catfile.split('/')[-1].split('_')[1] != '1': continue print("\nreading catalog: {}".format(catfile)) df = pd.read_table(catfile, sep=' ') # get rid of negative flux sources, if any df = df[df.flux > 0] # convert to magnitudes mags = spz_jy_to_mags(df.flux * 1e-3, 1) # print counts per magnitude bin for i in range(10, 15): sc = ((df.cl == 3) & (mags > i) & (mags < i + 1)).sum() xc = ((df.xsc == 1) & (mags > i) & (mags < i + 1)).sum() msg = "{}th to {}th mag: {} SDSS galaxy sources, {} 2MASS XSC sources" print(msg.format(i, i + 1, sc, xc)) # print number of sources agreed upon agree = ((df.xsc == 1) & (df.cl == 3)).sum() disagree = ((df.xsc == 1) & (df.cl == 6)).sum() na = ((df.xsc == 1) & (df.cl == 0)).sum() msg = "{} 2MASS XSC sources classified as galaxies by SDSS" print(msg.format(agree)) msg = "{} 2MASS XSC sources classified as stars by SDSS" print(msg.format(disagree)) msg = "{} 2MASS XSC sources not matched to SDSS" print(msg.format(na)) # plot normed histograms of 2MASS XSC and SDSS galaxy magnitudes xsc_gals = (mags > 10) & (mags < 15) & (df.xsc == 1) sdss_gals = (mags > 10) & (mags < 15) & (df.cl == 3) # mags[xsc_gals].hist(label='2MASS XSC', normed=True) # mags[sdss_gals].hist(label='SDSS galaxies', normed=True) plt.hist([mags[xsc_gals].values, mags[sdss_gals].values], bins=5, label=['2MASS', 'SDSS']) plt.xlabel('IRAC1 [mag]') plt.ylabel('Number Count') reg = catfile.split('/')[-1].split('_')[0] plt.title('{} Extended Sources / Galaxies'.format(reg)) plt.legend(loc=2) name = '{}_2mass_xsc_vs_sdss_hist.png'.format(reg) outpath = '/'.join(catfile.split('/')[:-1] + [name]) plt.savefig(outpath, dpi=100) plt.close() print("created file: {}".format(outpath))
def plot_sdss(cat_path): for catfile in find_files(cat_path, "*merged+sdss.txt"): # for now ignore the channel 2 files if catfile.split('/')[-1].split('_')[1] != '1': continue print("\nreading catalog: {}".format(catfile)) df = pd.read_table(catfile, sep=' ') # get rid of negative flux sources, if any df = df[df.flux > 0] # convert to magnitudes mags = spz_jy_to_mags(df.flux*1e-3, 1) # print counts per magnitude bin for i in range(10,15): sc = ((df.cl == 3) & (mags > i) & (mags < i+1)).sum() xc = ((df.xsc == 1) & (mags > i) & (mags < i+1)).sum() msg = "{}th to {}th mag: {} SDSS galaxy sources, {} 2MASS XSC sources" print(msg.format(i, i+1, sc, xc)) # print number of sources agreed upon agree = ((df.xsc == 1) & (df.cl == 3)).sum() disagree = ((df.xsc == 1) & (df.cl == 6)).sum() na = ((df.xsc == 1) & (df.cl == 0)).sum() msg = "{} 2MASS XSC sources classified as galaxies by SDSS" print(msg.format(agree)) msg = "{} 2MASS XSC sources classified as stars by SDSS" print(msg.format(disagree)) msg = "{} 2MASS XSC sources not matched to SDSS" print(msg.format(na)) # plot normed histograms of 2MASS XSC and SDSS galaxy magnitudes xsc_gals = (mags > 10) & (mags < 15) & (df.xsc == 1) sdss_gals = (mags > 10) & (mags < 15) & (df.cl == 3) # mags[xsc_gals].hist(label='2MASS XSC', normed=True) # mags[sdss_gals].hist(label='SDSS galaxies', normed=True) plt.hist([mags[xsc_gals].values, mags[sdss_gals].values], bins=5, label=['2MASS', 'SDSS']) plt.xlabel('IRAC1 [mag]') plt.ylabel('Number Count') reg = catfile.split('/')[-1].split('_')[0] plt.title('{} Extended Sources / Galaxies'.format(reg)) plt.legend(loc=2) name = '{}_2mass_xsc_vs_sdss_hist.png'.format(reg) outpath = '/'.join(catfile.split('/')[:-1]+[name]) plt.savefig(outpath, dpi=100) plt.close() print("created file: {}".format(outpath))
def submit_time_histogram(arr): """ Use Matplotlib to plot a normalized histogram of submit times """ from math import ceil, log try: import matplotlib.mlab as mlab from prettyplotlib import plt except ImportError: print( 'You must have Matplotlib and Prettyplotlib installed to plot a histogram.' ) # Use Sturges' formula for number of bins: k = ceiling(log2 n + 1) k = ceil(log(len(arr), 2) + 1) n, bins, patches = plt.hist(arr, k, normed=1, facecolor='green', alpha=0.75) # throw a PDF plot on top of it #y = mlab.normpdf(bins, np.mean(arr), np.std(arr)) #l = plt.plot(bins, y, 'r--', linewidth=1) # Get a Bayesian confidence interval for mean, variance, standard deviation dmean, dvar, dsd = bayes_mvs(deltas) # drop a line in at the mean for fun plt.axvline(dmean[0], color='blue', alpha=0.5) plt.axvspan(dmean[1][0], dmean[1][1], color='blue', alpha=0.5) plt.axvline(np.median(deltas), color='y', alpha=0.5) # Caclulate a Kernel Density Estimate density = gaussian_kde(deltas) xs = np.arange(0., np.max(deltas), 0.1) density.covariance_factor = lambda: .25 density._compute_covariance() plt.plot(xs, density(xs), color='m') #FIXME: come up with better legend names #plt.legend(('Normal Curve', 'Mean', 'Median', 'KDE')) plt.legend(('Mean', 'Median', 'KDE')) plt.xlabel('Submit Times (in Seconds)') plt.ylabel('Probability') plt.title('Histogram of Worker submit times') plt.grid(True) plt.show()
ax1 = plt.gcf().add_subplot(1,1,1) ax1.plot(times,s,'r',label = 'True Sate') #m = np.average(particles,weights=weights,axis=1) #st = np.std(particles,weights=weights,axis=1) #ext = (0.0,dt*timewindow,code.neurons[-1].theta,code.neurons[0].theta) #plt.imshow(rates.T,extent=ext,cmap = cm.gist_yarg,aspect = 'auto',interpolation ='nearest') thetas = [code.neurons[i].theta for i in sptrain] ax1.plot(times[sptimes],thetas,'yo',label='Observed Spikes') ax1.plot(times,m,'b',label='Posterior Mean') ax1.plot(times,m-st,'gray',times,m+st,'gray') #ax2 = plt.gcf().add_subplot(1,2,2) #ax2.plot(times,s) plt.xlabel('Time (in seconds)') plt.ylabel('Space (in cm)') plt.legend() plt.title('State Estimation in a Diffusion System') if plotting: #matplotlib.rcParams['font.size']=10 if gaussian: fig, (ax1,ax2) = ppl.subplots(1,2,figsize = (12,6)) else: fig, ax2 = ppl.subplots(1) times = np.arange(0.0,dt*timewindow,dt) if gaussian: if sum(sum(spsg)) !=0:
plt.plot(X, Y, label='iSVD u={}'.format(num)) """ print 'Testing raw SVD => exact reconstruction' svT = scipy.linalg.diagsvd(s, u.shape[0], vT.shape[1]).dot(vT) for y in xrange(train.shape[0]): for x in xrange(train.shape[1]): colU = u[y, :] rowV = svT[:, x] assert np.allclose(train[y, x], single_dot(u, svT, x, y)) """ ## plt.title('SVD reconstruction error on {}x{} matrix'.format(*train.shape)) plt.xlabel('Low rank approximation (k)') plt.ylabel('Frobenius norm') plt.ylim(0, max(svdY)) plt.legend(loc='best') plt.savefig('reconstruct_fro_{}x{}.pdf'.format(*train.shape)) plt.show(block=True) ## plt.plot(orthoX, orthoY, label="SVD", color='black', linewidth=2, linestyle='--') for label, X, Y in incr_ortho: plt.plot(X, Y, label=label) plt.title('SVD orthogonality error on {}x{} matrix'.format(*train.shape)) plt.xlabel('Low rank approximation (k)') plt.ylabel('Deviation from orthogonality') plt.semilogy()
rerr = sum(abs(approxG.node[n]['pageviews'] - G.node[n]['pageviews']) / (G.node[n]['pageviews'] + 1) for n in G.nodes()) / G.number_of_nodes() np.random.shuffle(nodes) print 'Pageviews from "real" edge weights' print '-=-=-=-=-' display_graph(G) print print 'Pageviews from evenly distributed edge weights' print '-=-=-=-=-' display_graph(approxG) plt.plot(np.arange(0, len(rerrs)), rerrs, label='Relative error over time') plt.xlabel('Iteration') plt.ylabel('Average pageview relative error per node') plt.legend() plt.savefig('error_over_time.pdf') plt.show(block=True) plt.plot(np.arange(0, len(werrs)), werrs, label='Weight error over time') plt.xlabel('Iteration') plt.ylabel('Average weight error per edge') plt.legend() plt.savefig('weight_over_time.pdf') plt.show(block=True) fig, ax = plt.subplots(1) ppl.bar(ax, *orig_weight_data, alpha=0.5, color='black', label='Weight error before') ppl.bar(ax, np.arange(0, G.number_of_edges()), [abs(G[u][v]['weight'] - approxG[u][v]['weight']) for u, v in G.edges()], alpha=0.8, label='Weight error after') #plt.ylim(-1, 1) plt.legend(loc='best')