Ejemplo n.º 1
0
def plot_learning_curves(num_points,
                         X_train,
                         Y_train,
                         X_test,
                         Y_test,
                         positive_class=1,
                         negative_class=0):
    train_set_sizes = [len(X_train) / k for k in range(num_points + 1, 0, -1)]
    test_errors = []
    training_errors = []
    for training_set_size in train_set_sizes:
        model = train(X_train, Y_train, training_set_size)
        test_error = evaluate(model, X_test, Y_test, positive_class,
                              negative_class)
        training_error = evaluate(model, X_train, Y_train, positive_class,
                                  negative_class)
        test_errors.append(test_error)
        training_errors.append(training_error)

    plt.plot(train_set_sizes,
             training_errors,
             'bs-',
             label='Training accuracy')
    plt.plot(train_set_sizes, test_errors, 'g^-', label='Test accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Number of training samples')
    plt.title('Augmented Logistic Regression Learning Curve')
    plt.legend(loc='lower right')
    plt.savefig('../Figures/accuracyPlotAugmented.png', dpi=100)
    pylab.show()
Ejemplo n.º 2
0
def plot_sdss(cat_path):
    for catfile in find_files(cat_path, "*merged+sdss.txt"):

        # for now ignore the channel 2 files
        if catfile.split('/')[-1].split('_')[1] != '1':
            continue

        print("\nreading catalog: {}".format(catfile))
        df = pd.read_table(catfile, sep=' ')

        # get rid of negative flux sources, if any
        df = df[df.flux > 0]

        # convert to magnitudes
        mags = spz_jy_to_mags(df.flux * 1e-3, 1)

        # print counts per magnitude bin
        for i in range(10, 15):
            sc = ((df.cl == 3) & (mags > i) & (mags < i + 1)).sum()
            xc = ((df.xsc == 1) & (mags > i) & (mags < i + 1)).sum()
            msg = "{}th to {}th mag: {} SDSS galaxy sources, {} 2MASS XSC sources"
            print(msg.format(i, i + 1, sc, xc))

        # print number of sources agreed upon
        agree = ((df.xsc == 1) & (df.cl == 3)).sum()
        disagree = ((df.xsc == 1) & (df.cl == 6)).sum()
        na = ((df.xsc == 1) & (df.cl == 0)).sum()
        msg = "{} 2MASS XSC sources classified as galaxies by SDSS"
        print(msg.format(agree))
        msg = "{} 2MASS XSC sources classified as stars by SDSS"
        print(msg.format(disagree))
        msg = "{} 2MASS XSC sources not matched to SDSS"
        print(msg.format(na))

        # plot normed histograms of 2MASS XSC and SDSS galaxy magnitudes
        xsc_gals = (mags > 10) & (mags < 15) & (df.xsc == 1)
        sdss_gals = (mags > 10) & (mags < 15) & (df.cl == 3)
        # mags[xsc_gals].hist(label='2MASS XSC', normed=True)
        # mags[sdss_gals].hist(label='SDSS galaxies', normed=True)
        plt.hist([mags[xsc_gals].values, mags[sdss_gals].values],
                 bins=5,
                 label=['2MASS', 'SDSS'])
        plt.xlabel('IRAC1 [mag]')
        plt.ylabel('Number Count')
        reg = catfile.split('/')[-1].split('_')[0]
        plt.title('{} Extended Sources / Galaxies'.format(reg))
        plt.legend(loc=2)
        name = '{}_2mass_xsc_vs_sdss_hist.png'.format(reg)
        outpath = '/'.join(catfile.split('/')[:-1] + [name])
        plt.savefig(outpath, dpi=100)
        plt.close()
        print("created file: {}".format(outpath))
Ejemplo n.º 3
0
def plot_sdss(cat_path):
	for catfile in find_files(cat_path, "*merged+sdss.txt"):

		# for now ignore the channel 2 files
		if catfile.split('/')[-1].split('_')[1] != '1':
			continue

		print("\nreading catalog: {}".format(catfile))
		df = pd.read_table(catfile, sep=' ')

		# get rid of negative flux sources, if any
		df = df[df.flux > 0]

		# convert to magnitudes
		mags = spz_jy_to_mags(df.flux*1e-3, 1)

		# print counts per magnitude bin
		for i in range(10,15):
			sc = ((df.cl == 3) & (mags > i) & (mags < i+1)).sum()
			xc = ((df.xsc == 1) & (mags > i) & (mags < i+1)).sum() 
			msg = "{}th to {}th mag: {} SDSS galaxy sources, {} 2MASS XSC sources"
			print(msg.format(i, i+1, sc, xc))

		# print number of sources agreed upon
		agree = ((df.xsc == 1) & (df.cl == 3)).sum()
		disagree = ((df.xsc == 1) & (df.cl == 6)).sum()
		na = ((df.xsc == 1) & (df.cl == 0)).sum()
		msg = "{} 2MASS XSC sources classified as galaxies by SDSS"
		print(msg.format(agree))
		msg = "{} 2MASS XSC sources classified as stars by SDSS"
		print(msg.format(disagree))
		msg = "{} 2MASS XSC sources not matched to SDSS"
		print(msg.format(na))

		# plot normed histograms of 2MASS XSC and SDSS galaxy magnitudes
		xsc_gals = (mags > 10) & (mags < 15) & (df.xsc == 1)
		sdss_gals = (mags > 10) & (mags < 15) & (df.cl == 3)
		# mags[xsc_gals].hist(label='2MASS XSC', normed=True)
		# mags[sdss_gals].hist(label='SDSS galaxies', normed=True)
		plt.hist([mags[xsc_gals].values, mags[sdss_gals].values],
			bins=5, label=['2MASS', 'SDSS'])
		plt.xlabel('IRAC1 [mag]')
		plt.ylabel('Number Count')
		reg = catfile.split('/')[-1].split('_')[0]
		plt.title('{} Extended Sources / Galaxies'.format(reg))
		plt.legend(loc=2)
		name = '{}_2mass_xsc_vs_sdss_hist.png'.format(reg)
		outpath = '/'.join(catfile.split('/')[:-1]+[name])
		plt.savefig(outpath, dpi=100)
		plt.close()
		print("created file: {}".format(outpath))
Ejemplo n.º 4
0
def submit_time_histogram(arr):
    """
    Use Matplotlib to plot a normalized histogram of submit times
    """
    from math import ceil, log
    try:
        import matplotlib.mlab as mlab
        from prettyplotlib import plt
    except ImportError:
        print(
            'You must have Matplotlib and Prettyplotlib installed to plot a histogram.'
        )

    # Use Sturges' formula for number of bins: k = ceiling(log2 n + 1)
    k = ceil(log(len(arr), 2) + 1)
    n, bins, patches = plt.hist(arr,
                                k,
                                normed=1,
                                facecolor='green',
                                alpha=0.75)
    # throw a PDF plot on top of it
    #y = mlab.normpdf(bins, np.mean(arr), np.std(arr))
    #l = plt.plot(bins, y, 'r--', linewidth=1)

    # Get a Bayesian confidence interval for mean, variance, standard deviation
    dmean, dvar, dsd = bayes_mvs(deltas)

    # drop a line in at the mean for fun
    plt.axvline(dmean[0], color='blue', alpha=0.5)
    plt.axvspan(dmean[1][0], dmean[1][1], color='blue', alpha=0.5)
    plt.axvline(np.median(deltas), color='y', alpha=0.5)

    # Caclulate a Kernel Density Estimate
    density = gaussian_kde(deltas)
    xs = np.arange(0., np.max(deltas), 0.1)
    density.covariance_factor = lambda: .25
    density._compute_covariance()
    plt.plot(xs, density(xs), color='m')

    #FIXME: come up with better legend names
    #plt.legend(('Normal Curve', 'Mean', 'Median', 'KDE'))
    plt.legend(('Mean', 'Median', 'KDE'))

    plt.xlabel('Submit Times (in Seconds)')
    plt.ylabel('Probability')
    plt.title('Histogram of Worker submit times')
    plt.grid(True)

    plt.show()
Ejemplo n.º 5
0
ax1 = plt.gcf().add_subplot(1,1,1)
ax1.plot(times,s,'r',label = 'True Sate')

#m = np.average(particles,weights=weights,axis=1)
#st = np.std(particles,weights=weights,axis=1)
#ext = (0.0,dt*timewindow,code.neurons[-1].theta,code.neurons[0].theta)
#plt.imshow(rates.T,extent=ext,cmap = cm.gist_yarg,aspect = 'auto',interpolation ='nearest')
thetas = [code.neurons[i].theta for i in sptrain]
ax1.plot(times[sptimes],thetas,'yo',label='Observed Spikes')
ax1.plot(times,m,'b',label='Posterior Mean')
ax1.plot(times,m-st,'gray',times,m+st,'gray')
#ax2 = plt.gcf().add_subplot(1,2,2)
#ax2.plot(times,s)
plt.xlabel('Time (in seconds)')
plt.ylabel('Space (in cm)')
plt.legend()
plt.title('State Estimation in a Diffusion System')



if plotting:
    
    #matplotlib.rcParams['font.size']=10
    
    if gaussian:
        fig, (ax1,ax2) = ppl.subplots(1,2,figsize = (12,6))
    else:
        fig, ax2 = ppl.subplots(1)
    times = np.arange(0.0,dt*timewindow,dt)
    if gaussian:    
        if sum(sum(spsg)) !=0:
Ejemplo n.º 6
0
       plt.plot(X, Y, label='iSVD u={}'.format(num))
   """
 print 'Testing raw SVD => exact reconstruction'
 svT = scipy.linalg.diagsvd(s, u.shape[0], vT.shape[1]).dot(vT)
 for y in xrange(train.shape[0]):
   for x in xrange(train.shape[1]):
     colU = u[y, :]
     rowV = svT[:, x]
     assert np.allclose(train[y, x], single_dot(u, svT, x, y))
 """
   ##
   plt.title('SVD reconstruction error on {}x{} matrix'.format(*train.shape))
   plt.xlabel('Low rank approximation (k)')
   plt.ylabel('Frobenius norm')
   plt.ylim(0, max(svdY))
   plt.legend(loc='best')
   plt.savefig('reconstruct_fro_{}x{}.pdf'.format(*train.shape))
   plt.show(block=True)
   ##
   plt.plot(orthoX,
            orthoY,
            label="SVD",
            color='black',
            linewidth=2,
            linestyle='--')
   for label, X, Y in incr_ortho:
       plt.plot(X, Y, label=label)
   plt.title('SVD orthogonality error on {}x{} matrix'.format(*train.shape))
   plt.xlabel('Low rank approximation (k)')
   plt.ylabel('Deviation from orthogonality')
   plt.semilogy()
          rerr = sum(abs(approxG.node[n]['pageviews'] - G.node[n]['pageviews']) / (G.node[n]['pageviews'] + 1) for n in G.nodes()) / G.number_of_nodes()

    np.random.shuffle(nodes)

  print 'Pageviews from "real" edge weights'
  print '-=-=-=-=-'
  display_graph(G)
  print
  print 'Pageviews from evenly distributed edge weights'
  print '-=-=-=-=-'
  display_graph(approxG)

  plt.plot(np.arange(0, len(rerrs)), rerrs, label='Relative error over time')
  plt.xlabel('Iteration')
  plt.ylabel('Average pageview relative error per node')
  plt.legend()
  plt.savefig('error_over_time.pdf')
  plt.show(block=True)

  plt.plot(np.arange(0, len(werrs)), werrs, label='Weight error over time')
  plt.xlabel('Iteration')
  plt.ylabel('Average weight error per edge')
  plt.legend()
  plt.savefig('weight_over_time.pdf')
  plt.show(block=True)

  fig, ax = plt.subplots(1)
  ppl.bar(ax, *orig_weight_data, alpha=0.5, color='black', label='Weight error before')
  ppl.bar(ax, np.arange(0, G.number_of_edges()), [abs(G[u][v]['weight'] - approxG[u][v]['weight']) for u, v in G.edges()], alpha=0.8, label='Weight error after')
  #plt.ylim(-1, 1)
  plt.legend(loc='best')