Example #1
0
def plot_wise(cat_path):

    for catfile in find_files(cat_path, "*merged+wise.csv"):

        print("\nreading catalog: {}".format(catfile))
        df = pd.read_csv(catfile)

        # convert to magnitudes
        nbadflux = (df.flux <= 0).sum()
        try:
            assert nbadflux == 0
        except:
            print("warning: {} negative flux source(s)".format(nbadflux))
        ch = catfile.split('/')[-1].split('_')[1]
        mags = spz_jy_to_mags(df.flux * 1e-3, float(ch))
        if ch == '1':
            plt.scatter(df.W1mag, mags)
            plt.xlabel('W1 [mag]')
            plt.ylabel('I1 [mag]')
        elif ch == '2':
            plt.scatter(df.W2mag, mags)
            plt.xlabel('W2 [mag]')
            plt.ylabel('I2 [mag]')
        ax = plt.gca()
        xlim, ylim = ax.get_xlim(), ax.get_ylim()
        plt.plot([-5, ylim[1] * 2], [-5, ylim[1] * 2], 'r-')
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
        reg = catfile.split('/')[-1].split('_')[0]
        name = '{}_{}_IRAC_vs_WISE.png'.format(reg, ch)
        outpath = '/'.join(catfile.split('/')[:-1] + [name])
        plt.savefig(outpath, dpi=120)
        plt.close()
Example #2
0
def plot_learning_curves(num_points,
                         X_train,
                         Y_train,
                         X_test,
                         Y_test,
                         positive_class=1,
                         negative_class=0):
    train_set_sizes = [len(X_train) / k for k in range(num_points + 1, 0, -1)]
    test_errors = []
    training_errors = []
    for training_set_size in train_set_sizes:
        model = train(X_train, Y_train, training_set_size)
        test_error = evaluate(model, X_test, Y_test, positive_class,
                              negative_class)
        training_error = evaluate(model, X_train, Y_train, positive_class,
                                  negative_class)
        test_errors.append(test_error)
        training_errors.append(training_error)

    plt.plot(train_set_sizes,
             training_errors,
             'bs-',
             label='Training accuracy')
    plt.plot(train_set_sizes, test_errors, 'g^-', label='Test accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Number of training samples')
    plt.title('Augmented Logistic Regression Learning Curve')
    plt.legend(loc='lower right')
    plt.savefig('../Figures/accuracyPlotAugmented.png', dpi=100)
    pylab.show()
Example #3
0
def plot(x, y, outpath, xlabel, ylabel, plot_style, plot_type):
	if plot_type == 'mag-mag':
		xlim = (10, 18)
		ylim = (10, 18)
	elif plot_type == 'color-mag':
		xlim = (10, 18)
		ylim = (-1, 1)
	elif plot_type == 'color-color':
		xlim = (-1, 1)
		ylim = (-1, 1)
	else:
		raise(ValueError("plot_type should be one of ['mag-mag', 'color-mag', 'color-color'] "))
	isinrange = lambda a,b: (a>=b[0]) & (a<=b[1])
	g = isinrange(x, xlim) & isinrange(y, ylim)
	if plot_style == 'scatter':
		plt.scatter(x[g], y[g])
	elif plot_style == 'hexbin':
		plt.hexbin(x[g], y[g])
	elif plot_style == 'hist2d':
		plt.hist2d(x[g], y[g], bins=100)
	else:
		raise(ValueError("plot_style should be one of ['scatter', 'hexbin', 'hist2d'] "))
	plt.xlabel(xlabel)
	plt.ylabel(ylabel)
	ax = plt.gca()
	ax.set_xlim(xlim)
	ax.set_ylim(ylim)
	plt.savefig(outpath, dpi=120)
	plt.close()
	print("created file: {}".format(outpath))
Example #4
0
def plot_wise(cat_path):

	for catfile in find_files(cat_path, "*merged+wise.csv"):

		print("\nreading catalog: {}".format(catfile))
		df = pd.read_csv(catfile)

		# convert to magnitudes
		nbadflux = (df.flux <= 0).sum()
		try:
			assert nbadflux == 0
		except:
			print("warning: {} negative flux source(s)".format(nbadflux))
		ch = catfile.split('/')[-1].split('_')[1]
		mags = spz_jy_to_mags(df.flux*1e-3, float(ch))
		if ch == '1':
			plt.scatter(df.W1mag, mags)
			plt.xlabel('W1 [mag]')
			plt.ylabel('I1 [mag]')
		elif ch == '2':
			plt.scatter(df.W2mag, mags)
			plt.xlabel('W2 [mag]')
			plt.ylabel('I2 [mag]')
		ax = plt.gca()
		xlim, ylim = ax.get_xlim(), ax.get_ylim()
		plt.plot([-5, ylim[1]*2], [-5, ylim[1]*2], 'r-')
		ax.set_xlim(xlim) ; ax.set_ylim(ylim)
		reg = catfile.split('/')[-1].split('_')[0]
		name = '{}_{}_IRAC_vs_WISE.png'.format(reg, ch)
		outpath = '/'.join(catfile.split('/')[:-1]+[name])
		plt.savefig(outpath, dpi=120)
		plt.close()
Example #5
0
def plot(x, y, outpath, xlabel, ylabel, plot_style, plot_type):
    if plot_type == 'mag-mag':
        xlim = (10, 18)
        ylim = (10, 18)
    elif plot_type == 'color-mag':
        xlim = (10, 18)
        ylim = (-1, 1)
    elif plot_type == 'color-color':
        xlim = (-1, 1)
        ylim = (-1, 1)
    else:
        raise (ValueError(
            "plot_type should be one of ['mag-mag', 'color-mag', 'color-color'] "
        ))
    isinrange = lambda a, b: (a >= b[0]) & (a <= b[1])
    g = isinrange(x, xlim) & isinrange(y, ylim)
    if plot_style == 'scatter':
        plt.scatter(x[g], y[g])
    elif plot_style == 'hexbin':
        plt.hexbin(x[g], y[g])
    elif plot_style == 'hist2d':
        plt.hist2d(x[g], y[g], bins=100)
    else:
        raise (ValueError(
            "plot_style should be one of ['scatter', 'hexbin', 'hist2d'] "))
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    ax = plt.gca()
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    plt.savefig(outpath, dpi=120)
    plt.close()
    print("created file: {}".format(outpath))
Example #6
0
def plot_sdss(cat_path):
    for catfile in find_files(cat_path, "*merged+sdss.txt"):

        # for now ignore the channel 2 files
        if catfile.split('/')[-1].split('_')[1] != '1':
            continue

        print("\nreading catalog: {}".format(catfile))
        df = pd.read_table(catfile, sep=' ')

        # get rid of negative flux sources, if any
        df = df[df.flux > 0]

        # convert to magnitudes
        mags = spz_jy_to_mags(df.flux * 1e-3, 1)

        # print counts per magnitude bin
        for i in range(10, 15):
            sc = ((df.cl == 3) & (mags > i) & (mags < i + 1)).sum()
            xc = ((df.xsc == 1) & (mags > i) & (mags < i + 1)).sum()
            msg = "{}th to {}th mag: {} SDSS galaxy sources, {} 2MASS XSC sources"
            print(msg.format(i, i + 1, sc, xc))

        # print number of sources agreed upon
        agree = ((df.xsc == 1) & (df.cl == 3)).sum()
        disagree = ((df.xsc == 1) & (df.cl == 6)).sum()
        na = ((df.xsc == 1) & (df.cl == 0)).sum()
        msg = "{} 2MASS XSC sources classified as galaxies by SDSS"
        print(msg.format(agree))
        msg = "{} 2MASS XSC sources classified as stars by SDSS"
        print(msg.format(disagree))
        msg = "{} 2MASS XSC sources not matched to SDSS"
        print(msg.format(na))

        # plot normed histograms of 2MASS XSC and SDSS galaxy magnitudes
        xsc_gals = (mags > 10) & (mags < 15) & (df.xsc == 1)
        sdss_gals = (mags > 10) & (mags < 15) & (df.cl == 3)
        # mags[xsc_gals].hist(label='2MASS XSC', normed=True)
        # mags[sdss_gals].hist(label='SDSS galaxies', normed=True)
        plt.hist([mags[xsc_gals].values, mags[sdss_gals].values],
                 bins=5,
                 label=['2MASS', 'SDSS'])
        plt.xlabel('IRAC1 [mag]')
        plt.ylabel('Number Count')
        reg = catfile.split('/')[-1].split('_')[0]
        plt.title('{} Extended Sources / Galaxies'.format(reg))
        plt.legend(loc=2)
        name = '{}_2mass_xsc_vs_sdss_hist.png'.format(reg)
        outpath = '/'.join(catfile.split('/')[:-1] + [name])
        plt.savefig(outpath, dpi=100)
        plt.close()
        print("created file: {}".format(outpath))
Example #7
0
def plot_sdss(cat_path):
	for catfile in find_files(cat_path, "*merged+sdss.txt"):

		# for now ignore the channel 2 files
		if catfile.split('/')[-1].split('_')[1] != '1':
			continue

		print("\nreading catalog: {}".format(catfile))
		df = pd.read_table(catfile, sep=' ')

		# get rid of negative flux sources, if any
		df = df[df.flux > 0]

		# convert to magnitudes
		mags = spz_jy_to_mags(df.flux*1e-3, 1)

		# print counts per magnitude bin
		for i in range(10,15):
			sc = ((df.cl == 3) & (mags > i) & (mags < i+1)).sum()
			xc = ((df.xsc == 1) & (mags > i) & (mags < i+1)).sum() 
			msg = "{}th to {}th mag: {} SDSS galaxy sources, {} 2MASS XSC sources"
			print(msg.format(i, i+1, sc, xc))

		# print number of sources agreed upon
		agree = ((df.xsc == 1) & (df.cl == 3)).sum()
		disagree = ((df.xsc == 1) & (df.cl == 6)).sum()
		na = ((df.xsc == 1) & (df.cl == 0)).sum()
		msg = "{} 2MASS XSC sources classified as galaxies by SDSS"
		print(msg.format(agree))
		msg = "{} 2MASS XSC sources classified as stars by SDSS"
		print(msg.format(disagree))
		msg = "{} 2MASS XSC sources not matched to SDSS"
		print(msg.format(na))

		# plot normed histograms of 2MASS XSC and SDSS galaxy magnitudes
		xsc_gals = (mags > 10) & (mags < 15) & (df.xsc == 1)
		sdss_gals = (mags > 10) & (mags < 15) & (df.cl == 3)
		# mags[xsc_gals].hist(label='2MASS XSC', normed=True)
		# mags[sdss_gals].hist(label='SDSS galaxies', normed=True)
		plt.hist([mags[xsc_gals].values, mags[sdss_gals].values],
			bins=5, label=['2MASS', 'SDSS'])
		plt.xlabel('IRAC1 [mag]')
		plt.ylabel('Number Count')
		reg = catfile.split('/')[-1].split('_')[0]
		plt.title('{} Extended Sources / Galaxies'.format(reg))
		plt.legend(loc=2)
		name = '{}_2mass_xsc_vs_sdss_hist.png'.format(reg)
		outpath = '/'.join(catfile.split('/')[:-1]+[name])
		plt.savefig(outpath, dpi=100)
		plt.close()
		print("created file: {}".format(outpath))
Example #8
0
def submit_time_histogram(arr):
    """
    Use Matplotlib to plot a normalized histogram of submit times
    """
    from math import ceil, log
    try:
        import matplotlib.mlab as mlab
        from prettyplotlib import plt
    except ImportError:
        print(
            'You must have Matplotlib and Prettyplotlib installed to plot a histogram.'
        )

    # Use Sturges' formula for number of bins: k = ceiling(log2 n + 1)
    k = ceil(log(len(arr), 2) + 1)
    n, bins, patches = plt.hist(arr,
                                k,
                                normed=1,
                                facecolor='green',
                                alpha=0.75)
    # throw a PDF plot on top of it
    #y = mlab.normpdf(bins, np.mean(arr), np.std(arr))
    #l = plt.plot(bins, y, 'r--', linewidth=1)

    # Get a Bayesian confidence interval for mean, variance, standard deviation
    dmean, dvar, dsd = bayes_mvs(deltas)

    # drop a line in at the mean for fun
    plt.axvline(dmean[0], color='blue', alpha=0.5)
    plt.axvspan(dmean[1][0], dmean[1][1], color='blue', alpha=0.5)
    plt.axvline(np.median(deltas), color='y', alpha=0.5)

    # Caclulate a Kernel Density Estimate
    density = gaussian_kde(deltas)
    xs = np.arange(0., np.max(deltas), 0.1)
    density.covariance_factor = lambda: .25
    density._compute_covariance()
    plt.plot(xs, density(xs), color='m')

    #FIXME: come up with better legend names
    #plt.legend(('Normal Curve', 'Mean', 'Median', 'KDE'))
    plt.legend(('Mean', 'Median', 'KDE'))

    plt.xlabel('Submit Times (in Seconds)')
    plt.ylabel('Probability')
    plt.title('Histogram of Worker submit times')
    plt.grid(True)

    plt.show()
Example #9
0
def show_timeorder_info(Dt, mesh_sizes, errors):
    '''Performs consistency check for the given problem/method combination and
    show some information about it. Useful for debugging.
    '''
    # Compute the numerical order of convergence.
    orders = {}
    for key in errors:
        orders[key] = _compute_numerical_order_of_convergence(Dt, errors[key])

    # Print the data to the screen
    for i, mesh_size in enumerate(mesh_sizes):
        print
        print('Mesh size %d:' % mesh_size)
        print('dt = %e' % Dt[0]),
        for label, e in errors.items():
            print('   err_%s = %e' % (label, e[i][0])),
        print
        for j in range(len(Dt) - 1):
            print('                 '),
            for label, o in orders.items():
                print('   ord_%s = %e' % (label, o[i][j])),
            print
            print('dt = %e' % Dt[j+1]),
            for label, e in errors.items():
                print('   err_%s = %e' % (label, e[i][j+1])),
            print

    # Create a figure
    for label, err in errors.items():
        pp.figure()
        ax = pp.axes()
        # Plot the actual data.
        for i, mesh_size in enumerate(mesh_sizes):
            pp.loglog(Dt, err[i], '-o', label=mesh_size)
        # Compare with order curves.
        pp.autoscale(False)
        e0 = err[-1][0]
        for o in range(7):
            pp.loglog([Dt[0], Dt[-1]],
                      [e0, e0 * (Dt[-1] / Dt[0]) ** o],
                      color='0.7')
        pp.xlabel('dt')
        pp.ylabel('||%s-%s_h||' % (label, label))
        # pp.title('Method: %s' % method['name'])
        ppl.legend(ax, loc=4)
    pp.show()
    return
Example #10
0
    def hero_stats_bar(self, thresh):
        '''
        Creates a bar chart of the heroes with highest
        Wins / #Games played ratio
        '''
        # Create individual hero wins and losses
        self.hero_stats()

        fig, ax = plt.subplots(1, figsize=(9, 7))
        # Compute the ratio of #Wins to the #Games played by that hero
        # Most relevant statistic, better than W/L Ratio, better than
        # just wins, all of them can be statistically insignificant
        # in edge cases, but this can be the least of all
        val = [
            (k, self.wstat[k] / float(self.dstat[k] + self.wstat[k]))
            for k in self.wstat
            if self.wstat[k] / float(self.dstat[k] + self.wstat[k]) >= thresh
        ]
        plt.title('Hero ID vs. Win Ratio (Matches from 01/23 - 02/24)')
        plt.xlabel('Hero ID')
        plt.ylabel('Win Ratio')
        ax.set_xlim([0, len(val)])
        ann = [round(k[1], 2) for k in val]
        # Extract the xticklabels
        xtl = [k[0] for k in val]
        # Extract the individual values to be plotted
        val = [k[1] for k in val]
        ppl.bar(ax,
                np.arange(len(val)),
                val,
                annotate=ann,
                xticklabels=xtl,
                grid='y',
                color=ppl.colors.set2[2])
        fig.savefig('../Figures/HIDvs#Wins#Games.png')
        plt.show()
        plt.clf()
lambda_1_samples = mcmc.trace('lambda_1')[:]
lambda_2_samples = mcmc.trace('lambda_2')[:]

tau_samples = mcmc.trace('tau')[:]

fig, (ax1, ax2, ax3) = plt.subplots(nrows=3, ncols=1)

plt.rc('font', **{'family': 'DejaVu Sans'})
plt.subplot(311)
plt.title(u'''Distribución posterior de las variables
            $\lambda_1,\;\lambda_2,\;tau$''')
plt.hist(lambda_1_samples, histtype="stepfilled", bins=30, alpha=0.85,
        normed=True)
plt.xlim([150000,250000])
plt.xlabel("valor de $\lambda_1$")


plt.subplot(312)
#ax.set_autoscaley_on(False)
plt.hist(lambda_2_samples, histtype="stepfilled", bins=30, alpha=0.85,
        normed=True)
plt.xlim([150000,250000])
plt.xlabel("valor de $\lambda_2$")
plt.tick_params(axis="both", which="mayor", labelsize=4)

plt.subplot(313)
w = 1.0/tau_samples.shape[0]*np.ones_like(tau_samples)
plt.hist(tau_samples, bins=n_datos, alpha=1, weights=w, rwidth=2.0)
plt.xticks(np.arange(n_datos))
plt.ylim([0, 1.5])
Example #12
0
plt.figure()

ax1 = plt.gcf().add_subplot(1,1,1)
ax1.plot(times,s,'r',label = 'True Sate')

#m = np.average(particles,weights=weights,axis=1)
#st = np.std(particles,weights=weights,axis=1)
#ext = (0.0,dt*timewindow,code.neurons[-1].theta,code.neurons[0].theta)
#plt.imshow(rates.T,extent=ext,cmap = cm.gist_yarg,aspect = 'auto',interpolation ='nearest')
thetas = [code.neurons[i].theta for i in sptrain]
ax1.plot(times[sptimes],thetas,'yo',label='Observed Spikes')
ax1.plot(times,m,'b',label='Posterior Mean')
ax1.plot(times,m-st,'gray',times,m+st,'gray')
#ax2 = plt.gcf().add_subplot(1,2,2)
#ax2.plot(times,s)
plt.xlabel('Time (in seconds)')
plt.ylabel('Space (in cm)')
plt.legend()
plt.title('State Estimation in a Diffusion System')



if plotting:
    
    #matplotlib.rcParams['font.size']=10
    
    if gaussian:
        fig, (ax1,ax2) = ppl.subplots(1,2,figsize = (12,6))
    else:
        fig, ax2 = ppl.subplots(1)
    times = np.arange(0.0,dt*timewindow,dt)
Example #13
0
width = 0.5

font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : 10}

#          k      c      b      m       r       o        char
vals = [ 45174 , 48810 , 41719 , 255017 , 642090 , 3053979 , 3066135 , 1731655 , 582226 , 126045 , 6738 ]
total = float(sum(vals))
print(vals)
vals = [(i / total)*100 for i in vals]
print(vals)
colors = ['LightGray','Gray','LightSkyBlue','RoyalBlue','LightSeaGreen','MediumSeaGreen','Khaki',\
        'Goldenrod','Tomato','MediumVioletRed','DarkViolet']

#plt.bar(ind, vals, width)
ppl.bar(ax, ind, vals, color=colors)
plt.xticks(ind+width/2.,\
        ('1e-7','1e-6','1e-5','1e-4','0.001','0.005','0.01', '0.05', '0.1', '0.2', '0.5')\
        ,fontsize=11)
plt.yticks(fontsize=11)
plt.title("Distribution of cells by mutation rate across resistant populations",fontsize=11)
#plt.ylim((0,100))
plt.ylabel("Percent of Total",fontsize=11)
plt.xlabel("Initial Mutation rate: u",fontsize=11)
plt.savefig(opt.filename+"_bar")
plt.clf()


###
  for spine in spines_to_remove:
     ax.spines[spine].set_visible(True)
  # For all the spines, make their line thicker and return them to be black
  all_spines = ['top', 'left', 'bottom', 'right']
  for spine in all_spines:
      ax.spines[spine].set_linewidth(0.5)
      #ax.spines[spine].set_color('black')

  # Get back the ticks. The position of the numbers is informative enough of
  # the position of the value.
  ax.xaxis.set_ticks_position('both')
  ax.yaxis.set_ticks_position('both')
  
  ax.tick_params('both', length=15, width=1, which='major')
  ax.tick_params('both', length=5, width=1, which='minor')
  plt.xlabel("SDSS "+band+" magnitude, mag")
  plt.ylabel("GC "+band+" magnitude, mag")
  plt.savefig('test/'+band+'_SDSS_vs_GC')
  #print band, "mean magnitude difference: ", np.mean(petroMags[:, i]) - np.mean(mags[:, i])


fig = plt.figure(figsize=(14, 14))  
for i, band in enumerate(bands):
  lim_lo = limits[i][0]
  lim_hi = limits[i][1]
  x, m, b = makeDiagonalLine([lim_lo, lim_hi])
  print i, lim_lo, lim_hi
  ax = plt.subplot(5, 1, i+1)
  c = ax.scatter(petroMags[:, i], mags[:, i], c='k', s=8, edgecolor='k') 
  ax.axis([lim_lo, lim_hi, lim_lo, lim_hi])
  ax.errorbar(petroMags[:, i], mags[:, i], yerr=mag_err[:, i], mew=0, linestyle = "none", color="black")
Example #15
0
for i in sorted(counter, key=counter.get, reverse=True):
    if counter[i] != 1:
        x.append(i)
        y.append(counter[i])

for i in range(len(x)):
    print str(y[i]) + "," + str(x[i])


plt.rc('font', **{'family': 'DejaVu Sans'})
fig, ax = plt.subplots(1, figsize=(20,6))

width = 0.35
ind = np.arange(len(y))
xdata = ind + 0.05 + width
ax.bar(ind, y)
ax.set_xticks(ind + 0.5)
ax.set_xticklabels(x, rotation="vertical")
ax.autoscale()
ax.set_title(u'Ranking de canciones "Top 10"\n Radio Inspiración FM',
        fontdict = {'fontsize':24}
        )

plt.ylabel('Frecuencia en "Top 10"', fontdict={'fontsize':18})
plt.xlabel(u"Canción", fontdict={'fontsize':22})

ppl.bar(ax, np.arange(len(y)), y, grid="y")
fig.tight_layout()
fig.savefig("top10.png")

##
import sys

if __name__ == '__main__':
  # Accumulate the counts fed into standard in (stdin)
  counts = []
  for line in sys.stdin:
    topic, count = line.split()
    # Shift the ones up by a tiny amount to allow them to be visible on the graph
    counts.append(int(count) + 0.05)

  print 'Total page view counts: {}'.format(len(counts))

  # Display the hourly page view distribution on a log-log plot
  # This matches our friendly and well understood Zipf distribution
  fig = plt.figure(figsize=(9, 5))
  ax = fig.add_subplot(1, 1, 1)
  plt.plot(xrange(len(counts)), counts, linewidth=3, label='Pageviews per page')
  #
  ax.set_xscale('log')
  ax.set_yscale('log')
  #
  plt.title('Log-log plot of hourly Wikipedia page view distribution')
  plt.xlabel('Rank order')
  plt.ylabel('Frequency')
  plt.grid(color='black')
  plt.legend()
  #
  plt.savefig('hourly_wikipedia_zipf.pdf')
  plt.show(block=True)
Example #17
0
def run_xsc_phot(bcdphot_out_path, mosaic_path):
	replaced = {}
	for cat in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"):

		print("\n======================================================")
		print("\nadjusting photometry in: {}".format(cat.split('/')[-1]))
		print("------------------------------------------------------")
		outpath = cat.replace('combined_hdr_catalog.txt','2mass_xsc.tbl')

		# retrieve 2mass data if file doesn't already exist (from previous run)
		if not os.path.isfile(outpath):
			# get url and retrieve data
			url = query_2mass_xsc_polygon(*get_region_corners(cat))
			print("\ndownloading 2MASS photometry from: {}".format(url))
			text = urllib2.urlopen(url).read()
			# write to disk
			with open(outpath, 'w') as f:
				f.write(text)
			print("\ncreated file: {}".format(outpath))

		# read back in as recarray	
		print("\nreading: {}".format(outpath))
		names = open(outpath).read().split('\n')[76].split('|')[1:-1]
		da = np.recfromtxt(outpath, skip_header=80, names=names)

		# write input file for xsc_phot.pro
		infile_outpath = '/'.join(cat.split('/')[:-1])+'/xsc.txt'
		with open(infile_outpath,'w') as w:
			for i in range(da.shape[0]):
				w.write("{} {} {} {}\n".format(da.designation[i], da.ra[i], da.dec[i], da.r_ext[i]))
		print("\ncreated input file for xsc_phot.pro: {}".format(infile_outpath))

		# locate the FITS mosaic file for xsc_phot.pro to do photometry on
		reg, ch = cat.split('/')[-1].split('_')[:2]
		mosaicfile = filter(lambda x: 'dirbe{}/ch{}/long/full/Combine'\
			.format(reg,ch) in x, find_files(mosaic_path, '*mosaic.fits'))[0]
		print("\nfound mosaic file: {}".format(mosaicfile))

		# spawn IDL subprocess running xsc_phot.pro and catch stdout in file
		outpath = infile_outpath.replace('xsc.txt', 'xsc_phot_out.txt')
		if not os.path.isfile(outpath):
			outfile = open(outpath,'w')
			print("\nspawning xsc_phot.pro IDL subprocess")
			cmd = "xsc_phot,'"+mosaicfile+"','"+infile_outpath+"','long'"
			rc = subprocess.call(['/usr/local/itt/idl71/bin/idl','-quiet','-e',cmd], 
				stderr = subprocess.PIPE, stdout = outfile)
			outfile.close()

		# read in output to recarray
		print("\nreading: {}".format(outpath))
		phot = np.recfromtxt(outpath, names=['id','flux','unc','sky','skyunc'])

		# make sure rows are aligned
		assert (da.designation == phot.id).all()

		# ignore xsc sources we got a NaN or negative flux for
		bad = np.isnan(phot.flux) | (phot.flux < 0)
		print("\naper.pro returned NaN or negative flux for {} sources".format(bad.sum()))
		if bad.sum() > 0:
			for i in phot[bad].id:
				print(i)
			outpath = cat.replace('combined_hdr_catalog.txt','xsc_nan_phot.csv')
			with open(outpath,'w') as f:
				w = csv.writer(f)
				w.writerow(da.dtype.names)
				w.writerows(da[bad].tolist())
			print('\ncreated file: {}'.format(outpath))
		phot = phot[~bad]
		da = da[~bad]

		# read in pipeline catalog
		print("\nreading: {}".format(cat))
		names = open(cat).readline().split()[1:]
		c = np.recfromtxt(cat, names=names)

		# loop through xsc sources and find matches in pipeline catalog
		print("\nfinding records associated with XSC sources in pipeline catalog")
		c_flux_total = []
		n_in_aper = []
		c_idx = []
		coords = radec_to_coords(c.ra, c.dec)
		kdt = KDT(coords)
		for i in range(phot.size):
			radius = da.r_ext[i]/3600.
			# idx1, idx2, ds = spherematch(da.ra[i], da.dec[i], 
			# 	c.ra, c.dec, tolerance=radius)
			idx, ds = spherematch2(da.ra[i], da.dec[i], c.ra, c.dec,
				kdt, tolerance=radius, k=500)
			# c_flux_total.append(c.flux[idx2].sum())
			# n_in_aper.append(c.flux[idx2].size)
			# c_idx.append(idx2.tolist())
			c_flux_total.append(c.flux[idx].sum())
			n_in_aper.append(ds.size)
			c_idx.append(idx.tolist())
		print("\nhistogram of source counts in r_ext aperture")
		for i in [(i,n_in_aper.count(i)) for i in set(n_in_aper)]:
			print i

		# create new version of catalog file with xsc-associated entries replaced
		c_idx = np.array(flatten(c_idx))
		print("\nremoving {}, adding {}".format(c_idx.size, phot.size))
		replaced[cat] = {'old':c_idx.size, 'new':phot.size}
		replaced[cat]['hist'] = [(i,n_in_aper.count(i)) for i in set(n_in_aper)]
		c = np.delete(c, c_idx)
		newrows = np.rec.array([(-i, da.ra[i], da.dec[i], 
			phot.flux[i], phot.unc[i], 1) for i in \
			range(phot.size)], dtype=c.dtype)
		newcat = np.hstack((c, newrows))

		# write new version of catalog to disk
		fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']
		outpath = cat.replace('catalog.txt', 'catalog_xsc_cor.txt')
		np.savetxt(outpath, newcat, fmt = fmt, header = ' '.join(names))
		print('\ncreated file: {}'.format(outpath))

		# make plot of total old vs. new flux
		plt.scatter(c_flux_total, phot.flux)
		ylim = plt.gca().get_ylim()
		plt.xlim(*ylim)
		max_y = ylim[1]
		plt.plot(ylim, ylim, 'r-')
		plt.xlabel('old flux [mJy]')
		plt.ylabel('new flux [mJy]')
		name = ' '.join(cat.split('/')[-1].split('_')[:2])
		plt.title(name)
		outpath = cat.replace('combined_hdr_catalog.txt','xsc_new_vs_old_phot.png')
		plt.savefig(outpath, dpi=200)
		plt.close()
		print('\ncreated file: {}'.format(outpath))

	outfile = 'xsc_replaced.json'
	json.dump(replaced, open(outfile,'w'))
	print("\ncreated file: {}".format(outfile))
	print("\nremoved / added")
	for k,v in replaced.iteritems():
		print k.split('/')[-1], v['old'], v['new']
	m = np.mean([i['old']/float(i['new']) for i in replaced.values()])
	print("average ratio: {}".format(m))
	print("\nK mag and r_ext of sources with NaN photometry:")
	for i in find_files(bcdphot_out_path, "*xsc_nan_phot.csv"):
		reg = i.split('/')[-1]
		rec = np.recfromcsv(i)
		bad_id = rec.designation.tolist()
		bad_k = rec.k_m_k20fe.tolist()
		bad_r_ext = rec.r_ext.tolist()
		print reg
		print ("\tid\t\t\tKmag\tr_ext")
		if type(bad_id) is list:
			seq = sorted(zip(bad_id, bad_k, bad_r_ext), key=lambda x: x[0])
			for j,k,l in seq:
				print("\t{}\t{}\t{}".format(j,k,l))
		else:
			print("\t{}\t{}\t{}".format(bad_id, bad_k, bad_r_ext))
Example #18
0
import sys
import csv
import prettyplotlib as ppl
from prettyplotlib import plt
import numpy as np

blast_data = sys.argv[1].strip()

ident = []
with open(blast_data, "r") as handle:
    reader = csv.reader(handle)
    for row in reader:
        ident.append(float(row[2]))


fig, ax = plt.subplots(1)

ppl.hist(ax, ident)
plt.xlabel(u"Percentage of identity", fontdict={'fontsize':14})
plt.ylabel(u"Frecuency", fontdict={'fontsize':14})
ax.set_title("Plotted Blast results: % of identity")

fig.savefig('fig_blast_identity.png')
print "Made a plot of the percentags of identity from the blast data."
print "It is in the file `fig_blast_identity.png`"


Example #19
0
def run_xsc_phot(bcdphot_out_path, mosaic_path):
    replaced = {}
    for cat in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"):

        print("\n======================================================")
        print("\nadjusting photometry in: {}".format(cat.split('/')[-1]))
        print("------------------------------------------------------")
        outpath = cat.replace('combined_hdr_catalog.txt', '2mass_xsc.tbl')

        # retrieve 2mass data if file doesn't already exist (from previous run)
        if not os.path.isfile(outpath):
            # get url and retrieve data
            url = query_2mass_xsc_polygon(*get_region_corners(cat))
            print("\ndownloading 2MASS photometry from: {}".format(url))
            text = urllib2.urlopen(url).read()
            # write to disk
            with open(outpath, 'w') as f:
                f.write(text)
            print("\ncreated file: {}".format(outpath))

        # read back in as recarray
        print("\nreading: {}".format(outpath))
        names = open(outpath).read().split('\n')[76].split('|')[1:-1]
        da = np.recfromtxt(outpath, skip_header=80, names=names)

        # write input file for xsc_phot.pro
        infile_outpath = '/'.join(cat.split('/')[:-1]) + '/xsc.txt'
        with open(infile_outpath, 'w') as w:
            for i in range(da.shape[0]):
                w.write("{} {} {} {}\n".format(da.designation[i], da.ra[i],
                                               da.dec[i], da.r_ext[i]))
        print(
            "\ncreated input file for xsc_phot.pro: {}".format(infile_outpath))

        # locate the FITS mosaic file for xsc_phot.pro to do photometry on
        reg, ch = cat.split('/')[-1].split('_')[:2]
        mosaicfile = filter(lambda x: 'dirbe{}/ch{}/long/full/Combine'\
         .format(reg,ch) in x, find_files(mosaic_path, '*mosaic.fits'))[0]
        print("\nfound mosaic file: {}".format(mosaicfile))

        # spawn IDL subprocess running xsc_phot.pro and catch stdout in file
        outpath = infile_outpath.replace('xsc.txt', 'xsc_phot_out.txt')
        if not os.path.isfile(outpath):
            outfile = open(outpath, 'w')
            print("\nspawning xsc_phot.pro IDL subprocess")
            cmd = "xsc_phot,'" + mosaicfile + "','" + infile_outpath + "','long'"
            rc = subprocess.call(
                ['/usr/local/itt/idl71/bin/idl', '-quiet', '-e', cmd],
                stderr=subprocess.PIPE,
                stdout=outfile)
            outfile.close()

        # read in output to recarray
        print("\nreading: {}".format(outpath))
        phot = np.recfromtxt(outpath,
                             names=['id', 'flux', 'unc', 'sky', 'skyunc'])

        # make sure rows are aligned
        assert (da.designation == phot.id).all()

        # ignore xsc sources we got a NaN or negative flux for
        bad = np.isnan(phot.flux) | (phot.flux < 0)
        print("\naper.pro returned NaN or negative flux for {} sources".format(
            bad.sum()))
        if bad.sum() > 0:
            for i in phot[bad].id:
                print(i)
            outpath = cat.replace('combined_hdr_catalog.txt',
                                  'xsc_nan_phot.csv')
            with open(outpath, 'w') as f:
                w = csv.writer(f)
                w.writerow(da.dtype.names)
                w.writerows(da[bad].tolist())
            print('\ncreated file: {}'.format(outpath))
        phot = phot[~bad]
        da = da[~bad]

        # read in pipeline catalog
        print("\nreading: {}".format(cat))
        names = open(cat).readline().split()[1:]
        c = np.recfromtxt(cat, names=names)

        # loop through xsc sources and find matches in pipeline catalog
        print(
            "\nfinding records associated with XSC sources in pipeline catalog"
        )
        c_flux_total = []
        n_in_aper = []
        c_idx = []
        coords = radec_to_coords(c.ra, c.dec)
        kdt = KDT(coords)
        for i in range(phot.size):
            radius = da.r_ext[i] / 3600.
            # idx1, idx2, ds = spherematch(da.ra[i], da.dec[i],
            # 	c.ra, c.dec, tolerance=radius)
            idx, ds = spherematch2(da.ra[i],
                                   da.dec[i],
                                   c.ra,
                                   c.dec,
                                   kdt,
                                   tolerance=radius,
                                   k=500)
            # c_flux_total.append(c.flux[idx2].sum())
            # n_in_aper.append(c.flux[idx2].size)
            # c_idx.append(idx2.tolist())
            c_flux_total.append(c.flux[idx].sum())
            n_in_aper.append(ds.size)
            c_idx.append(idx.tolist())
        print("\nhistogram of source counts in r_ext aperture")
        for i in [(i, n_in_aper.count(i)) for i in set(n_in_aper)]:
            print i

        # create new version of catalog file with xsc-associated entries replaced
        c_idx = np.array(flatten(c_idx))
        print("\nremoving {}, adding {}".format(c_idx.size, phot.size))
        replaced[cat] = {'old': c_idx.size, 'new': phot.size}
        replaced[cat]['hist'] = [(i, n_in_aper.count(i))
                                 for i in set(n_in_aper)]
        c = np.delete(c, c_idx)
        newrows = np.rec.array([(-i, da.ra[i], da.dec[i],
         phot.flux[i], phot.unc[i], 1) for i in \
         range(phot.size)], dtype=c.dtype)
        newcat = np.hstack((c, newrows))

        # write new version of catalog to disk
        fmt = ['%i'] + ['%0.8f'] * 2 + ['%.4e'] * 2 + ['%i']
        outpath = cat.replace('catalog.txt', 'catalog_xsc_cor.txt')
        np.savetxt(outpath, newcat, fmt=fmt, header=' '.join(names))
        print('\ncreated file: {}'.format(outpath))

        # make plot of total old vs. new flux
        plt.scatter(c_flux_total, phot.flux)
        ylim = plt.gca().get_ylim()
        plt.xlim(*ylim)
        max_y = ylim[1]
        plt.plot(ylim, ylim, 'r-')
        plt.xlabel('old flux [mJy]')
        plt.ylabel('new flux [mJy]')
        name = ' '.join(cat.split('/')[-1].split('_')[:2])
        plt.title(name)
        outpath = cat.replace('combined_hdr_catalog.txt',
                              'xsc_new_vs_old_phot.png')
        plt.savefig(outpath, dpi=200)
        plt.close()
        print('\ncreated file: {}'.format(outpath))

    outfile = 'xsc_replaced.json'
    json.dump(replaced, open(outfile, 'w'))
    print("\ncreated file: {}".format(outfile))
    print("\nremoved / added")
    for k, v in replaced.iteritems():
        print k.split('/')[-1], v['old'], v['new']
    m = np.mean([i['old'] / float(i['new']) for i in replaced.values()])
    print("average ratio: {}".format(m))
    print("\nK mag and r_ext of sources with NaN photometry:")
    for i in find_files(bcdphot_out_path, "*xsc_nan_phot.csv"):
        reg = i.split('/')[-1]
        rec = np.recfromcsv(i)
        bad_id = rec.designation.tolist()
        bad_k = rec.k_m_k20fe.tolist()
        bad_r_ext = rec.r_ext.tolist()
        print reg
        print("\tid\t\t\tKmag\tr_ext")
        if type(bad_id) is list:
            seq = sorted(zip(bad_id, bad_k, bad_r_ext), key=lambda x: x[0])
            for j, k, l in seq:
                print("\t{}\t{}\t{}".format(j, k, l))
        else:
            print("\t{}\t{}\t{}".format(bad_id, bad_k, bad_r_ext))
Example #20
0
###
# Kid friendly stats (i.e. console)
for ftype, fsize in size.items():
  sys.stderr.write('{} files contain {} bytes over {} files\n'.format(ftype.upper(), sum(fsize), len(files[ftype])))
###
# To upload to the correct spot on S3
# gzip *.paths
# s3cmd put --acl-public *.paths.gz s3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-YYYY-WW/

###
# Plot
for ftype, fsize in size.items():
  if not fsize:
    continue
  plt.hist(fsize, bins=50)
  plt.xlabel('Size (bytes)')
  plt.ylabel('Count')
  plt.title('Distribution for {}'.format(ftype.upper()))
  plt.savefig(prefix + '{}_dist.pdf'.format(ftype))
  #plt.show(block=True)
  plt.close()

###
# Find missing WAT / WET files
warc = set([x.strip() for x in open(prefix + 'warc.paths').readlines()])
wat = [x.strip() for x in open(prefix + 'wat.paths').readlines()]
wat = set([x.replace('.warc.wat.', '.warc.').replace('/wat/', '/warc/') for x in wat])
# Work out the missing files and segments
missing = sorted(warc - wat)
missing_segments = defaultdict(list)
for fn in missing:
          generate_pageviews(approxG)
          prev_rerr = rerr
          rerr = sum(abs(approxG.node[n]['pageviews'] - G.node[n]['pageviews']) / (G.node[n]['pageviews'] + 1) for n in G.nodes()) / G.number_of_nodes()

    np.random.shuffle(nodes)

  print 'Pageviews from "real" edge weights'
  print '-=-=-=-=-'
  display_graph(G)
  print
  print 'Pageviews from evenly distributed edge weights'
  print '-=-=-=-=-'
  display_graph(approxG)

  plt.plot(np.arange(0, len(rerrs)), rerrs, label='Relative error over time')
  plt.xlabel('Iteration')
  plt.ylabel('Average pageview relative error per node')
  plt.legend()
  plt.savefig('error_over_time.pdf')
  plt.show(block=True)

  plt.plot(np.arange(0, len(werrs)), werrs, label='Weight error over time')
  plt.xlabel('Iteration')
  plt.ylabel('Average weight error per edge')
  plt.legend()
  plt.savefig('weight_over_time.pdf')
  plt.show(block=True)

  fig, ax = plt.subplots(1)
  ppl.bar(ax, *orig_weight_data, alpha=0.5, color='black', label='Weight error before')
  ppl.bar(ax, np.arange(0, G.number_of_edges()), [abs(G[u][v]['weight'] - approxG[u][v]['weight']) for u, v in G.edges()], alpha=0.8, label='Weight error after')
Example #22
0
     Y.append(err)
     incr_orthoY.append(check_orthogonality(uL[i]))
   incr_ortho.append(['iSVD u={}'.format(num), X, incr_orthoY])
   plt.plot(X, Y, label='iSVD u={}'.format(num))
 """
 print 'Testing raw SVD => exact reconstruction'
 svT = scipy.linalg.diagsvd(s, u.shape[0], vT.shape[1]).dot(vT)
 for y in xrange(train.shape[0]):
   for x in xrange(train.shape[1]):
     colU = u[y, :]
     rowV = svT[:, x]
     assert np.allclose(train[y, x], single_dot(u, svT, x, y))
 """
 ##
 plt.title('SVD reconstruction error on {}x{} matrix'.format(*train.shape))
 plt.xlabel('Low rank approximation (k)')
 plt.ylabel('Frobenius norm')
 plt.ylim(0, max(svdY))
 plt.legend(loc='best')
 plt.savefig('reconstruct_fro_{}x{}.pdf'.format(*train.shape))
 plt.show(block=True)
 ##
 plt.plot(orthoX, orthoY, label="SVD", color='black', linewidth=2, linestyle='--')
 for label, X, Y in incr_ortho:
   plt.plot(X, Y, label=label)
 plt.title('SVD orthogonality error on {}x{} matrix'.format(*train.shape))
 plt.xlabel('Low rank approximation (k)')
 plt.ylabel('Deviation from orthogonality')
 plt.semilogy()
 #plt.ylim(0, max(orthoY))
 plt.legend(loc='best')
Example #23
0
tau_samples = mcmc.trace('tau')[:]

fig, (ax1, ax2, ax3) = plt.subplots(nrows=3, ncols=1)

plt.rc('font', **{'family': 'DejaVu Sans'})
plt.subplot(311)
plt.title(u'''Distribución posterior de las variables
            $\lambda_1,\;\lambda_2,\;tau$''')
plt.hist(lambda_1_samples,
         histtype="stepfilled",
         bins=30,
         alpha=0.85,
         normed=True)
plt.xlim([150000, 250000])
plt.xlabel("valor de $\lambda_1$")

plt.subplot(312)
#ax.set_autoscaley_on(False)
plt.hist(lambda_2_samples,
         histtype="stepfilled",
         bins=30,
         alpha=0.85,
         normed=True)
plt.xlim([150000, 250000])
plt.xlabel("valor de $\lambda_2$")
plt.tick_params(axis="both", which="mayor", labelsize=4)

plt.subplot(313)
w = 1.0 / tau_samples.shape[0] * np.ones_like(tau_samples)
plt.hist(tau_samples, bins=n_datos, alpha=1, weights=w, rwidth=2.0)
ax.set_xticks(ind + 0.4)
ax.set_xticklabels(["principales y vitalicios\n(" + numero_socios[0] + " socios)", 
                    "otros socios\n(" + numero_socios[1] + " socios)", 
                    ],
                    rotation="horizontal", multialignment="center")
ax.autoscale()
ax.set_title(u'Ganancias de socios principales y vitalicios\n comparados con el resto de socios',
        fontdict = {'fontsize':22}
        )

y_labels = ["0", "1,000,000", "2,000,000", "3,000,000", "4,000,000",
                "5,000,000", "6,000,000", "7,000,000", "8,000,000"]
ax.set_yticklabels(y_labels)

plt.ylabel(u'Regalías en S/.', fontdict={'fontsize':18})
plt.xlabel(u'Beneficiarios', fontdict={'fontsize':22})

ppl.bar(ax, np.arange(len(y)), y, grid="y", annotate=annotate, color=bar_color)
fig.tight_layout()
fig.savefig("output/socios_principales.png")
output = "Plot de socios Principales + Vitalicios guardados en archivo "
output += "``output/socios_principales.png``\n"
print output


## DO principales + vitalicios + activos
## numero de socios por categoria
numero_socios = [str(len(principales) + len(vitalicios) + len(activos)),
                 str(250-len(principales) - len(vitalicios) - len(activos))]

# Porcentaje de socios principales+vitalicios+activos versus otros
#print len(c)

for b in range(len(k)):
    za = []
    for a in range(len(e)):
        if a != len(e) - 1:
            za.append(c[b + len(k) * a])
    z.append(za)

z = np.array(z)

#print z

#se consegue x e y (matrices) directamente conseguia
#X,Y = np.meshgrid(e,k)

#   print z

fig, ax = plt.subplots()

cax = ax.pcolor(e, k, z, vmin=-25, vmax=175., cmap=('hot_r'))
plt.xscale('log')
plt.axis([1, 0.00223872113857, 2, 21])
plt.ylabel('<k>', rotation='vertical')
plt.xlabel(u'\u03B5')
plt.yticks([2, 4, 6, 8, 10, 12, 14, 16, 18, 20])
cbar = fig.colorbar(cax, ticks=[0, 25, 50, 75, 100, 125, 150, 175])
cbar.ax.set_yticklabels([0, 25, 50, 75, 100, 125, 150, 'N.C.'])

plt.show()
y = []
with open("congresistas.txt", "rb") as csvfile:
    f = csv.reader(csvfile, delimiter=",")
    for row in f:
        x.append(row[1].decode("utf-8"))
        y.append(row[0])

y = map(int, y)

plt.rc('font', **{'family': 'DejaVu Sans'})
fig, ax = plt.subplots(1, figsize=(20,16))

width = 0.35
ind = np.arange(len(y))
xdata = ind + 0.05 + width
ax.bar(ind, y)
ax.set_xticks(ind + 0.5)
ax.set_xticklabels(x, rotation="vertical")
ax.autoscale()
ax.set_title(u'Ranking de congresistas',
        fontdict = {'fontsize':36}
        )

plt.ylabel(u'Número de saludos oficiales', fontdict={'fontsize':32})
plt.xlabel(u'Congresista', fontdict={'fontsize':32})
plt.tick_params(axis="y", which="major", labelsize=24)

ppl.bar(ax, np.arange(len(y)), y, grid="y")
plt.tight_layout()
fig.savefig("ranking_congresista.png")
Example #27
0
           Y.append(err)
           incr_orthoY.append(check_orthogonality(uL[i]))
       incr_ortho.append(['iSVD u={}'.format(num), X, incr_orthoY])
       plt.plot(X, Y, label='iSVD u={}'.format(num))
   """
 print 'Testing raw SVD => exact reconstruction'
 svT = scipy.linalg.diagsvd(s, u.shape[0], vT.shape[1]).dot(vT)
 for y in xrange(train.shape[0]):
   for x in xrange(train.shape[1]):
     colU = u[y, :]
     rowV = svT[:, x]
     assert np.allclose(train[y, x], single_dot(u, svT, x, y))
 """
   ##
   plt.title('SVD reconstruction error on {}x{} matrix'.format(*train.shape))
   plt.xlabel('Low rank approximation (k)')
   plt.ylabel('Frobenius norm')
   plt.ylim(0, max(svdY))
   plt.legend(loc='best')
   plt.savefig('reconstruct_fro_{}x{}.pdf'.format(*train.shape))
   plt.show(block=True)
   ##
   plt.plot(orthoX,
            orthoY,
            label="SVD",
            color='black',
            linewidth=2,
            linestyle='--')
   for label, X, Y in incr_ortho:
       plt.plot(X, Y, label=label)
   plt.title('SVD orthogonality error on {}x{} matrix'.format(*train.shape))
def main():
    description = """Simple bar plot of table. Input is a table from a file or
    standard input."""

    parser = argparse.ArgumentParser(
        description=description,
        formatter_class=RawTextHelpFormatter
    )
    parser.add_argument(
        '-f',
        '--filename',
        action='store',
        metavar='table.csv',
        help='''A table of the format:
                x_title,  y_title,  Main_title
                x_label1, value1
                x_label2, value2''',
        required=False,
        dest='filename',
    )

    args = parser.parse_args()

    if args.filename:
        filename = args.filename.strip()
    else:
        parser.print_help()
        sys.exit(1)

    y = []
    x = []
    for line in open(filename, "r").readlines():
        line = line.strip()

        res = re.search("[0-9]+", line)
        if not res:
            line = line.split(",")
            x_lab = line[0]
            y_lab = line[1]
            main = line[2]
        else:
            line = line.split(",")
            if len(line) < 2:
                y.append(float(line[0]))
            else:
                x.append(line[0])
                y.append(float(line[1]))
            print(line)

    if len(x) < 1:
        x = range(1, len(y) + 1)

    plt.rc('font', **{'family': 'DejaVu Sans'})
    fig, ax = plt.subplots(1, figsize=(8, 6))

    width = 0.2
    ind = np.arange(len(y))
    xdata = ind + 0.05 + width
    ax.bar(ind, y)
    ax.set_xticks(ind + 0.5)
    ax.set_xticklabels(x)
    ax.autoscale()
    ax.set_title(
        main,
        fontdict={'fontsize': 20},
    )

    plt.ylabel(y_lab, fontdict={'fontsize': 15})
    plt.xlabel(x_lab, fontdict={'fontsize': 15})
    plt.tick_params(axis="y", which="major", labelsize=10)
    plt.tick_params(axis="x", which="major", labelsize=10)

    ppl.bar(ax, np.arange(len(y)), y, grid="y")

    plt.tight_layout()
    print("The plot has been saved as ``out.png``")
    fig.savefig("out.png")