Exemple #1
0
def test_compile_update_catalogs(app, status, warning):
    app.builder.compile_update_catalogs()

    catalog_dir = locale_dir / app.config.language / 'LC_MESSAGES'
    expect = set([
        x.replace('.po', '.mo')
        for x in find_files(catalog_dir, '.po')
    ])
    actual = set(find_files(catalog_dir, '.mo'))
    assert actual  # not empty
    assert actual == expect
Exemple #2
0
def dir_walk(target_dir=None, quiet=None):
    '''recursively walk a directory containing cti and return the stats'''
    files = find_files('*.xml', resolve_path(target_dir))
    if not quiet:
        widgets = ['Directory Walk: ', Percentage(), ' ', Bar(marker=RotatingMarker()),
                   ' ', ETA()]
        progress = ProgressBar(widgets=widgets, maxval=len(files)).start()
    cooked_stix_objs = {'campaigns': set(), 'courses_of_action': set(), \
                        'exploit_targets': set(), 'incidents': set(), \
                        'indicators': set(), 'threat_actors': set(), \
                        'ttps': set()}
    cooked_cybox_objs = dict()
    for file_ in files:
        try:
            stix_package = file_to_stix(file_)
            (raw_stix_objs, raw_cybox_objs) = \
                process_stix_pkg(stix_package)
            for k in raw_stix_objs.keys():
                cooked_stix_objs[k].update(raw_stix_objs[k])
            for k in raw_cybox_objs.keys():
                if not k in cooked_cybox_objs.keys():
                    cooked_cybox_objs[k] = set()
                cooked_cybox_objs[k].update(raw_cybox_objs[k])
            if not quiet:
                progress.update(i)
        except:
            next
    if not quiet:
        progress.finish()
    return (cooked_stix_objs, cooked_cybox_objs)
Exemple #3
0
def check_n_in_aper(radius_factor=1, k=100):

	for catfile in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"):

		print
		print catfile
		names = open(catfile).readline().split()[1:]
		cat = np.recfromtxt(catfile, names=names)

		xscfile = catfile.replace('combined_hdr_catalog.txt','2mass_xsc.tbl')
		print xscfile
		names = open(xscfile).read().split('\n')[76].split('|')[1:-1]
		xsc = np.recfromtxt(xscfile, skip_header=80, names=names)

		n_in_aper = []
		coords = radec_to_coords(cat.ra, cat.dec)
		kdt = KDT(coords)
		for i in range(xsc.size):
			r_deg = xsc.r_ext[i]/3600.

			idx, ds = spherematch2(xsc.ra[i], xsc.dec[i], cat.ra, cat.dec,
				kdt, tolerance=radius_factor*r_deg, k=k)
			n_in_aper.append(ds.size)
		for i in [(i,n_in_aper.count(i)) for i in set(n_in_aper)]:
			print i
Exemple #4
0
def plot_wise(cat_path):

	for catfile in find_files(cat_path, "*merged+wise.csv"):

		print("\nreading catalog: {}".format(catfile))
		df = pd.read_csv(catfile)

		# convert to magnitudes
		nbadflux = (df.flux <= 0).sum()
		try:
			assert nbadflux == 0
		except:
			print("warning: {} negative flux source(s)".format(nbadflux))
		ch = catfile.split('/')[-1].split('_')[1]
		mags = spz_jy_to_mags(df.flux*1e-3, float(ch))
		if ch == '1':
			plt.scatter(df.W1mag, mags)
			plt.xlabel('W1 [mag]')
			plt.ylabel('I1 [mag]')
		elif ch == '2':
			plt.scatter(df.W2mag, mags)
			plt.xlabel('W2 [mag]')
			plt.ylabel('I2 [mag]')
		ax = plt.gca()
		xlim, ylim = ax.get_xlim(), ax.get_ylim()
		plt.plot([-5, ylim[1]*2], [-5, ylim[1]*2], 'r-')
		ax.set_xlim(xlim) ; ax.set_ylim(ylim)
		reg = catfile.split('/')[-1].split('_')[0]
		name = '{}_{}_IRAC_vs_WISE.png'.format(reg, ch)
		outpath = '/'.join(catfile.split('/')[:-1]+[name])
		plt.savefig(outpath, dpi=120)
		plt.close()
Exemple #5
0
def setup_test():
    # delete remnants left over after failed build
    root.rmtree(True)
    (rootdir / 'roots' / 'test-intl').copytree(root)
    # copy all catalogs into locale layout directory
    for po in find_files(root, '.po'):
        copy_po = (locale_dir / 'en' / 'LC_MESSAGES' / po)
        if not copy_po.parent.exists():
            copy_po.parent.makedirs()
        shutil.copy(root / po, copy_po)
Exemple #6
0
def plot_sdss(cat_path):
	for catfile in find_files(cat_path, "*merged+sdss.txt"):

		# for now ignore the channel 2 files
		if catfile.split('/')[-1].split('_')[1] != '1':
			continue

		print("\nreading catalog: {}".format(catfile))
		df = pd.read_table(catfile, sep=' ')

		# get rid of negative flux sources, if any
		df = df[df.flux > 0]

		# convert to magnitudes
		mags = spz_jy_to_mags(df.flux*1e-3, 1)

		# print counts per magnitude bin
		for i in range(10,15):
			sc = ((df.cl == 3) & (mags > i) & (mags < i+1)).sum()
			xc = ((df.xsc == 1) & (mags > i) & (mags < i+1)).sum() 
			msg = "{}th to {}th mag: {} SDSS galaxy sources, {} 2MASS XSC sources"
			print(msg.format(i, i+1, sc, xc))

		# print number of sources agreed upon
		agree = ((df.xsc == 1) & (df.cl == 3)).sum()
		disagree = ((df.xsc == 1) & (df.cl == 6)).sum()
		na = ((df.xsc == 1) & (df.cl == 0)).sum()
		msg = "{} 2MASS XSC sources classified as galaxies by SDSS"
		print(msg.format(agree))
		msg = "{} 2MASS XSC sources classified as stars by SDSS"
		print(msg.format(disagree))
		msg = "{} 2MASS XSC sources not matched to SDSS"
		print(msg.format(na))

		# plot normed histograms of 2MASS XSC and SDSS galaxy magnitudes
		xsc_gals = (mags > 10) & (mags < 15) & (df.xsc == 1)
		sdss_gals = (mags > 10) & (mags < 15) & (df.cl == 3)
		# mags[xsc_gals].hist(label='2MASS XSC', normed=True)
		# mags[sdss_gals].hist(label='SDSS galaxies', normed=True)
		plt.hist([mags[xsc_gals].values, mags[sdss_gals].values],
			bins=5, label=['2MASS', 'SDSS'])
		plt.xlabel('IRAC1 [mag]')
		plt.ylabel('Number Count')
		reg = catfile.split('/')[-1].split('_')[0]
		plt.title('{} Extended Sources / Galaxies'.format(reg))
		plt.legend(loc=2)
		name = '{}_2mass_xsc_vs_sdss_hist.png'.format(reg)
		outpath = '/'.join(catfile.split('/')[:-1]+[name])
		plt.savefig(outpath, dpi=100)
		plt.close()
		print("created file: {}".format(outpath))
def parseDir(path):
    # set up class and results dictionary
    log.info("Performing recursive search for smali files")
    classes = {}
    sharedobj_strings = {}

    for smali in util.find_files(path, '*.smali'):
        continue
        log.info("Parsing " + smali)
        f = open(smali, 'r')
        smali_class = parseSmaliFiles(f)
        classes[smali_class['ClassName']] = smali_class

    for sharedobj in util.find_files(path, '*.so'):
        log.info("Processing: " + sharedobj)
        f = open(sharedobj, 'r')
        smali_class = parseSmaliFiles(f)
        sharedobj_strings[sharedobj] =  util.unique_strings_from_file(sharedobj)


    log.info("Parsing Complete")
    return { 'classes' : classes,
             'sharedobjs' : sharedobj_strings }
Exemple #8
0
 def get_actual():
     return set(find_files(catalog_dir, '.mo'))
Exemple #9
0
def find_vct_files(root_dir):
    """Return a chronologically sorted list of VCT file paths in root_dir."""
    files = util.find_files(root_dir)
    files = parse_file_list(files)
    return seaflowfile.sorted_files(files)
import importlib, re
from util import find_files

modules = []

for file in find_files("modules"):
	if file.endswith(".py") and not file == "__init__.py":
		filename = 'modules.'+file[:-3]
		modules.append(importlib.import_module(filename))

"""
for module in modules:
	print module
"""

pattern = re.compile(r'(http://i.imgur.com/(.*))(\?.*)?')

result = pattern.search("http://i.imgur.com/test") # using regex here instead of BeautifulSoup because we are pasing a url, not html
if result:
	print result.group(0)
	print len(result.groups())
Exemple #11
0
def dir_walk(target_dir=None, quiet=None):
    '''recursively walk a directory containing cti and return the stats'''
    files = find_files('*.xml', resolve_path(target_dir))
    if not quiet:
        widgets = ['Directory Walk: ', Percentage(), ' ', Bar(marker=RotatingMarker()),
                   ' ', ETA()]
        progress = ProgressBar(widgets=widgets, maxval=len(files)).start()
    cooked_stix_objs = {'campaigns': set(), 'courses_of_action': set(), \
                        'exploit_targets': set(), 'incidents': set(), \
                        'indicators': set(), 'threat_actors': set(), \
                        'ttps': set()}
    cooked_cybox_objs = {'AccountObjectType': set(),
                      'AddressObjectType': set(),
                      'APIObjectType': set(),
                      'ArchiveFileObjectType': set(),
                      'ARPCacheObjectType': set(),
                      'ArtifactObjectType': set(),
                      'ASObjectType': set(),
                      'CodeObjectType': set(),
                      'CustomObjectType': set(),
                      'DeviceObjectType': set(),
                      'DiskObjectType': set(),
                      'DiskPartitionObjectType': set(),
                      'DNSCacheObjectType': set(),
                      'DNSQueryObjectType': set(),
                      'DNSRecordObjectType': set(),
                      'DomainNameObjectType': set(),
                      'EmailMessageObjectType': set(),
                      'FileObjectType': set(),
                      'GUIDialogboxObjectType': set(),
                      'GUIObjectType': set(),
                      'GUIWindowObjectType': set(),
                      'HostnameObjectType': set(),
                      'HTTPSessionObjectType': set(),
                      'ImageFileObjectType': set(),
                      'LibraryObjectType': set(),
                      'LinkObjectType': set(),
                      'LinuxPackageObjectType': set(),
                      'MemoryObjectType': set(),
                      'MutexObjectType': set(),
                      'NetworkConnectionObjectType': set(),
                      'NetworkFlowObjectType': set(),
                      'NetworkPacketObjectType': set(),
                      'NetworkRouteEntryObjectType': set(),
                      'NetRouteObjectType': set(),
                      'NetworkSocketObjectType': set(),
                      'NetworkSubnetObjectType': set(),
                      'PDFFileObjectType': set(),
                      'PipeObjectType': set(),
                      'PortObjectType': set(),
                      'ProcessObjectType': set(),
                      'ProductObjectType': set(),
                      'SemaphoreObjectType': set(),
                      'SMSMessageObjectType': set(),
                      'SocketAddressObjectType': set(),
                      'SystemObjectType': set(),
                      'UnixFileObjectType': set(),
                      'UnixNetworkRouteEntryObjectType': set(),
                      'UnixPipeObjectType': set(),
                      'UnixProcessObjectType': set(),
                      'UnixUserAccountObjectType': set(),
                      'UnixVolumeObjectType': set(),
                      'URIObjectType': set(),
                      'URLHistoryObjectType': set(),
                      'UserAccountObjectType': set(),
                      'UserSessionObjectType': set(),
                      'VolumeObjectType': set(),
                      'WhoisObjectType': set(),
                      'WindowsComputerAccountObjectType': set(),
                      'WindowsCriticalSectionObjectType': set(),
                      'WindowsDriverObjectType': set(),
                      'WindowsEventLogObjectType': set(),
                      'WindowsEventObjectType': set(),
                      'WindowsExecutableFileObjectType': set(),
                      'WindowsFilemappingObjectType': set(),
                      'WindowsFileObjectType': set(),
                      'WindowsHandleObjectType': set(),
                      'WindowsHookObjectType': set(),
                      'WindowsKernelHookObjectType': set(),
                      'WindowsKernelObjectType': set(),
                      'WindowsMailslotObjectType': set(),
                      'WindowsMemoryPageRegionObjectType': set(),
                      'WindowsMutexObjectType': set(),
                      'WindowsNetworkRouteEntryObjectType': set(),
                      'WindowsNetworkShareObjectType': set(),
                      'WindowsPipeObjectType': set(),
                      'WindowsPrefetchObjectType': set(),
                      'WindowsProcessObjectType': set(),
                      'WindowsRegistryKeyObjectType': set(),
                      'WindowsSemaphoreObjectType': set(),
                      'WindowsServiceObjectType': set(),
                      'WindowsSystemObjectType': set(),
                      'WindowsSystemRestoreObjectType': set(),
                      'WindowsTaskObjectType': set(),
                      'WindowsThreadObjectType': set(),
                      'WindowsUserAccountObjectType': set(),
                      'WindowsVolumeObjectType': set(),
                      'WindowsWaitableTimerObjectType': set(),
                      'X509CertificateObjectType': set(),
    }
    for file_ in files:
        try:
            stix_package = file_to_stix(file_)
            (raw_stix_objs, raw_cybox_objs) = \
                process_stix_pkg(stix_package)
            for k in raw_stix_objs.keys():
                cooked_stix_objs[k].update(raw_stix_objs[k])
            for k in raw_cybox_objs.keys():
                cooked_cybox_objs[k].update(raw_cybox_objs[k])
            if not quiet:
                progress.update(i)
        except:
            next
    if not quiet:
        progress.finish()
    return (cooked_stix_objs, cooked_cybox_objs)
Exemple #12
0
def run_xsc_phot(bcdphot_out_path, mosaic_path):
	replaced = {}
	for cat in find_files(bcdphot_out_path, "*_combined_hdr_catalog.txt"):

		print("\n======================================================")
		print("\nadjusting photometry in: {}".format(cat.split('/')[-1]))
		print("------------------------------------------------------")
		outpath = cat.replace('combined_hdr_catalog.txt','2mass_xsc.tbl')

		# retrieve 2mass data if file doesn't already exist (from previous run)
		if not os.path.isfile(outpath):
			# get url and retrieve data
			url = query_2mass_xsc_polygon(*get_region_corners(cat))
			print("\ndownloading 2MASS photometry from: {}".format(url))
			text = urllib2.urlopen(url).read()
			# write to disk
			with open(outpath, 'w') as f:
				f.write(text)
			print("\ncreated file: {}".format(outpath))

		# read back in as recarray	
		print("\nreading: {}".format(outpath))
		names = open(outpath).read().split('\n')[76].split('|')[1:-1]
		da = np.recfromtxt(outpath, skip_header=80, names=names)

		# write input file for xsc_phot.pro
		infile_outpath = '/'.join(cat.split('/')[:-1])+'/xsc.txt'
		with open(infile_outpath,'w') as w:
			for i in range(da.shape[0]):
				w.write("{} {} {} {}\n".format(da.designation[i], da.ra[i], da.dec[i], da.r_ext[i]))
		print("\ncreated input file for xsc_phot.pro: {}".format(infile_outpath))

		# locate the FITS mosaic file for xsc_phot.pro to do photometry on
		reg, ch = cat.split('/')[-1].split('_')[:2]
		mosaicfile = filter(lambda x: 'dirbe{}/ch{}/long/full/Combine'\
			.format(reg,ch) in x, find_files(mosaic_path, '*mosaic.fits'))[0]
		print("\nfound mosaic file: {}".format(mosaicfile))

		# spawn IDL subprocess running xsc_phot.pro and catch stdout in file
		outpath = infile_outpath.replace('xsc.txt', 'xsc_phot_out.txt')
		if not os.path.isfile(outpath):
			outfile = open(outpath,'w')
			print("\nspawning xsc_phot.pro IDL subprocess")
			cmd = "xsc_phot,'"+mosaicfile+"','"+infile_outpath+"','long'"
			rc = subprocess.call(['/usr/local/itt/idl71/bin/idl','-quiet','-e',cmd], 
				stderr = subprocess.PIPE, stdout = outfile)
			outfile.close()

		# read in output to recarray
		print("\nreading: {}".format(outpath))
		phot = np.recfromtxt(outpath, names=['id','flux','unc','sky','skyunc'])

		# make sure rows are aligned
		assert (da.designation == phot.id).all()

		# ignore xsc sources we got a NaN or negative flux for
		bad = np.isnan(phot.flux) | (phot.flux < 0)
		print("\naper.pro returned NaN or negative flux for {} sources".format(bad.sum()))
		if bad.sum() > 0:
			for i in phot[bad].id:
				print(i)
			outpath = cat.replace('combined_hdr_catalog.txt','xsc_nan_phot.csv')
			with open(outpath,'w') as f:
				w = csv.writer(f)
				w.writerow(da.dtype.names)
				w.writerows(da[bad].tolist())
			print('\ncreated file: {}'.format(outpath))
		phot = phot[~bad]
		da = da[~bad]

		# read in pipeline catalog
		print("\nreading: {}".format(cat))
		names = open(cat).readline().split()[1:]
		c = np.recfromtxt(cat, names=names)

		# loop through xsc sources and find matches in pipeline catalog
		print("\nfinding records associated with XSC sources in pipeline catalog")
		c_flux_total = []
		n_in_aper = []
		c_idx = []
		coords = radec_to_coords(c.ra, c.dec)
		kdt = KDT(coords)
		for i in range(phot.size):
			radius = da.r_ext[i]/3600.
			# idx1, idx2, ds = spherematch(da.ra[i], da.dec[i], 
			# 	c.ra, c.dec, tolerance=radius)
			idx, ds = spherematch2(da.ra[i], da.dec[i], c.ra, c.dec,
				kdt, tolerance=radius, k=500)
			# c_flux_total.append(c.flux[idx2].sum())
			# n_in_aper.append(c.flux[idx2].size)
			# c_idx.append(idx2.tolist())
			c_flux_total.append(c.flux[idx].sum())
			n_in_aper.append(ds.size)
			c_idx.append(idx.tolist())
		print("\nhistogram of source counts in r_ext aperture")
		for i in [(i,n_in_aper.count(i)) for i in set(n_in_aper)]:
			print i

		# create new version of catalog file with xsc-associated entries replaced
		c_idx = np.array(flatten(c_idx))
		print("\nremoving {}, adding {}".format(c_idx.size, phot.size))
		replaced[cat] = {'old':c_idx.size, 'new':phot.size}
		replaced[cat]['hist'] = [(i,n_in_aper.count(i)) for i in set(n_in_aper)]
		c = np.delete(c, c_idx)
		newrows = np.rec.array([(-i, da.ra[i], da.dec[i], 
			phot.flux[i], phot.unc[i], 1) for i in \
			range(phot.size)], dtype=c.dtype)
		newcat = np.hstack((c, newrows))

		# write new version of catalog to disk
		fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']
		outpath = cat.replace('catalog.txt', 'catalog_xsc_cor.txt')
		np.savetxt(outpath, newcat, fmt = fmt, header = ' '.join(names))
		print('\ncreated file: {}'.format(outpath))

		# make plot of total old vs. new flux
		plt.scatter(c_flux_total, phot.flux)
		ylim = plt.gca().get_ylim()
		plt.xlim(*ylim)
		max_y = ylim[1]
		plt.plot(ylim, ylim, 'r-')
		plt.xlabel('old flux [mJy]')
		plt.ylabel('new flux [mJy]')
		name = ' '.join(cat.split('/')[-1].split('_')[:2])
		plt.title(name)
		outpath = cat.replace('combined_hdr_catalog.txt','xsc_new_vs_old_phot.png')
		plt.savefig(outpath, dpi=200)
		plt.close()
		print('\ncreated file: {}'.format(outpath))

	outfile = 'xsc_replaced.json'
	json.dump(replaced, open(outfile,'w'))
	print("\ncreated file: {}".format(outfile))
	print("\nremoved / added")
	for k,v in replaced.iteritems():
		print k.split('/')[-1], v['old'], v['new']
	m = np.mean([i['old']/float(i['new']) for i in replaced.values()])
	print("average ratio: {}".format(m))
	print("\nK mag and r_ext of sources with NaN photometry:")
	for i in find_files(bcdphot_out_path, "*xsc_nan_phot.csv"):
		reg = i.split('/')[-1]
		rec = np.recfromcsv(i)
		bad_id = rec.designation.tolist()
		bad_k = rec.k_m_k20fe.tolist()
		bad_r_ext = rec.r_ext.tolist()
		print reg
		print ("\tid\t\t\tKmag\tr_ext")
		if type(bad_id) is list:
			seq = sorted(zip(bad_id, bad_k, bad_r_ext), key=lambda x: x[0])
			for j,k,l in seq:
				print("\t{}\t{}\t{}".format(j,k,l))
		else:
			print("\t{}\t{}\t{}".format(bad_id, bad_k, bad_r_ext))
Exemple #13
0
def match_sdss(cat_path):
	for catfile in find_files(cat_path, "*merged.txt"):

		# read pipeline catalog
		print("\nreading catalog: {}".format(catfile))
		cat = pd.read_table(catfile, sep=' ')

		# retrieve SDSS data from ViZieR if not already downloaded
		ch = catfile.split('/')[-1].split('_')[1]
		outpath = catfile.replace('{}_merged.txt'.format(ch), 'sdss.vot')
		if not os.path.isfile(outpath):
			cntr_ra = np.median(cat.ra)
			cntr_dec = np.median(cat.dec)
			# get source from one corner of the mosaic to calculate radius
			c1 = (cat.ra.min(), cat.dec[cat.ra==cat.ra.min()].values[0])
			# make radius 10% bigger just to be on safe side
			radius = great_circle_distance(cntr_ra, cntr_dec, *c1) * 1.1
			url = get_url(cntr_ra, cntr_dec, radius)
			print("retrieving URL: {}".format(url))
			handler = urllib2.urlopen(url)
			raw = handler.read()
			with open(outpath,'wb') as f:
				f.write(raw)
			print("created file: {}".format(outpath))

		# parse VOTable
		print("reading VOTable: {}".format(outpath))
		table = parse_single_table(outpath)

		# if this is one of the southern hemisphere regions, delete and continue
		if table.array.size == 0:
			os.remove(outpath)
			print("outside of SDSS coverage")
			continue

		# make sure no missing data
		for name in table.array.dtype.names:
			assert table.array[name].mask.sum() == 0

		# get unmasked array
		sdss = table.array.data

		# make sure sky coverage is big enough
		assert sdss['RAJ2000'].min() < cat.ra.min()
		assert sdss['RAJ2000'].max() > cat.ra.max()
		assert sdss['DEJ2000'].min() < cat.dec.min()
		assert sdss['DEJ2000'].max() > cat.dec.max()

		# match to catalog
		assert cat.shape[0] < sdss.shape[0]
		tol = 2/3600.
		idx1, idx2, ds = spherematch(cat.ra, cat.dec, 
			sdss['RAJ2000'], sdss['DEJ2000'], tolerance = tol)
		print("matched {} out of {} sources with {} arcsec tolerance".format(ds.size, 
			cat.shape[0], tol*3600))

		# create vector of star/galaxy class (0=missing, 3=galaxy, 6=star)
		cl = np.zeros(cat.shape[0]).astype('int')
		cl[idx1] = sdss['cl'][idx2]

		# add the column to the dataset
		cat['cl'] = cl

		# write to new file
		outpath = catfile.replace('merged.txt', 'merged+sdss.txt')
		# fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']*2
		# hdr = ' '.join(names)+' cl'
		# np.savetxt(outpath, df.to_records(index=False), fmt=fmt, header=hdr)
		cat.to_csv(outpath, index=False, sep=' ', float_format='%.8f')
		print("created file: {}".format(outpath))
Exemple #14
0
def merge_subarray(vg_dir, bcdphot_dir):
	out_dir = vg_dir.replace('clean','plots_catalogs')
	os.mkdir(out_dir)

	hdr_files = find_files(bcdphot_dir, '*combined_hdr_*xsc_cor.txt')
	# hdr_file = list(hdr_files)[0]
	for hdr_file in hdr_files:
		reg, ch = hdr_file.split('/')[-1].split('_')[:2]
		sub_file = '/'.join([vg_dir, "d{}_ch{}_agg.csv".format(reg, ch)])

		hdr_names = open(hdr_file).readline().split()[1:]
		hdr = np.recfromtxt(hdr_file, names=hdr_names)
		sub = np.recfromcsv(sub_file)
		# sub.flux *= 1e-3	# convert from uJy to mJy

		idx1, idx2, ds = spherematch(sub.ra, sub.dec, hdr.ra, hdr.dec, tolerance=3/3600.)
		df = pd.DataFrame({'sub_flux': sub.flux[idx1], 'hdr_flux':hdr.flux[idx2]})

		slope = fit_line(df, int(ch))
		with open("{}/linefits.txt".format(out_dir),'a') as f:
			f.write("{} {} {}\n".format(reg, ch, slope))

		fig = df.plot(x='hdr_flux',y='sub_flux', kind='scatter')
		fig.plot([0, fig.get_xlim()[1]], [0, slope * fig.get_xlim()[1]], 'r-')
		fig.set_title("region {} channel {}".format(reg, ch))
		fig.text(fig.get_xlim()[1]*0.2, fig.get_ylim()[1]*0.8, 
			"slope: {0:3f}".format(slope), fontsize=24)
		plt.savefig("{}/{}_{}_linefit.png".format(out_dir, reg, ch), dpi=300)
		plt.close()

		# now save the (uncorrected) matched data to disk
		sub_matched = pd.DataFrame.from_records(sub[idx1])
		# rename the columns
		cols = sub_matched.columns.tolist()
		cols_new = ['sub_'+i for i in cols]
		sub_matched.columns = cols_new
		# set hdr_matched dataframe index equal to sub_matched index, this is
		# necessary for concatenation using pandas.concat
		hdr_matched = pd.DataFrame.from_records(hdr[idx2]).set_index(sub_matched.index)
		# rename the columns
		cols = hdr_matched.columns.tolist()
		cols_new = ['hdr_'+i for i in cols]
		hdr_matched.columns = cols_new
		# concatenate
		concat = pd.concat([ sub_matched, hdr_matched ], 1)
		# # convert subarray flux to mJy
		# concat.sub_flux = concat.sub_flux*1e3
		# concat.sub_unc = concat.sub_unc*1e3
		concat.to_csv("{}/{}_{}_hdr_vs_sub.csv".format(out_dir, reg, ch), 
			index=False, float_format='%.8f')

		# now correct all the subarray flux values with the slope
		sub.flux /= slope

		# now merge hdr and subarray into one dataset:
		# want to keep all the hdr photometry that is not saturated, and
		# keep only the subarray photometry above the hdr saturation limit
		cutoff = get_cutoff(ch)
		bad = hdr.flux > cutoff
		hdr_subset = pd.DataFrame.from_records(hdr[~bad])
		bad = sub.flux < cutoff
		sub_subset = pd.DataFrame.from_records(sub[~bad])
		# add n_obs column to subarray data so it has same format as hdr
		sub_subset['n_obs'] = 4
		# add column indicating whether if it came from subarray
		hdr_subset['sub'] = np.zeros(hdr_subset.shape[0]).astype(int)
		sub_subset['sub'] = np.ones(sub_subset.shape[0]).astype(int)
		# concatenate them
		concat = pd.concat([ hdr_subset, sub_subset ], 0, ignore_index=True)
		# get rid of the 'id' field since it is no longer relevant
		# but add a column indicating if it was a 2MASS XSC measurement
		concat['xsc'] = np.zeros(concat.shape[0]).astype(int)
		concat.xsc[concat.id < 1] = 1
		concat = concat.drop('id', 1)
		# apply 1% flux reduction to correct for stray light (only to >100 mJy sources)
		concat.flux[concat.flux > 100] *= 0.99
		concat.unc[concat.flux > 100] *= 0.99
		# write to disk
		concat.to_csv("{}/{}_{}_merged.txt".format(out_dir, reg, ch), 
			index=False, sep=' ', float_format='%.8f')
Exemple #15
0
def match_wise(cat_path, sdss=True):
	if sdss:
		search_pattern = "*merged+sdss.txt"
	else:
		search_pattern = "*merged.txt"

	for catfile in find_files(cat_path, search_pattern):

		# read pipeline catalog
		print("\nreading catalog: {}".format(catfile))
		cat = pd.read_table(catfile, sep=' ')

		# retrieve WISE data from ViZieR if not already downloaded
		ch = catfile.split('/')[-1].split('_')[1]
		if sdss:
			outpath = catfile.replace('{}_merged+sdss.txt'.format(ch), 'wise.vot')
		else:
			outpath = catfile.replace('{}_merged.txt'.format(ch), 'wise.vot')
		if not os.path.isfile(outpath):
			cntr_ra = np.median(cat.ra)
			cntr_dec = np.median(cat.dec)
			# get source from one corner of the mosaic to calculate radius
			c1 = (cat.ra.min(), cat.dec[cat.ra==cat.ra.min()].values[0])
			# make radius 10% bigger just to be on safe side
			radius = great_circle_distance(cntr_ra, cntr_dec, *c1) * 1.1
			url = get_url(cntr_ra, cntr_dec, radius)
			print("retrieving URL: {}".format(url))
			handler = urllib2.urlopen(url)
			raw = handler.read()
			with open(outpath,'wb') as f:
				f.write(raw)
			print("created file: {}".format(outpath))

		# parse VOTable
		print("reading VOTable: {}".format(outpath))
		table = parse_single_table(outpath)

		# if this is one of the southern hemisphere regions, delete and continue
		if table.array.size == 0:
			os.remove(outpath)
			print("no WISE coverage")
			continue

		# get unmasked array
		wise = table.array.data

		# make sure sky coverage is big enough
		assert wise['RAJ2000'].min() < cat.ra.min()
		assert wise['RAJ2000'].max() > cat.ra.max()
		assert wise['DEJ2000'].min() < cat.dec.min()
		assert wise['DEJ2000'].max() > cat.dec.max()

		# match to catalog
		tol = 2/3600.
		if cat.shape[0] < wise.shape[0]:
			idx1, idx2, ds = spherematch(cat.ra, cat.dec, 
				wise['RAJ2000'], wise['DEJ2000'], tolerance = tol)
		else:
			idx2, idx1, ds = spherematch(wise['RAJ2000'], wise['DEJ2000'],
				cat.ra, cat.dec, tolerance = tol)
		print("matched {} out of {} sources with {} arcsec tolerance".format(ds.size, 
			cat.shape[0], tol*3600))

		# add WISE to the catalog
		if ch == '1':
			cat['W1mag'] = np.repeat(np.nan, cat.shape[0])
			cat['e_W1mag'] = np.repeat(np.nan, cat.shape[0])
			cat['W1mag'][idx1] = wise['W1mag'][idx2]
			cat['e_W1mag'][idx1] = wise['e_W1mag'][idx2]
		elif ch == '2':
			cat['W2mag'] = np.repeat(np.nan, cat.shape[0])
			cat['e_W2mag'] = np.repeat(np.nan, cat.shape[0])
			cat['W2mag'][idx1] = wise['W2mag'][idx2]
			cat['e_W2mag'][idx1] = wise['e_W2mag'][idx2]
		else:
			print("unexpected error adding WISE data")

		# write to new file
		outpath = catfile.replace('.txt', '+wise.csv')
		# fmt = ['%i']+['%0.8f']*2+['%.4e']*2+['%i']*2
		# hdr = ' '.join(names)+' cl'
		# np.savetxt(outpath, df.to_records(index=False), fmt=fmt, header=hdr)
		cat.to_csv(outpath, index=False, float_format='%.8f')
		print("created file: {}".format(outpath))
Exemple #16
0
def plot_spz_vs_wise_sdss_class(cat_path, plot_style='scatter'):

	ch1 = list(find_files(cat_path, "*merged+sdss+wise.csv"))[::2]
	ch2 = list(find_files(cat_path, "*merged+sdss+wise.csv"))[1::2]

	for ch1, ch2 in zip(ch1, ch2):

		reg1 = ch1.split('/')[-1].split('_')[0]
		reg2 = ch2.split('/')[-1].split('_')[0]
		assert reg1 == reg2

		print("\nreading catalog: {}".format(ch1))
		print("reading catalog: {}".format(ch2))
		df1 = pd.read_csv(ch1)
		df2 = pd.read_csv(ch2)

		# convert to magnitudes
		mags1 = spz_jy_to_mags(df1.flux*1e-3, 1)
		mags2 = spz_jy_to_mags(df2.flux*1e-3, 2)

		# match ch1 / ch2
		idx1, idx2 = match_cats(df1, df2, tol=2/3600.)

		# save matched catalogs
		matched1 = df1.loc[idx1]
		matched2 = df2.loc[idx2]
		ch1_cols = [i+'_1' for i in df1.columns.tolist()]
		ch2_cols = [i+'_2' for i in df2.columns.tolist()]
		# matched1.columns = ch1_cols	
		# matched2.columns = ch2_cols
		# matched = pd.concat([matched1, matched2], 1, ignore_index=True)	# weird error
		matched = np.concatenate([matched1.values, matched2.values], 1)
		df_matched = pd.DataFrame(matched, columns=ch1_cols+ch2_cols)
		df_matched['I1'] = mags1[idx1].values
		df_matched['I2'] = mags2[idx2].values
		outpath = '/'.join(ch1.split('/')[:-1])+'/{}_2ch_matched+sdss.csv'.format(reg1)
		df_matched.to_csv(outpath, index=False, float_format='%.8f')
		print("created file: {}".format(outpath))

		# identify SDSS galaxies and stars
		galaxies = (df1.cl[idx1].values == 3) & (df2.cl[idx2].values == 3)
		stars = (df1.cl[idx1].values == 6) & (df2.cl[idx2].values == 6)

		# plot I1-I2 vs. W1-W2
		color1 = df1.W1mag[idx1].values - df2.W2mag[idx2].values
		color2 = mags1[idx1].values - mags2[idx2].values
		# galaxies
		name = '{}_I1-I2_vs_W1-W2_galaxies_plot_style.png'.format(reg1)
		name = name.replace('plot_style', plot_style)
		outpath = '/'.join(ch1.split('/')[:-1]+[name])
		plot(color1[galaxies], color2[galaxies], outpath, 'W1-W2 [mag]', 'I1-I2 [mag]', 
			plot_style=plot_style, plot_type='color-color')
		# stars
		outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars')
		plot(color1[stars], color2[stars], outpath, 'W1-W2 [mag]', 'I1-I2 [mag]', 
			plot_style=plot_style, plot_type='color-color')

		# plot I1-W1 vs. I2-W2
		color1 = mags1[idx1].values - df1.W1mag[idx1].values
		color2 = mags2[idx2].values - df2.W2mag[idx2].values
		# galaxies
		name = '{}_I1-W1_vs_I2-W2_galaxies_plot_style.png'.format(reg1)
		name = name.replace('plot_style', plot_style)
		outpath = '/'.join(ch1.split('/')[:-1]+[name])
		plot(color1[galaxies], color2[galaxies], outpath, 'I1-W1 [mag]', 'I2-W2 [mag]', 
			plot_style=plot_style, plot_type='color-color')
		# stars
		outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars')
		plot(color1[stars], color2[stars], outpath, 'I1-W1 [mag]', 'I2-W2 [mag]', 
			plot_style=plot_style, plot_type='color-color')

		# plot spz color-magnitude diagrams
		color = mags1[idx1].values - mags2[idx2].values
		mags = mags1[idx1].values
		# galaxies
		name = '{}_I1_vs_I1-I2_galaxies_plot_style.png'.format(reg1)
		name = name.replace('plot_style', plot_style)
		outpath = '/'.join(ch1.split('/')[:-1]+[name])
		plot(mags[galaxies], color[galaxies], outpath, 'I1 [mag]', 'I1-I2 [mag]', 
			plot_style=plot_style, plot_type='color-mag')
		# stars
		outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars')
		plot(mags[stars], color[stars], outpath, 'I1 [mag]', 'I1-I2 [mag]', 
			plot_style=plot_style, plot_type='color-mag')

		# plot wise color-magnitude diagrams
		color = df1.W1mag[idx1].values - df2.W2mag[idx2].values
		mags = df1.W1mag[idx1].values
		# galaxies
		name = '{}_W1_vs_W1-W2_galaxies_plot_style.png'.format(reg1)
		name = name.replace('plot_style', plot_style)
		outpath = '/'.join(ch1.split('/')[:-1]+[name])
		plot(mags[galaxies], color[galaxies], outpath, 'W1 [mag]', 'W1-W2 [mag]', 
			plot_style=plot_style, plot_type='color-mag')
		# stars
		outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars')
		plot(mags[stars], color[stars], outpath, 'W1 [mag]', 'W1-W2 [mag]', 
			plot_style=plot_style, plot_type='color-mag')
	
		# plot I1 vs I2
		mags1_matched = mags1[idx1].values
		mags2_matched = mags2[idx2].values
		# galaxies
		name = '{}_I1_vs_I2_galaxies_plot_style.png'.format(reg1)
		name = name.replace('plot_style', plot_style)
		outpath = '/'.join(ch1.split('/')[:-1]+[name])
		plot(mags1_matched[galaxies], mags2_matched[galaxies], outpath, 'I1 [mag]', 'I2 [mag]', 
			plot_style=plot_style, plot_type='mag-mag')
		# stars
		outpath = '/'.join(ch1.split('/')[:-1]+[name]).replace('galaxies', 'stars')
		plot(mags1_matched[stars], mags2_matched[stars], outpath, 'I1 [mag]', 'I2 [mag]', 
			plot_style=plot_style, plot_type='mag-mag')