Ejemplo n.º 1
0
def SDSS_select(sql):
    '''pass an SQL query to SDSS and return a pandas dataframe
	in case of error, wait 10 seconds and try again; give up after 5 tries'''
    br = mechanize.Browser()
    br.set_handle_robots(False)
    tryCount = 0
    while (True):
        tryCount += 1
        try:
            br.open('http://skyserver.sdss.org/dr13/en/tools/search/sql.aspx',
                    timeout=4)
            br.select_form(name='sql')
            br['cmd'] = sql
            br['format'] = ['csv']
            response = br.submit()
            file_like = StringIO.StringIO(response.get_data())
            df = pd.read_csv(file_like, skiprows=1)
            break
        except (mechanize.URLError, mechanize.HTTPError, httplib.BadStatusLine,
                pd.parser.CParserError) as e:
            if tryCount > 5:
                message = 'Unable to connect to SkyServer; trying again in 10 min'
                logging.exception(message)
                print message
                raise fn.DataAccessError(message)
            logging.exception(e)
            time.sleep(10)
    return df
Ejemplo n.º 2
0
def getWISE(entry):
    '''
	get IR data from AllWISE Source Catalog
	attempts to query Irsa 5 times; if they keep failing, abort
	returns updated entry
	'''

    ir_pos = coord.SkyCoord(entry['consensus']['ir_ra'],
                            entry['consensus']['ir_dec'],
                            unit=(u.deg, u.deg),
                            frame='icrs')

    tryCount = 0
    while (
            True
    ):  #in case of error, wait 10 sec and try again; give up after 5 tries
        tryCount += 1
        try:
            table = Irsa.query_region(ir_pos,
                                      catalog='allwise_p3as_psd',
                                      radius=3. * u.arcsec)
            break
        except (astroquery.exceptions.TimeoutError,
                astroquery.exceptions.TableParseError) as e:
            if tryCount > 5:
                message = 'Unable to connect to IRSA; trying again in 10 min'
                logging.exception(message)
                print message
                raise fn.DataAccessError(message)
            logging.exception(e)
            time.sleep(10)
        except Exception as e:
            if 'Query failed' in str(e) or 'timed out' in str(e):
                if tryCount > 5:
                    message = 'Unable to connect to IRSA; trying again in 10 min'
                    logging.exception(message)
                    print message
                    raise fn.DataAccessError(message)
                logging.exception(e)
                time.sleep(10)
            else:
                raise

    if len(table):
        number_matches = 0
        if table[0]['w1snr'] > 5:
            match = table[0]
            dist = match['dist']
            number_matches += 1
        else:
            match = None
            dist = np.inf
        if len(table) > 1:
            for row in table:
                if row['dist'] < dist and row['w1snr'] > 5:
                    match = row
                    dist = match['dist']
                    number_matches += 1
        if match:
            wise_match = {'designation':'WISEA'+match['designation'], 'ra':match['ra'], 'dec':match['dec'], \
                 'number_matches':np.int16(number_matches), \
                 'w1mpro':match['w1mpro'], 'w1sigmpro':match['w1sigmpro'], 'w1snr':match['w1snr'], \
                 'w2mpro':match['w2mpro'], 'w2sigmpro':match['w2sigmpro'], 'w2snr':match['w2snr'], \
                 'w3mpro':match['w3mpro'], 'w3sigmpro':match['w3sigmpro'], 'w3snr':match['w3snr'], \
                 'w4mpro':match['w4mpro'], 'w4sigmpro':match['w4sigmpro'], 'w4snr':match['w4snr']}
        else:
            wise_match = None
    else:
        wise_match = None

    if wise_match:
        logging.info('AllWISE match found')
        for key in wise_match.keys():
            if wise_match[key] is np.ma.masked:
                wise_match.pop(key)
            elif wise_match[key] and type(wise_match[key]) is not str:
                wise_match[key] = wise_match[key].item()
            elif wise_match[key] == 0:
                wise_match[key] = 0
    else:
        logging.info('No AllWISE match found')

    return wise_match
Ejemplo n.º 3
0
def RGZcatalog():
	
	#start timer
	starttime = time.time()
	
	#begin logging even if not run from command line
	logging.basicConfig(filename='{}/{}'.format(rgz_path,logfile), level=logging.DEBUG, format='%(asctime)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
	logging.captureWarnings(True)
	
	#connect to database of subjects
	subjects = db['radio_subjects']
	consensus = db['consensus{}'.format(version)]
	catalog = db['catalog{}'.format(version)] #this is being populated by this program
	if catalog.count():
		logging.info('Catalog contains entries; appending')
	else:
		catalog.create_index('catalog_id', unique=True)
	
	#get dictionary for finding the path to FITS files and WCS headers
	with open('%s/first_fits.txt' % rgz_path) as f:
		lines = f.readlines()
	
	pathdict = {}
	for l in lines:
		spl = l.split(' ')
		pathdict[spl[1].strip()] = '%s/rgz/raw_images/RGZ-full.%i/FIRST-IMGS/%s.fits' % (data_path, int(spl[0]), spl[1].strip())
	
	#count the number of entries from this run and how many entries are in the catalog total
	count = 0
	if catalog.count() != 0:
		for entry in catalog.find().sort('catalog_id', -1).limit(1):
			IDnumber = entry['catalog_id']
	else:
		IDnumber = 0
	
	#find completed catalog entries so they can be skipped
	consensus_set = set()
	for source in consensus.find():
		consensus_set.add(source['zooniverse_id'])
	catalog_set = set()
	for entry in catalog.find():
		catalog_set.add(entry['zooniverse_id'])
	to_be_completed = consensus_set.difference(catalog_set)
	if os.path.exists(in_progress_file):
		with open(in_progress_file, 'r') as f:
			in_progress_zid = f.read()
		to_be_completed = to_be_completed.union(in_progress_zid)
	to_be_completed = list(to_be_completed)
	
	#iterate through all noncompleted subjects
	for subject in subjects.find({'zooniverse_id': {'$in':to_be_completed} }).batch_size(10):
	#for subject in subjects.find({'zooniverse_id': {'$in': ['ARG00000sl', 'ARG0003f9l']} }):
	#for subject in subjects.find({'zooniverse_id':'ARG00000sl'}): #sample subject with distinct sources
	#for subject in subjects.find({'zooniverse_id':'ARG0003f9l'}): #sample subject with multiple-component source
		
		#mark subject as being in-progress
		with open(in_progress_file, 'w') as f:
			f.write(subject['zooniverse_id'])
		
		#iterate through all consensus groupings
		for source in consensus.find({'zooniverse_id':subject['zooniverse_id'], 'first_id':{'$exists':True}}):
			
			#do not process if this object in this source is already in the catalog
			process = True
			for i in catalog.find({'zooniverse_id':subject['zooniverse_id']}):
				if i['consensus']['label'] == source['label']:
					process = False
			
			if process:
				
				logging.info('Processing consensus object %s within subject field %s', source['label'], subject['zooniverse_id'])
				
				count += 1
				IDnumber += 1
				
				#display which entry is being processed to see how far the program is
				print 'Processing entry %i (consensus %s in subject %s)' % (IDnumber, source['label'], subject['zooniverse_id'])
				entry = {'catalog_id':IDnumber, 'zooniverse_id':str(subject['zooniverse_id'])}
				
				#find location of FITS file; once non-FIRST sources are included, modify this
				fid = source['first_id']
				#if fid[0] == 'F':
				fits_loc = pathdict[fid]
				entry.update({'first_id':str(fid)})
				#else:
				#	raise RuntimeError('Not expecting non-FIRST data')
				#	fits_loc = '%s/rgz/raw_images/ATLAS/2x2/%s_radio.fits' % (data_path, fid)
				#	entry.update({'atlas_id':str(fid)})
				
				#find IR counterpart from consensus data, if present
				w = wcs.WCS(fits.getheader(fits_loc, 0)) #gets pixel-to-WCS conversion from header
				ir_coords = source['ir_peak']
				if ir_coords[0] == -99:
					ir_pos = None
					wise_match = None
					sdss_match = None
				else:
					#this only works for FIRST images; will need changing when ATLAS is added
					p2w = w.wcs_pix2world
					ir_ra_pixels = ir_coords[0]*w._naxis1/500.
					ir_dec_pixels = 1 + w._naxis2 - ir_coords[1]*w._naxis2/500.
					ir_peak = p2w( np.array([[ir_ra_pixels, ir_dec_pixels]]), 1)
					ir_pos = coord.SkyCoord(ir_peak[0][0], ir_peak[0][1], unit=(u.deg,u.deg), frame='icrs')
				
				entry.update({'consensus':{'n_radio':source['n_votes'], 'n_total':source['n_total'], 'n_ir':source['n_ir'], 'ir_flag':source['ir_flag'], \
										   'ir_level':source['ir_level'], 'radio_level':source['consensus_level'], 'label':source['label']}})
				if ir_pos:
					logging.info('IR counterpart found')
					entry['consensus'].update({'ir_ra':ir_pos.ra.deg, 'ir_dec':ir_pos.dec.deg})
				else:
					logging.info('No IR counterpart found')
				
				#if an IR peak exists, search AllWISE and SDSS for counterparts
				if ir_pos:
					
					wise_match = p.getWISE(entry)
					if wise_match:
						designation = wise_match['designation'][5:]
						pz = db['wise_pz'].find_one({'wiseX':designation})
						if pz is not None:
							wise_match['photo_redshift'] = pz['zPhoto_Corr']
						entry.update({'AllWISE':wise_match})
					
					'''tryCount = 0
					while(True):
						tryCount += 1
						try:
							sdss_match = p.getSDSS(entry)
							if sdss_match:
								entry.update({'SDSS':sdss_match})
							break
						except KeyError as e:
							if tryCount>5:
								output('Bad response from SkyServer; trying again in 10 min', logging.exception)
								raise fn.DataAccessError(message)
							elif e.message == 'ra':
								#unable to reproduce; no error when I try again, so let's just do that
								logging.exception(e)
								time.sleep(10)
							else:
								raise e'''
					sdss_match = None
				
				#try block attempts to read JSON from web; if it exists, calculate data
				try:
					link = subject['location']['contours'] #gets url as Unicode string
					
					# Use local file if available
					
					jsonfile = link.split("/")[-1]
					jsonfile_path = "{0}/rgz/contours/{1}".format(data_path,jsonfile)
					if os.path.exists(jsonfile_path):
						with open(jsonfile_path,'r') as jf:
							data = json.load(jf)
					
					# Otherwise, read from web
					
					else:
						
						# Reform weblink to point to the direct S3 URL, which will work even with older SSLv3
						
						link_s3 = "http://zooniverse-static.s3.amazonaws.com/"+link.split('http://')[-1]
						
						tryCount = 0
						while(True): #in case of error, wait 10 sec and try again; give up after 5 tries
							tryCount += 1
							try:
								compressed = urllib2.urlopen(str(link_s3)).read() #reads contents of url to str
								break
							except (urllib2.URLError, urllib2.HTTPError) as e:
								if tryCount>5:
									output('Unable to connect to Amazon Web Services; trying again in 10 min', logging.exception)
									raise fn.DataAccessError(message)
								logging.exception(e)
								time.sleep(10)
						
						tempfile = StringIO.StringIO(compressed) #temporarily stores contents as file (emptied after unzipping)
						uncompressed = gzip.GzipFile(fileobj=tempfile, mode='r').read() #unzips contents to str
						data = json.loads(uncompressed) #loads JSON object
					
					radio_data = p.getRadio(data, fits_loc, source)
					entry.update(radio_data)
					
					#check if a component is straddling the edge of the image
					entry.update({'overedge':0})
					source_bbox = np.array(source['bbox'])
					for c in data['contours']:
						bbox = np.array(c[0]['bbox'])
						if bbox in source_bbox:
							vertices = []
							for pos in c[0]['arr']:
								vertices.append([pos['x'], pos['y']])
							vertices = np.array(vertices)
							diff = vertices[0] - vertices[-1]
							if np.sqrt(diff[0]**2 + diff[1]**2) > 1 and (np.any(vertices[0] <= 4) or np.any(vertices[0] >= 128)):
								entry.update({'overedge':1})
								break
					
					#use WISE catalog name if available
					if wise_match:
						entry.update({'rgz_name':'RGZ{}{}'.format(wise_match['designation'][5:14], wise_match['designation'][15:22])})
						
					else:
						#if not, try consensus IR position
						if ir_pos:
							ra = ir_pos.ra.deg
							dec = ir_pos.dec.deg
						#finally, just use radio center
						else:
							ra = radio_data['radio']['ra']
							dec = radio_data['radio']['dec']
						
						ra_h = int(ra/15.)
						ra_m = int((ra - ra_h*15)*4)
						ra_s = (ra - ra_h*15 - ra_m/4.)*240
						dec_d = int(dec)
						dec_m = int((dec - dec_d)*60)
						dec_s = int((dec - dec_d - dec_m/60.)*3600)
						entry.update({'rgz_name':'RGZJ{:0=2}{:0=2}{:0=4.1f}{:0=+3}{:0=2}{:0=2}'.format(ra_h, ra_m, ra_s, dec_d, dec_m, dec_s)})
					
					#calculate physical parameters using redshift from SDSS
					if sdss_match:
						z = 0
						if 'spec_redshift' in sdss_match:
							z = sdss_match['spec_redshift']
						elif 'photo_redshift' in sdss_match:
							z = sdss_match['photo_redshift']
						if z>0:
							physical = p.getPhysical(z, radio_data)
							entry['radio'].update(physical)
					
					logging.info('Radio data added')
									   
				#if the link doesn't have a JSON, no data can be determined
				except urllib2.HTTPError as e:
					if e.code == 404:
						logging.info('No radio JSON detected')
					else:
						logging.exception(e)
						raise
				
				catalog.insert(entry)
				find_duplicates(entry['zooniverse_id'])
				logging.info('Entry %i added to catalog', IDnumber)
		
		with open(in_progress_file, 'w') as f:
			f.write('')
		
	#end timer
	endtime = time.time()
	output('Time taken: %f' % (endtime-starttime))
	
	return count