def set_first_max_light(self): if ENTRY.MAX_APP_MAG not in self: mldt, mlmag, mlband, mlsource = self._get_max_light() if mldt or mlmag or mlband: source = self.add_self_source() uniq_src = uniq_cdl([source] + mlsource.split(',')) if mldt: max_date = make_date_string(mldt.year, mldt.month, mldt.day) self.add_quantity(ENTRY.MAX_DATE, max_date, uniq_src, derived=True) if mlmag: mlmag = pretty_num(mlmag) self.add_quantity(ENTRY.MAX_APP_MAG, mlmag, uniq_src, derived=True) if mlband: self.add_quantity(ENTRY.MAX_BAND, mlband, uniq_src, derived=True) if (self._KEYS.DISCOVER_DATE not in self or max([len(x[QUANTITY.VALUE].split('/')) for x in self[self._KEYS.DISCOVER_DATE]]) < 3): fldt, flsource = self._get_first_light() if fldt: source = self.add_self_source() disc_date = make_date_string(fldt.year, fldt.month, fldt.day) self.add_quantity( self._KEYS.DISCOVER_DATE, disc_date, uniq_cdl([source] + flsource.split(',')), derived=True) if self._KEYS.DISCOVER_DATE not in self and self._KEYS.SPECTRA in self: minspecmjd = float("+inf") for spectrum in self[self._KEYS.SPECTRA]: if 'time' in spectrum and 'u_time' in spectrum: if spectrum['u_time'] == 'MJD': mjd = float(spectrum['time']) elif spectrum['u_time'] == 'JD': mjd = float(jd_to_mjd(Decimal(spectrum['time']))) else: continue if mjd < minspecmjd: minspecmjd = mjd minspecsource = spectrum['source'] if minspecmjd < float("+inf"): fldt = astrotime(minspecmjd, format='mjd').datetime source = self.add_self_source() disc_date = make_date_string(fldt.year, fldt.month, fldt.day) self.add_quantity( self._KEYS.DISCOVER_DATE, disc_date, uniq_cdl([source] + minspecsource.split(',')), derived=True) return
def append_sources_from(self, other): """Merge the source alias lists of two CatDicts.""" # Get aliases lists from this `CatDict` and other self_aliases = self[self._KEYS.SOURCE].split(',') other_aliases = other[self._KEYS.SOURCE].split(',') # Store alias to `self` self[self._KEYS.SOURCE] = uniq_cdl(self_aliases + other_aliases) return
def append_sources_from(self, other): """Merge the source alias lists of two CatDicts. """ # Get aliases lists from this `CatDict` and other self_aliases = self[self._KEYS.SOURCE].split(',') other_aliases = other[self._KEYS.SOURCE].split(',') # Store alias to `self` self[self._KEYS.SOURCE] = uniq_cdl(self_aliases + other_aliases) return
def copy_entry_to_entry(self, fromentry, destentry): """ Used by `merge_duplicates` """ self.log.info("Copy entry object '{}' to '{}'" .format(fromentry[fromentry._KEYS.NAME], destentry[destentry._KEYS.NAME])) newsourcealiases = {} if self.proto._KEYS.SOURCES in fromentry: for source in fromentry[self.proto._KEYS.SOURCES]: alias = source.pop(SOURCE.ALIAS) newsourcealiases[alias] = source if self.proto._KEYS.ERRORS in fromentry: for err in fromentry[self.proto._KEYS.ERRORS]: destentry.setdefault( self.proto._KEYS.ERRORS, []).append(err) for key in fromentry: if fromentry._KEYS.get_key_by_name(key).no_source: continue for item in fromentry[key]: # isd = False if 'source' not in item: raise ValueError("Item has no source!") nsid = [] for sid in item['source'].split(','): if sid in newsourcealiases: source = newsourcealiases[sid] nsid.append(destentry .add_source(**source)) else: raise ValueError("Couldn't find source alias!") item['source'] = uniq_cdl(nsid) if key == ENTRY.PHOTOMETRY: destentry.add_photometry(**item) elif key == ENTRY.SPECTRA: destentry.add_spectrum(**item) elif key == ENTRY.ERRORS: destentry.add_error(**item) else: destentry.add_quantity(check_for_dupes=False, quantity=key, **item) return
def set_first_max_light(self): if FASTSTARS.MAX_APP_MAG not in self: # Get the maximum amongst all bands mldt, mlmag, mlband, mlsource = self._get_max_light() if mldt or mlmag or mlband: source = self.add_self_source() uniq_src = uniq_cdl([source] + mlsource.split(',')) if mldt: max_date = make_date_string(mldt.year, mldt.month, mldt.day) self.add_quantity( FASTSTARS.MAX_DATE, max_date, uniq_src, derived=True) if mlmag: mlmag = pretty_num(mlmag) self.add_quantity( FASTSTARS.MAX_APP_MAG, mlmag, uniq_src, derived=True) if mlband: self.add_quantity( FASTSTARS.MAX_BAND, mlband, uniq_src, derived=True) return
def do_cleanup(catalog): """Cleanup catalog after importing all data.""" task_str = catalog.get_current_task_str() # Set preferred names, calculate some columns based on imported data, # sanitize some fields keys = list(catalog.entries.keys()) cleanupcnt = 0 for oname in pbar(keys, task_str): # Some events may be merged in cleanup process, skip them if # non-existent. try: name = catalog.add_entry(oname) except Exception: catalog.log.warning( '"{}" was not found, suggests merge occurred in cleanup ' 'process.'.format(oname)) continue # Set the preferred name, switching to that name if name changed. name = catalog.entries[name].set_preferred_name() aliases = catalog.entries[name].get_aliases() catalog.entries[name].purge_bandless_photometry() catalog.entries[name].set_first_max_light() if SUPERNOVA.DISCOVER_DATE not in catalog.entries[name]: prefixes = ['MLS', 'SSS', 'CSS', 'GRB '] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:2])): discoverdate = ('/'.join([ '20' + alias.replace(prefix, '')[:2], alias.replace(prefix, '')[2:4], alias.replace(prefix, '')[4:6] ])) if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, discoverdate, source, derived=True) break if SUPERNOVA.DISCOVER_DATE in catalog.entries[name]: break if SUPERNOVA.DISCOVER_DATE not in catalog.entries[name]: prefixes = [ 'ASASSN-', 'PS1-', 'PS1', 'PS', 'iPTF', 'PTF', 'SCP-', 'SNLS-', 'SPIRITS', 'LSQ', 'DES', 'SNHiTS', 'Gaia', 'GND', 'GNW', 'GSD', 'GSW', 'EGS', 'COS', 'OGLE', 'HST' ] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:2]) and is_number(alias.replace(prefix, '')[:1])): discoverdate = '20' + alias.replace(prefix, '')[:2] if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, discoverdate, source, derived=True) break if SUPERNOVA.DISCOVER_DATE in catalog.entries[name]: break if SUPERNOVA.DISCOVER_DATE not in catalog.entries[name]: prefixes = ['SNF'] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:4])): discoverdate = ('/'.join([ alias.replace(prefix, '')[:4], alias.replace(prefix, '')[4:6], alias.replace(prefix, '')[6:8] ])) if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, discoverdate, source, derived=True) break if SUPERNOVA.DISCOVER_DATE in catalog.entries[name]: break if SUPERNOVA.DISCOVER_DATE not in catalog.entries[name]: prefixes = ['PTFS', 'SNSDF'] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:2])): discoverdate = ('/'.join([ '20' + alias.replace(prefix, '')[:2], alias.replace(prefix, '')[2:4] ])) if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, discoverdate, source, derived=True) break if SUPERNOVA.DISCOVER_DATE in catalog.entries[name]: break if SUPERNOVA.DISCOVER_DATE not in catalog.entries[name]: prefixes = ['AT', 'SN', 'OGLE-', 'SM ', 'KSN'] for alias in aliases: for prefix in prefixes: if alias.startswith(prefix): year = re.findall(r'\d+', alias) if len(year) == 1: year = year[0] else: continue if alias.replace(prefix, '').index(year) != 0: continue if (year and is_number(year) and '.' not in year and len(year) <= 4): discoverdate = year if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, discoverdate, source, derived=True) break if SUPERNOVA.DISCOVER_DATE in catalog.entries[name]: break if (SUPERNOVA.RA not in catalog.entries[name] or SUPERNOVA.DEC not in catalog.entries[name]): prefixes = [ 'PSN J', 'MASJ', 'CSS', 'SSS', 'MASTER OT J', 'HST J', 'TCP J', 'MACS J', '2MASS J', 'EQ J', 'CRTS J', 'SMT J' ] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:6])): noprefix = alias.split(':')[-1].replace(prefix, '').replace( '.', '') decsign = '+' if '+' in noprefix else '-' noprefix = noprefix.replace('+', '|').replace('-', '|') nops = noprefix.split('|') if len(nops) < 2: continue rastr = nops[0] decstr = nops[1] ra = ':'.join([rastr[:2], rastr[2:4], rastr[4:6]]) + \ ('.' + rastr[6:] if len(rastr) > 6 else '') dec = ( decsign + ':'.join([decstr[:2], decstr[2:4], decstr[4:6]]) + ('.' + decstr[6:] if len(decstr) > 6 else '')) if catalog.args.verbose: tprint('Added ra/dec from name: ' + ra + ' ' + dec) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity(SUPERNOVA.RA, ra, source, derived=True) catalog.entries[name].add_quantity(SUPERNOVA.DEC, dec, source, derived=True) break if SUPERNOVA.RA in catalog.entries[name]: break no_host = (SUPERNOVA.HOST not in catalog.entries[name] or not any([ x[QUANTITY.VALUE] == 'Milky Way' for x in catalog.entries[name][SUPERNOVA.HOST] ])) if (SUPERNOVA.RA in catalog.entries[name] and SUPERNOVA.DEC in catalog.entries[name] and no_host): from astroquery.irsa_dust import IrsaDust if name not in catalog.extinctions_dict: try: ra_dec = catalog.entries[name][ SUPERNOVA.RA][0][QUANTITY.VALUE] + \ " " + \ catalog.entries[name][SUPERNOVA.DEC][0][QUANTITY.VALUE] result = IrsaDust.get_query_table(ra_dec, section='ebv') except (KeyboardInterrupt, SystemExit): raise except Exception: warnings.warn("Coordinate lookup for " + name + " failed in IRSA.") else: ebv = result['ext SandF mean'][0] ebverr = result['ext SandF std'][0] catalog.extinctions_dict[name] = [ebv, ebverr] if name in catalog.extinctions_dict: sources = uniq_cdl([ catalog.entries[name].add_self_source(), catalog.entries[name].add_source( bibcode='2011ApJ...737..103S') ]) (catalog.entries[name].add_quantity( SUPERNOVA.EBV, str(catalog.extinctions_dict[name][0]), sources, e_value=str(catalog.extinctions_dict[name][1]), derived=True)) if ((SUPERNOVA.HOST in catalog.entries[name] and (SUPERNOVA.HOST_RA not in catalog.entries[name] or SUPERNOVA.HOST_DEC not in catalog.entries[name]))): for host in catalog.entries[name][SUPERNOVA.HOST]: alias = host[QUANTITY.VALUE] if ' J' in alias and is_number(alias.split(' J')[-1][:6]): noprefix = alias.split(' J')[-1].split(':')[-1].replace( '.', '') decsign = '+' if '+' in noprefix else '-' noprefix = noprefix.replace('+', '|').replace('-', '|') nops = noprefix.split('|') if len(nops) < 2: continue rastr = nops[0] decstr = nops[1] hostra = (':'.join([rastr[:2], rastr[2:4], rastr[4:6]]) + ('.' + rastr[6:] if len(rastr) > 6 else '')) hostdec = decsign + ':'.join([ decstr[:2], decstr[2:4], decstr[4:6] ]) + ('.' + decstr[6:] if len(decstr) > 6 else '') if catalog.args.verbose: tprint('Added hostra/hostdec from name: ' + hostra + ' ' + hostdec) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity(SUPERNOVA.HOST_RA, hostra, source, derived=True) catalog.entries[name].add_quantity(SUPERNOVA.HOST_DEC, hostdec, source, derived=True) break if SUPERNOVA.HOST_RA in catalog.entries[name]: break if (SUPERNOVA.REDSHIFT not in catalog.entries[name] and SUPERNOVA.VELOCITY in catalog.entries[name]): # Find the "best" velocity to use for this bestsig = 0 for hv in catalog.entries[name][SUPERNOVA.VELOCITY]: sig = get_sig_digits(hv[QUANTITY.VALUE]) if sig > bestsig: besthv = hv[QUANTITY.VALUE] bestsrc = hv['source'] bestsig = sig if bestsig > 0 and is_number(besthv): voc = float(besthv) * 1.e5 / CLIGHT source = catalog.entries[name].add_self_source() sources = uniq_cdl([source] + bestsrc.split(',')) (catalog.entries[name].add_quantity( SUPERNOVA.REDSHIFT, pretty_num(sqrt((1. + voc) / (1. - voc)) - 1., sig=bestsig), sources, kind='heliocentric', derived=True)) if (SUPERNOVA.REDSHIFT not in catalog.entries[name] and len(catalog.nedd_dict) > 0 and SUPERNOVA.HOST in catalog.entries[name]): reference = "NED-D" refurl = "http://ned.ipac.caltech.edu/Library/Distances/" refbib = "1991ASSL..171...89H" for host in catalog.entries[name][SUPERNOVA.HOST]: if host[QUANTITY.VALUE] in catalog.nedd_dict: source = catalog.entries[name].add_source( bibcode='2016A&A...594A..13P') secondarysource = catalog.entries[name].add_source( name=reference, url=refurl, bibcode=refbib, secondary=True) meddist = statistics.median( catalog.nedd_dict[host[QUANTITY.VALUE]]) redz = z_at_value(cosmo.comoving_distance, float(meddist) * un.Mpc) redshift = pretty_num(redz, sig=get_sig_digits(str(meddist))) catalog.entries[name].add_quantity( [SUPERNOVA.REDSHIFT, SUPERNOVA.HOST_REDSHIFT], redshift, uniq_cdl([source, secondarysource]), kind='host', derived=True) if (SUPERNOVA.MAX_ABS_MAG not in catalog.entries[name] and SUPERNOVA.MAX_APP_MAG in catalog.entries[name] and SUPERNOVA.LUM_DIST in catalog.entries[name]): # Find the "best" distance to use for this bestsig = 0 for ld in catalog.entries[name][SUPERNOVA.LUM_DIST]: sig = get_sig_digits(ld[QUANTITY.VALUE]) if sig > bestsig: bestld = ld[QUANTITY.VALUE] bestsrc = ld[QUANTITY.SOURCE] bestsig = sig if bestsig > 0 and is_number(bestld) and float(bestld) > 0.: source = catalog.entries[name].add_self_source() sources = uniq_cdl([source] + bestsrc.split(',')) bestldz = z_at_value(cosmo.luminosity_distance, float(bestld) * un.Mpc) pnum = (float(catalog.entries[name][SUPERNOVA.MAX_APP_MAG][0][ QUANTITY.VALUE]) - 5.0 * (log10(float(bestld) * 1.0e6) - 1.0) + 2.5 * log10(1.0 + bestldz)) pnum = pretty_num(pnum, sig=bestsig + 1) catalog.entries[name].add_quantity(SUPERNOVA.MAX_ABS_MAG, pnum, sources, derived=True) if (SUPERNOVA.MAX_VISUAL_ABS_MAG not in catalog.entries[name] and SUPERNOVA.MAX_VISUAL_APP_MAG in catalog.entries[name] and SUPERNOVA.LUM_DIST in catalog.entries[name]): # Find the "best" distance to use for this bestsig = 0 for ld in catalog.entries[name][SUPERNOVA.LUM_DIST]: sig = get_sig_digits(ld[QUANTITY.VALUE]) if sig > bestsig: bestld = ld[QUANTITY.VALUE] bestsrc = ld[QUANTITY.SOURCE] bestsig = sig if bestsig > 0 and is_number(bestld) and float(bestld) > 0.: source = catalog.entries[name].add_self_source() sources = uniq_cdl([source] + bestsrc.split(',')) # FIX: what's happening here?! pnum = (float(catalog.entries[name][ SUPERNOVA.MAX_VISUAL_APP_MAG][0][QUANTITY.VALUE]) - 5.0 * (log10(float(bestld) * 1.0e6) - 1.0)) pnum = pretty_num(pnum, sig=bestsig + 1) catalog.entries[name].add_quantity( SUPERNOVA.MAX_VISUAL_ABS_MAG, pnum, sources, derived=True) if SUPERNOVA.REDSHIFT in catalog.entries[name]: # Find the "best" redshift to use for this bestz, bestkind, bestsig, bestsrc = catalog.entries[ name].get_best_redshift() if bestsig > 0: try: bestz = float(bestz) except Exception: print(catalog.entries[name]) raise if SUPERNOVA.VELOCITY not in catalog.entries[name]: source = catalog.entries[name].add_self_source() # FIX: what's happening here?! pnum = CLIGHT / KM * \ ((bestz + 1.)**2. - 1.) / ((bestz + 1.)**2. + 1.) pnum = pretty_num(pnum, sig=bestsig) catalog.entries[name].add_quantity( SUPERNOVA.VELOCITY, pnum, source, kind=(SUPERNOVA.VELOCITY.kind_preference[bestkind] if bestkind else '')) if bestz > 0.: if SUPERNOVA.LUM_DIST not in catalog.entries[name]: dl = cosmo.luminosity_distance(bestz) sources = [ catalog.entries[name].add_self_source(), catalog.entries[name].add_source( bibcode='2016A&A...594A..13P') ] sources = uniq_cdl(sources + bestsrc.split(',')) catalog.entries[name].add_quantity( SUPERNOVA.LUM_DIST, pretty_num(dl.value, sig=bestsig + 1), sources, kind=(SUPERNOVA.LUM_DIST.kind_preference[bestkind] if bestkind else ''), derived=True) if (SUPERNOVA.MAX_ABS_MAG not in catalog.entries[name] and SUPERNOVA.MAX_APP_MAG in catalog.entries[name]): source = catalog.entries[name].add_self_source() pnum = pretty_num( float(catalog.entries[name][ SUPERNOVA.MAX_APP_MAG][0][QUANTITY.VALUE]) - 5.0 * (log10(dl.to('pc').value) - 1.0) + 2.5 * log10(1.0 + bestz), sig=bestsig + 1) catalog.entries[name].add_quantity( SUPERNOVA.MAX_ABS_MAG, pnum, sources, derived=True) if (SUPERNOVA.MAX_VISUAL_ABS_MAG not in catalog.entries[name] and SUPERNOVA.MAX_VISUAL_APP_MAG in catalog.entries[name]): source = catalog.entries[name].add_self_source() pnum = pretty_num(float(catalog.entries[name][ SUPERNOVA.MAX_VISUAL_APP_MAG][0][ QUANTITY.VALUE]) - 5.0 * (log10(dl.to('pc').value) - 1.0), sig=bestsig + 1) catalog.entries[name].add_quantity( SUPERNOVA.MAX_VISUAL_ABS_MAG, pnum, sources, derived=True) if SUPERNOVA.COMOVING_DIST not in catalog.entries[name]: cd = cosmo.comoving_distance(bestz) sources = [ catalog.entries[name].add_self_source(), catalog.entries[name].add_source( bibcode='2016A&A...594A..13P') ] sources = uniq_cdl(sources + bestsrc.split(',')) catalog.entries[name].add_quantity( SUPERNOVA.COMOVING_DIST, pretty_num(cd.value, sig=bestsig), sources, derived=True) if SUPERNOVA.HOST_REDSHIFT in catalog.entries[name]: # Find the "best" redshift to use for this bestz, bestkind, bestsig, bestsrc = catalog.entries[ name].get_best_redshift(SUPERNOVA.HOST_REDSHIFT) if bestsig > 0: try: bestz = float(bestz) except Exception: print(catalog.entries[name]) raise if SUPERNOVA.HOST_VELOCITY not in catalog.entries[name]: source = catalog.entries[name].add_self_source() # FIX: what's happening here?! pnum = CLIGHT / KM * \ ((bestz + 1.)**2. - 1.) / ((bestz + 1.)**2. + 1.) pnum = pretty_num(pnum, sig=bestsig) catalog.entries[name].add_quantity( SUPERNOVA.HOST_VELOCITY, pnum, source, kind=(SUPERNOVA.HOST_VELOCITY.kind_preference[bestkind] if bestkind else '')) if bestz > 0.: if SUPERNOVA.HOST_LUM_DIST not in catalog.entries[name]: dl = cosmo.luminosity_distance(bestz) sources = [ catalog.entries[name].add_self_source(), catalog.entries[name].add_source( bibcode='2016A&A...594A..13P') ] sources = uniq_cdl(sources + bestsrc.split(',')) catalog.entries[name].add_quantity( SUPERNOVA.HOST_LUM_DIST, pretty_num(dl.value, sig=bestsig + 1), sources, kind=(SUPERNOVA.HOST_LUM_DIST. kind_preference[bestkind] if bestkind else ''), derived=True) if SUPERNOVA.HOST_COMOVING_DIST not in catalog.entries[ name]: cd = cosmo.comoving_distance(bestz) sources = [ catalog.entries[name].add_self_source(), catalog.entries[name].add_source( bibcode='2016A&A...594A..13P') ] sources = uniq_cdl(sources + bestsrc.split(',')) catalog.entries[name].add_quantity( SUPERNOVA.HOST_COMOVING_DIST, pretty_num(cd.value, sig=bestsig), sources, derived=True) if all([ x in catalog.entries[name] for x in [ SUPERNOVA.RA, SUPERNOVA.DEC, SUPERNOVA.HOST_RA, SUPERNOVA.HOST_DEC ] ]): # For now just using first coordinates that appear in entry try: c1 = coord( ra=catalog.entries[name][SUPERNOVA.RA][0][QUANTITY.VALUE], dec=catalog.entries[name][SUPERNOVA.DEC][0][ QUANTITY.VALUE], unit=(un.hourangle, un.deg)) c2 = coord(ra=catalog.entries[name][SUPERNOVA.HOST_RA][0][ QUANTITY.VALUE], dec=catalog.entries[name][SUPERNOVA.HOST_DEC][0][ QUANTITY.VALUE], unit=(un.hourangle, un.deg)) except (KeyboardInterrupt, SystemExit): raise except Exception: pass else: sources = uniq_cdl([catalog.entries[name].add_self_source()] + catalog.entries[name][SUPERNOVA.RA][0][ QUANTITY.SOURCE].split(',') + catalog.entries[name][SUPERNOVA.DEC][0][ QUANTITY.SOURCE].split(',') + catalog.entries[name][SUPERNOVA.HOST_RA][0][ QUANTITY.SOURCE].split(',') + catalog.entries[name][SUPERNOVA.HOST_DEC][0] [QUANTITY.SOURCE].split(',')) if SUPERNOVA.HOST_OFFSET_ANG not in catalog.entries[name]: hosa = Decimal(c1.separation(c2).arcsecond) hosa = pretty_num(hosa) catalog.entries[name].add_quantity( SUPERNOVA.HOST_OFFSET_ANG, hosa, sources, derived=True, u_value='arcseconds') if (SUPERNOVA.COMOVING_DIST in catalog.entries[name] and SUPERNOVA.REDSHIFT in catalog.entries[name] and SUPERNOVA.HOST_OFFSET_DIST not in catalog.entries[name]): offsetsig = get_sig_digits(catalog.entries[name][ SUPERNOVA.HOST_OFFSET_ANG][0][QUANTITY.VALUE]) sources = uniq_cdl( sources.split(',') + (catalog.entries[name][SUPERNOVA.COMOVING_DIST][0][ QUANTITY.SOURCE]).split(',') + (catalog.entries[name][SUPERNOVA.REDSHIFT][0][ QUANTITY.SOURCE]).split(',')) (catalog.entries[name].add_quantity( SUPERNOVA.HOST_OFFSET_DIST, pretty_num( float(catalog.entries[name][ SUPERNOVA.HOST_OFFSET_ANG][0][QUANTITY.VALUE]) / 3600. * (pi / 180.) * float(catalog.entries[name][ SUPERNOVA.COMOVING_DIST][0][QUANTITY.VALUE]) * 1000. / (1.0 + float(catalog.entries[name][ SUPERNOVA.REDSHIFT][0][QUANTITY.VALUE])), sig=offsetsig), sources)) catalog.entries[name].sanitize() catalog.journal_entries(bury=True, final=True, gz=True) cleanupcnt = cleanupcnt + 1 if catalog.args.travis and cleanupcnt % 1000 == 0: break catalog.save_caches() return
def do_simbad(catalog): # Simbad.list_votable_fields() # Some coordinates that SIMBAD claims belong to the SNe actually belong to # the host. task_str = catalog.get_current_task_str() simbadmirrors = ['http://simbad.harvard.edu/simbad/sim-script', 'http://simbad.u-strasbg.fr/simbad/sim-script'] simbadbadcoordbib = ['2013ApJ...770..107C'] simbadbadnamebib = ['2004AJ....127.2809W', '2005MNRAS.364.1419Z', '2015A&A...574A.112D', '2011MNRAS.417..916G', '2002ApJ...566..880G'] simbadbannedcats = ['[TBV2008]', 'OGLE-MBR'] customSimbad = Simbad() customSimbad.ROW_LIMIT = -1 customSimbad.TIMEOUT = 120 customSimbad.add_votable_fields('otype', 'sptype', 'sp_bibcode', 'id') table = [] for mirror in simbadmirrors: customSimbad.SIMBAD_URL = mirror try: table = customSimbad.query_criteria('maintype=No* | maintype="No?"') except: continue else: break if not table: catalog.log.warning('SIMBAD unable to load, probably offline.') # 2000A&AS..143....9W for brow in pbar(table, task_str): row = {x: re.sub(r'b\'(.*)\'', r'\1', str(brow[x])) for x in brow.colnames} # Skip items with no bibliographic info aside from SIMBAD, too # error-prone if row['OTYPE'] == 'Candidate_No*' and not row['SP_TYPE']: continue if (not row['COO_BIBCODE'] and not row['SP_BIBCODE'] and not row['SP_BIBCODE_2']): continue if any([x in row['MAIN_ID'] for x in simbadbannedcats]): continue if row['COO_BIBCODE'] and row['COO_BIBCODE'] in simbadbadnamebib: continue name = single_spaces(re.sub(r'\[[^)]*\]', '', row['MAIN_ID']).strip()) if name == 'SN': continue if is_number(name): continue name = catalog.add_entry(name) source = (catalog.entries[name] .add_source(name='SIMBAD astronomical database', bibcode="2000A&AS..143....9W", url="http://simbad.u-strasbg.fr/", secondary=True)) aliases = row['ID'].split(',') for alias in aliases: if any([x in alias for x in simbadbannedcats]): continue ali = single_spaces(re.sub(r'\[[^)]*\]', '', alias).strip()) if is_number(ali): continue ali = name_clean(ali) catalog.entries[name].add_quantity(NOVA.ALIAS, ali, source) if row['COO_BIBCODE'] and row['COO_BIBCODE'] not in simbadbadcoordbib: csources = ','.join( [source, catalog.entries[name].add_source( bibcode=row['COO_BIBCODE'])]) catalog.entries[name].add_quantity(NOVA.RA, row['RA'], csources) catalog.entries[name].add_quantity(NOVA.DEC, row['DEC'], csources) if row['SP_BIBCODE']: ssources = uniq_cdl([source, catalog.entries[name] .add_source(bibcode=row['SP_BIBCODE'])] + ([catalog.entries[name] .add_source(bibcode=row['SP_BIBCODE_2'])] if row['SP_BIBCODE_2'] else [])) catalog.entries[name].add_quantity( NOVA.CLAIMED_TYPE, (row['SP_TYPE'] .replace('SN.', '') .replace('SN', '') .replace('(~)', '') .strip(': ')), ssources) catalog.journal_entries() return
def do_cpcs(catalog): """Import data from CPCS.""" task_str = catalog.get_current_task_str() cpcs_url = ('http://gsaweb.ast.cam.ac.uk/' 'followup/list_of_alerts?format=json&num=100000&' 'published=1&observed_only=1' '&hashtag=JG_530ad9462a0b8785bfb385614bf178c6') jsontxt = catalog.load_url( cpcs_url, os.path.join(catalog.get_current_task_repo(), 'CPCS/index.json')) if not jsontxt: return alertindex = json.loads(jsontxt, object_pairs_hook=OrderedDict) ids = [xx['id'] for xx in alertindex] for ii, ai in enumerate(pbar(ids, task_str)): name = alertindex[ii]['ivorn'].split('/')[-1].strip() # Skip aa few weird entries if name == 'ASASSNli': continue # Just use aa whitelist for now since naming seems inconsistent white_list = [ 'GAIA', 'OGLE', 'ASASSN', 'MASTER', 'OTJ', 'PS1', 'IPTF', 'CSS' ] if True in [xx in name.upper() for xx in white_list]: name = name.replace('Verif', '').replace('_', ' ') if 'ASASSN' in name and name[6] != '-': name = 'ASASSN-' + name[6:].lower() if 'MASTEROTJ' in name: name = name.replace('MASTEROTJ', 'MASTER OT J') if 'OTJ' in name: name = name.replace('OTJ', 'MASTER OT J') if name.upper().startswith('IPTF'): name = 'iPTF' + name[4:].lower() if name.upper().startswith('PS1'): name = 'PS1' + name[3:].lower() # Only add events that are classified as SN. if not catalog.entry_exists(name): continue oldname = name name = catalog.add_entry(name) else: continue sec_source = catalog.entries[name].add_source( name='Cambridge Photometric Calibration Server', url='http://gsaweb.ast.cam.ac.uk/followup/', secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, oldname, sec_source) unit_deg = 'floatdegrees' catalog.entries[name].add_quantity(SUPERNOVA.RA, str(alertindex[ii][SUPERNOVA.RA]), sec_source, u_value=unit_deg) catalog.entries[name].add_quantity(SUPERNOVA.DEC, str(alertindex[ii][SUPERNOVA.DEC]), sec_source, u_value=unit_deg) alerturl = ('http://gsaweb.ast.cam.ac.uk/' 'followup/get_alert_lc_data?alert_id=' + str(ai)) source = catalog.entries[name].add_source(name='CPCS Alert ' + str(ai), url=alerturl) fname = os.path.join(catalog.get_current_task_repo(), 'CPCS/alert-') + str(ai).zfill(2) + '.json' jsonstr = catalog.load_url( alerturl + '&hashtag=JG_530ad9462a0b8785bfb385614bf178c6', fname) try: cpcsalert = json.loads(jsonstr) except Exception: catalog.log.warning('Mangled CPCS data for alert {}.'.format(ai)) continue mjds = [round_sig(xx, sig=9) for xx in cpcsalert['mjd']] mags = [round_sig(xx, sig=6) for xx in cpcsalert['mag']] errs = [ round_sig(xx, sig=6) if (is_number(xx) and float(xx) > 0.0) else '' for xx in cpcsalert['magerr'] ] bnds = cpcsalert['filter'] obs = cpcsalert['observatory'] for mi, mjd in enumerate(mjds): catalog.entries[name].add_photometry(time=mjd, u_time='MJD', magnitude=mags[mi], e_magnitude=errs[mi], band=bnds[mi], observatory=obs[mi], source=uniq_cdl( [source, sec_source])) if catalog.args.update: catalog.journal_entries() if catalog.args.travis and ii >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() return
def do_wiserep_spectra(catalog): #if not catalog.args.travis: # from ..input.WISeWEBSpider.wisewebspider import spider # try: # spider(update=True, daysago=7, path="/../../sne-external-WISEREP/") # except: # catalog.log.warning( # 'Spider errored, continuing without letting it complete.') task_str = catalog.get_current_task_str() secondaryreference = 'WISeREP' secondaryrefurl = 'http://wiserep.weizmann.ac.il/' secondarybibcode = '2012PASP..124..668Y' wiserepcnt = 0 # These are known to be in error on the WISeREP page, either fix or ignore # them. wiserepbibcorrectdict = { '2000AJ....120..367G]': '2000AJ....120..367G', 'Harutyunyan et al. 2008': '2008A&A...488..383H', '0609268': '2007AJ....133...58K', '2006ApJ...636...400Q': '2006ApJ...636..400Q', '2011ApJ...741...76': '2011ApJ...741...76C', '2016PASP...128...961': '2016PASP..128...961', '2002AJ....1124..417H': '2002AJ....1124.417H', '2013ApJ…774…58D': '2013ApJ...774...58D', '2011Sci.333..856S': '2011Sci...333..856S', '2014MNRAS.438,368': '2014MNRAS.438..368T', '2012MNRAS.420.1135': '2012MNRAS.420.1135S', '2012Sci..337..942D': '2012Sci...337..942D', 'stt1839': '2013MNRAS.436.3614S', 'arXiv:1605.03136': '2016MNRAS.460.3447T', '10.1093/mnras/stt1839': '2013MNRAS.436.3614S' } file_names = list(glob(os.path.join(catalog.get_current_task_repo(), '*'))) for folder in pbar_strings(file_names, task_str): if '.txt' in folder or '.json' in folder: continue name = os.path.basename(folder).strip() if name.startswith('sn'): name = 'SN' + name[2:] if (name.startswith(('CSS', 'SSS', 'MLS')) and ':' not in name): name = name.replace('-', ':', 1) if name.startswith('MASTERJ'): name = name.replace('MASTERJ', 'MASTER OT J') if name.startswith('PSNJ'): name = name.replace('PSNJ', 'PSN J') name = catalog.add_entry(name) secondarysource = catalog.entries[name].add_source( name=secondaryreference, url=secondaryrefurl, bibcode=secondarybibcode, secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, secondarysource) readme_path = os.path.join(folder, 'README.json') if not os.path.exists(readme_path): catalog.log.warning( 'Metadata file not found for event "{}"'.format(name)) continue with open(readme_path, 'r') as f: fileinfo = json.loads(f.read()) files = list( set(glob(folder + '/*')) - set(glob(folder + '/README.json'))) for fname in pbar(files, task_str): specfile = os.path.basename(fname) if specfile not in fileinfo: catalog.log.warning( 'Metadata not found for "{}"'.format(fname)) continue claimedtype = fileinfo[specfile]["Type"] instrument = fileinfo[specfile]["Instrument"] epoch = fileinfo[specfile]["Obs. Date"] observer = fileinfo[specfile]["Observer"] reducer = fileinfo[specfile]["Reducer"] bibcode = fileinfo[specfile]["Bibcode"] redshift = fileinfo[specfile]["Redshift"] survey = fileinfo[specfile]["Program"] reduction = fileinfo[specfile]["Reduction Status"] if bibcode: newbibcode = bibcode if bibcode in wiserepbibcorrectdict: newbibcode = wiserepbibcorrectdict[bibcode] if newbibcode and len(newbibcode) == 19: source = catalog.entries[name].add_source( bibcode=unescape(newbibcode)) else: bibname = unescape(bibcode) source = catalog.entries[name].add_source(name=bibname) catalog.log.warning('Bibcode "{}" is invalid, using as ' '`{}` instead'.format( bibname, SOURCE.NAME)) sources = uniq_cdl([source, secondarysource]) else: sources = secondarysource if claimedtype not in ['Other']: catalog.entries[name].add_quantity(SUPERNOVA.CLAIMED_TYPE, claimedtype, secondarysource) catalog.entries[name].add_quantity(SUPERNOVA.REDSHIFT, redshift, secondarysource) with open(fname, 'r') as f: data = [x.split() for x in f] skipspec = False newdata = [] oldval = '' for row in data: if row and '#' not in row[0]: if (len(row) >= 2 and is_number(row[0]) and is_number(row[1]) and row[1] != oldval): newdata.append(row) oldval = row[1] if skipspec or not newdata: warnings.warn('Skipped adding spectrum file ' + specfile) continue data = [list(i) for i in zip(*newdata)] wavelengths = data[0] fluxes = data[1] errors = '' if len(data) == 3: errors = data[1] time = str(astrotime(epoch).mjd) if max([float(x) for x in fluxes]) < 1.0e-5: fluxunit = 'erg/s/cm^2/Angstrom' else: fluxunit = 'Uncalibrated' catalog.entries[name].add_spectrum( u_wavelengths='Angstrom', errors=errors, u_fluxes=fluxunit, u_errors=fluxunit if errors else '', wavelengths=wavelengths, fluxes=fluxes, u_time='MJD', time=time, instrument=instrument, source=sources, observer=observer, reducer=reducer, reduction=reduction, filename=specfile, survey=survey, redshift=redshift) catalog.journal_entries() wiserepcnt = wiserepcnt + 1 if (catalog.args.travis and wiserepcnt % catalog.TRAVIS_QUERY_LIMIT == 0): break return
def do_nedd(catalog): task_str = catalog.get_current_task_str() nedd_path = os.path.join( catalog.get_current_task_repo(), 'NED26.05.1-D-12.1.0-20160501.csv') f = open(nedd_path, 'r') data = sorted(list(csv.reader(f, delimiter=',', quotechar='"'))[ 13:], key=lambda x: (x[9], x[3])) reference = "NED-D" refurl = "http://ned.ipac.caltech.edu/Library/Distances/" nedd_dict = OrderedDict() olddistname = '' for r, row in enumerate(pbar(data, task_str)): if r <= 12: continue distname = row[3] name = name_clean(distname) # distmod = row[4] # moderr = row[5] dist = row[6] bibcode = unescape(row[8]) snname = name_clean(row[9]) redshift = row[10] cleanhost = '' if name != snname and (name + ' HOST' != snname): cleanhost = host_clean(distname) if cleanhost.endswith(' HOST'): cleanhost = '' if not is_number(dist): print(dist) if dist: nedd_dict.setdefault(cleanhost, []).append(Decimal(dist)) if snname and 'HOST' not in snname: snname, secondarysource = catalog.new_entry( snname, srcname=reference, url=refurl, secondary=True) if bibcode: source = catalog.entries[snname].add_source(bibcode=bibcode) sources = uniq_cdl([source, secondarysource]) else: sources = secondarysource if name == snname: if redshift: catalog.entries[snname].add_quantity( 'redshift', redshift, sources) if dist: catalog.entries[snname].add_quantity( 'comovingdist', dist, sources) if not redshift: try: zatval = z_at_value(cosmo.comoving_distance, float(dist) * un.Mpc, zmax=5.0) sigd = get_sig_digits(str(dist)) redshift = pretty_num(zatval, sig=sigd) except (KeyboardInterrupt, SystemExit): raise except: pass else: cosmosource = catalog.entries[name].add_source( bibcode='2016A&A...594A..13P') combsources = uniq_cdl(sources.split(',') + [cosmosource]) catalog.entries[snname].add_quantity('redshift', redshift, combsources) if cleanhost: catalog.entries[snname].add_quantity( 'host', cleanhost, sources) if catalog.args.update and olddistname != distname: catalog.journal_entries() olddistname = distname catalog.journal_entries() f.close() return
def do_ucb_spectra(catalog): task_str = catalog.get_current_task_str() sec_reference = 'UCB Filippenko Group\'s Supernova Database (SNDB)' sec_refurl = 'http://heracles.astro.berkeley.edu/sndb/info' sec_refbib = '2012MNRAS.425.1789S' ucbspectracnt = 0 jsontxt = catalog.load_url( 'http://heracles.astro.berkeley.edu/sndb/download?id=allpubspec', os.path.join(catalog.get_current_task_repo(), 'UCB/allpubspec.json'), json_sort='SpecID') if not jsontxt: return spectra = json.loads(jsontxt) spectra = sorted(spectra, key=lambda kk: kk['SpecID']) oldname = '' for spectrum in pbar(spectra, task_str): name = spectrum['ObjName'] if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) sec_source = catalog.entries[name].add_source( name=sec_reference, url=sec_refurl, bibcode=sec_refbib, secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, sec_source) sources = [sec_source] if spectrum['Reference']: sources += [catalog.entries[name] .add_source(bibcode=spectrum['Reference'])] sources = uniq_cdl(sources) if spectrum['Type'] and spectrum['Type'].strip() != 'NoMatch': for ct in spectrum['Type'].strip().split(','): catalog.entries[name].add_quantity( SUPERNOVA.CLAIMED_TYPE, ct.replace('-norm', '').strip(), sources) if spectrum['DiscDate']: ddate = spectrum['DiscDate'].replace('-', '/') catalog.entries[name].add_quantity(SUPERNOVA.DISCOVER_DATE, ddate, sources) if spectrum['HostName']: host = urllib.parse.unquote(spectrum['HostName']).replace('*', '') catalog.entries[name].add_quantity(SUPERNOVA.HOST, host, sources) if spectrum['UT_Date']: epoch = str(spectrum['UT_Date']) year = epoch[:4] month = epoch[4:6] day = epoch[6:] sig = get_sig_digits(day) + 5 mjd = astrotime(year + '-' + month + '-' + str(floor(float( day))).zfill(2)).mjd mjd = pretty_num(mjd + float(day) - floor(float(day)), sig=sig) filename = spectrum['Filename'] if spectrum['Filename'] else '' instrument = spectrum['Instrument'] if spectrum['Instrument'] else '' reducer = spectrum['Reducer'] if spectrum['Reducer'] else '' observer = spectrum['Observer'] if spectrum['Observer'] else '' snr = str(spectrum['SNR']) if spectrum['SNR'] else '' if not filename: raise ValueError('Filename not found for SNDB spectrum!') if not spectrum['SpecID']: raise ValueError('ID not found for SNDB spectrum!') filepath = os.path.join(catalog.get_current_task_repo(), 'UCB/') + filename spectxt = catalog.load_url( 'http://heracles.astro.berkeley.edu/sndb/download?id=ds:' + str(spectrum['SpecID']), filepath, archived_mode=True) specdata = list( csv.reader( spectxt.splitlines(), delimiter=' ', skipinitialspace=True)) newspecdata = [] for row in specdata: if row[0][0] == '#': continue else: newspecdata.append(row) specdata = newspecdata haserrors = len(specdata[0]) == 3 and specdata[0][2] and specdata[0][ 2] != 'NaN' specdata = [list(ii) for ii in zip(*specdata)] wavelengths = specdata[0] fluxes = specdata[1] errors = '' if haserrors: errors = specdata[2] if not list(filter(None, errors)): errors = '' units = 'Uncalibrated' catalog.entries[name].add_spectrum( u_wavelengths='Angstrom', u_fluxes=units, u_time='MJD', time=mjd, wavelengths=wavelengths, filename=filename, fluxes=fluxes, errors=errors, u_errors=units, instrument=instrument, source=sources, snr=snr, observer=observer, reducer=reducer, deredshifted=('-noz' in filename)) ucbspectracnt = ucbspectracnt + 1 if catalog.args.travis and ucbspectracnt >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() return
def do_cleanup(catalog): """Task to cleanup catalog before final write.""" task_str = catalog.get_current_task_str() # Set preferred names, calculate some columns based on imported data, # sanitize some fields keys = catalog.entries.copy().keys() cleanupcnt = 0 for oname in pbar(keys, task_str): name = catalog.add_entry(oname) # Set the preferred name, switching to that name if name changed. name = catalog.entries[name].set_preferred_name() aliases = catalog.entries[name].get_aliases() catalog.entries[name].set_first_max_light() if TIDALDISRUPTION.DISCOVER_DATE not in catalog.entries[name]: prefixes = ['MLS', 'SSS', 'CSS', 'GRB '] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:2])): discoverdate = ('/'.join([ '20' + alias.replace(prefix, '')[:2], alias.replace(prefix, '')[2:4], alias.replace(prefix, '')[4:6] ])) if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TIDALDISRUPTION.DISCOVER_DATE, discoverdate, source, derived=True) break if TIDALDISRUPTION.DISCOVER_DATE in catalog.entries[name]: break if TIDALDISRUPTION.DISCOVER_DATE not in catalog.entries[name]: prefixes = [ 'ASASSN-', 'PS1-', 'PS1', 'PS', 'iPTF', 'PTF', 'SCP-', 'SNLS-', 'SPIRITS', 'LSQ', 'DES', 'SNHiTS', 'Gaia', 'GND', 'GNW', 'GSD', 'GSW', 'EGS', 'COS', 'OGLE', 'HST' ] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:2]) and is_number(alias.replace(prefix, '')[:1])): discoverdate = '20' + alias.replace(prefix, '')[:2] if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TIDALDISRUPTION.DISCOVER_DATE, discoverdate, source, derived=True) break if TIDALDISRUPTION.DISCOVER_DATE in catalog.entries[name]: break if TIDALDISRUPTION.DISCOVER_DATE not in catalog.entries[name]: prefixes = ['SNF'] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:4])): discoverdate = ('/'.join([ alias.replace(prefix, '')[:4], alias.replace(prefix, '')[4:6], alias.replace(prefix, '')[6:8] ])) if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TIDALDISRUPTION.DISCOVER_DATE, discoverdate, source, derived=True) break if TIDALDISRUPTION.DISCOVER_DATE in catalog.entries[name]: break if TIDALDISRUPTION.DISCOVER_DATE not in catalog.entries[name]: prefixes = ['PTFS', 'SNSDF'] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:2])): discoverdate = ('/'.join([ '20' + alias.replace(prefix, '')[:2], alias.replace(prefix, '')[2:4] ])) if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TIDALDISRUPTION.DISCOVER_DATE, discoverdate, source, derived=True) break if TIDALDISRUPTION.DISCOVER_DATE in catalog.entries[name]: break if TIDALDISRUPTION.DISCOVER_DATE not in catalog.entries[name]: prefixes = ['AT', 'SN', 'OGLE-', 'SM ', 'KSN-'] for alias in aliases: for prefix in prefixes: if alias.startswith(prefix): year = re.findall(r'\d+', alias) if len(year) == 1: year = year[0] else: continue if alias.replace(prefix, '').index(year) != 0: continue if (year and is_number(year) and '.' not in year and len(year) <= 4): discoverdate = year if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TIDALDISRUPTION.DISCOVER_DATE, discoverdate, source, derived=True) break if TIDALDISRUPTION.DISCOVER_DATE in catalog.entries[name]: break if (TIDALDISRUPTION.RA not in catalog.entries[name] or TIDALDISRUPTION.DEC not in catalog.entries[name]): prefixes = [ 'PSN J', 'MASJ', 'CSS', 'SSS', 'MASTER OT J', 'HST J', 'TCP J', 'MACS J', '2MASS J', 'EQ J', 'CRTS J', 'SMT J' ] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:6])): noprefix = alias.split(':')[-1].replace( prefix, '').replace('.', '') decsign = '+' if '+' in noprefix else '-' noprefix = noprefix.replace('+', '|').replace('-', '|') nops = noprefix.split('|') if len(nops) < 2: continue rastr = nops[0] decstr = nops[1] ra = ':'.join([rastr[:2], rastr[2:4], rastr[4:6]]) + \ ('.' + rastr[6:] if len(rastr) > 6 else '') dec = (decsign + ':'.join( [decstr[:2], decstr[2:4], decstr[4:6]]) + ('.' + decstr[6:] if len(decstr) > 6 else '')) if catalog.args.verbose: tprint('Added ra/dec from name: ' + ra + ' ' + dec) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TIDALDISRUPTION.RA, ra, source, derived=True) catalog.entries[name].add_quantity( TIDALDISRUPTION.DEC, dec, source, derived=True) break if TIDALDISRUPTION.RA in catalog.entries[name]: break no_host = (TIDALDISRUPTION.HOST not in catalog.entries[name] or not any([ x[QUANTITY.VALUE] == 'Milky Way' for x in catalog.entries[name][TIDALDISRUPTION.HOST] ])) if (TIDALDISRUPTION.RA in catalog.entries[name] and TIDALDISRUPTION.DEC in catalog.entries[name] and no_host): from astroquery.irsa_dust import IrsaDust if name not in catalog.extinctions_dict: try: ra_dec = (catalog.entries[name][TIDALDISRUPTION.RA][0][ QUANTITY.VALUE] + " " + catalog.entries[name][ TIDALDISRUPTION.DEC][0][QUANTITY.VALUE]) result = IrsaDust.get_query_table(ra_dec, section='ebv') except (KeyboardInterrupt, SystemExit): raise except Exception: warnings.warn("Coordinate lookup for " + name + " failed in IRSA.") else: ebv = result['ext SandF mean'][0] ebverr = result['ext SandF std'][0] catalog.extinctions_dict[name] = [ebv, ebverr] if name in catalog.extinctions_dict: sources = uniq_cdl([ catalog.entries[name].add_self_source(), catalog.entries[name] .add_source(bibcode='2011ApJ...737..103S') ]) (catalog.entries[name].add_quantity( TIDALDISRUPTION.EBV, str(catalog.extinctions_dict[name][0]), sources, e_value=str(catalog.extinctions_dict[name][1]), derived=True)) if ((TIDALDISRUPTION.HOST in catalog.entries[name] and (TIDALDISRUPTION.HOST_RA not in catalog.entries[name] or TIDALDISRUPTION.HOST_DEC not in catalog.entries[name]))): for host in catalog.entries[name][TIDALDISRUPTION.HOST]: alias = host[QUANTITY.VALUE] if ' J' in alias and is_number(alias.split(' J')[-1][:6]): noprefix = alias.split(' J')[-1].split(':')[-1].replace( '.', '') decsign = '+' if '+' in noprefix else '-' noprefix = noprefix.replace('+', '|').replace('-', '|') nops = noprefix.split('|') if len(nops) < 2: continue rastr = nops[0] decstr = nops[1] hostra = (':'.join([rastr[:2], rastr[2:4], rastr[4:6]]) + ('.' + rastr[6:] if len(rastr) > 6 else '')) hostdec = decsign + ':'.join([ decstr[:2], decstr[2:4], decstr[4:6] ]) + ('.' + decstr[6:] if len(decstr) > 6 else '') if catalog.args.verbose: tprint('Added hostra/hostdec from name: ' + hostra + ' ' + hostdec) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TIDALDISRUPTION.HOST_RA, hostra, source, derived=True) catalog.entries[name].add_quantity( TIDALDISRUPTION.HOST_DEC, hostdec, source, derived=True) break if TIDALDISRUPTION.HOST_RA in catalog.entries[name]: break if (TIDALDISRUPTION.REDSHIFT not in catalog.entries[name] and TIDALDISRUPTION.VELOCITY in catalog.entries[name]): # Find the "best" velocity to use for this bestsig = 0 for hv in catalog.entries[name][TIDALDISRUPTION.VELOCITY]: sig = get_sig_digits(hv[QUANTITY.VALUE]) if sig > bestsig: besthv = hv[QUANTITY.VALUE] bestsrc = hv['source'] bestsig = sig if bestsig > 0 and is_number(besthv): voc = float(besthv) * 1.e5 / CLIGHT source = catalog.entries[name].add_self_source() sources = uniq_cdl([source] + bestsrc.split(',')) (catalog.entries[name].add_quantity( TIDALDISRUPTION.REDSHIFT, pretty_num( sqrt((1. + voc) / (1. - voc)) - 1., sig=bestsig), sources, kind='heliocentric', derived=True)) if (TIDALDISRUPTION.REDSHIFT not in catalog.entries[name] and len(catalog.nedd_dict) > 0 and TIDALDISRUPTION.HOST in catalog.entries[name]): reference = "NED-D" refurl = "http://ned.ipac.caltech.edu/Library/Distances/" for host in catalog.entries[name][TIDALDISRUPTION.HOST]: if host[QUANTITY.VALUE] in catalog.nedd_dict: source = catalog.entries[name].add_source( bibcode='2016A&A...594A..13P') secondarysource = catalog.entries[name].add_source( name=reference, url=refurl, secondary=True) meddist = statistics.median(catalog.nedd_dict[host[ QUANTITY.VALUE]]) redz = z_at_value(cosmo.comoving_distance, float(meddist) * un.Mpc) redshift = pretty_num( redz, sig=get_sig_digits(str(meddist))) catalog.entries[name].add_quantity( TIDALDISRUPTION.REDSHIFT, redshift, uniq_cdl([source, secondarysource]), kind='host', derived=True) if (TIDALDISRUPTION.MAX_ABS_MAG not in catalog.entries[name] and TIDALDISRUPTION.MAX_APP_MAG in catalog.entries[name] and TIDALDISRUPTION.LUM_DIST in catalog.entries[name]): # Find the "best" distance to use for this bestsig = 0 for ld in catalog.entries[name][TIDALDISRUPTION.LUM_DIST]: sig = get_sig_digits(ld[QUANTITY.VALUE]) if sig > bestsig: bestld = ld[QUANTITY.VALUE] bestsrc = ld['source'] bestsig = sig if bestsig > 0 and is_number(bestld) and float(bestld) > 0.: source = catalog.entries[name].add_self_source() sources = uniq_cdl([source] + bestsrc.split(',')) bestldz = z_at_value(cosmo.luminosity_distance, float(bestld) * un.Mpc) pnum = (float(catalog.entries[name][ TIDALDISRUPTION.MAX_APP_MAG][0][QUANTITY.VALUE]) - 5.0 * (log10(float(bestld) * 1.0e6) - 1.0 ) + 2.5 * log10(1.0 + bestldz)) pnum = pretty_num(pnum, sig=bestsig) catalog.entries[name].add_quantity( TIDALDISRUPTION.MAX_ABS_MAG, pnum, sources, derived=True) if TIDALDISRUPTION.REDSHIFT in catalog.entries[name]: # Find the "best" redshift to use for this bestz, bestkind, bestsig, bestsrc = catalog.entries[ name].get_best_redshift() if bestsig > 0: try: bestz = float(bestz) except Exception: print(catalog.entries[name]) raise if TIDALDISRUPTION.VELOCITY not in catalog.entries[name]: source = catalog.entries[name].add_self_source() # FIX: what's happening here?! pnum = CLIGHT / KM * \ ((bestz + 1.)**2. - 1.) / ((bestz + 1.)**2. + 1.) pnum = pretty_num(pnum, sig=bestsig) catalog.entries[name].add_quantity( TIDALDISRUPTION.VELOCITY, pnum, source, kind=PREF_KINDS[bestkind], derived=True) if bestz > 0.: from astropy.cosmology import Planck15 as cosmo if TIDALDISRUPTION.LUM_DIST not in catalog.entries[name]: dl = cosmo.luminosity_distance(bestz) sources = [ catalog.entries[name].add_self_source(), catalog.entries[name] .add_source(bibcode='2016A&A...594A..13P') ] sources = uniq_cdl(sources + bestsrc.split(',')) catalog.entries[name].add_quantity( TIDALDISRUPTION.LUM_DIST, pretty_num( dl.value, sig=bestsig), sources, kind=PREF_KINDS[bestkind], derived=True) if (TIDALDISRUPTION.MAX_ABS_MAG not in catalog.entries[name] and TIDALDISRUPTION.MAX_APP_MAG in catalog.entries[name]): source = catalog.entries[name].add_self_source() pnum = pretty_num( float(catalog.entries[name][ TIDALDISRUPTION.MAX_APP_MAG][0][ QUANTITY.VALUE]) - 5.0 * (log10(dl.to('pc').value) - 1.0 ) + 2.5 * log10(1.0 + bestz), sig=bestsig + 1) catalog.entries[name].add_quantity( TIDALDISRUPTION.MAX_ABS_MAG, pnum, sources, derived=True) if TIDALDISRUPTION.COMOVING_DIST not in catalog.entries[ name]: cd = cosmo.comoving_distance(bestz) sources = [ catalog.entries[name].add_self_source(), catalog.entries[name] .add_source(bibcode='2016A&A...594A..13P') ] sources = uniq_cdl(sources + bestsrc.split(',')) catalog.entries[name].add_quantity( TIDALDISRUPTION.COMOVING_DIST, pretty_num( cd.value, sig=bestsig), sources, derived=True) if all([ x in catalog.entries[name] for x in [ TIDALDISRUPTION.RA, TIDALDISRUPTION.DEC, TIDALDISRUPTION.HOST_RA, TIDALDISRUPTION.HOST_DEC ] ]): # For now just using first coordinates that appear in entry try: c1 = coord( ra=catalog.entries[name][TIDALDISRUPTION.RA][0][ QUANTITY.VALUE], dec=catalog.entries[name][TIDALDISRUPTION.DEC][0][ QUANTITY.VALUE], unit=(un.hourangle, un.deg)) c2 = coord( ra=catalog.entries[name][TIDALDISRUPTION.HOST_RA][0][ QUANTITY.VALUE], dec=catalog.entries[name][TIDALDISRUPTION.HOST_DEC][0][ QUANTITY.VALUE], unit=(un.hourangle, un.deg)) except (KeyboardInterrupt, SystemExit): raise except Exception: pass else: sources = uniq_cdl( [catalog.entries[name].add_self_source()] + catalog. entries[name][TIDALDISRUPTION.RA][0]['source'].split(',') + catalog.entries[name][TIDALDISRUPTION.DEC][0]['source']. split(',') + catalog.entries[name][TIDALDISRUPTION.HOST_RA] [0]['source'].split(',') + catalog.entries[name][ TIDALDISRUPTION.HOST_DEC][0]['source'].split(',')) if 'hostoffsetang' not in catalog.entries[name]: hosa = Decimal( hypot(c1.ra.degree - c2.ra.degree, c1.dec.degree - c2.dec.degree)) hosa = pretty_num(hosa * Decimal(3600.)) catalog.entries[name].add_quantity( TIDALDISRUPTION.HOST_OFFSET_ANG, hosa, sources, derived=True, u_value='arcseconds') if (TIDALDISRUPTION.COMOVING_DIST in catalog.entries[name] and TIDALDISRUPTION.REDSHIFT in catalog.entries[name] and TIDALDISRUPTION.HOST_OFFSET_DIST not in catalog.entries[name]): offsetsig = get_sig_digits(catalog.entries[name][ TIDALDISRUPTION.HOST_OFFSET_ANG][0][QUANTITY.VALUE]) sources = uniq_cdl( sources.split(',') + (catalog.entries[name][ TIDALDISRUPTION.COMOVING_DIST][0]['source']). split(',') + (catalog.entries[name][ TIDALDISRUPTION.REDSHIFT][0]['source']).split(',')) (catalog.entries[name].add_quantity( TIDALDISRUPTION.HOST_OFFSET_DIST, pretty_num( float(catalog.entries[name][ TIDALDISRUPTION.HOST_OFFSET_ANG][0][ QUANTITY.VALUE]) / 3600. * (pi / 180.) * float(catalog.entries[name][ TIDALDISRUPTION.COMOVING_DIST][0][ QUANTITY.VALUE]) * 1000. / (1.0 + float(catalog.entries[name][ TIDALDISRUPTION.REDSHIFT][0][QUANTITY.VALUE])), sig=offsetsig), sources)) catalog.entries[name].sanitize() catalog.journal_entries(bury=True, final=True, gz=True) cleanupcnt = cleanupcnt + 1 if catalog.args.travis and cleanupcnt % 1000 == 0: break catalog.save_caches() return
def do_snf_specta(catalog): task_str = catalog.get_current_task_str() bibcodes = { 'SN2005gj': '2006ApJ...650..510A', 'SN2006D': '2007ApJ...654L..53T', 'SN2007if': '2010ApJ...713.1073S', 'SN2011fe': '2013A&A...554A..27P' } oldname = '' snfcnt = 0 eventfolders = next( os.walk(os.path.join(catalog.get_current_task_repo(), 'SNFactory')))[1] for eventfolder in pbar(eventfolders, task_str): oname = eventfolder name = catalog.get_preferred_name(oname) if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) sec_reference = 'Nearby Supernova Factory' sec_refurl = 'http://snfactory.lbl.gov/' sec_bibcode = '2002SPIE.4836...61A' sec_source = catalog.entries[name].add_source(name=sec_reference, url=sec_refurl, bibcode=sec_bibcode, secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, oname, sec_source) bibcode = bibcodes[oname] source = catalog.entries[name].add_source(bibcode=bibcode) sources = uniq_cdl([source, sec_source]) use_path = os.path.join(catalog.get_current_task_repo(), 'SNFactory', eventfolder, '*.dat') eventspectra = glob(use_path) for spectrum in pbar(eventspectra, task_str): filename = os.path.basename(spectrum) with open(spectrum) as spec_file: specdata = list( csv.reader(spec_file, delimiter=' ', skipinitialspace=True)) specdata = list(filter(None, specdata)) newspec = [] time = '' telescope = '' instrument = '' observer = '' observatory = '' if 'Keck_20060202_R' in spectrum: time = '53768.23469' elif 'Spectrum05_276' in spectrum: time = pretty_num(astrotime('2005-10-03').mjd, sig=5) elif 'Spectrum05_329' in spectrum: time = pretty_num(astrotime('2005-11-25').mjd, sig=5) elif 'Spectrum05_336' in spectrum: time = pretty_num(astrotime('2005-12-02').mjd, sig=5) for row in specdata: if row[0][0] == '#': joinrow = (' '.join(row)).split('=') if len(joinrow) < 2: continue field = joinrow[0].strip('# ') value = joinrow[1].split('/')[0].strip('\' ') if not time: if field == 'JD': time = str(jd_to_mjd(Decimal(value))) elif field == 'MJD': time = value elif field == 'MJD-OBS': time = value if field == 'OBSERVER': observer = value.capitalize() if field == 'OBSERVAT': observatory = value.capitalize() if field == 'TELESCOP': telescope = value.capitalize() if field == 'INSTRUME': instrument = value.capitalize() else: newspec.append(row) if not time: raise ValueError('Time missing from spectrum.') specdata = newspec haserrors = len( specdata[0] ) == 3 and specdata[0][2] and specdata[0][2] != 'NaN' specdata = [list(i) for i in zip(*specdata)] wavelengths = specdata[0] fluxes = specdata[1] errors = '' if haserrors: errors = specdata[2] unit_err = ('Variance' if oldname == 'SN2011fe' else 'erg/s/cm^2/Angstrom') unit_flx = 'erg/s/cm^2/Angstrom' catalog.entries[name].add_spectrum(u_wavelengths='Angstrom', u_fluxes=unit_flx, u_time='MJD', time=time, wavelengths=wavelengths, fluxes=fluxes, errors=errors, observer=observer, observatory=observatory, telescope=telescope, instrument=instrument, u_errors=unit_err, source=sources, filename=filename) snfcnt = snfcnt + 1 if (catalog.args.travis and snfcnt % catalog.TRAVIS_QUERY_LIMIT == 0): break catalog.journal_entries() return
def do_snax(catalog): """Import from the SNaX X-ray database.""" task_str = catalog.get_current_task_str() dlurl = 'http://kronos.uchicago.edu/snax/export.php?exportType=TSV&exportFields=standard&objid=&name=&typeid=&type=&galaxyid=&galaxy=&fluxMin=&fluxMax=&fluxEnergyLMin=&fluxEnergyLMax=&fluxEnergyHMin=&fluxEnergyHMax=&lumMin=&lumMax=&instrumentid=&instrument=&ageMin=&ageMax=&dateMin=&dateMax=&sortA=dateExploded' # noqa: E501 file_path = os.path.join(catalog.get_current_task_repo(), 'SNaX.TSV') tsv = catalog.load_url(dlurl, file_path) # csvtxt = catalog.load_url( # 'http://www.grbcatalog.org/' # 'download_data?cut_0_min=5&cut_0=BAT%20T90' # '&cut_0_max=100000&num_cuts=1&no_date_cut=True', # file_path) data = [x.split('\t') for x in tsv.split('\n')] for r, row in enumerate(pbar(data, task_str)): if r == 0 or not row[0]: continue (name, source) = catalog.new_entry(row[0], srcname='SNaX', url='http://kronos.uchicago.edu/snax/', secondary=True) sources = [source] bibcode = row[-6].strip() if len(bibcode) != 19: continue expsrc = uniq_cdl( sources + [catalog.entries[name].add_source(bibcode=row[-6].strip())]) coosrc = uniq_cdl( sources + [catalog.entries[name].add_source(bibcode=row[-5].strip())]) dissrc = uniq_cdl( sources + [catalog.entries[name].add_source(bibcode=row[-4].strip())]) flxsrc = uniq_cdl(sources + [ catalog.entries[name].add_source( bibcode=row[-3].strip()), catalog.entries[name].add_source( bibcode=row[-2].strip()) ]) catalog.entries[name].add_quantity(SUPERNOVA.CLAIMED_TYPE, row[1], source) date = astrotime(float(row[2]), format='jd').datetime catalog.entries[name].add_quantity( SUPERNOVA.EXPLOSION_DATE, make_date_string(date.year, date.month, date.day), expsrc) catalog.entries[name].add_quantity(SUPERNOVA.RA, ' '.join(row[3].split()[:3]), coosrc) catalog.entries[name].add_quantity(SUPERNOVA.DEC, ' '.join(row[3].split()[3:]), coosrc) catalog.entries[name].add_quantity(SUPERNOVA.LUM_DIST, row[4], dissrc) catalog.entries[name].add_quantity(SUPERNOVA.HOST, row[5], source) catalog.entries[name].add_quantity( SUPERNOVA.REDSHIFT, row[6], source, e_value=row[7] if (row[7] and float(row[7]) != 0.0) else '') photodict = { PHOTOMETRY.TIME: jd_to_mjd(Decimal(row[8])), PHOTOMETRY.U_TIME: 'MJD', PHOTOMETRY.ENERGY: row[15:17], PHOTOMETRY.U_ENERGY: 'keV', PHOTOMETRY.FLUX: str(Decimal('1.0e-13') * Decimal(row[11])), PHOTOMETRY.U_FLUX: 'ergs/s/cm^2', PHOTOMETRY.E_LOWER_FLUX: str(Decimal('1.0e-13') * Decimal(row[13])), PHOTOMETRY.E_UPPER_FLUX: str(Decimal('1.0e-13') * Decimal(row[14])), PHOTOMETRY.INSTRUMENT: row[9], PHOTOMETRY.SOURCE: flxsrc } if row[12] == '1': photodict[PHOTOMETRY.UPPER_LIMIT] = True catalog.entries[name].add_photometry(**photodict) catalog.journal_entries() return
def do_ucb_spectra(catalog): task_str = catalog.get_current_task_str() sec_reference = 'UCB Filippenko Group\'s Supernova Database (SNDB)' sec_refurl = 'http://heracles.astro.berkeley.edu/sndb/info' sec_refbib = '2012MNRAS.425.1789S' ucbspectracnt = 0 jsontxt = catalog.load_url( 'http://heracles.astro.berkeley.edu/sndb/download?id=allpubspec', os.path.join(catalog.get_current_task_repo(), 'UCB/allpubspec.json'), json_sort='SpecID') if not jsontxt: return spectra = json.loads(jsontxt) spectra = sorted(spectra, key=lambda kk: kk['SpecID']) oldname = '' for spectrum in pbar(spectra, task_str): name = spectrum['ObjName'] if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) sec_source = catalog.entries[name].add_source(name=sec_reference, url=sec_refurl, bibcode=sec_refbib, secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, sec_source) sources = [sec_source] if spectrum['Reference']: sources += [ catalog.entries[name].add_source(bibcode=spectrum['Reference']) ] sources = uniq_cdl(sources) if spectrum['Type'] and spectrum['Type'].strip() != 'NoMatch': for ct in spectrum['Type'].strip().split(','): catalog.entries[name].add_quantity( SUPERNOVA.CLAIMED_TYPE, ct.replace('-norm', '').strip(), sources) if spectrum['DiscDate']: ddate = spectrum['DiscDate'].replace('-', '/') catalog.entries[name].add_quantity(SUPERNOVA.DISCOVER_DATE, ddate, sources) if spectrum['HostName']: host = urllib.parse.unquote(spectrum['HostName']).replace('*', '') catalog.entries[name].add_quantity(SUPERNOVA.HOST, host, sources) if spectrum['UT_Date']: epoch = str(spectrum['UT_Date']) year = epoch[:4] month = epoch[4:6] day = epoch[6:] sig = get_sig_digits(day) + 5 mjd = astrotime(year + '-' + month + '-' + str(floor(float(day))).zfill(2)).mjd mjd = pretty_num(mjd + float(day) - floor(float(day)), sig=sig) filename = spectrum['Filename'] if spectrum['Filename'] else '' instrument = spectrum['Instrument'] if spectrum['Instrument'] else '' reducer = spectrum['Reducer'] if spectrum['Reducer'] else '' observer = spectrum['Observer'] if spectrum['Observer'] else '' snr = str(spectrum['SNR']) if spectrum['SNR'] else '' if not filename: raise ValueError('Filename not found for SNDB spectrum!') if not spectrum['SpecID']: raise ValueError('ID not found for SNDB spectrum!') filepath = os.path.join(catalog.get_current_task_repo(), 'UCB/') + filename spectxt = catalog.load_url( 'http://heracles.astro.berkeley.edu/sndb/download?id=ds:' + str(spectrum['SpecID']), filepath, archived_mode=True) specdata = list( csv.reader(spectxt.splitlines(), delimiter=' ', skipinitialspace=True)) newspecdata = [] for row in specdata: if not row or not row[0] or row[0][0] == '#': continue else: newspecdata.append(row) specdata = newspecdata haserrors = len( specdata[0]) == 3 and specdata[0][2] and specdata[0][2] != 'NaN' specdata = [list(ii) for ii in zip(*specdata)] wavelengths = specdata[0] fluxes = specdata[1] errors = '' if haserrors: errors = specdata[2] if not list(filter(None, errors)): errors = '' units = 'Uncalibrated' catalog.entries[name].add_spectrum(u_wavelengths='Angstrom', u_fluxes=units, u_time='MJD', time=mjd, wavelengths=wavelengths, filename=filename, fluxes=fluxes, errors=errors, u_errors=units, instrument=instrument, source=sources, snr=snr, observer=observer, reducer=reducer, deredshifted=('-noz' in filename)) ucbspectracnt = ucbspectracnt + 1 if catalog.args.travis and ucbspectracnt >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() return
def do_ucb_photo(catalog): task_str = catalog.get_current_task_str() sec_ref = 'UCB Filippenko Group\'s Supernova Database (SNDB)' sec_refurl = 'http://heracles.astro.berkeley.edu/sndb/info' sec_refbib = '2012MNRAS.425.1789S' jsontxt = catalog.load_url( 'http://heracles.astro.berkeley.edu/sndb/download?id=allpubphot', os.path.join(catalog.get_current_task_repo(), 'SNDB/allpubphot.json'), json_sort='PhotID') if not jsontxt: return photom = json.loads(jsontxt) photom = sorted(photom, key=lambda kk: kk['PhotID']) for phot in pbar(photom, task_str): oldname = phot['ObjName'] name = catalog.add_entry(oldname) sec_source = catalog.entries[name].add_source(name=sec_ref, url=sec_refurl, bibcode=sec_refbib, secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, oldname, sec_source) if phot['AltObjName']: catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, phot['AltObjName'], sec_source) sources = [sec_source] if phot['Reference']: sources += [ catalog.entries[name].add_source(bibcode=phot['Reference']) ] sources = uniq_cdl(sources) if phot['Type'] and phot['Type'].strip() != 'NoMatch': for ct in phot['Type'].strip().split(','): catalog.entries[name].add_quantity( SUPERNOVA.CLAIMED_TYPE, ct.replace('-norm', '').strip(), sources) if phot['DiscDate']: catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, phot['DiscDate'].replace('-', '/'), sources) if phot['HostName']: host = urllib.parse.unquote(phot['HostName']).replace('*', '') catalog.entries[name].add_quantity(SUPERNOVA.HOST, host, sources) filename = phot['Filename'] if phot['Filename'] else '' if not filename: raise ValueError('Filename not found for SNDB phot!') if not phot['PhotID']: raise ValueError('ID not found for SNDB phot!') filepath = os.path.join(catalog.get_current_task_repo(), 'SNDB/') + filename phottxt = catalog.load_url( 'http://heracles.astro.berkeley.edu/sndb/' 'download?id=dp:' + str(phot['PhotID']), filepath) tsvin = csv.reader(phottxt.splitlines(), delimiter=' ', skipinitialspace=True) for rr, row in enumerate(tsvin): if not len(row) > 0 or row[0] == "#": continue if 'DOCTYPE' in ''.join(row): break photodict = { PHOTOMETRY.TIME: row[0], PHOTOMETRY.U_TIME: 'MJD', PHOTOMETRY.TELESCOPE: row[5], PHOTOMETRY.BAND: row[4], PHOTOMETRY.SOURCE: sources } if is_number(row[1]) and float(row[1]) < 99.0: photodict[PHOTOMETRY.MAGNITUDE] = row[1] photodict[PHOTOMETRY.E_MAGNITUDE] = row[2] elif is_number(row[3]) and float(row[1]) < 99.0: photodict[PHOTOMETRY.MAGNITUDE] = row[3] photodict[PHOTOMETRY.UPPER_LIMIT] = True else: continue catalog.entries[name].add_photometry(**photodict) catalog.journal_entries() return
def do_snax(catalog): task_str = catalog.get_current_task_str() file_path = os.path.join(catalog.get_current_task_repo(), 'SNaX.TSV') # csvtxt = catalog.load_url( # 'http://www.grbcatalog.org/' # 'download_data?cut_0_min=5&cut_0=BAT%20T90' # '&cut_0_max=100000&num_cuts=1&no_date_cut=True', # file_path) data = list( csv.reader( open(file_path, 'r'), delimiter='\t', quotechar='"', skipinitialspace=True)) for r, row in enumerate(pbar(data, task_str)): if r == 0: continue (name, source) = catalog.new_entry( row[0], srcname='SNaX', url='http://kronos.uchicago.edu/snax/') sources = [source] expsrc = uniq_cdl(sources + [ catalog.entries[name].add_source(bibcode=row[-6].strip()) ]) coosrc = uniq_cdl(sources + [ catalog.entries[name].add_source(bibcode=row[-5].strip()) ]) dissrc = uniq_cdl(sources + [ catalog.entries[name].add_source(bibcode=row[-4].strip()) ]) flxsrc = uniq_cdl(sources + [ catalog.entries[name].add_source(bibcode=row[-3].strip()), catalog.entries[name].add_source(bibcode=row[-2].strip()) ]) catalog.entries[name].add_quantity(SUPERNOVA.CLAIMED_TYPE, row[1], source) date = astrotime(float(row[2]), format='jd').datetime catalog.entries[name].add_quantity( SUPERNOVA.EXPLOSION_DATE, make_date_string(date.year, date.month, date.day), expsrc) catalog.entries[name].add_quantity( SUPERNOVA.RA, ' '.join(row[3].split()[:3]), coosrc) catalog.entries[name].add_quantity( SUPERNOVA.DEC, ' '.join(row[3].split()[:3]), coosrc) catalog.entries[name].add_quantity(SUPERNOVA.LUM_DIST, row[4], dissrc) catalog.entries[name].add_quantity(SUPERNOVA.HOST, row[5], source) catalog.entries[name].add_quantity( SUPERNOVA.REDSHIFT, row[6], source, e_value=row[7] if (row[7] and float(row[7]) != 0.0) else '') photodict = { PHOTOMETRY.TIME: jd_to_mjd(Decimal(row[8])), PHOTOMETRY.U_TIME: 'MJD', PHOTOMETRY.ENERGY: row[15:17], PHOTOMETRY.U_ENERGY: 'keV', PHOTOMETRY.FLUX: str(Decimal('1.0e-13') * Decimal(row[11])), PHOTOMETRY.U_FLUX: 'ergs/s/cm^2', PHOTOMETRY.E_LOWER_FLUX: str(Decimal('1.0e-13') * Decimal(row[13])), PHOTOMETRY.E_UPPER_FLUX: str(Decimal('1.0e-13') * Decimal(row[14])), PHOTOMETRY.INSTRUMENT: row[9], PHOTOMETRY.SOURCE: flxsrc } if row[12] == '1': photodict[PHOTOMETRY.UPPER_LIMIT] = True catalog.entries[name].add_photometry(**photodict) catalog.journal_entries() return
def do_ogle(catalog): task_str = catalog.get_current_task_str() basenames = [ 'transients', 'transients/2015', 'transients/2014b', 'transients/2014', 'transients/2013', 'transients/2012' ] oglenames = [] ogleupdate = [True, False, False, False, False] for b, bn in enumerate(pbar(basenames, task_str)): if catalog.args.update and not ogleupdate[b]: continue filepath = os.path.join(catalog.get_current_task_repo(), 'OGLE-') filepath += bn.replace('/', '-') + '-transients.html' htmltxt = catalog.load_url( 'http://ogle.astrouw.edu.pl/ogle4/' + bn + '/transients.html', filepath) if not htmltxt: continue soup = BeautifulSoup(htmltxt, 'html5lib') links = soup.findAll('a') breaks = soup.findAll('br') datalinks = [] datafnames = [] for a in links: if a.has_attr('href'): if '.dat' in a['href']: datalinks.append('http://ogle.astrouw.edu.pl/ogle4/' + bn + '/' + a['href']) datafnames.append( bn.replace('/', '-') + '-' + a['href'].replace('/', '-')) ec = -1 reference = 'OGLE-IV Transient Detection System' refurl = 'http://ogle.astrouw.edu.pl/ogle4/transients/transients.html' for bi, br in enumerate(pbar(breaks, task_str)): sibling = br.nextSibling if 'Ra,Dec=' in sibling: line = sibling.replace('\n', '').split('Ra,Dec=') name = line[0].strip() ec += 1 if 'NOVA' in name or 'dupl' in name: continue if name in oglenames: continue oglenames.append(name) name = catalog.add_entry(name) mySibling = sibling.nextSibling atelref = '' claimedtype = '' while 'Ra,Dec=' not in mySibling: if isinstance(mySibling, NavigableString): if not claimedtype and 'class=' in str(mySibling): claimedtype = re.sub(r'\([^)]*\)', '', str(mySibling).split('=')[-1]) claimedtype = claimedtype.replace('SN', '').strip() if claimedtype == '-': claimedtype = '' if isinstance(mySibling, Tag): atela = mySibling if (atela and atela.has_attr('href') and 'astronomerstelegram' in atela['href']): atelref = atela.contents[0].strip() atelurl = atela['href'] mySibling = mySibling.nextSibling if mySibling is None: break # nextSibling = sibling.nextSibling # if ((isinstance(nextSibling, Tag) and # nextSibling.has_attr('alt') and # nextSibling.contents[0].strip() != 'NED')): # radec = nextSibling.contents[0].strip().split() # else: # radec = line[-1].split() # ra = radec[0] # dec = radec[1] fname = os.path.join(catalog.get_current_task_repo(), 'OGLE/') + datafnames[ec] csvtxt = catalog.load_url(datalinks[ec], fname) lcdat = csvtxt.splitlines() sources = [ catalog.entries[name].add_source( name=reference, url=refurl) ] catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, sources[0]) if atelref and atelref != 'ATel#----': sources.append(catalog.entries[name].add_source( name=atelref, url=atelurl)) sources = uniq_cdl(sources) if name.startswith('OGLE'): if name[4] == '-': if is_number(name[5:9]): catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, name[5:9], sources) else: if is_number(name[4:6]): catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, '20' + name[4:6], sources) # RA and Dec from OGLE pages currently not reliable # catalog.entries[name].add_quantity(SUPERNOVA.RA, ra, sources) # catalog.entries[name].add_quantity(SUPERNOVA.DEC, dec, # sources) if claimedtype and claimedtype != '-': catalog.entries[name].add_quantity(SUPERNOVA.CLAIMED_TYPE, claimedtype, sources) elif ('SN' not in name and SUPERNOVA.CLAIMED_TYPE not in catalog.entries[name]): catalog.entries[name].add_quantity(SUPERNOVA.CLAIMED_TYPE, 'Candidate', sources) for row in lcdat: row = row.split() mjd = str(jd_to_mjd(Decimal(row[0]))) magnitude = row[1] if float(magnitude) > 90.0: continue e_mag = row[2] upperlimit = False if e_mag == '-1' or float(e_mag) > 10.0: e_mag = '' upperlimit = True catalog.entries[name].add_photometry( time=mjd, u_time='MJD', band='I', magnitude=magnitude, e_magnitude=e_mag, system='Vega', source=sources, upperlimit=upperlimit) if catalog.args.update: catalog.journal_entries() if catalog.args.travis and bi >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() return
def do_ucb_photo(catalog): task_str = catalog.get_current_task_str() sec_ref = 'UCB Filippenko Group\'s Supernova Database (SNDB)' sec_refurl = 'http://heracles.astro.berkeley.edu/sndb/info' sec_refbib = '2012MNRAS.425.1789S' jsontxt = catalog.load_url( 'http://heracles.astro.berkeley.edu/sndb/download?id=allpubphot', os.path.join(catalog.get_current_task_repo(), 'SNDB/allpubphot.json'), json_sort='PhotID') if not jsontxt: return photom = json.loads(jsontxt) photom = sorted(photom, key=lambda kk: kk['PhotID']) for phot in pbar(photom, task_str): oldname = phot['ObjName'] name = catalog.add_entry(oldname) sec_source = catalog.entries[name].add_source( name=sec_ref, url=sec_refurl, bibcode=sec_refbib, secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, oldname, sec_source) if phot['AltObjName']: catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, phot['AltObjName'], sec_source) sources = [sec_source] if phot['Reference']: sources += [catalog.entries[name] .add_source(bibcode=phot['Reference'])] sources = uniq_cdl(sources) if phot['Type'] and phot['Type'].strip() != 'NoMatch': for ct in phot['Type'].strip().split(','): catalog.entries[name].add_quantity( SUPERNOVA.CLAIMED_TYPE, ct.replace('-norm', '').strip(), sources) if phot['DiscDate']: catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, phot['DiscDate'].replace('-', '/'), sources) if phot['HostName']: host = urllib.parse.unquote(phot['HostName']).replace('*', '') catalog.entries[name].add_quantity(SUPERNOVA.HOST, host, sources) filename = phot['Filename'] if phot['Filename'] else '' if not filename: raise ValueError('Filename not found for SNDB phot!') if not phot['PhotID']: raise ValueError('ID not found for SNDB phot!') filepath = os.path.join(catalog.get_current_task_repo(), 'SNDB/') + filename phottxt = catalog.load_url('http://heracles.astro.berkeley.edu/sndb/' 'download?id=dp:' + str(phot['PhotID']), filepath) tsvin = csv.reader( phottxt.splitlines(), delimiter=' ', skipinitialspace=True) for rr, row in enumerate(tsvin): if not len(row) > 0 or row[0] == "#": continue if 'DOCTYPE' in ''.join(row): break photodict = { PHOTOMETRY.TIME: row[0], PHOTOMETRY.U_TIME: 'MJD', PHOTOMETRY.TELESCOPE: row[5], PHOTOMETRY.BAND: row[4], PHOTOMETRY.SOURCE: sources } if is_number(row[1]) and float(row[1]) < 99.0: photodict[PHOTOMETRY.MAGNITUDE] = row[1] photodict[PHOTOMETRY.E_MAGNITUDE] = row[2] elif is_number(row[3]) and float(row[1]) < 99.0: photodict[PHOTOMETRY.MAGNITUDE] = row[3] photodict[PHOTOMETRY.UPPER_LIMIT] = True else: continue catalog.entries[name].add_photometry(**photodict) catalog.journal_entries() return
def do_cfa_spectra(catalog): """Import spectra from the CfA archive.""" task_str = catalog.get_current_task_str() # II spectra oldname = '' file_names = next( os.walk(os.path.join(catalog.get_current_task_repo(), 'CfA_SNII')))[1] for ni, name in enumerate(pbar_strings(file_names, task_str)): fullpath = os.path.join(catalog.get_current_task_repo(), 'CfA_SNII/') + name origname = name if name.startswith('sn') and is_number(name[2:6]): name = 'SN' + name[2:] name = catalog.get_preferred_name(name) if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) reference = 'CfA Supernova Archive' refurl = 'https://www.cfa.harvard.edu/supernova/SNarchive.html' source = catalog.entries[name].add_source( name=reference, url=refurl, secondary=True, acknowledgment=ACKN_CFA) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, source) for fi, fname in enumerate( sorted( glob(fullpath + '/*'), key=lambda s: s.lower())): filename = os.path.basename(fname) fileparts = filename.split('-') if origname.startswith('sn') and is_number(origname[2:6]): year = fileparts[1][:4] month = fileparts[1][4:6] day = fileparts[1][6:] instrument = fileparts[2].split('.')[0] else: year = fileparts[2][:4] month = fileparts[2][4:6] day = fileparts[2][6:] instrument = fileparts[3].split('.')[0] time = str( astrotime(year + '-' + month + '-' + str(floor(float(day))) .zfill(2)).mjd + float(day) - floor(float(day))) f = open(fname, 'r') data = csv.reader(f, delimiter=' ', skipinitialspace=True) data = [list(i) for i in zip(*data)] wavelengths = data[0] fluxes = data[1] errors = data[2] sources = uniq_cdl([ source, (catalog.entries[name] .add_source(bibcode='2017arXiv170601030H')) ]) catalog.entries[name].add_spectrum( u_wavelengths='Angstrom', u_fluxes='erg/s/cm^2/Angstrom', filename=filename, wavelengths=wavelengths, fluxes=fluxes, u_time='MJD' if time else '', time=time, instrument=instrument, u_errors='ergs/s/cm^2/Angstrom', errors=errors, source=sources, dereddened=False, deredshifted=False) if catalog.args.travis and ni >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() # Ia spectra oldname = '' file_names = next( os.walk(os.path.join(catalog.get_current_task_repo(), 'CfA_SNIa')))[1] for ni, name in enumerate(pbar_strings(file_names, task_str)): fullpath = os.path.join(catalog.get_current_task_repo(), 'CfA_SNIa/') + name origname = name if name.startswith('sn') and is_number(name[2:6]): name = 'SN' + name[2:] if name.startswith('snf') and is_number(name[3:7]): name = 'SNF' + name[3:] name = catalog.get_preferred_name(name) if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) reference = 'CfA Supernova Archive' refurl = 'https://www.cfa.harvard.edu/supernova/SNarchive.html' source = catalog.entries[name].add_source( name=reference, url=refurl, secondary=True, acknowledgment=ACKN_CFA) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, source) for fi, fname in enumerate( sorted( glob(fullpath + '/*'), key=lambda s: s.lower())): filename = os.path.basename(fname) fileparts = filename.split('-') if origname.startswith('sn') and is_number(origname[2:6]): year = fileparts[1][:4] month = fileparts[1][4:6] day = fileparts[1][6:] instrument = fileparts[2].split('.')[0] else: year = fileparts[2][:4] month = fileparts[2][4:6] day = fileparts[2][6:] instrument = fileparts[3].split('.')[0] time = str( astrotime(year + '-' + month + '-' + str(floor(float(day))) .zfill(2)).mjd + float(day) - floor(float(day))) f = open(fname, 'r') data = csv.reader(f, delimiter=' ', skipinitialspace=True) data = [list(i) for i in zip(*data)] wavelengths = data[0] fluxes = data[1] errors = data[2] sources = uniq_cdl([ source, (catalog.entries[name] .add_source(bibcode='2012AJ....143..126B')), (catalog.entries[name] .add_source(bibcode='2008AJ....135.1598M')) ]) catalog.entries[name].add_spectrum( u_wavelengths='Angstrom', u_fluxes='erg/s/cm^2/Angstrom', filename=filename, wavelengths=wavelengths, fluxes=fluxes, u_time='MJD' if time else '', time=time, instrument=instrument, u_errors='ergs/s/cm^2/Angstrom', errors=errors, source=sources, dereddened=False, deredshifted=False) if catalog.args.travis and ni >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() # Ibc spectra oldname = '' file_names = next( os.walk(os.path.join(catalog.get_current_task_repo(), 'CfA_SNIbc')))[1] for ni, name in enumerate(pbar(file_names, task_str)): fullpath = os.path.join(catalog.get_current_task_repo(), 'CfA_SNIbc/') + name if name.startswith('sn') and is_number(name[2:6]): name = 'SN' + name[2:] name = catalog.get_preferred_name(name) if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) reference = 'CfA Supernova Archive' refurl = 'https://www.cfa.harvard.edu/supernova/SNarchive.html' source = catalog.entries[name].add_source( name=reference, url=refurl, secondary=True, acknowledgment=ACKN_CFA) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, source) for fi, fname in enumerate( sorted( glob(fullpath + '/*'), key=lambda s: s.lower())): filename = os.path.basename(fname) fileparts = filename.split('-') instrument = '' year = fileparts[1][:4] month = fileparts[1][4:6] day = fileparts[1][6:].split('.')[0] if len(fileparts) > 2: instrument = fileparts[-1].split('.')[0] time = str( astrotime(year + '-' + month + '-' + str(floor(float(day))) .zfill(2)).mjd + float(day) - floor(float(day))) f = open(fname, 'r') data = csv.reader(f, delimiter=' ', skipinitialspace=True) data = [list(i) for i in zip(*data)] wavelengths = data[0] fluxes = data[1] sources = uniq_cdl([ source, catalog.entries[name] .add_source(bibcode='2014AJ....147...99M') ]) catalog.entries[name].add_spectrum( u_wavelengths='Angstrom', u_fluxes='erg/s/cm^2/Angstrom', wavelengths=wavelengths, filename=filename, fluxes=fluxes, u_time='MJD' if time else '', time=time, instrument=instrument, source=sources, dereddened=False, deredshifted=False) if catalog.args.travis and ni >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() # Other spectra oldname = '' file_names = next( os.walk(os.path.join(catalog.get_current_task_repo(), 'CfA_Extra')))[1] for ni, name in enumerate(pbar_strings(file_names, task_str)): fullpath = os.path.join(catalog.get_current_task_repo(), 'CfA_Extra/') + name if name.startswith('sn') and is_number(name[2:6]): name = 'SN' + name[2:] name = catalog.get_preferred_name(name) if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) reference = 'CfA Supernova Archive' refurl = 'https://www.cfa.harvard.edu/supernova/SNarchive.html' source = catalog.entries[name].add_source( name=reference, url=refurl, secondary=True, acknowledgment=ACKN_CFA) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, source) for fi, fname in enumerate( sorted( glob(fullpath + '/*'), key=lambda s: s.lower())): if not os.path.isfile(fname): continue filename = os.path.basename(fname) if ((not filename.startswith('sn') or not filename.endswith('flm') or any( x in filename for x in ['-interp', '-z', '-dered', '-obj', '-gal']))): continue fileparts = filename.split('.')[0].split('-') instrument = '' time = '' if len(fileparts) > 1: year = fileparts[1][:4] month = fileparts[1][4:6] day = fileparts[1][6:] if is_number(year) and is_number(month) and is_number(day): if len(fileparts) > 2: instrument = fileparts[-1] time = str( astrotime(year + '-' + month + '-' + str( floor(float(day))).zfill(2)).mjd + float(day) - floor(float(day))) f = open(fname, 'r') data = csv.reader(f, delimiter=' ', skipinitialspace=True) data = [list(i) for i in zip(*data)] wavelengths = data[0] fluxes = [str(Decimal(x) * Decimal(1.0e-15)) for x in data[1]] catalog.entries[name].add_spectrum( u_wavelengths='Angstrom', u_fluxes='erg/s/cm^2/Angstrom', wavelengths=wavelengths, filename=filename, fluxes=fluxes, u_time='MJD' if time else '', time=time, instrument=instrument, source=source, dereddened=False, deredshifted=False) if catalog.args.travis and ni >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() return
def do_wiserep_spectra(catalog): task_str = catalog.get_current_task_str() secondaryreference = 'WISeREP' secondaryrefurl = 'http://wiserep.weizmann.ac.il/' secondarybibcode = '2012PASP..124..668Y' wiserepcnt = 0 # These are known to be in error on the WISeREP page, either fix or ignore # them. wiserepbibcorrectdict = {'2000AJ....120..367G]': '2000AJ....120..367G', 'Harutyunyan et al. 2008': '2008A&A...488..383H', '0609268': '2007AJ....133...58K', '2006ApJ...636...400Q': '2006ApJ...636..400Q', '2011ApJ...741...76': '2011ApJ...741...76C', '2016PASP...128...961': '2016PASP..128...961', '2002AJ....1124..417H': '2002AJ....1124.417H', '2013ApJ…774…58D': '2013ApJ...774...58D', '2011Sci.333..856S': '2011Sci...333..856S', '2014MNRAS.438,368': '2014MNRAS.438..368T', '2012MNRAS.420.1135': '2012MNRAS.420.1135S', '2012Sci..337..942D': '2012Sci...337..942D', 'stt1839': '2013MNRAS.436.3614S', 'arXiv:1605.03136': '2016arXiv160503136T', '10.1093/mnras/stt1839': '2013MNRAS.436.3614S'} file_names = list( glob(os.path.join( catalog.get_current_task_repo(), '*'))) for folder in pbar_strings(file_names, task_str): if '.txt' in folder: continue name = os.path.basename(folder).strip() if name.startswith('sn'): name = 'SN' + name[2:] if (name.startswith(('CSS', 'SSS', 'MLS')) and ':' not in name): name = name.replace('-', ':', 1) if name.startswith('MASTERJ'): name = name.replace('MASTERJ', 'MASTER OT J') if name.startswith('PSNJ'): name = name.replace('PSNJ', 'PSN J') name = catalog.add_entry(name) secondarysource = catalog.entries[name].add_source( name=secondaryreference, url=secondaryrefurl, bibcode=secondarybibcode, secondary=True) catalog.entries[name].add_quantity( SUPERNOVA.ALIAS, name, secondarysource) with open(os.path.join(folder, 'README.json'), 'r') as f: fileinfo = json.loads(f.read()) files = list(set(glob(folder + '/*')) - set(glob(folder + '/README.json'))) for fname in pbar(files, task_str): specfile = os.path.basename(fname) claimedtype = fileinfo[specfile]["Type"] instrument = fileinfo[specfile]["Instrument"] epoch = fileinfo[specfile]["Obs. Date"] observer = fileinfo[specfile]["Observer"] reducer = fileinfo[specfile]["Reducer"] bibcode = fileinfo[specfile]["Bibcode"] redshift = fileinfo[specfile]["Redshift"] survey = fileinfo[specfile]["Program"] reduction = fileinfo[specfile]["Reduction Status"] if bibcode: newbibcode = bibcode if bibcode in wiserepbibcorrectdict: newbibcode = wiserepbibcorrectdict[bibcode] if newbibcode and len(newbibcode) == 19: source = catalog.entries[name].add_source( bibcode=unescape(newbibcode)) else: bibname = unescape(bibcode) source = catalog.entries[name].add_source( name=bibname) catalog.log.warning('Bibcode "{}" is invalid, using as ' '`{}` instead'.format(bibname, SOURCE.NAME)) sources = uniq_cdl([source, secondarysource]) else: sources = secondarysource if claimedtype not in ['Other']: catalog.entries[name].add_quantity( SUPERNOVA.CLAIMED_TYPE, claimedtype, secondarysource) catalog.entries[name].add_quantity( SUPERNOVA.REDSHIFT, redshift, secondarysource) with open(fname, 'r') as f: data = [x.split() for x in f] skipspec = False newdata = [] oldval = '' for row in data: if row and '#' not in row[0]: if (len(row) >= 2 and is_number(row[0]) and is_number(row[1]) and row[1] != oldval): newdata.append(row) oldval = row[1] if skipspec or not newdata: warnings.warn( 'Skipped adding spectrum file ' + specfile) continue data = [list(i) for i in zip(*newdata)] wavelengths = data[0] fluxes = data[1] errors = '' if len(data) == 3: errors = data[1] time = str(astrotime(epoch).mjd) if max([float(x) for x in fluxes]) < 1.0e-5: fluxunit = 'erg/s/cm^2/Angstrom' else: fluxunit = 'Uncalibrated' catalog.entries[name].add_spectrum( u_wavelengths='Angstrom', errors=errors, u_fluxes=fluxunit, u_errors=fluxunit if errors else '', wavelengths=wavelengths, fluxes=fluxes, u_time='MJD', time=time, instrument=instrument, source=sources, observer=observer, reducer=reducer, reduction=reduction, filename=specfile, survey=survey, redshift=redshift) catalog.journal_entries() wiserepcnt = wiserepcnt + 1 if (catalog.args.travis and wiserepcnt % catalog.TRAVIS_QUERY_LIMIT == 0): break return
def do_asiago_spectra(catalog): task_str = catalog.get_current_task_str() html = catalog.load_cached_url( ('http://sngroup.oapd.inaf.it./' 'cgi-bin/output_class.cgi?sn=1990'), os.path.join(catalog.get_current_task_repo(), 'Asiago/spectra.html')) if not html: return bs = BeautifulSoup(html, 'html5lib') trs = bs.findAll('tr') for tr in pbar(trs, task_str): tds = tr.findAll('td') name = '' host = '' # fitsurl = '' source = '' reference = '' for tdi, td in enumerate(tds): if tdi == 0: butt = td.find('button') if not butt: break alias = butt.text.strip() alias = alias.replace('PSNJ', 'PSN J').replace('GAIA', 'Gaia') elif tdi == 1: name = (td.text.strip() .replace('PSNJ', 'PSN J') .replace('GAIA', 'Gaia')) if name.startswith('SN '): name = 'SN' + name[3:] if not name: name = alias if is_number(name[:4]): name = 'SN' + name oldname = name name = catalog.add_entry(name) reference = 'Asiago Supernova Catalogue' refurl = 'http://graspa.oapd.inaf.it/cgi-bin/sncat.php' secondarysource = catalog.entries[name].add_source( name=reference, url=refurl, secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, oldname, secondarysource) if alias != name: catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, alias, secondarysource) elif tdi == 2: host = td.text.strip() if host == 'anonymous': host = '' elif tdi == 3: discoverer = td.text.strip() elif tdi == 5: ra = td.text.strip() elif tdi == 6: dec = td.text.strip() elif tdi == 7: claimedtype = td.text.strip() elif tdi == 8: redshift = td.text.strip() # elif tdi == 9: # epochstr = td.text.strip() # if epochstr: # mjd = (astrotime(epochstr[:4] + '-' + epochstr[4:6] + # '-' + # str(floor(float(epochstr[6:]))).zfill(2)).mjd + # float(epochstr[6:]) - floor(float(epochstr[6:]))) # else: # mjd = '' elif tdi == 10: refs = td.findAll('a') source = '' reference = '' refurl = '' for ref in refs: if ref.text != 'REF': reference = ref.text refurl = ref['href'] if reference: source = catalog.entries[name].add_source( name=reference, url=refurl) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, secondarysource) sources = uniq_cdl( list(filter(None, [source, secondarysource]))) elif tdi == 12: pass # fitslink = td.find('a') # if fitslink: # fitsurl = fitslink['href'] if name: catalog.entries[name].add_quantity(SUPERNOVA.CLAIMED_TYPE, claimedtype, sources) catalog.entries[name].add_quantity(SUPERNOVA.RA, ra, sources) catalog.entries[name].add_quantity(SUPERNOVA.DEC, dec, sources) catalog.entries[name].add_quantity(SUPERNOVA.REDSHIFT, redshift, sources) catalog.entries[name].add_quantity(SUPERNOVA.DISCOVERER, discoverer, sources) catalog.entries[name].add_quantity(SUPERNOVA.HOST, host, sources) # if fitsurl: # response = urllib.request.urlopen( # 'http://sngroup.oapd.inaf.it./' + fitsurl) # compressed = io.BytesIO(response.read()) # decompressed = gzip.GzipFile(fileobj=compressed) # hdulist = fits.open(decompressed) # scidata = hdulist[0].data # print(hdulist[0].header) # # print(scidata[3]) # sys.exit() catalog.journal_entries() return
def do_nedd(catalog): task_str = catalog.get_current_task_str() nedd_path = os.path.join( catalog.get_current_task_repo(), 'NED26.10.1-D-13.1.0-20160930.csv') f = open(nedd_path, 'r') data = sorted(list(csv.reader(f, delimiter=',', quotechar='"'))[ 13:], key=lambda x: (x[9], x[3])) reference = "NED-D v" + nedd_path.split('-')[-2] refurl = "http://ned.ipac.caltech.edu/Library/Distances/" nedbib = "1991ASSL..171...89H" olddistname = '' loopcnt = 0 for r, row in enumerate(pbar(data, task_str)): if r <= 12: continue distname = row[3] name = name_clean(distname) # distmod = row[4] # moderr = row[5] dist = row[6] bibcode = unescape(row[8]) snname = name_clean(row[9]) redshift = row[10] cleanhost = '' if name != snname and (name + ' HOST' != snname): cleanhost = host_clean(distname) if cleanhost.endswith(' HOST') or cleanhost.startswith('SN'): cleanhost = '' if not is_number(dist): print(dist) if dist and cleanhost: catalog.nedd_dict.setdefault( cleanhost, []).append(Decimal(dist)) if snname and 'HOST' not in snname: snname, secondarysource = catalog.new_entry( snname, srcname=reference, bibcode=nedbib, url=refurl, secondary=True) if bibcode: source = catalog.entries[snname].add_source(bibcode=bibcode) sources = uniq_cdl([source, secondarysource]) else: sources = secondarysource if name == snname: if redshift: catalog.entries[snname].add_quantity( SUPERNOVA.REDSHIFT, redshift, sources) if dist: catalog.entries[snname].add_quantity( SUPERNOVA.COMOVING_DIST, dist, sources) if not redshift: try: zatval = z_at_value(cosmo.comoving_distance, float(dist) * un.Mpc, zmax=5.0) sigd = get_sig_digits(str(dist)) redshift = pretty_num(zatval, sig=sigd) except (KeyboardInterrupt, SystemExit): raise except Exception: pass else: cosmosource = catalog.entries[name].add_source( bibcode='2016A&A...594A..13P') combsources = uniq_cdl(sources.split(',') + [cosmosource]) catalog.entries[snname].add_quantity( SUPERNOVA.REDSHIFT, redshift, combsources, derived=True) if cleanhost: catalog.entries[snname].add_quantity( SUPERNOVA.HOST, cleanhost, sources) if catalog.args.update and olddistname != distname: catalog.journal_entries() olddistname = distname loopcnt = loopcnt + 1 if catalog.args.travis and loopcnt % catalog.TRAVIS_QUERY_LIMIT == 0: break catalog.journal_entries() f.close() return
def do_nedd(catalog): task_str = catalog.get_current_task_str() nedd_path = os.path.join(catalog.get_current_task_repo(), 'NED26.10.1-D-13.1.0-20160930.csv') f = open(nedd_path, 'r') data = sorted(list(csv.reader(f, delimiter=',', quotechar='"'))[13:], key=lambda x: (x[9], x[3])) reference = "NED-D v" + nedd_path.split('-')[-2] refurl = "http://ned.ipac.caltech.edu/Library/Distances/" nedbib = "1991ASSL..171...89H" olddistname = '' loopcnt = 0 for r, row in enumerate(pbar(data, task_str)): if r <= 12: continue distname = row[3] name = name_clean(distname) # distmod = row[4] # moderr = row[5] dist = row[6] bibcode = unescape(row[8]) snname = name_clean(row[9]) redshift = row[10] cleanhost = '' if name != snname and (name + ' HOST' != snname): cleanhost = host_clean(distname) if cleanhost.endswith(' HOST'): cleanhost = '' if not is_number(dist): print(dist) if dist: catalog.nedd_dict.setdefault(cleanhost, []).append(Decimal(dist)) if snname and 'HOST' not in snname: snname, secondarysource = catalog.new_entry(snname, srcname=reference, bibcode=nedbib, url=refurl, secondary=True) if bibcode: source = catalog.entries[snname].add_source(bibcode=bibcode) sources = uniq_cdl([source, secondarysource]) else: sources = secondarysource if name == snname: if redshift: catalog.entries[snname].add_quantity( SUPERNOVA.REDSHIFT, redshift, sources) if dist: catalog.entries[snname].add_quantity( SUPERNOVA.COMOVING_DIST, dist, sources) if not redshift: try: zatval = z_at_value(cosmo.comoving_distance, float(dist) * un.Mpc, zmax=5.0) sigd = get_sig_digits(str(dist)) redshift = pretty_num(zatval, sig=sigd) except (KeyboardInterrupt, SystemExit): raise except Exception: pass else: cosmosource = catalog.entries[name].add_source( bibcode='2016A&A...594A..13P') combsources = uniq_cdl( sources.split(',') + [cosmosource]) catalog.entries[snname].add_quantity( SUPERNOVA.REDSHIFT, redshift, combsources, derived=True) if cleanhost: catalog.entries[snname].add_quantity(SUPERNOVA.HOST, cleanhost, sources) if catalog.args.update and olddistname != distname: catalog.journal_entries() olddistname = distname loopcnt = loopcnt + 1 if catalog.args.travis and loopcnt % catalog.TRAVIS_QUERY_LIMIT == 0: break catalog.journal_entries() f.close() return
def do_rochester(catalog): """Import data from the Latest Supernovae page.""" rochestermirrors = [ 'http://www.rochesterastronomy.org/', 'http://www.supernova.thistlethwaites.com/' ] rochesterpaths = [ 'snimages/snredshiftall.html', 'sn2017/snredshift.html', 'snimages/snredboneyard.html', 'snimages/snredboneyard-old.html' ] rochesterupdate = [False, True, True, False] task_str = catalog.get_current_task_str() baddates = ['2440587', '2440587.292', '0001/01/01'] for pp, path in enumerate(pbar(rochesterpaths, task_str)): if catalog.args.update and not rochesterupdate[pp]: continue if 'snredboneyard.html' in path: cns = { 'name': 0, 'host': 1, 'ra': 2, 'dec': 3, 'type': 7, 'z': 8, 'mmag': 9, 'max': 10, 'disc': 11, 'ref': 12, 'dver': 13, 'aka': 14 } else: cns = { 'name': 0, 'type': 1, 'host': 2, 'ra': 3, 'dec': 4, 'disc': 6, 'max': 7, 'mmag': 8, 'z': 11, 'zh': 12, 'ref': 13, 'dver': 14, 'aka': 15 } filepath = ( os.path.join(catalog.get_current_task_repo(), 'rochester/') + path.replace('/', '-')) for mirror in rochestermirrors: html = catalog.load_url( mirror + path, filepath, fail=(mirror != rochestermirrors[-1])) if html: break if not html: continue soup = BeautifulSoup(html, 'html5lib') rows = soup.findAll('tr') sec_ref = 'Latest Supernovae' sec_refurl = ('http://www.rochesterastronomy.org/' 'snimages/snredshiftall.html') loopcnt = 0 for rr, row in enumerate(pbar(rows, task_str)): if rr == 0: continue cols = row.findAll('td') if not len(cols): continue name = '' if cols[cns['aka']].contents: for rawaka in str(cols[cns['aka']].contents[0]).split(','): aka = rawaka.strip() if is_number(aka.strip('?')): aka = 'SN' + aka.strip('?') + 'A' oldname = aka name = catalog.add_entry(aka) elif len(aka) == 4 and is_number(aka[:4]): aka = 'SN' + aka oldname = aka name = catalog.add_entry(aka) ra = str(cols[cns['ra']].contents[0]).strip() dec = str(cols[cns['dec']].contents[0]).strip() sn = re.sub('<[^<]+?>', '', str(cols[cns['name']].contents[0])).strip() if is_number(sn.strip('?')): sn = 'SN' + sn.strip('?') + 'A' elif len(sn) == 4 and is_number(sn[:4]): sn = 'SN' + sn if not name: if not sn or sn in ['Transient']: continue if sn[:8] == 'MASTER J': sn = sn.replace('MASTER J', 'MASTER OT J').replace( 'SNHunt', 'SNhunt') if 'POSSIBLE' in sn.upper() and ra and dec: sn = 'PSN J' + ra.replace(':', '').replace('.', '') sn += dec.replace(':', '').replace('.', '') oldname = sn name = catalog.add_entry(sn) sec_source = catalog.entries[name].add_source( name=sec_ref, url=sec_refurl, secondary=True) sources = [] if 'ref' in cns: reftag = reference = cols[cns['ref']].findAll('a') if len(reftag): reference = reftag[0].contents[0].strip() refurl = reftag[0]['href'].strip() sources.append(catalog.entries[name].add_source( name=reference, url=refurl)) sources.append(sec_source) sources = uniq_cdl(list(filter(None, sources))) catalog.entries[name].add_quantity(TIDALDISRUPTION.ALIAS, oldname, sources) catalog.entries[name].add_quantity( TIDALDISRUPTION.ALIAS, sn, sources) if cols[cns['aka']].contents: for rawaka in str(cols[cns['aka']].contents[0]).split(','): aka = rawaka.strip() if aka == 'SNR G1.9+0.3': aka = 'G001.9+00.3' if aka[:4] == 'PS1 ': aka = 'PS1-' + aka[4:] if aka[:8] == 'MASTER J': aka = aka.replace('MASTER J', 'MASTER OT J').replace( 'SNHunt', 'SNhunt') if 'POSSIBLE' in aka.upper() and ra and dec: aka = 'PSN J' + ra.replace(':', '').replace('.', '') aka += dec.replace(':', '').replace('.', '') catalog.entries[name].add_quantity( TIDALDISRUPTION.ALIAS, aka, sources) if str(cols[cns['type']].contents[0]).strip() != 'unk': ctype = str(cols[cns['type']].contents[0]).strip(' :,') catalog.entries[name].add_quantity( TIDALDISRUPTION.CLAIMED_TYPE, ctype, sources) if (len(cols[cns['host']].contents) > 0 and str(cols[cns['host']].contents[0]).strip() != 'anonymous'): catalog.entries[name].add_quantity( TIDALDISRUPTION.HOST, str(cols[cns['host']].contents[0]).strip(), sources) catalog.entries[name].add_quantity(TIDALDISRUPTION.RA, ra, sources) catalog.entries[name].add_quantity( TIDALDISRUPTION.DEC, dec, sources) discstr = str(cols[cns['disc']].contents[0]).strip() if discstr and discstr not in baddates: if '/' not in discstr: astrot = astrotime(float(discstr), format='jd').datetime ddate = make_date_string(astrot.year, astrot.month, astrot.day) else: ddate = discstr catalog.entries[name].add_quantity( TIDALDISRUPTION.DISCOVER_DATE, ddate, sources) maxstr = str(cols[cns.get('max', '')].contents[0]).strip() if maxstr and maxstr not in baddates: try: if '/' not in maxstr: astrot = astrotime(float(maxstr), format='jd') else: astrot = astrotime( maxstr.replace('/', '-'), format='iso') except Exception: catalog.log.info( 'Max date conversion failed for `{}`.'.format(maxstr)) if ((float(str( cols[cns['mmag']].contents[0]) .strip()) <= 90.0 and not any( 'GRB' in xx for xx in catalog.entries[name].get_aliases()))): mag = str(cols[cns['mmag']].contents[0]).strip() catalog.entries[name].add_photometry( time=str(astrot.mjd), u_time='MJD', magnitude=mag, source=sources) if 'z' in cns and cols[cns['z']].contents[0] != 'n/a': catalog.entries[name].add_quantity( TIDALDISRUPTION.REDSHIFT, str(cols[cns['z']].contents[0]).strip(), sources) if 'zh' in cns: zhost = str(cols[cns['zh']].contents[0]).strip() if is_number(zhost): catalog.entries[name].add_quantity( TIDALDISRUPTION.REDSHIFT, zhost, sources) if 'dver' in cns: catalog.entries[name].add_quantity( TIDALDISRUPTION.DISCOVERER, str(cols[cns['dver']].contents[0]).strip(), sources) if catalog.args.update: catalog.journal_entries() loopcnt = loopcnt + 1 if (catalog.args.travis and loopcnt % catalog.TRAVIS_QUERY_LIMIT == 0): break if not catalog.args.update: vsnetfiles = ['latestsne.dat'] for vsnetfile in vsnetfiles: file_name = os.path.join(catalog.get_current_task_repo(), "" + vsnetfile) with open(file_name, 'r', encoding='latin1') as csv_file: tsvin = csv.reader( csv_file, delimiter=' ', skipinitialspace=True) loopcnt = 0 for rr, row in enumerate(tsvin): if (not row or row[0] in ['Transient'] or row[0][:4] in ['http', 'www.'] or len(row) < 3): continue name = row[0].strip() if name[:4].isdigit(): name = 'SN' + name if name.startswith('PSNJ'): name = 'PSN J' + name[4:] if name.startswith('MASTEROTJ'): name = name.replace('MASTEROTJ', 'MASTER OT J') name = catalog.add_entry(name) sec_source = catalog.entries[name].add_source( name=sec_ref, url=sec_refurl, secondary=True) catalog.entries[name].add_quantity( TIDALDISRUPTION.ALIAS, name, sec_source) if not is_number(row[1]): continue year = row[1][:4] month = row[1][4:6] day = row[1][6:] if '.' not in day: day = day[:2] + '.' + day[2:] mjd = astrotime(year + '-' + month + '-' + str( floor(float(day))).zfill(2)).mjd mjd += float(day) - floor(float(day)) magnitude = row[2].rstrip(ascii_letters) if not is_number(magnitude): continue if magnitude.isdigit(): if int(magnitude) > 100: magnitude = magnitude[:2] + '.' + magnitude[2:] if float(str(cols[8].contents[0]).strip()) >= 90.0: continue if len(row) >= 4: if is_number(row[3]): e_magnitude = row[3] refind = 4 else: e_magnitude = '' refind = 3 if refind >= len(row): sources = sec_source else: reference = ' '.join(row[refind:]) source = catalog.entries[name].add_source( name=reference) catalog.entries[name].add_quantity( TIDALDISRUPTION.ALIAS, name, sec_source) sources = uniq_cdl([source, sec_source]) else: sources = sec_source band = row[2].lstrip('1234567890.') catalog.entries[name].add_photometry( time=mjd, u_time='MJD', band=band, magnitude=magnitude, e_magnitude=e_magnitude, source=sources) if (catalog.args.travis and loopcnt % catalog.TRAVIS_QUERY_LIMIT == 0): break catalog.journal_entries() return
def do_rochester(catalog): """Import data from latest supernova page.""" rochestermirrors = [ 'http://www.rochesterastronomy.org/', 'http://www.supernova.thistlethwaites.com/' ] rochesterpaths = [ 'snimages/snredshiftall.html', 'sn2020/snredshift.html', 'snimages/snredboneyard.html', 'snimages/snredboneyard-old.html' ] rochesterupdate = [False, True, True, False] task_str = catalog.get_current_task_str() baddates = ['2440587', '2440587.292', '0001/01/01'] for pp, path in enumerate(pbar(rochesterpaths, task_str)): if catalog.args.update and not rochesterupdate[pp]: continue if 'snredboneyard.html' in path: cns = { 'name': 0, 'host': 1, 'ra': 2, 'dec': 3, 'type': 7, 'z': 8, 'mmag': 9, 'max': 10, 'disc': 11, 'ref': 12, 'dver': 13, 'aka': 14 } else: cns = { 'name': 0, 'type': 1, 'host': 2, 'ra': 3, 'dec': 4, 'disc': 6, 'max': 7, 'mmag': 8, 'z': 11, 'zh': 12, 'ref': 13, 'dver': 14, 'aka': 15 } filepath = ( os.path.join(catalog.get_current_task_repo(), 'rochester/') + path.replace('/', '-')) for mirror in rochestermirrors: html = catalog.load_url(mirror + path, filepath, fail=(mirror != rochestermirrors[-1])) if html: break if not html: continue soup = BeautifulSoup(html, 'html5lib') rows = soup.findAll('tr') sec_ref = 'Latest Supernovae' sec_refurl = ('http://www.rochesterastronomy.org/' 'snimages/snredshiftall.html') loopcnt = 0 for rr, row in enumerate(pbar(rows, task_str)): if rr == 0: continue cols = row.findAll('td') if not len(cols): continue name = '' if cols[cns['aka']].contents: for rawaka in str(cols[cns['aka']].contents[0]).split(','): aka = rawaka.strip() if is_number(aka.strip('?')): aka = 'SN' + aka.strip('?') + 'A' oldname = aka name = catalog.add_entry(aka) elif len(aka) == 4 and is_number(aka[:4]): aka = 'SN' + aka oldname = aka name = catalog.add_entry(aka) sn = re.sub('<[^<]+?>', '', str(cols[cns['name']].contents[0])).strip() if is_number(sn.strip('?')): sn = 'SN' + sn.strip('?') + 'A' elif len(sn) == 4 and is_number(sn[:4]): sn = 'SN' + sn if not name: if not sn or sn in ['Transient']: continue ra = str(cols[cns['ra']].contents[0]).strip().replace(':.', '.') dec = str(cols[cns['dec']].contents[0]).strip().replace(':.', '.') if not name: if sn[:8] == 'MASTER J': sn = sn.replace('MASTER J', 'MASTER OT J').replace('SNHunt', 'SNhunt') if 'POSSIBLE' in sn.upper() and ra and dec: sn = 'PSN J' + ra.replace(':', '').replace('.', '') sn += dec.replace(':', '').replace('.', '') oldname = sn name = catalog.add_entry(sn) sec_source = catalog.entries[name].add_source(name=sec_ref, url=sec_refurl, secondary=True) sources = [] if 'ref' in cns: reftag = reference = cols[cns['ref']].findAll('a') if len(reftag) and len(reftag[0].contents): reference = reftag[0].contents[0].strip() refurl = reftag[0]['href'].strip() sources.append(catalog.entries[name].add_source( name=reference, url=refurl)) sources.append(sec_source) sources = uniq_cdl(list(filter(None, sources))) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, oldname, sources) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, sn, sources) if cols[cns['aka']].contents: for rawaka in str(cols[cns['aka']].contents[0]).split(','): aka = rawaka.strip() if aka == 'SNR G1.9+0.3': aka = 'G001.9+00.3' if aka[:4] == 'PS1 ': aka = 'PS1-' + aka[4:] if aka[:8] == 'MASTER J': aka = aka.replace('MASTER J', 'MASTER OT J').replace( 'SNHunt', 'SNhunt') if 'POSSIBLE' in aka.upper() and ra and dec: aka = 'PSN J' + ra.replace(':', '').replace('.', '') aka += dec.replace(':', '').replace('.', '') catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, aka, sources) if (len(cols[cns['type']].contents) > 0 and str(cols[cns['type']].contents[0]).strip() != 'unk'): type = str(cols[cns['type']].contents[0]).strip(' :,') catalog.entries[name].add_quantity(SUPERNOVA.CLAIMED_TYPE, type, sources) if (len(cols[cns['host']].contents) > 0 and str(cols[cns['host']].contents[0]).strip() != 'anonymous'): catalog.entries[name].add_quantity( SUPERNOVA.HOST, str(cols[cns['host']].contents[0]).strip(), sources) catalog.entries[name].add_quantity(SUPERNOVA.RA, ra, sources) catalog.entries[name].add_quantity(SUPERNOVA.DEC, dec, sources) discstr = str(cols[cns['disc']].contents[0]).strip() if discstr and discstr not in baddates: if '/' not in discstr: astrot = astrotime(float(discstr), format='jd').datetime ddate = make_date_string(astrot.year, astrot.month, astrot.day) else: ddate = discstr catalog.entries[name].add_quantity(SUPERNOVA.DISCOVER_DATE, ddate, sources) maxstr = str(cols[cns.get('max', '')].contents[0]).strip() if maxstr and maxstr not in baddates: try: if '/' not in maxstr: astrot = astrotime(float(maxstr), format='jd') else: astrot = astrotime(maxstr.replace('/', '-'), format='iso') except: catalog.log.info( 'Max date conversion failed for `{}`.'.format(maxstr)) if ((float(str(cols[cns['mmag']].contents[0]).strip()) <= 90.0 and not any('GRB' in xx for xx in catalog.entries[name].get_aliases()))): mag = str(cols[cns['mmag']].contents[0]).strip() catalog.entries[name].add_photometry(time=str(astrot.mjd), u_time='MJD', magnitude=mag, source=sources) if 'z' in cns and cols[cns['z']].contents[0] != 'n/a': catalog.entries[name].add_quantity( SUPERNOVA.REDSHIFT, str(cols[cns['z']].contents[0]).strip(), sources) if 'zh' in cns: zhost = str(cols[cns['zh']].contents[0]).strip() if is_number(zhost): catalog.entries[name].add_quantity(SUPERNOVA.REDSHIFT, zhost, sources) if 'dver' in cns: catalog.entries[name].add_quantity( SUPERNOVA.DISCOVERER, str(cols[cns['dver']].contents[0]).strip(), sources) if catalog.args.update: catalog.journal_entries() loopcnt = loopcnt + 1 if (catalog.args.travis and loopcnt % catalog.TRAVIS_QUERY_LIMIT == 0): break if not catalog.args.update: vsnetfiles = ['latestsne.dat'] for vsnetfile in vsnetfiles: file_name = os.path.join(catalog.get_current_task_repo(), "" + vsnetfile) with open(file_name, 'r', encoding='latin1') as csv_file: tsvin = csv.reader(csv_file, delimiter=' ', skipinitialspace=True) loopcnt = 0 for rr, row in enumerate(tsvin): if (not row or row[0] in ['Transient'] or row[0][:4] in ['http', 'www.'] or len(row) < 3): continue name = row[0].strip() if name[:4].isdigit(): name = 'SN' + name if name.startswith('PSNJ'): name = 'PSN J' + name[4:] if name.startswith('MASTEROTJ'): name = name.replace('MASTEROTJ', 'MASTER OT J') name = catalog.add_entry(name) sec_source = catalog.entries[name].add_source( name=sec_ref, url=sec_refurl, secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, sec_source) if not is_number(row[1]): continue year = row[1][:4] month = row[1][4:6] day = row[1][6:] if '.' not in day: day = day[:2] + '.' + day[2:] mjd = astrotime(year + '-' + month + '-' + str(floor(float(day))).zfill(2)).mjd mjd += float(day) - floor(float(day)) magnitude = row[2].rstrip(ascii_letters) if not is_number(magnitude): continue if magnitude.isdigit(): if int(magnitude) > 100: magnitude = magnitude[:2] + '.' + magnitude[2:] if float(str(cols[8].contents[0]).strip()) >= 90.0: continue if len(row) >= 4: if is_number(row[3]): e_magnitude = row[3] refind = 4 else: e_magnitude = '' refind = 3 if refind >= len(row): sources = sec_source else: reference = ' '.join(row[refind:]) source = catalog.entries[name].add_source( name=reference) catalog.entries[name].add_quantity( SUPERNOVA.ALIAS, name, sec_source) sources = uniq_cdl([source, sec_source]) else: sources = sec_source band = row[2].lstrip('1234567890.') catalog.entries[name].add_photometry( time=mjd, u_time='MJD', band=band, magnitude=magnitude, e_magnitude=e_magnitude, source=sources) if (catalog.args.travis and loopcnt % catalog.TRAVIS_QUERY_LIMIT == 0): break catalog.journal_entries() return
def do_ps_threepi(catalog): """Import data from Pan-STARRS' 3pi page.""" task_str = catalog.get_current_task_str() bad_aliases = ['SN1994J'] teles = 'Pan-STARRS1' fname = os.path.join(catalog.get_current_task_repo(), '3pi/page00.html') ps_url = ("https://star.pst.qub.ac.uk/" "ps1threepi/psdb/public/?page=1&sort=followup_flag_date") html = catalog.load_url(ps_url, fname, write=False, update_mode=True) # Check if offline. offline = False if not html: offline = True else: # Clean some common HTML manglings html = html.replace('ahref=', 'a href=') bs = BeautifulSoup(html, 'html5lib') div = bs.find('div', {'class': 'pagination'}) if not div: offline = True else: links = div.findAll('a') if not links: offline = True if offline: if catalog.args.update: return warnings.warn('Pan-STARRS 3pi offline, using local files only.') with open(fname, 'r') as f: html = f.read() bs = BeautifulSoup(html, 'html5lib') div = bs.find('div', {'class': 'pagination'}) links = div.findAll('a') else: with open(fname, 'w') as f: f.write(html) numpages = int(links[-2].contents[0]) + 1 oldnumpages = len( glob(os.path.join(catalog.get_current_task_repo(), '3pi/page*'))) for page in pbar(range(1, numpages), task_str): fname = os.path.join(catalog.get_current_task_repo(), '3pi/page') + \ str(page).zfill(2) + '.html' if offline: if not os.path.isfile(fname): continue with open(fname, 'r') as f: html = f.read() else: if (catalog.current_task.load_archive(catalog.args) and page < oldnumpages and os.path.isfile(fname)): with open(fname, 'r') as f: html = f.read() else: response = urllib.request.urlopen( "https://star.pst.qub.ac.uk/ps1threepi/psdb/public/?page=" + str(page) + "&sort=followup_flag_date") with open(fname, 'w') as f: html = response.read().decode('utf-8') f.write(html) bs = BeautifulSoup(html, 'html5lib') trs = bs.findAll('tr') for tr in pbar(trs, task_str): tds = tr.findAll('td') if not tds: continue refs = [] aliases = [] ttype = '' ctype = '' for tdi, td in enumerate(tds): if tdi == 0: psname = td.contents[0] pslink = psname['href'] psname = psname.text elif tdi == 1: ra = td.contents[0] elif tdi == 2: dec = td.contents[0] elif tdi == 3: ttype = td.contents[0] elif tdi == 6: if not td.contents: continue ctype = td.contents[0] if ctype == 'Observed': ctype = '' elif tdi == 17: if td.contents: crossrefs = td.findAll('a') for cref in crossrefs: if 'atel' in cref.contents[0].lower(): refs.append([cref.contents[0], cref['href']]) elif is_number(cref.contents[0][:4]): continue else: aliases.append(cref.contents[0]) name = '' for alias in aliases: if alias in bad_aliases: continue if alias[:2] == 'AT': name = alias if not name: name = psname if not any([catalog.entry_exists(x) for x in (aliases + [psname])]): continue name = catalog.add_entry(name) sources = [ catalog.entries[name].add_source( name='Pan-STARRS 3Pi', url=('https://star.pst.qub.ac.uk/' 'ps1threepi/psdb/')) ] catalog.entries[name].add_quantity(TIDALDISRUPTION.ALIAS, name, sources[0]) for ref in refs: sources.append(catalog.entries[name].add_source( name=ref[0], url=ref[1])) source = uniq_cdl(sources) for alias in aliases: newalias = alias if alias[:3] in ['CSS', 'SSS', 'MLS']: newalias = alias.replace('-', ':', 1) newalias = newalias.replace('PSNJ', 'PSN J') catalog.entries[name].add_quantity(TIDALDISRUPTION.ALIAS, newalias, source) catalog.entries[name].add_quantity(TIDALDISRUPTION.RA, ra, source) catalog.entries[name].add_quantity(TIDALDISRUPTION.DEC, dec, source) catalog.entries[name].add_quantity(TIDALDISRUPTION.CLAIMED_TYPE, ctype, source) fname2 = os.path.join(catalog.get_current_task_repo(), '3pi/candidate-') fname2 += pslink.rstrip('/').split('/')[-1] + '.html' if offline: if not os.path.isfile(fname2): continue with open(fname2, 'r') as f: html2 = f.read() else: if (catalog.current_task.load_archive(catalog.args) and os.path.isfile(fname2)): with open(fname2, 'r') as f: html2 = f.read() else: pslink = ('https://star.pst.qub.ac.uk/' 'ps1threepi/psdb/public/') + pslink try: session2 = requests.Session() response2 = session2.get(pslink) except Exception: offline = True if not os.path.isfile(fname2): continue with open(fname2, 'r') as f: html2 = f.read() else: html2 = response2.text with open(fname2, 'w') as f: f.write(html2) bs2 = BeautifulSoup(html2, 'html5lib') scripts = bs2.findAll('script') nslines = [] nslabels = [] for script in scripts: if 'jslcdata.push' not in script.text: continue slines = script.text.splitlines() for line in slines: if 'jslcdata.push' in line: json_fname = (line.strip() .replace('jslcdata.push(', '') .replace(');', '')) nslines.append(json.loads(json_fname)) if ('jslabels.push' in line and 'blanks' not in line and 'non det' not in line): json_fname = (line.strip() .replace('jslabels.push(', '') .replace(');', '')) nslabels.append(json.loads(json_fname)['label']) for li, line in enumerate(nslines[:len(nslabels)]): if not line: continue for obs in line: catalog.entries[name].add_photometry( time=str(obs[0]), u_time='MJD', band=nslabels[li], instrument='GPC', magnitude=str(obs[1]), e_magnitude=str(obs[2]), source=source, telescope=teles) # Ignoring upper limits as they are usually spurious chip gaps. # for li, line in enumerate(nslines[2 * len(nslabels):]): # if not line: # continue # for obs in line: # catalog.entries[name].add_photometry( # time=str(obs[0]), # u_time='MJD', # band=nslabels[li], # instrument='GPC', # magnitude=str(obs[1]), # upperlimit=True, # source=source, # telescope=teles) assoctab = bs2.find('table', {'class': 'generictable'}) hostname = '' redshift = '' if assoctab: trs = assoctab.findAll('tr') headertds = [x.contents[0] for x in trs[1].findAll('td')] tds = trs[1].findAll('td') for tdi, td in enumerate(tds): if tdi == 1: hostname = td.contents[0].strip() elif tdi == 4: if 'z' in headertds: redshift = td.contents[0].strip() # Skip galaxies with just SDSS id if is_number(hostname): continue catalog.entries[name].add_quantity(TIDALDISRUPTION.HOST, hostname, source) if redshift: catalog.entries[name].add_quantity( [TIDALDISRUPTION.REDSHIFT, TIDALDISRUPTION.HOST_REDSHIFT], redshift, source, kind='host') if catalog.args.update: catalog.journal_entries() catalog.journal_entries() # Only run first page for Travis if catalog.args.travis: break return
def do_ucb_photo(catalog): task_str = catalog.get_current_task_str() sec_ref = 'UCB Filippenko Group\'s Supernova Database (SNDB)' sec_refurl = 'http://heracles.astro.berkeley.edu/sndb/info' sec_refbib = '2012MNRAS.425.1789S' jsontxt = catalog.load_cached_url( 'http://heracles.astro.berkeley.edu/sndb/download?id=allpubphot', os.path.join(catalog.get_current_task_repo(), 'SNDB/allpub.json'), jsonsort='PhotID') if not jsontxt: return photom = json.loads(jsontxt) photom = sorted(photom, key=lambda kk: kk['PhotID']) for phot in pbar(photom, task_str): oldname = phot['ObjName'] name = catalog.add_entry(oldname) sec_source = catalog.entries[name].add_source( name=sec_ref, url=sec_refurl, bibcode=sec_refbib, secondary=True) catalog.entries[name].add_quantity( SUPERNOVA.ALIAS, oldname, sec_source) sources = [sec_source] if phot['Reference']: sources += [catalog.entries[name] .add_source(bibcode=phot['Reference'])] sources = uniq_cdl(sources) if phot['Type'] and phot['Type'].strip() != 'NoMatch': for ct in phot['Type'].strip().split(','): catalog.entries[name].add_quantity( SUPERNOVA.CLAIMED_TYPE, ct.replace('-norm', '').strip(), sources) if phot['DiscDate']: catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, phot['DiscDate'].replace('-', '/'), sources) if phot['HostName']: host = urllib.parse.unquote(phot['HostName']).replace('*', '') catalog.entries[name].add_quantity(SUPERNOVA.HOST, host, sources) filename = phot['Filename'] if phot['Filename'] else '' if not filename: raise ValueError('Filename not found for SNDB phot!') if not phot['PhotID']: raise ValueError('ID not found for SNDB phot!') filepath = os.path.join( catalog.get_current_task_repo(), 'SNDB/') + filename if (catalog.current_task.load_archive(catalog.args) and os.path.isfile(filepath)): with open(filepath, 'r') as ff: phottxt = ff.read() else: session = requests.Session() response = session.get( 'http://heracles.astro.berkeley.edu/sndb/download?id=dp:' + str(phot['PhotID'])) phottxt = response.text with open(filepath, 'w') as ff: ff.write(phottxt) tsvin = csv.reader(phottxt.splitlines(), delimiter=' ', skipinitialspace=True) for rr, row in enumerate(tsvin): if len(row) > 0 and row[0] == "#": continue mjd = row[0] magnitude = row[1] if magnitude and float(magnitude) > 99.0: continue e_mag = row[2] band = row[4] telescope = row[5] catalog.entries[name].add_photometry( time=mjd, telescope=telescope, band=band, magnitude=magnitude, e_magnitude=e_mag, source=sources) catalog.journal_entries() return
def do_rochester(catalog): rochestermirrors = ['http://www.rochesterastronomy.org/', 'http://www.supernova.thistlethwaites.com/'] rochesterpaths = ['snimages/snredshiftall.html', 'sn2016/snredshift.html', 'snimages/snredboneyard.html'] rochesterupdate = [False, True, True] task_str = catalog.get_current_task_str() for pp, path in enumerate(pbar(rochesterpaths, task_str)): if catalog.args.update and not rochesterupdate[pp]: continue filepath = (os.path.join( catalog.get_current_task_repo(), 'rochester/') + os.path.basename(path)) for mirror in rochestermirrors: html = catalog.load_cached_url( mirror + path, filepath, failhard=(mirror != rochestermirrors[-1])) if html: break if not html: continue soup = BeautifulSoup(html, 'html5lib') rows = soup.findAll('tr') sec_ref = 'Latest Supernovae' sec_refurl = ('http://www.rochesterastronomy.org/' 'snimages/snredshiftall.html') for rr, row in enumerate(pbar(rows, task_str)): if rr == 0: continue cols = row.findAll('td') if not len(cols): continue name = '' if cols[14].contents: aka = str(cols[14].contents[0]).strip() if is_number(aka.strip('?')): aka = 'SN' + aka.strip('?') + 'A' oldname = aka name = catalog.add_entry(aka) elif len(aka) == 4 and is_number(aka[:4]): aka = 'SN' + aka oldname = aka name = catalog.add_entry(aka) ra = str(cols[3].contents[0]).strip() dec = str(cols[4].contents[0]).strip() sn = re.sub('<[^<]+?>', '', str(cols[0].contents[0])).strip() if is_number(sn.strip('?')): sn = 'SN' + sn.strip('?') + 'A' elif len(sn) == 4 and is_number(sn[:4]): sn = 'SN' + sn if not name: if not sn: continue if sn[:8] == 'MASTER J': sn = sn.replace('MASTER J', 'MASTER OT J').replace( 'SNHunt', 'SNhunt') if 'POSSIBLE' in sn.upper() and ra and dec: sn = 'PSN J' + ra.replace(':', '').replace('.', '') sn += dec.replace(':', '').replace('.', '') oldname = sn name = catalog.add_entry(sn) reference = cols[12].findAll('a')[0].contents[0].strip() refurl = cols[12].findAll('a')[0]['href'].strip() source = catalog.entries[name].add_source( name=reference, url=refurl) sec_source = catalog.entries[name].add_source( name=sec_ref, url=sec_refurl, secondary=True) sources = uniq_cdl(list(filter(None, [source, sec_source]))) catalog.entries[name].add_quantity( SUPERNOVA.ALIAS, oldname, sources) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, sn, sources) if cols[14].contents: if aka == 'SNR G1.9+0.3': aka = 'G001.9+00.3' if aka[:4] == 'PS1 ': aka = 'PS1-' + aka[4:] if aka[:8] == 'MASTER J': aka = aka.replace('MASTER J', 'MASTER OT J').replace( 'SNHunt', 'SNhunt') if 'POSSIBLE' in aka.upper() and ra and dec: aka = 'PSN J' + ra.replace(':', '').replace('.', '') aka += dec.replace(':', '').replace('.', '') catalog.entries[name].add_quantity( SUPERNOVA.ALIAS, aka, sources) if str(cols[1].contents[0]).strip() != 'unk': type = str(cols[1].contents[0]).strip(' :,') catalog.entries[name].add_quantity( SUPERNOVA.CLAIMED_TYPE, type, sources) if str(cols[2].contents[0]).strip() != 'anonymous': catalog.entries[name].add_quantity(SUPERNOVA.HOST, str( cols[2].contents[0]).strip(), sources) catalog.entries[name].add_quantity(SUPERNOVA.RA, ra, sources) catalog.entries[name].add_quantity(SUPERNOVA.DEC, dec, sources) if (str(cols[6].contents[0]).strip() not in ['2440587', '2440587.292']): astrot = astrotime( float(str(cols[6].contents[0]).strip()), format='jd').datetime ddate = make_date_string(astrot.year, astrot.month, astrot.day) catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, ddate, sources) if (str(cols[7].contents[0]).strip() not in ['2440587', '2440587.292']): astrot = astrotime( float(str(cols[7].contents[0]).strip()), format='jd') if ((float(str(cols[8].contents[0]).strip()) <= 90.0 and not any('GRB' in xx for xx in catalog.entries[name].get_aliases()))): mag = str(cols[8].contents[0]).strip() catalog.entries[name].add_photometry( time=str(astrot.mjd), magnitude=mag, source=sources) if cols[11].contents[0] != 'n/a': catalog.entries[name].add_quantity(SUPERNOVA.REDSHIFT, str( cols[11].contents[0]).strip(), sources) catalog.entries[name].add_quantity('discoverer', str( cols[13].contents[0]).strip(), sources) if catalog.args.update: catalog.journal_entries() if not catalog.args.update: vsnetfiles = ['latestsne.dat'] for vsnetfile in vsnetfiles: file_name = os.path.join( catalog.get_current_task_repo(), "" + vsnetfile) with open(file_name, 'r', encoding='latin1') as csv_file: tsvin = csv.reader(csv_file, delimiter=' ', skipinitialspace=True) for rr, row in enumerate(tsvin): if (not row or row[0][:4] in ['http', 'www.'] or len(row) < 3): continue name = row[0].strip() if name[:4].isdigit(): name = 'SN' + name if name.startswith('PSNJ'): name = 'PSN J' + name[4:] if name.startswith('MASTEROTJ'): name = name.replace('MASTEROTJ', 'MASTER OT J') name = catalog.add_entry(name) sec_source = catalog.entries[name].add_source( name=sec_ref, url=sec_refurl, secondary=True) catalog.entries[name].add_quantity( SUPERNOVA.ALIAS, name, sec_source) if not is_number(row[1]): continue year = row[1][:4] month = row[1][4:6] day = row[1][6:] if '.' not in day: day = day[:2] + '.' + day[2:] mjd = astrotime(year + '-' + month + '-' + str(floor(float(day))).zfill(2)).mjd mjd += float(day) - floor(float(day)) magnitude = row[2].rstrip(ascii_letters) if not is_number(magnitude): continue if magnitude.isdigit(): if int(magnitude) > 100: magnitude = magnitude[:2] + '.' + magnitude[2:] if float(str(cols[8].contents[0]).strip()) >= 90.0: continue if len(row) >= 4: if is_number(row[3]): e_magnitude = row[3] refind = 4 else: e_magnitude = '' refind = 3 if refind >= len(row): sources = sec_source else: reference = ' '.join(row[refind:]) source = catalog.entries[ name].add_source(name=reference) catalog.entries[name].add_quantity( SUPERNOVA.ALIAS, name, sec_source) sources = uniq_cdl([source, sec_source]) else: sources = sec_source band = row[2].lstrip('1234567890.') catalog.entries[name].add_photometry( time=mjd, band=band, magnitude=magnitude, e_magnitude=e_magnitude, source=sources) catalog.journal_entries() return
def do_cpcs(catalog): """Import from the CPCS.""" task_str = catalog.get_current_task_str() cpcs_url = ('http://gsaweb.ast.cam.ac.uk/' 'followup/list_of_alerts?format=json&num=100000&' 'published=1&observed_only=1' '&hashtag=JG_530ad9462a0b8785bfb385614bf178c6') jsontxt = catalog.load_url(cpcs_url, os.path.join( catalog.get_current_task_repo(), 'CPCS', 'index.json')) if not jsontxt: return alertindex = json.loads(jsontxt, object_pairs_hook=OrderedDict) ids = [xx['id'] for xx in alertindex] for ii, ai in enumerate(pbar(ids, task_str)): name = alertindex[ii]['ivorn'].split('/')[-1].strip() # Skip aa few weird entries if name == 'ASASSNli': continue # Just use aa whitelist for now since naming seems inconsistent white_list = [ 'GAIA', 'OGLE', 'ASASSN', 'MASTER', 'OTJ', 'PS1', 'IPTF', 'CSS'] if True in [xx in name.upper() for xx in white_list]: name = name.replace('Verif', '').replace('_', ' ') if 'ASASSN' in name and name[6] != '-': name = 'ASASSN-' + name[6:].lower() if 'MASTEROTJ' in name: name = name.replace('MASTEROTJ', 'MASTER OT J') if 'OTJ' in name: name = name.replace('OTJ', 'MASTER OT J') if name.upper().startswith('IPTF'): name = 'iPTF' + name[4:].lower() if name.upper().startswith('PS1'): name = 'PS1' + name[3:].lower() # Only add events that already exist. if not catalog.entry_exists(name): continue oldname = name name = catalog.add_entry(name) else: continue sec_source = catalog.entries[name].add_source( name='Cambridge Photometric Calibration Server', url='http://gsaweb.ast.cam.ac.uk/followup/', secondary=True) catalog.entries[name].add_quantity(TIDALDISRUPTION.ALIAS, oldname, sec_source) unit_deg = 'floatdegrees' catalog.entries[name].add_quantity( TIDALDISRUPTION.RA, str(alertindex[ii][TIDALDISRUPTION.RA]), sec_source, u_value=unit_deg) catalog.entries[name].add_quantity( TIDALDISRUPTION.DEC, str(alertindex[ii][TIDALDISRUPTION.DEC]), sec_source, u_value=unit_deg) alerturl = ('http://gsaweb.ast.cam.ac.uk/' 'followup/get_alert_lc_data?alert_id=' + str(ai)) source = catalog.entries[name].add_source( name='CPCS Alert ' + str(ai), url=alerturl) fname = os.path.join(catalog.get_current_task_repo(), 'CPCS/alert-') + str(ai).zfill(2) + '.json' jsonstr = catalog.load_url( alerturl + '&hashtag=JG_530ad9462a0b8785bfb385614bf178c6', fname) try: cpcsalert = json.loads(jsonstr) except Exception: catalog.log.warning('Mangled CPCS data for alert {}.'.format(ai)) continue mjds = [round_sig(xx, sig=9) for xx in cpcsalert['mjd']] mags = [round_sig(xx, sig=6) for xx in cpcsalert['mag']] errs = [round_sig( xx, sig=6) if (is_number(xx) and float(xx) > 0.0) else '' for xx in cpcsalert['magerr']] bnds = cpcsalert['filter'] obs = cpcsalert['observatory'] for mi, mjd in enumerate(mjds): catalog.entries[name].add_photometry( time=mjd, u_time='MJD', magnitude=mags[mi], e_magnitude=errs[mi], band=bnds[mi], observatory=obs[mi], source=uniq_cdl([source, sec_source])) if catalog.args.update: catalog.journal_entries() if catalog.args.travis and ii >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() return
def do_asiago_spectra(catalog): task_str = catalog.get_current_task_str() html = catalog.load_url(('http://sngroup.oapd.inaf.it./' 'cgi-bin/output_class.cgi?sn=1990'), os.path.join(catalog.get_current_task_repo(), 'Asiago/spectra.html')) if not html: return bs = BeautifulSoup(html, 'html5lib') trs = bs.findAll('tr') for tr in pbar(trs, task_str): tds = tr.findAll('td') name = '' host = '' # fitsurl = '' source = '' reference = '' for tdi, td in enumerate(tds): if tdi == 0: butt = td.find('button') if not butt: break alias = butt.text.strip() alias = alias.replace('PSNJ', 'PSN J').replace('GAIA', 'Gaia') elif tdi == 1: name = (td.text.strip().replace('PSNJ', 'PSN J').replace( 'GAIA', 'Gaia')) if name.startswith('SN '): name = 'SN' + name[3:] if not name: name = alias if is_number(name[:4]): name = 'SN' + name oldname = name name = catalog.add_entry(name) reference = 'Asiago Supernova Catalogue' refurl = 'http://graspa.oapd.inaf.it/cgi-bin/sncat.php' secondarysource = catalog.entries[name].add_source( name=reference, url=refurl, secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, oldname, secondarysource) if alias != name: catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, alias, secondarysource) elif tdi == 2: host = td.text.strip() if host == 'anonymous': host = '' elif tdi == 3: discoverer = td.text.strip() elif tdi == 5: ra = td.text.strip() elif tdi == 6: dec = td.text.strip() elif tdi == 7: claimedtype = td.text.strip() elif tdi == 8: redshift = td.text.strip() # elif tdi == 9: # epochstr = td.text.strip() # if epochstr: # mjd = (astrotime(epochstr[:4] + '-' + epochstr[4:6] + # '-' + # str(floor(float(epochstr[6:]))).zfill(2)).mjd + # float(epochstr[6:]) - floor(float(epochstr[6:]))) # else: # mjd = '' elif tdi == 10: refs = td.findAll('a') source = '' reference = '' refurl = '' for ref in refs: if ref.text != 'REF': reference = ref.text refurl = ref['href'] if reference: source = catalog.entries[name].add_source(name=reference, url=refurl) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, secondarysource) sources = uniq_cdl( list(filter(None, [source, secondarysource]))) elif tdi == 12: pass # fitslink = td.find('a') # if fitslink: # fitsurl = fitslink['href'] if name: catalog.entries[name].add_quantity(SUPERNOVA.CLAIMED_TYPE, claimedtype, sources) catalog.entries[name].add_quantity(SUPERNOVA.RA, ra, sources) catalog.entries[name].add_quantity(SUPERNOVA.DEC, dec, sources) catalog.entries[name].add_quantity(SUPERNOVA.REDSHIFT, redshift, sources) catalog.entries[name].add_quantity(SUPERNOVA.DISCOVERER, discoverer, sources) catalog.entries[name].add_quantity(SUPERNOVA.HOST, host, sources) # if fitsurl: # response = urllib.request.urlopen( # 'http://sngroup.oapd.inaf.it./' + fitsurl) # compressed = io.BytesIO(response.read()) # decompressed = gzip.GzipFile(fileobj=compressed) # hdulist = fits.open(decompressed) # scidata = hdulist[0].data # print(hdulist[0].header) # # print(scidata[3]) # sys.exit() catalog.journal_entries() return
def do_suspect_spectra(catalog): task_str = catalog.get_current_task_str() with open(os.path.join(catalog.get_current_task_repo(), 'Suspect/sources.json'), 'r') as f: sourcedict = json.loads(f.read()) with open(os.path.join(catalog.get_current_task_repo(), 'Suspect/filename-changes.txt'), 'r') as f: rows = f.readlines() changedict = {} for row in rows: if not row.strip() or row[0] == "#": continue items = row.strip().split(' ') changedict[items[1]] = items[0] suspectcnt = 0 folders = next(os.walk(os.path.join( catalog.get_current_task_repo(), 'Suspect')))[1] for folder in pbar(folders, task_str): eventfolders = next(os.walk(os.path.join( catalog.get_current_task_repo(), 'Suspect/') + folder))[1] oldname = '' for eventfolder in pbar(eventfolders, task_str): name = eventfolder if is_number(name[:4]): name = 'SN' + name name = catalog.get_preferred_name(name) if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) sec_ref = 'SUSPECT' sec_refurl = 'https://www.nhn.ou.edu/~suspect/' sec_bibc = '2001AAS...199.8408R' sec_source = catalog.entries[name].add_source( name=sec_ref, url=sec_refurl, bibcode=sec_bibc, secondary=True) catalog.entries[name].add_quantity( SUPERNOVA.ALIAS, name, sec_source) fpath = os.path.join(catalog.get_current_task_repo(), 'Suspect', folder, eventfolder) eventspectra = next(os.walk(fpath))[2] for spectrum in eventspectra: sources = [sec_source] bibcode = '' if spectrum in changedict: specalias = changedict[spectrum] else: specalias = spectrum if specalias in sourcedict: bibcode = sourcedict[specalias] elif name in sourcedict: bibcode = sourcedict[name] if bibcode: source = catalog.entries[name].add_source( bibcode=unescape(bibcode)) sources += [source] sources = uniq_cdl(sources) date = spectrum.split('_')[1] year = date[:4] month = date[4:6] day = date[6:] sig = get_sig_digits(day) + 5 day_fmt = str(floor(float(day))).zfill(2) time = astrotime(year + '-' + month + '-' + day_fmt).mjd time = time + float(day) - floor(float(day)) time = pretty_num(time, sig=sig) fpath = os.path.join(catalog.get_current_task_repo(), 'Suspect', folder, eventfolder, spectrum) with open(fpath, 'r') as f: specdata = list(csv.reader( f, delimiter=' ', skipinitialspace=True)) specdata = list(filter(None, specdata)) newspec = [] oldval = '' for row in specdata: if row[1] == oldval: continue newspec.append(row) oldval = row[1] specdata = newspec haserrors = len(specdata[0]) == 3 and specdata[ 0][2] and specdata[0][2] != 'NaN' specdata = [list(i) for i in zip(*specdata)] wavelengths = specdata[0] fluxes = specdata[1] errors = '' if haserrors: errors = specdata[2] catalog.entries[name].add_spectrum( u_wavelengths='Angstrom', u_fluxes='Uncalibrated', u_time='MJD', time=time, wavelengths=wavelengths, fluxes=fluxes, errors=errors, u_errors='Uncalibrated', source=sources, filename=spectrum) suspectcnt = suspectcnt + 1 if (catalog.args.travis and suspectcnt % catalog.TRAVIS_QUERY_LIMIT == 0): break catalog.journal_entries() return
def do_ps_threepi(catalog): """Import data from Pan-STARRS' 3pi page.""" task_str = catalog.get_current_task_str() bad_aliases = ['SN1994J'] teles = 'Pan-STARRS1' fname = os.path.join(catalog.get_current_task_repo(), '3pi/page00.html') ps_url = ("http://psweb.mp.qub.ac.uk/" "ps1threepi/psdb/public/?page=1&sort=followup_flag_date") html = catalog.load_url(ps_url, fname, write=False) if not html: return # Clean some common HTML manglings html = html.replace('ahref=', 'a href=') bs = BeautifulSoup(html, 'html5lib') div = bs.find('div', {'class': 'pagination'}) offline = False if not div: offline = True else: links = div.findAll('a') if not links: offline = True if offline: if catalog.args.update: return warnings.warn('Pan-STARRS 3pi offline, using local files only.') with open(fname, 'r') as f: html = f.read() bs = BeautifulSoup(html, 'html5lib') div = bs.find('div', {'class': 'pagination'}) links = div.findAll('a') else: with open(fname, 'w') as f: f.write(html) numpages = int(links[-2].contents[0]) oldnumpages = len( glob(os.path.join(catalog.get_current_task_repo(), '3pi/page*'))) for page in pbar(range(1, numpages), task_str): fname = os.path.join(catalog.get_current_task_repo(), '3pi/page') + \ str(page).zfill(2) + '.html' if offline: if not os.path.isfile(fname): continue with open(fname, 'r') as f: html = f.read() else: if (catalog.current_task.load_archive(catalog.args) and page < oldnumpages and os.path.isfile(fname)): with open(fname, 'r') as f: html = f.read() else: response = urllib.request.urlopen( "http://psweb.mp.qub.ac.uk/ps1threepi/psdb/public/?page=" + str(page) + "&sort=followup_flag_date") with open(fname, 'w') as f: html = response.read().decode('utf-8') f.write(html) bs = BeautifulSoup(html, 'html5lib') trs = bs.findAll('tr') for tr in pbar(trs, task_str): tds = tr.findAll('td') if not tds: continue refs = [] aliases = [] ttype = '' ctype = '' for tdi, td in enumerate(tds): if tdi == 0: psname = td.contents[0] pslink = psname['href'] psname = psname.text elif tdi == 1: ra = td.contents[0] elif tdi == 2: dec = td.contents[0] elif tdi == 3: ttype = td.contents[0] if ttype != 'sn' and ttype != 'orphan': break elif tdi == 6: if not td.contents: continue ctype = td.contents[0] if ctype == 'Observed': ctype = '' elif tdi == 17: if td.contents: crossrefs = td.findAll('a') for cref in crossrefs: if 'atel' in cref.contents[0].lower(): refs.append([cref.contents[0], cref['href']]) elif is_number(cref.contents[0][:4]): continue else: aliases.append(cref.contents[0]) if ttype != 'sn' and ttype != 'orphan': continue name = '' for alias in aliases: if alias in bad_aliases: continue if alias[:2] == 'SN': name = alias if not name: name = psname name = catalog.add_entry(name) sources = [ catalog.entries[name].add_source( name='Pan-STARRS 3Pi', url=('http://psweb.mp.qub.ac.uk/' 'ps1threepi/psdb/')) ] catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, sources[0]) for ref in refs: sources.append(catalog.entries[name].add_source( name=ref[0], url=ref[1])) source = uniq_cdl(sources) for alias in aliases: newalias = alias if alias[:3] in ['CSS', 'SSS', 'MLS']: newalias = alias.replace('-', ':', 1) newalias = newalias.replace('PSNJ', 'PSN J') catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, newalias, source) catalog.entries[name].add_quantity(SUPERNOVA.RA, ra, source) catalog.entries[name].add_quantity(SUPERNOVA.DEC, dec, source) catalog.entries[name].add_quantity(SUPERNOVA.CLAIMED_TYPE, ctype, source) fname2 = os.path.join(catalog.get_current_task_repo(), '3pi/candidate-') fname2 += pslink.rstrip('/').split('/')[-1] + '.html' if offline: if not os.path.isfile(fname2): continue with open(fname2, 'r') as f: html2 = f.read() else: if (catalog.current_task.load_archive(catalog.args) and os.path.isfile(fname2)): with open(fname2, 'r') as f: html2 = f.read() else: pslink = ('http://psweb.mp.qub.ac.uk/' 'ps1threepi/psdb/public/') + pslink try: session2 = requests.Session() response2 = session2.get(pslink) except Exception: offline = True if not os.path.isfile(fname2): continue with open(fname2, 'r') as f: html2 = f.read() else: html2 = response2.text with open(fname2, 'w') as f: f.write(html2) bs2 = BeautifulSoup(html2, 'html5lib') scripts = bs2.findAll('script') nslines = [] nslabels = [] for script in scripts: if 'jslcdata.push' not in script.text: continue slines = script.text.splitlines() for line in slines: if 'jslcdata.push' in line: json_fname = (line.strip() .replace('jslcdata.push(', '') .replace(');', '')) nslines.append(json.loads(json_fname)) if ('jslabels.push' in line and 'blanks' not in line and 'non det' not in line): json_fname = (line.strip() .replace('jslabels.push(', '') .replace(');', '')) nslabels.append(json.loads(json_fname)['label']) for li, line in enumerate(nslines[:len(nslabels)]): if not line: continue for obs in line: catalog.entries[name].add_photometry( time=str(obs[0]), u_time='MJD', band=nslabels[li], instrument='GPC', magnitude=str(obs[1]), e_magnitude=str(obs[2]), source=source, telescope=teles) # Ignoring upper limits as they are usually spurious chip gaps. # for li, line in enumerate(nslines[2 * len(nslabels):]): # if not line: # continue # for obs in line: # catalog.entries[name].add_photometry( # time=str(obs[0]), # u_time='MJD', # band=nslabels[li], # instrument='GPC', # magnitude=str(obs[1]), # upperlimit=True, # source=source, # telescope=teles) assoctab = bs2.find('table', {'class': 'generictable'}) hostname = '' redshift = '' if assoctab: trs = assoctab.findAll('tr') headertds = [x.contents[0] for x in trs[1].findAll('td')] tds = trs[1].findAll('td') for tdi, td in enumerate(tds): if tdi == 1: hostname = td.contents[0].strip() elif tdi == 4: if 'z' in headertds: redshift = td.contents[0].strip() # Skip galaxies with just SDSS id if is_number(hostname): continue catalog.entries[name].add_quantity(SUPERNOVA.HOST, hostname, source) if redshift: catalog.entries[name].add_quantity( [SUPERNOVA.REDSHIFT, SUPERNOVA.HOST_REDSHIFT], redshift, source, kind='host') if catalog.args.update: catalog.journal_entries() catalog.journal_entries() # Only run first page for Travis if catalog.args.travis: break return
def do_cpcs(catalog): task_str = catalog.get_current_task_str() cpcs_url = ('http://gsaweb.ast.cam.ac.uk/' 'followup/list_of_alerts?format=json&num=100000&' 'published=1&observed_only=1&' 'hashtag=JG_530ad9462a0b8785bfb385614bf178c6') jsontxt = catalog.load_cached_url( cpcs_url, os.path.join(catalog.get_current_task_repo(), 'CPCS/index.json')) if not jsontxt: return alertindex = json.loads(jsontxt, object_pairs_hook=OrderedDict) ids = [xx['id'] for xx in alertindex] for ii, ai in enumerate(pbar(ids, task_str)): name = alertindex[ii]['ivorn'].split('/')[-1].strip() # Skip aa few weird entries if name == 'ASASSNli': continue # Just use aa whitelist for now since naming seems inconsistent white_list = ['GAIA', 'OGLE', 'ASASSN', 'MASTER', 'OTJ', 'PS1', 'IPTF'] if True in [xx in name.upper() for xx in white_list]: name = name.replace('Verif', '').replace('_', ' ') if 'ASASSN' in name and name[6] != '-': name = 'ASASSN-' + name[6:] if 'MASTEROTJ' in name: name = name.replace('MASTEROTJ', 'MASTER OT J') if 'OTJ' in name: name = name.replace('OTJ', 'MASTER OT J') if name.upper().startswith('IPTF'): name = 'iPTF' + name[4:] # Only add events that are classified as SN. if catalog.entry_exists(name): continue oldname = name name = catalog.add_entry(name) else: continue sec_source = catalog.entries[name].add_source( name='Cambridge Photometric Calibration Server', url='http://gsaweb.ast.cam.ac.uk/followup/', secondary=True) catalog.entries[name].add_quantity( SUPERNOVA.ALIAS, oldname, sec_source) unit_deg = 'floatdegrees' catalog.entries[name].add_quantity( SUPERNOVA.RA, str(alertindex[ii][SUPERNOVA.RA]), sec_source, u_value=unit_deg) catalog.entries[name].add_quantity(SUPERNOVA.DEC, str( alertindex[ii][SUPERNOVA.DEC]), sec_source, u_value=unit_deg) alerturl = ('http://gsaweb.ast.cam.ac.uk/' 'followup/get_alert_lc_data?alert_id=' + str(ai)) source = catalog.entries[name].add_source( name='CPCS Alert ' + str(ai), url=alerturl) fname = os.path.join(catalog.get_current_task_repo(), 'CPCS/alert-') + str(ai).zfill(2) + '.json' if (catalog.current_task.load_archive(catalog.args) and os.path.isfile(fname)): with open(fname, 'r') as ff: jsonstr = ff.read() else: session = requests.Session() response = session.get( alerturl + '&hashtag=JG_530ad9462a0b8785bfb385614bf178c6') with open(fname, 'w') as ff: jsonstr = response.text ff.write(jsonstr) try: cpcsalert = json.loads(jsonstr) except: continue mjds = [round_sig(xx, sig=9) for xx in cpcsalert['mjd']] mags = [round_sig(xx, sig=6) for xx in cpcsalert['mag']] errs = [round_sig(xx, sig=6) if (is_number(xx) and float(xx) > 0.0) else '' for xx in cpcsalert['magerr']] bnds = cpcsalert['filter'] obs = cpcsalert['observatory'] for mi, mjd in enumerate(mjds): (catalog.entries[name] .add_photometry(time=mjd, magnitude=mags[mi], e_magnitude=errs[mi], band=bnds[mi], observatory=obs[mi], source=uniq_cdl([source, sec_source]))) if catalog.args.update: catalog.journal_entries() catalog.journal_entries() return
def do_snax(catalog): """Import from the SNaX X-ray database.""" task_str = catalog.get_current_task_str() dlurl = 'http://kronos.uchicago.edu/snax/export.php?exportType=TSV&exportFields=standard&objid=&name=&typeid=&type=&galaxyid=&galaxy=&fluxMin=&fluxMax=&fluxEnergyLMin=&fluxEnergyLMax=&fluxEnergyHMin=&fluxEnergyHMax=&lumMin=&lumMax=&instrumentid=&instrument=&ageMin=&ageMax=&dateMin=&dateMax=&sortA=dateExploded' # noqa: E501 file_path = os.path.join(catalog.get_current_task_repo(), 'SNaX.TSV') tsv = catalog.load_url(dlurl, file_path) # csvtxt = catalog.load_url( # 'http://www.grbcatalog.org/' # 'download_data?cut_0_min=5&cut_0=BAT%20T90' # '&cut_0_max=100000&num_cuts=1&no_date_cut=True', # file_path) data = [x.split('\t') for x in tsv.split('\n')] for r, row in enumerate(pbar(data, task_str)): if r == 0 or not row[0]: continue (name, source) = catalog.new_entry( row[0], srcname='SNaX', url='http://kronos.uchicago.edu/snax/', secondary=True) sources = [source] expsrc = uniq_cdl(sources + [ catalog.entries[name].add_source(bibcode=row[-6].strip()) ]) coosrc = uniq_cdl(sources + [ catalog.entries[name].add_source(bibcode=row[-5].strip()) ]) dissrc = uniq_cdl(sources + [ catalog.entries[name].add_source(bibcode=row[-4].strip()) ]) flxsrc = uniq_cdl(sources + [ catalog.entries[name].add_source(bibcode=row[-3].strip()), catalog.entries[name].add_source(bibcode=row[-2].strip()) ]) catalog.entries[name].add_quantity(SUPERNOVA.CLAIMED_TYPE, row[1], source) date = astrotime(float(row[2]), format='jd').datetime catalog.entries[name].add_quantity( SUPERNOVA.EXPLOSION_DATE, make_date_string(date.year, date.month, date.day), expsrc) catalog.entries[name].add_quantity( SUPERNOVA.RA, ' '.join(row[3].split()[:3]), coosrc) catalog.entries[name].add_quantity( SUPERNOVA.DEC, ' '.join(row[3].split()[3:]), coosrc) catalog.entries[name].add_quantity(SUPERNOVA.LUM_DIST, row[4], dissrc) catalog.entries[name].add_quantity(SUPERNOVA.HOST, row[5], source) catalog.entries[name].add_quantity( SUPERNOVA.REDSHIFT, row[6], source, e_value=row[7] if (row[7] and float(row[7]) != 0.0) else '') photodict = { PHOTOMETRY.TIME: jd_to_mjd(Decimal(row[8])), PHOTOMETRY.U_TIME: 'MJD', PHOTOMETRY.ENERGY: row[15:17], PHOTOMETRY.U_ENERGY: 'keV', PHOTOMETRY.FLUX: str(Decimal('1.0e-13') * Decimal(row[11])), PHOTOMETRY.U_FLUX: 'ergs/s/cm^2', PHOTOMETRY.E_LOWER_FLUX: str(Decimal('1.0e-13') * Decimal(row[13])), PHOTOMETRY.E_UPPER_FLUX: str(Decimal('1.0e-13') * Decimal(row[14])), PHOTOMETRY.INSTRUMENT: row[9], PHOTOMETRY.SOURCE: flxsrc } if row[12] == '1': photodict[PHOTOMETRY.UPPER_LIMIT] = True catalog.entries[name].add_photometry(**photodict) catalog.journal_entries() return
def set_first_max_light(self): if SUPERNOVA.MAX_APP_MAG not in self: # Get the maximum amongst all bands mldt, mlmag, mlband, mlsource = self._get_max_light() if mldt or mlmag or mlband: source = self.add_self_source() uniq_src = uniq_cdl([source] + mlsource.split(',')) if mldt: max_date = make_date_string(mldt.year, mldt.month, mldt.day) self.add_quantity(SUPERNOVA.MAX_DATE, max_date, uniq_src, derived=True) if mlmag: mlmag = pretty_num(mlmag) self.add_quantity(SUPERNOVA.MAX_APP_MAG, mlmag, uniq_src, derived=True) if mlband: self.add_quantity(SUPERNOVA.MAX_BAND, mlband, uniq_src, derived=True) if SUPERNOVA.MAX_VISUAL_APP_MAG not in self: # Get the "visual" maximum mldt, mlmag, mlband, mlsource = self._get_max_light(visual=True) if mldt or mlmag or mlband: source = self.add_self_source() uniq_src = uniq_cdl([source] + mlsource.split(',')) if mldt: max_date = make_date_string(mldt.year, mldt.month, mldt.day) self.add_quantity(SUPERNOVA.MAX_VISUAL_DATE, max_date, uniq_src, derived=True) if mlmag: mlmag = pretty_num(mlmag) self.add_quantity(SUPERNOVA.MAX_VISUAL_APP_MAG, mlmag, uniq_src, derived=True) if mlband: self.add_quantity(SUPERNOVA.MAX_VISUAL_BAND, mlband, uniq_src, derived=True) if (self._KEYS.DISCOVER_DATE not in self or max([ len(x[QUANTITY.VALUE].split('/')) for x in self[self._KEYS.DISCOVER_DATE] ]) < 3): fldt, flsource = self._get_first_light() if fldt: source = self.add_self_source() disc_date = make_date_string(fldt.year, fldt.month, fldt.day) self.add_quantity(self._KEYS.DISCOVER_DATE, disc_date, uniq_cdl([source] + flsource.split(',')), derived=True) if self._KEYS.DISCOVER_DATE not in self and self._KEYS.SPECTRA in self: minspecmjd = float("+inf") for spectrum in self[self._KEYS.SPECTRA]: if 'time' in spectrum and 'u_time' in spectrum: if spectrum['u_time'] == 'MJD': mjd = float(spectrum['time']) elif spectrum['u_time'] == 'JD': mjd = float(jd_to_mjd(Decimal(spectrum['time']))) else: continue if mjd < minspecmjd: minspecmjd = mjd minspecsource = spectrum['source'] if minspecmjd < float("+inf"): fldt = astrotime(minspecmjd, format='mjd').datetime source = self.add_self_source() disc_date = make_date_string(fldt.year, fldt.month, fldt.day) self.add_quantity(self._KEYS.DISCOVER_DATE, disc_date, uniq_cdl([source] + minspecsource.split(',')), derived=True) return
def do_simbad(catalog): # Simbad.list_votable_fields() # Some coordinates that SIMBAD claims belong to the SNe actually belong to # the host. task_str = catalog.get_current_task_str() simbadmirrors = ['http://simbad.harvard.edu/simbad/sim-script', 'http://simbad.u-strasbg.fr/simbad/sim-script'] simbadbadcoordbib = ['2013ApJ...770..107C'] simbadbadtypebib = ['2014ApJ...796...87I', '2015MNRAS.448.1206M', '2015ApJ...807L..18N'] simbadbadnamebib = ['2004AJ....127.2809W', '2005MNRAS.364.1419Z', '2015A&A...574A.112D', '2011MNRAS.417..916G', '2002ApJ...566..880G'] simbadbannedcats = ['[TBV2008]', 'OGLE-MBR'] simbadbannednames = ['SN'] customSimbad = Simbad() customSimbad.ROW_LIMIT = -1 customSimbad.TIMEOUT = 120 customSimbad.add_votable_fields('otype', 'sptype', 'sp_bibcode', 'id') table = [] print(customSimbad.SIMBAD_URL) for mirror in simbadmirrors: customSimbad.SIMBAD_URL = mirror try: table = customSimbad.query_criteria('maintype=SN | maintype="SN?"') except Exception: continue else: if not table: continue break if not table: catalog.log.warning('SIMBAD unable to load, probably offline.') # 2000A&AS..143....9W for brow in pbar(table, task_str): row = {x: re.sub(r'b\'(.*)\'', r'\1', str(brow[x])) for x in brow.colnames} # Skip items with no bibliographic info aside from SIMBAD, too # error-prone if row['OTYPE'] == 'Candidate_SN*' and not row['SP_TYPE']: continue if (not row['COO_BIBCODE'] and not row['SP_BIBCODE'] and not row['SP_BIBCODE_2']): continue if any([x in row['MAIN_ID'] for x in simbadbannedcats]): continue if row['COO_BIBCODE'] and row['COO_BIBCODE'] in simbadbadnamebib: continue name = single_spaces(re.sub(r'\[[^)]*\]', '', row['MAIN_ID']).strip()) if name in simbadbannednames: continue if is_number(name.replace(' ', '')): continue name = catalog.add_entry(name) source = (catalog.entries[name] .add_source(name='SIMBAD astronomical database', bibcode="2000A&AS..143....9W", url="http://simbad.u-strasbg.fr/", secondary=True)) aliases = row['ID'].split(',') for alias in aliases: if any([x in alias for x in simbadbannedcats]): continue ali = single_spaces(re.sub(r'\[[^)]*\]', '', alias).strip()) if is_number(ali.replace(' ', '')): continue if ali in simbadbannednames: continue ali = name_clean(ali) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, ali, source) if row['COO_BIBCODE'] and row['COO_BIBCODE'] not in simbadbadcoordbib: csources = ','.join( [source, catalog.entries[name].add_source( bibcode=row['COO_BIBCODE'])]) catalog.entries[name].add_quantity(SUPERNOVA.RA, row['RA'], csources) catalog.entries[name].add_quantity(SUPERNOVA.DEC, row['DEC'], csources) if row['SP_BIBCODE'] and row['SP_BIBCODE'] not in simbadbadtypebib: ssources = uniq_cdl([source, catalog.entries[name] .add_source(bibcode=row['SP_BIBCODE'])] + ([catalog.entries[name] .add_source(bibcode=row['SP_BIBCODE_2'])] if row['SP_BIBCODE_2'] else [])) catalog.entries[name].add_quantity( SUPERNOVA.CLAIMED_TYPE, (row['SP_TYPE'] .replace('SN.', '') .replace('SN', '') .replace('(~)', '') .strip(': ')), ssources) catalog.journal_entries() return
def do_itep(catalog): task_str = catalog.get_current_task_str() itepignoresources = ['2004ApJ...602..571B', '2013NewA...20...30M'] itepignorephot = ['SN2006gy'] needsbib = [] with open(os.path.join(catalog.get_current_task_repo(), 'itep-refs.txt'), 'r') as refs_file: refrep = refs_file.read().splitlines() refrepf = dict(list(zip(refrep[1::2], refrep[::2]))) fname = os.path.join(catalog.get_current_task_repo(), 'itep-lc-cat-28dec2015.txt') tsvin = list( csv.reader(open(fname, 'r'), delimiter='|', skipinitialspace=True)) curname = '' for rr, row in enumerate(pbar(tsvin, task_str)): if rr <= 1 or len(row) < 7: continue oldname = 'SN' + row[0].strip() mjd = str(jd_to_mjd(Decimal(row[1].strip()))) band = row[2].strip() magnitude = row[3].strip() e_magnitude = row[4].strip() reference = row[6].strip().strip(',') if curname != oldname: curname = oldname name = catalog.add_entry(oldname) sec_reference = ('Sternberg Astronomical Institute ' 'Supernova Light Curve Catalogue') sec_refurl = 'http://dau.itep.ru/sn/node/72' sec_source = catalog.entries[name].add_source(name=sec_reference, url=sec_refurl, secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, oldname, sec_source) year = re.findall(r'\d+', name)[0] catalog.entries[name].add_quantity(SUPERNOVA.DISCOVER_DATE, year, sec_source) if reference in refrepf: bibcode = unescape(refrepf[reference]) source = catalog.entries[name].add_source(bibcode=bibcode) else: needsbib.append(reference) source = catalog.entries[name].add_source( name=reference) if reference else '' if oldname in itepignorephot or bibcode in itepignoresources: continue photodict = { PHOTOMETRY.TIME: mjd, PHOTOMETRY.U_TIME: 'MJD', PHOTOMETRY.MAGNITUDE: magnitude, PHOTOMETRY.SOURCE: uniq_cdl([sec_source, source]) } if e_magnitude: photodict[PHOTOMETRY.E_MAGNITUDE] = e_magnitude if band.endswith('_SDSS'): photodict[PHOTOMETRY.BAND_SET] = 'SDSS' photodict[PHOTOMETRY.SYSTEM] = 'SDSS' band = band.replace('_SDSS', "'") photodict[PHOTOMETRY.BAND] = band catalog.entries[name].add_photometry(**photodict) if catalog.args.travis and rr >= catalog.TRAVIS_QUERY_LIMIT: break # Write out references that could use aa bibcode needsbib = list(OrderedDict.fromkeys(needsbib)) with open('../itep-needsbib.txt', 'w') as bib_file: bib_file.writelines(['%ss\n' % ii for ii in needsbib]) catalog.journal_entries() return
def do_suspect_spectra(catalog): task_str = catalog.get_current_task_str() with open( os.path.join(catalog.get_current_task_repo(), 'Suspect/sources.json'), 'r') as f: sourcedict = json.loads(f.read()) with open( os.path.join(catalog.get_current_task_repo(), 'Suspect/filename-changes.txt'), 'r') as f: rows = f.readlines() changedict = {} for row in rows: if not row.strip() or row[0] == "#": continue items = row.strip().split(' ') changedict[items[1]] = items[0] suspectcnt = 0 folders = next( os.walk(os.path.join(catalog.get_current_task_repo(), 'Suspect')))[1] for folder in pbar(folders, task_str): eventfolders = next( os.walk( os.path.join(catalog.get_current_task_repo(), 'Suspect/') + folder))[1] oldname = '' for eventfolder in pbar(eventfolders, task_str): name = eventfolder if is_number(name[:4]): name = 'SN' + name name = catalog.get_preferred_name(name) if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) sec_ref = 'SUSPECT' sec_refurl = 'https://www.nhn.ou.edu/~suspect/' sec_bibc = '2001AAS...199.8408R' sec_source = catalog.entries[name].add_source(name=sec_ref, url=sec_refurl, bibcode=sec_bibc, secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, name, sec_source) fpath = os.path.join(catalog.get_current_task_repo(), 'Suspect', folder, eventfolder) eventspectra = next(os.walk(fpath))[2] for spectrum in eventspectra: sources = [sec_source] bibcode = '' if spectrum in changedict: specalias = changedict[spectrum] else: specalias = spectrum if specalias in sourcedict: bibcode = sourcedict[specalias] elif name in sourcedict: bibcode = sourcedict[name] if bibcode: source = catalog.entries[name].add_source( bibcode=unescape(bibcode)) sources += [source] sources = uniq_cdl(sources) date = spectrum.split('_')[1] year = date[:4] month = date[4:6] day = date[6:] sig = get_sig_digits(day) + 5 day_fmt = str(floor(float(day))).zfill(2) time = astrotime(year + '-' + month + '-' + day_fmt).mjd time = time + float(day) - floor(float(day)) time = pretty_num(time, sig=sig) fpath = os.path.join(catalog.get_current_task_repo(), 'Suspect', folder, eventfolder, spectrum) with open(fpath, 'r') as f: specdata = list( csv.reader(f, delimiter=' ', skipinitialspace=True)) specdata = list(filter(None, specdata)) newspec = [] oldval = '' for row in specdata: if row[1] == oldval: continue newspec.append(row) oldval = row[1] specdata = newspec haserrors = len( specdata[0] ) == 3 and specdata[0][2] and specdata[0][2] != 'NaN' specdata = [list(i) for i in zip(*specdata)] wavelengths = specdata[0] fluxes = specdata[1] errors = '' if haserrors: errors = specdata[2] catalog.entries[name].add_spectrum(u_wavelengths='Angstrom', u_fluxes='Uncalibrated', u_time='MJD', time=time, wavelengths=wavelengths, fluxes=fluxes, errors=errors, u_errors='Uncalibrated', source=sources, filename=spectrum) suspectcnt = suspectcnt + 1 if (catalog.args.travis and suspectcnt % catalog.TRAVIS_QUERY_LIMIT == 0): break catalog.journal_entries() return
def do_snf_specta(catalog): task_str = catalog.get_current_task_str() bibcodes = {'SN2005gj': '2006ApJ...650..510A', 'SN2006D': '2007ApJ...654L..53T', 'SN2007if': '2010ApJ...713.1073S', 'SN2011fe': '2013A&A...554A..27P'} oldname = '' snfcnt = 0 eventfolders = next(os.walk(os.path.join( catalog.get_current_task_repo(), 'SNFactory')))[1] for eventfolder in pbar(eventfolders, task_str): oname = eventfolder name = catalog.get_preferred_name(oname) if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) sec_reference = 'Nearby Supernova Factory' sec_refurl = 'http://snfactory.lbl.gov/' sec_bibcode = '2002SPIE.4836...61A' sec_source = catalog.entries[name].add_source( name=sec_reference, url=sec_refurl, bibcode=sec_bibcode, secondary=True) catalog.entries[name].add_quantity(SUPERNOVA.ALIAS, oname, sec_source) bibcode = bibcodes[oname] source = catalog.entries[name].add_source(bibcode=bibcode) sources = uniq_cdl([source, sec_source]) use_path = os.path.join( catalog.get_current_task_repo(), 'SNFactory', eventfolder, '*.dat') eventspectra = glob(use_path) for spectrum in pbar(eventspectra, task_str): filename = os.path.basename(spectrum) with open(spectrum) as spec_file: specdata = list(csv.reader( spec_file, delimiter=' ', skipinitialspace=True)) specdata = list(filter(None, specdata)) newspec = [] time = '' telescope = '' instrument = '' observer = '' observatory = '' if 'Keck_20060202_R' in spectrum: time = '53768.23469' elif 'Spectrum05_276' in spectrum: time = pretty_num(astrotime('2005-10-03').mjd, sig=5) elif 'Spectrum05_329' in spectrum: time = pretty_num(astrotime('2005-11-25').mjd, sig=5) elif 'Spectrum05_336' in spectrum: time = pretty_num(astrotime('2005-12-02').mjd, sig=5) for row in specdata: if row[0][0] == '#': joinrow = (' '.join(row)).split('=') if len(joinrow) < 2: continue field = joinrow[0].strip('# ') value = joinrow[1].split('/')[0].strip('\' ') if not time: if field == 'JD': time = str(jd_to_mjd(Decimal(value))) elif field == 'MJD': time = value elif field == 'MJD-OBS': time = value if field == 'OBSERVER': observer = value.capitalize() if field == 'OBSERVAT': observatory = value.capitalize() if field == 'TELESCOP': telescope = value.capitalize() if field == 'INSTRUME': instrument = value.capitalize() else: newspec.append(row) if not time: raise ValueError('Time missing from spectrum.') specdata = newspec haserrors = len(specdata[0]) == 3 and specdata[ 0][2] and specdata[0][2] != 'NaN' specdata = [list(i) for i in zip(*specdata)] wavelengths = specdata[0] fluxes = specdata[1] errors = '' if haserrors: errors = specdata[2] unit_err = ('Variance' if oldname == 'SN2011fe' else 'erg/s/cm^2/Angstrom') unit_flx = 'erg/s/cm^2/Angstrom' catalog.entries[name].add_spectrum( u_wavelengths='Angstrom', u_fluxes=unit_flx, u_time='MJD', time=time, wavelengths=wavelengths, fluxes=fluxes, errors=errors, observer=observer, observatory=observatory, telescope=telescope, instrument=instrument, u_errors=unit_err, source=sources, filename=filename) snfcnt = snfcnt + 1 if (catalog.args.travis and snfcnt % catalog.TRAVIS_QUERY_LIMIT == 0): break catalog.journal_entries() return
def do_cleanup(catalog): """Cleanup catalog after importing all data.""" task_str = catalog.get_current_task_str() # Set preferred names, calculate some columns based on imported data, # sanitize some fields keys = list(catalog.entries.keys()) cleanupcnt = 0 for oname in pbar(keys, task_str): # Some events may be merged in cleanup process, skip them if # non-existent. try: name = catalog.add_entry(oname) except Exception: catalog.log.warning( '"{}" was not found, suggests merge occurred in cleanup ' 'process.'.format(oname)) continue # Set the preferred name, switching to that name if name changed. name = catalog.entries[name].set_preferred_name() aliases = catalog.entries[name].get_aliases() catalog.entries[name].set_first_max_light() # Clean discoverer field if FASTSTARS.DISCOVERER in catalog.entries[name]: if len(catalog.entries[name][FASTSTARS.DISCOVERER]) > 1: POSSIBLEDISCOVERER = [ catalog.entries[name][FASTSTARS.DISCOVERER][i]['value'] for i in range( len(catalog.entries[name][FASTSTARS.DISCOVERER])) ] POSSIBLEDISCOVERER_DATE = [ int(DATE['value']) for DATE in catalog.entries[name][FASTSTARS.DISCOVER_DATE] ] POSSIBLEDISCOVERER_DATE_SOURCES = [ DATE['source'] for DATE in catalog.entries[name][FASTSTARS.DISCOVER_DATE] ] EARLIESTSOURCE = POSSIBLEDISCOVERER_DATE_SOURCES[np.argmin( POSSIBLEDISCOVERER_DATE)] EARLIESTDISCOVER_DATE = catalog.entries[name][ FASTSTARS.DISCOVER_DATE][np.argmin( POSSIBLEDISCOVERER_DATE)] # Deal with case where a star was 'discovered' multiple times in one year if ',' in EARLIESTSOURCE: EARLIESTSOURCE = EARLIESTSOURCE.split(',')[0] for DISCOVERER in catalog.entries[name][FASTSTARS.DISCOVERER]: for DISCOVERERSOURCE in DISCOVERER['source'].split(','): if DISCOVERERSOURCE == EARLIESTSOURCE: EARLIESTDISCOVERER = DISCOVERER for DISCOVERER in catalog.entries[name][FASTSTARS.DISCOVERER]: for DISCOVERERSOURCE in DISCOVERER['source'].split(','): if DISCOVERERSOURCE == EARLIESTSOURCE: EARLIESTDISCOVERER = DISCOVERER catalog.entries[name][FASTSTARS.DISCOVERER] = [ EARLIESTDISCOVERER ] catalog.entries[name][FASTSTARS.DISCOVER_DATE] = [ EARLIESTDISCOVER_DATE ] # Convert all distances to kpc. if FASTSTARS.LUM_DIST in catalog.entries[name]: for li, ld in enumerate(catalog.entries[name][FASTSTARS.LUM_DIST]): if ld.get('u_value') != 'kpc': if ld.get('u_value') == 'pc': catalog.entries[name][ FASTSTARS.LUM_DIST][li]['value'] = str( Decimal(catalog.entries[name][ FASTSTARS.LUM_DIST][li]['value']) * Decimal('0.001')) elif ld.get('u_value') == 'Mpc': catalog.entries[name][ FASTSTARS.LUM_DIST][li]['value'] = str( Decimal(catalog.entries[name][ FASTSTARS.LUM_DIST][li]['value']) * Decimal('1000')) else: raise ValueError('unknown distance unit') catalog.entries[name][ FASTSTARS.LUM_DIST][li]['u_value'] = 'kpc' if (FASTSTARS.RA not in catalog.entries[name] or FASTSTARS.DEC not in catalog.entries[name]): prefixes = ['SDSS'] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:6])): noprefix = alias.split(':')[-1].replace(prefix, '').replace( '.', '') decsign = '+' if '+' in noprefix else '-' noprefix = noprefix.replace('+', '|').replace('-', '|') nops = noprefix.split('|') if len(nops) < 2: continue rastr = nops[0] decstr = nops[1] ra = ':'.join([rastr[:2], rastr[2:4], rastr[4:6]]) + \ ('.' + rastr[6:] if len(rastr) > 6 else '') dec = ( decsign + ':'.join([decstr[:2], decstr[2:4], decstr[4:6]]) + ('.' + decstr[6:] if len(decstr) > 6 else '')) if catalog.args.verbose: tprint('Added ra/dec from name: ' + ra + ' ' + dec) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity(FASTSTARS.RA, ra, source, derived=True) catalog.entries[name].add_quantity(FASTSTARS.DEC, dec, source, derived=True) break if FASTSTARS.RA in catalog.entries[name]: break if (FASTSTARS.MAX_ABS_MAG not in catalog.entries[name] and FASTSTARS.MAX_APP_MAG in catalog.entries[name] and FASTSTARS.LUM_DIST in catalog.entries[name]): # Find the "best" distance to use for this bestsig = 0 for ld in catalog.entries[name][FASTSTARS.LUM_DIST]: sig = get_sig_digits(ld[QUANTITY.VALUE]) if sig > bestsig: bestld = ld[QUANTITY.VALUE] bestsrc = ld[QUANTITY.SOURCE] bestsig = sig if bestsig > 0 and is_number(bestld) and float(bestld) > 0.: source = catalog.entries[name].add_self_source() sources = uniq_cdl([source] + bestsrc.split(',')) bestldz = z_at_value(cosmo.luminosity_distance, float(bestld) * un.Mpc) pnum = (float(catalog.entries[name][FASTSTARS.MAX_APP_MAG][0][ QUANTITY.VALUE]) - 5.0 * (log10(float(bestld) * 1.0e6) - 1.0) + 2.5 * log10(1.0 + bestldz)) pnum = pretty_num(pnum, sig=bestsig + 1) catalog.entries[name].add_quantity(FASTSTARS.MAX_ABS_MAG, pnum, sources, derived=True) catalog.entries[name].sanitize() catalog.journal_entries(bury=True, final=True, gz=True) cleanupcnt = cleanupcnt + 1 if catalog.args.travis and cleanupcnt % 1000 == 0: break catalog.save_caches() return
def do_ogle(catalog): task_str = catalog.get_current_task_str() basenames = ['transients', 'transients/2014b', 'transients/2014', 'transients/2013', 'transients/2012'] oglenames = [] ogleupdate = [True, False, False, False, False] for b, bn in enumerate(pbar(basenames, task_str)): if catalog.args.update and not ogleupdate[b]: continue filepath = os.path.join(catalog.get_current_task_repo(), 'OGLE-') filepath += bn.replace('/', '-') + '-transients.html' htmltxt = catalog.load_cached_url( 'http://ogle.astrouw.edu.pl/ogle4/' + bn + '/transients.html', filepath) if not htmltxt: continue soup = BeautifulSoup(htmltxt, 'html5lib') links = soup.findAll('a') breaks = soup.findAll('br') datalinks = [] datafnames = [] for a in links: if a.has_attr('href'): if '.dat' in a['href']: datalinks.append( 'http://ogle.astrouw.edu.pl/ogle4/' + bn + '/' + a['href']) datafnames.append(bn.replace('/', '-') + '-' + a['href'].replace('/', '-')) ec = -1 reference = 'OGLE-IV Transient Detection System' refurl = 'http://ogle.astrouw.edu.pl/ogle4/transients/transients.html' for br in pbar(breaks, task_str): sibling = br.nextSibling if 'Ra,Dec=' in sibling: line = sibling.replace('\n', '').split('Ra,Dec=') name = line[0].strip() ec += 1 if 'NOVA' in name or 'dupl' in name: continue if name in oglenames: continue oglenames.append(name) name = catalog.add_entry(name) mySibling = sibling.nextSibling atelref = '' claimedtype = '' while 'Ra,Dec=' not in mySibling: if isinstance(mySibling, NavigableString): if 'Phot.class=' in str(mySibling): claimedtype = re.sub( r'\([^)]*\)', '', str(mySibling).split('=')[-1]) claimedtype = claimedtype.replace('SN', '').strip() if isinstance(mySibling, Tag): atela = mySibling if (atela and atela.has_attr('href') and 'astronomerstelegram' in atela['href']): atelref = atela.contents[0].strip() atelurl = atela['href'] mySibling = mySibling.nextSibling if mySibling is None: break # nextSibling = sibling.nextSibling # if ((isinstance(nextSibling, Tag) and # nextSibling.has_attr('alt') and # nextSibling.contents[0].strip() != 'NED')): # radec = nextSibling.contents[0].strip().split() # else: # radec = line[-1].split() # ra = radec[0] # dec = radec[1] fname = os.path.join(catalog.get_current_task_repo(), 'OGLE/') + datafnames[ec] if (catalog.current_task.load_archive(catalog.args) and os.path.isfile(fname)): with open(fname, 'r') as f: csvtxt = f.read() else: response = urllib.request.urlopen(datalinks[ec]) with open(fname, 'w') as f: csvtxt = response.read().decode('utf-8') f.write(csvtxt) lcdat = csvtxt.splitlines() sources = [catalog.entries[name].add_source( name=reference, url=refurl)] catalog.entries[name].add_quantity( SUPERNOVA.ALIAS, name, sources[0]) if atelref and atelref != 'ATel#----': sources.append(catalog.entries[name].add_source( name=atelref, url=atelurl)) sources = uniq_cdl(sources) if name.startswith('OGLE'): if name[4] == '-': if is_number(name[5:9]): catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, name[5:9], sources) else: if is_number(name[4:6]): catalog.entries[name].add_quantity( SUPERNOVA.DISCOVER_DATE, '20' + name[4:6], sources) # RA and Dec from OGLE pages currently not reliable # catalog.entries[name].add_quantity(SUPERNOVA.RA, ra, sources) # catalog.entries[name].add_quantity(SUPERNOVA.DEC, dec, # sources) if claimedtype and claimedtype != '-': catalog.entries[name].add_quantity( SUPERNOVA.CLAIMED_TYPE, claimedtype, sources) elif ('SN' not in name and SUPERNOVA.CLAIMED_TYPE not in catalog.entries[name]): catalog.entries[name].add_quantity( SUPERNOVA.CLAIMED_TYPE, 'Candidate', sources) for row in lcdat: row = row.split() mjd = str(jd_to_mjd(Decimal(row[0]))) magnitude = row[1] if float(magnitude) > 90.0: continue e_mag = row[2] upperlimit = False if e_mag == '-1' or float(e_mag) > 10.0: e_mag = '' upperlimit = True catalog.entries[name].add_photometry( time=mjd, band='I', magnitude=magnitude, e_magnitude=e_mag, system='Vega', source=sources, upperlimit=upperlimit) if catalog.args.update: catalog.journal_entries() catalog.journal_entries() return
def do_simbad(catalog): # Simbad.list_votable_fields() # Some coordinates that SIMBAD claims belong to the SNe actually belong to # the host. task_str = catalog.get_current_task_str() simbadmirrors = [ 'http://simbad.harvard.edu/simbad/sim-script', 'http://simbad.u-strasbg.fr/simbad/sim-script' ] simbadbadcoordbib = [ '2013ApJ...770..107C', ] simbadbadtypebib = [ '2014ApJ...796...87I', '2015MNRAS.448.1206M', '2015ApJ...807L..18N' ] simbadbadnamebib = [ '2004AJ....127.2809W', '2005MNRAS.364.1419Z', '2015A&A...574A.112D', '2011MNRAS.417..916G', '2002ApJ...566..880G', 'url:CBAT', 'url:GPSA', ] badurlbibname = ['url:TNS', 'url:ASASSN', 'url:Lasair', 'url:AAVSO'] simbadbannedcats = ['[TBV2008]', 'OGLE-MBR'] simbadbannednames = ['SN'] customSimbad = Simbad() customSimbad.ROW_LIMIT = -1 customSimbad.TIMEOUT = 120 customSimbad.add_votable_fields('otype', 'sptype', 'sp_bibcode', 'id') table = [] print(customSimbad.SIMBAD_URL) for mirror in simbadmirrors: customSimbad.SIMBAD_URL = mirror try: table = customSimbad.query_criteria( 'maintypes=CV* | maintypes="CV?" | maintypes=No* | maintypes="No?"' ) except Exception: continue else: if not table: continue break if not table: catalog.log.warning('SIMBAD unable to load, probably offline.') # 2000A&AS..143....9W for brow in pbar(table, task_str): row = { x: re.sub(r'b\'(.*)\'', r'\1', str(brow[x])) for x in brow.colnames } # Skip items with no bibliographic info aside from SIMBAD, too # error-prone # print(row) if (not row['COO_BIBCODE'] and not row['SP_BIBCODE'] and not row['SP_BIBCODE_2'] and not row['OTYPE'] == 'Nova' and not row['OTYPE'] == 'DwarfNova'): continue if any([x in row['MAIN_ID'] for x in simbadbannedcats]): continue if row['COO_BIBCODE'] and row['COO_BIBCODE'] in simbadbadnamebib: continue name = single_spaces(re.sub(r'\[[^)]*\]', '', row['MAIN_ID']).strip()).replace('*', '_') if name in simbadbannednames: continue if is_number(name.replace(' ', '')): continue name = catalog.add_entry(name) source = (catalog.entries[name].add_source( name='SIMBAD astronomical database', bibcode="2000A&AS..143....9W", url="http://simbad.u-strasbg.fr/", secondary=True)).replace('*', '_') if row['COO_BIBCODE'] == 'url:TNS': source = ','.join([ source, catalog.entries[name].add_source( name='Transient Name Server', url='https://wis-tns.weizmann.ac.il/') ]) if row['COO_BIBCODE'] == 'url:ASASSN': source = ','.join([ source, catalog.entries[name].add_source( name='ASAS-CV Transients', bibcode="2014ApJ...788...48S", url= 'http://www.astronomy.ohio-state.edu/~assassin/transients.html' ) ]) if row['COO_BIBCODE'] == 'url:Lasair': source = ','.join([ source, catalog.entries[name].add_source( name='lASAIR Transients and Variables', bibcode="2019RNAAS...3...26S", url='https://lasair.roe.ac.uk/') ]) aliases = row['ID'].split(',') for alias in aliases: if any([x in alias for x in simbadbannedcats]): continue ali = single_spaces(re.sub(r'\[[^)]*\]', '', alias).strip()).replace('*', '_') if is_number(ali.replace(' ', '')): continue if ali in simbadbannednames: continue ali = name_clean(ali) catalog.entries[name].add_quantity(CATACLYSMIC.ALIAS, ali, source) catalog.entries[name].add_quantity( CATACLYSMIC.CLAIMED_TYPE, (row['OTYPE'].replace('CV.', 'CV').replace('CV', 'CV').replace( '(~)', '').replace('CV?', 'Candidate').replace( '*', '').replace('No?', 'Candidate Nova').strip(': ')), source) if row['COO_BIBCODE'] and row['COO_BIBCODE'] not in simbadbadcoordbib: csources = source if row['COO_BIBCODE'] not in badurlbibname: csources = ','.join([ source, catalog.entries[name].add_source( bibcode=row['COO_BIBCODE']) ]) catalog.entries[name].add_quantity(CATACLYSMIC.RA, row['RA'], csources) catalog.entries[name].add_quantity(CATACLYSMIC.DEC, row['DEC'], csources) catalog.entries[name].add_quantity( CATACLYSMIC.CLAIMED_TYPE, (row['OTYPE'].replace('CV.', 'CV').replace('CV', 'CV').replace( '(~)', '').replace('CV?', 'Candidate').replace( '*', '').replace('No?', 'Candidate Nova').strip(': ')), csources) if row['SP_BIBCODE'] and row['SP_BIBCODE'] not in simbadbadtypebib: ssources = source if row['SP_BIBCODE'] and row['SP_BIBCODE_2'] not in badurlbibname: ssources = uniq_cdl([ source, catalog.entries[name].add_source( bibcode=row['SP_BIBCODE']) ] + ([ catalog.entries[name].add_source( bibcode=row['SP_BIBCODE_2']) ] if row['SP_BIBCODE_2'] else [])) catalog.entries[name].add_quantity( CATACLYSMIC.CLAIMED_TYPE, (row['OTYPE'].replace('CV.', 'CV').replace('CV', 'CV').replace( '(~)', '').replace('CV?', 'Candidate').replace( '*', '').replace('No?', 'Candidate Nova').strip(': ')), ssources) if row['OTYPE'] == 'Nova' and row['SP_BIBCODE'] == '' and row[ 'COO_BIBCODE'] == '': catalog.entries[name].add_quantity(CATACLYSMIC.RA, row['RA'], source) catalog.entries[name].add_quantity(CATACLYSMIC.DEC, row['DEC'], source) catalog.entries[name].add_quantity(CATACLYSMIC.CLAIMED_TYPE, (row['OTYPE']), source) if row['OTYPE'] == 'DwarfNova' and row['SP_BIBCODE'] == '' and row[ 'COO_BIBCODE'] == '': catalog.entries[name].add_quantity(CATACLYSMIC.RA, row['RA'], source) catalog.entries[name].add_quantity(CATACLYSMIC.DEC, row['DEC'], source) catalog.entries[name].add_quantity(CATACLYSMIC.CLAIMED_TYPE, (row['OTYPE']), source) catalog.journal_entries() return