def extra_aliases(self): """These aliases are considered when merging duplicates only, but are not added to the list of aliases that would be included with the event """ if (self[TEST_ENTRY.NAME].startswith('SN') and is_number(self[TEST_ENTRY.NAME][2:6])): return ['AT' + self[TEST_ENTRY.NAME][2:]] return []
def add_source(self, **kwargs): # Sanitize some fields before adding source # Replace reference names and URLs using dictionaries. if SOURCE.NAME in kwargs: if (kwargs[SOURCE.NAME].upper().startswith('ATEL') and SOURCE.BIBCODE not in kwargs): kwargs[SOURCE.NAME] = (kwargs[SOURCE.NAME].replace( 'ATEL', 'ATel').replace('Atel', 'ATel').replace( 'ATel #', 'ATel ').replace('ATel#', 'ATel').replace('ATel', 'ATel ')) kwargs[SOURCE.NAME] = ' '.join(kwargs[SOURCE.NAME].split()) atelnum = kwargs[SOURCE.NAME].split()[-1] if is_number(atelnum) and atelnum in self.catalog.atels_dict: kwargs[SOURCE.BIBCODE] = self.catalog.atels_dict[atelnum] if (kwargs[SOURCE.NAME].upper().startswith('CBET') and SOURCE.BIBCODE not in kwargs): kwargs[SOURCE.NAME] = kwargs[SOURCE.NAME].replace( 'CBET', 'CBET ') kwargs[SOURCE.NAME] = ' '.join(kwargs[SOURCE.NAME].split()) cbetnum = kwargs[SOURCE.NAME].split()[-1] if is_number(cbetnum) and cbetnum in self.catalog.cbets_dict: kwargs[SOURCE.BIBCODE] = self.catalog.cbets_dict[cbetnum] if (kwargs[SOURCE.NAME].upper().startswith('IAUC') and SOURCE.BIBCODE not in kwargs): kwargs[SOURCE.NAME] = kwargs[SOURCE.NAME].replace( 'IAUC', 'IAUC ') kwargs[SOURCE.NAME] = ' '.join(kwargs[SOURCE.NAME].split()) iaucnum = kwargs[SOURCE.NAME].split()[-1] if is_number(iaucnum) and iaucnum in self.catalog.iaucs_dict: kwargs[SOURCE.BIBCODE] = self.catalog.iaucs_dict[iaucnum] for rep in self.catalog.source_syns: if kwargs[SOURCE.NAME] in self.catalog.source_syns[rep]: kwargs[SOURCE.NAME] = rep break if SOURCE.URL in kwargs: for rep in self.catalog.url_redirs: if kwargs[SOURCE.URL] in self.catalog.url_redirs[rep]: kwargs[SOURCE.URL] = rep break return super(Test_Entry, self).add_source(**kwargs)
def do_cleanup(catalog): """Cleanup catalog after importing all data.""" task_str = catalog.get_current_task_str() # Set preferred names, calculate some columns based on imported data, # sanitize some fields keys = list(catalog.entries.keys()) cleanupcnt = 0 for oname in pbar(keys, task_str): # Some events may be merged in cleanup process, skip them if # non-existent. try: name = catalog.add_entry(oname) except Exception: err = '"{}" was not found, suggests merge occurred in cleanup process.'.format(oname) catalog.log.warning(err) continue # Set the preferred name, switching to that name if name changed. name = catalog.entries[name].set_preferred_name() aliases = catalog.entries[name].get_aliases() catalog.entries[name].purge_bandless_photometry() catalog.entries[name].set_first_max_light() if TEST_ENTRY.DISCOVER_DATE not in catalog.entries[name]: prefixes = ['MLS', 'SSS', 'CSS', 'GRB '] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:2])): temp = [ '20' + alias.replace(prefix, '')[:2], alias.replace(prefix, '')[2:4], alias.replace(prefix, '')[4:6] ] discoverdate = '/'.join(temp) if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TEST_ENTRY.DISCOVER_DATE, discoverdate, source, derived=True) break if TEST_ENTRY.DISCOVER_DATE in catalog.entries[name]: break if TEST_ENTRY.DISCOVER_DATE not in catalog.entries[name]: prefixes = [ 'ASASSN-', 'PS1-', 'PS1', 'PS', 'iPTF', 'PTF', 'SCP-', 'SNLS-', 'SPIRITS', 'LSQ', 'DES', 'SNHiTS', 'Gaia', 'GND', 'GNW', 'GSD', 'GSW', 'EGS', 'COS', 'OGLE', 'HST' ] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:2]) and is_number(alias.replace(prefix, '')[:1])): discoverdate = '20' + alias.replace(prefix, '')[:2] if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TEST_ENTRY.DISCOVER_DATE, discoverdate, source, derived=True) break if TEST_ENTRY.DISCOVER_DATE in catalog.entries[name]: break if TEST_ENTRY.DISCOVER_DATE not in catalog.entries[name]: prefixes = ['SNF'] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:4])): discoverdate = ('/'.join([ alias.replace(prefix, '')[:4], alias.replace(prefix, '')[4:6], alias.replace(prefix, '')[6:8] ])) if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TEST_ENTRY.DISCOVER_DATE, discoverdate, source, derived=True) break if TEST_ENTRY.DISCOVER_DATE in catalog.entries[name]: break if TEST_ENTRY.DISCOVER_DATE not in catalog.entries[name]: prefixes = ['PTFS', 'SNSDF'] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:2])): discoverdate = ('/'.join([ '20' + alias.replace(prefix, '')[:2], alias.replace(prefix, '')[2:4] ])) if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TEST_ENTRY.DISCOVER_DATE, discoverdate, source, derived=True) break if TEST_ENTRY.DISCOVER_DATE in catalog.entries[name]: break if TEST_ENTRY.DISCOVER_DATE not in catalog.entries[name]: prefixes = ['AT', 'SN', 'OGLE-', 'SM ', 'KSN'] for alias in aliases: for prefix in prefixes: if alias.startswith(prefix): year = re.findall(r'\d+', alias) if len(year) == 1: year = year[0] else: continue if alias.replace(prefix, '').index(year) != 0: continue if (year and is_number(year) and '.' not in year and len(year) <= 4): discoverdate = year if catalog.args.verbose: tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TEST_ENTRY.DISCOVER_DATE, discoverdate, source, derived=True) break if TEST_ENTRY.DISCOVER_DATE in catalog.entries[name]: break if (TEST_ENTRY.RA not in catalog.entries[name] or TEST_ENTRY.DEC not in catalog.entries[name]): prefixes = [ 'PSN J', 'MASJ', 'CSS', 'SSS', 'MASTER OT J', 'HST J', 'TCP J', 'MACS J', '2MASS J', 'EQ J', 'CRTS J', 'SMT J' ] for alias in aliases: for prefix in prefixes: if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:6])): noprefix = alias.split(':')[-1].replace( prefix, '').replace('.', '') decsign = '+' if '+' in noprefix else '-' noprefix = noprefix.replace('+', '|').replace('-', '|') nops = noprefix.split('|') if len(nops) < 2: continue rastr = nops[0] decstr = nops[1] ra = ':'.join([rastr[:2], rastr[2:4], rastr[4:6]]) + \ ('.' + rastr[6:] if len(rastr) > 6 else '') dec = ( decsign + ':'.join( [decstr[:2], decstr[2:4], decstr[4:6]]) + ('.' + decstr[6:] if len(decstr) > 6 else '')) if catalog.args.verbose: tprint('Added ra/dec from name: ' + ra + ' ' + dec) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TEST_ENTRY.RA, ra, source, derived=True) catalog.entries[name].add_quantity( TEST_ENTRY.DEC, dec, source, derived=True) break if TEST_ENTRY.RA in catalog.entries[name]: break no_host = (TEST_ENTRY.HOST not in catalog.entries[name] or not any([ x[QUANTITY.VALUE] == 'Milky Way' for x in catalog.entries[name][TEST_ENTRY.HOST] ])) if (TEST_ENTRY.RA in catalog.entries[name] and TEST_ENTRY.DEC in catalog.entries[name] and no_host): from astroquery.irsa_dust import IrsaDust if name not in catalog.extinctions_dict: try: ra_dec = catalog.entries[name][ TEST_ENTRY.RA][0][QUANTITY.VALUE] + \ " " + \ catalog.entries[name][TEST_ENTRY.DEC][0][QUANTITY.VALUE] result = IrsaDust.get_query_table(ra_dec, section='ebv') except (KeyboardInterrupt, SystemExit): raise except Exception: warnings.warn("Coordinate lookup for " + name + " failed in IRSA.") else: ebv = result['ext SandF mean'][0] ebverr = result['ext SandF std'][0] catalog.extinctions_dict[name] = [ebv, ebverr] if name in catalog.extinctions_dict: sources = uniq_cdl([ catalog.entries[name].add_self_source(), catalog.entries[name] .add_source(bibcode='2011ApJ...737..103S') ]) (catalog.entries[name].add_quantity( TEST_ENTRY.EBV, str(catalog.extinctions_dict[name][0]), sources, e_value=str(catalog.extinctions_dict[name][1]), derived=True)) if ((TEST_ENTRY.HOST in catalog.entries[name] and (TEST_ENTRY.HOST_RA not in catalog.entries[name] or TEST_ENTRY.HOST_DEC not in catalog.entries[name]))): for host in catalog.entries[name][TEST_ENTRY.HOST]: alias = host[QUANTITY.VALUE] if ' J' in alias and is_number(alias.split(' J')[-1][:6]): noprefix = alias.split(' J')[-1].split(':')[-1].replace( '.', '') decsign = '+' if '+' in noprefix else '-' noprefix = noprefix.replace('+', '|').replace('-', '|') nops = noprefix.split('|') if len(nops) < 2: continue rastr = nops[0] decstr = nops[1] hostra = (':'.join([rastr[:2], rastr[2:4], rastr[4:6]]) + ('.' + rastr[6:] if len(rastr) > 6 else '')) hostdec = decsign + ':'.join([ decstr[:2], decstr[2:4], decstr[4:6] ]) + ('.' + decstr[6:] if len(decstr) > 6 else '') if catalog.args.verbose: tprint('Added hostra/hostdec from name: ' + hostra + ' ' + hostdec) source = catalog.entries[name].add_self_source() catalog.entries[name].add_quantity( TEST_ENTRY.HOST_RA, hostra, source, derived=True) catalog.entries[name].add_quantity( TEST_ENTRY.HOST_DEC, hostdec, source, derived=True) break if TEST_ENTRY.HOST_RA in catalog.entries[name]: break if (TEST_ENTRY.REDSHIFT not in catalog.entries[name] and TEST_ENTRY.VELOCITY in catalog.entries[name]): # Find the "best" velocity to use for this bestsig = 0 for hv in catalog.entries[name][TEST_ENTRY.VELOCITY]: sig = get_sig_digits(hv[QUANTITY.VALUE]) if sig > bestsig: besthv = hv[QUANTITY.VALUE] bestsrc = hv['source'] bestsig = sig if bestsig > 0 and is_number(besthv): voc = float(besthv) * 1.e5 / CLIGHT source = catalog.entries[name].add_self_source() sources = uniq_cdl([source] + bestsrc.split(',')) (catalog.entries[name].add_quantity( TEST_ENTRY.REDSHIFT, pretty_num( sqrt((1. + voc) / (1. - voc)) - 1., sig=bestsig), sources, kind='heliocentric', derived=True)) if (TEST_ENTRY.REDSHIFT not in catalog.entries[name] and len(catalog.nedd_dict) > 0 and TEST_ENTRY.HOST in catalog.entries[name]): reference = "NED-D" refurl = "http://ned.ipac.caltech.edu/Library/Distances/" refbib = "1991ASSL..171...89H" for host in catalog.entries[name][TEST_ENTRY.HOST]: if host[QUANTITY.VALUE] in catalog.nedd_dict: source = catalog.entries[name].add_source( bibcode='2016A&A...594A..13P') secondarysource = catalog.entries[name].add_source( name=reference, url=refurl, bibcode=refbib, secondary=True) meddist = statistics.median(catalog.nedd_dict[host[ QUANTITY.VALUE]]) redz = z_at_value(cosmo.comoving_distance, float(meddist) * un.Mpc) redshift = pretty_num( redz, sig=get_sig_digits(str(meddist))) catalog.entries[name].add_quantity( [TEST_ENTRY.REDSHIFT, TEST_ENTRY.HOST_REDSHIFT], redshift, uniq_cdl([source, secondarysource]), kind='host', derived=True) if (TEST_ENTRY.MAX_ABS_MAG not in catalog.entries[name] and TEST_ENTRY.MAX_APP_MAG in catalog.entries[name] and TEST_ENTRY.LUM_DIST in catalog.entries[name]): # Find the "best" distance to use for this bestsig = 0 for ld in catalog.entries[name][TEST_ENTRY.LUM_DIST]: sig = get_sig_digits(ld[QUANTITY.VALUE]) if sig > bestsig: bestld = ld[QUANTITY.VALUE] bestsrc = ld[QUANTITY.SOURCE] bestsig = sig if bestsig > 0 and is_number(bestld) and float(bestld) > 0.: source = catalog.entries[name].add_self_source() sources = uniq_cdl([source] + bestsrc.split(',')) bestldz = z_at_value(cosmo.luminosity_distance, float(bestld) * un.Mpc) pnum = ( float(catalog.entries[name][TEST_ENTRY.MAX_APP_MAG][0][ QUANTITY.VALUE]) - 5.0 * (log10(float(bestld) * 1.0e6) - 1.0 ) + 2.5 * log10(1.0 + bestldz)) pnum = pretty_num(pnum, sig=bestsig + 1) catalog.entries[name].add_quantity( TEST_ENTRY.MAX_ABS_MAG, pnum, sources, derived=True) if (TEST_ENTRY.MAX_VISUAL_ABS_MAG not in catalog.entries[name] and TEST_ENTRY.MAX_VISUAL_APP_MAG in catalog.entries[name] and TEST_ENTRY.LUM_DIST in catalog.entries[name]): # Find the "best" distance to use for this bestsig = 0 for ld in catalog.entries[name][TEST_ENTRY.LUM_DIST]: sig = get_sig_digits(ld[QUANTITY.VALUE]) if sig > bestsig: bestld = ld[QUANTITY.VALUE] bestsrc = ld[QUANTITY.SOURCE] bestsig = sig if bestsig > 0 and is_number(bestld) and float(bestld) > 0.: source = catalog.entries[name].add_self_source() sources = uniq_cdl([source] + bestsrc.split(',')) # FIX: what's happening here?! pnum = ( float(catalog.entries[name][ TEST_ENTRY.MAX_VISUAL_APP_MAG][0][QUANTITY.VALUE]) - 5.0 * (log10(float(bestld) * 1.0e6) - 1.0)) pnum = pretty_num(pnum, sig=bestsig + 1) catalog.entries[name].add_quantity( TEST_ENTRY.MAX_VISUAL_ABS_MAG, pnum, sources, derived=True) if TEST_ENTRY.REDSHIFT in catalog.entries[name]: # Find the "best" redshift to use for this bestz, bestkind, bestsig, bestsrc = catalog.entries[ name].get_best_redshift() if bestsig > 0: try: bestz = float(bestz) except Exception: print(catalog.entries[name]) raise if TEST_ENTRY.VELOCITY not in catalog.entries[name]: source = catalog.entries[name].add_self_source() # FIX: what's happening here?! pnum = CLIGHT / KM * \ ((bestz + 1.)**2. - 1.) / ((bestz + 1.)**2. + 1.) pnum = pretty_num(pnum, sig=bestsig) catalog.entries[name].add_quantity( TEST_ENTRY.VELOCITY, pnum, source, kind=(TEST_ENTRY.VELOCITY.kind_preference[bestkind] if bestkind else '')) if bestz > 0.: if TEST_ENTRY.LUM_DIST not in catalog.entries[name]: dl = cosmo.luminosity_distance(bestz) sources = [ catalog.entries[name].add_self_source(), catalog.entries[name] .add_source(bibcode='2016A&A...594A..13P') ] sources = uniq_cdl(sources + bestsrc.split(',')) catalog.entries[name].add_quantity( TEST_ENTRY.LUM_DIST, pretty_num( dl.value, sig=bestsig + 1), sources, kind=(TEST_ENTRY.LUM_DIST.kind_preference[bestkind] if bestkind else ''), derived=True) if (TEST_ENTRY.MAX_ABS_MAG not in catalog.entries[name] and TEST_ENTRY.MAX_APP_MAG in catalog.entries[name]): source = catalog.entries[name].add_self_source() pnum = pretty_num( float(catalog.entries[name][ TEST_ENTRY.MAX_APP_MAG][0][QUANTITY.VALUE]) - 5.0 * (log10(dl.to('pc').value) - 1.0 ) + 2.5 * log10(1.0 + bestz), sig=bestsig + 1) catalog.entries[name].add_quantity( TEST_ENTRY.MAX_ABS_MAG, pnum, sources, derived=True) if (TEST_ENTRY.MAX_VISUAL_ABS_MAG not in catalog.entries[name] and TEST_ENTRY.MAX_VISUAL_APP_MAG in catalog.entries[name]): source = catalog.entries[name].add_self_source() pnum = pretty_num( float(catalog.entries[name][ TEST_ENTRY.MAX_VISUAL_APP_MAG][0][ QUANTITY.VALUE]) - 5.0 * (log10(dl.to('pc').value) - 1.0), sig=bestsig + 1) catalog.entries[name].add_quantity( TEST_ENTRY.MAX_VISUAL_ABS_MAG, pnum, sources, derived=True) if TEST_ENTRY.COMOVING_DIST not in catalog.entries[name]: cd = cosmo.comoving_distance(bestz) sources = [ catalog.entries[name].add_self_source(), catalog.entries[name] .add_source(bibcode='2016A&A...594A..13P') ] sources = uniq_cdl(sources + bestsrc.split(',')) catalog.entries[name].add_quantity( TEST_ENTRY.COMOVING_DIST, pretty_num( cd.value, sig=bestsig), sources, derived=True) if TEST_ENTRY.HOST_REDSHIFT in catalog.entries[name]: # Find the "best" redshift to use for this bestz, bestkind, bestsig, bestsrc = catalog.entries[ name].get_best_redshift(TEST_ENTRY.HOST_REDSHIFT) if bestsig > 0: try: bestz = float(bestz) except Exception: print(catalog.entries[name]) raise if TEST_ENTRY.HOST_VELOCITY not in catalog.entries[name]: source = catalog.entries[name].add_self_source() # FIX: what's happening here?! pnum = CLIGHT / KM * \ ((bestz + 1.)**2. - 1.) / ((bestz + 1.)**2. + 1.) pnum = pretty_num(pnum, sig=bestsig) catalog.entries[name].add_quantity( TEST_ENTRY.HOST_VELOCITY, pnum, source, kind=(TEST_ENTRY.HOST_VELOCITY.kind_preference[bestkind] if bestkind else '')) if bestz > 0.: if TEST_ENTRY.HOST_LUM_DIST not in catalog.entries[name]: dl = cosmo.luminosity_distance(bestz) sources = [ catalog.entries[name].add_self_source(), catalog.entries[name] .add_source(bibcode='2016A&A...594A..13P') ] sources = uniq_cdl(sources + bestsrc.split(',')) catalog.entries[name].add_quantity( TEST_ENTRY.HOST_LUM_DIST, pretty_num( dl.value, sig=bestsig + 1), sources, kind=(TEST_ENTRY.HOST_LUM_DIST.kind_preference[ bestkind] if bestkind else ''), derived=True) if TEST_ENTRY.HOST_COMOVING_DIST not in catalog.entries[ name]: cd = cosmo.comoving_distance(bestz) sources = [ catalog.entries[name].add_self_source(), catalog.entries[name] .add_source(bibcode='2016A&A...594A..13P') ] sources = uniq_cdl(sources + bestsrc.split(',')) catalog.entries[name].add_quantity( TEST_ENTRY.HOST_COMOVING_DIST, pretty_num( cd.value, sig=bestsig), sources, derived=True) if all([ x in catalog.entries[name] for x in [ TEST_ENTRY.RA, TEST_ENTRY.DEC, TEST_ENTRY.HOST_RA, TEST_ENTRY.HOST_DEC ] ]): # For now just using first coordinates that appear in entry try: c1 = coord( ra=catalog.entries[name][TEST_ENTRY.RA][0][QUANTITY.VALUE], dec=catalog.entries[name][TEST_ENTRY.DEC][0][ QUANTITY.VALUE], unit=(un.hourangle, un.deg)) c2 = coord( ra=catalog.entries[name][TEST_ENTRY.HOST_RA][0][ QUANTITY.VALUE], dec=catalog.entries[name][TEST_ENTRY.HOST_DEC][0][ QUANTITY.VALUE], unit=(un.hourangle, un.deg)) except (KeyboardInterrupt, SystemExit): raise except Exception: pass else: sources = uniq_cdl( [catalog.entries[name].add_self_source()] + catalog. entries[name][TEST_ENTRY.RA][0][QUANTITY.SOURCE].split(',') + catalog.entries[name][TEST_ENTRY.DEC][0][QUANTITY.SOURCE] .split(',') + catalog.entries[name][TEST_ENTRY.HOST_RA][0][ QUANTITY.SOURCE].split(',') + catalog.entries[name][ TEST_ENTRY.HOST_DEC][0][QUANTITY.SOURCE].split(',')) if TEST_ENTRY.HOST_OFFSET_ANG not in catalog.entries[name]: hosa = Decimal(c1.separation(c2).arcsecond) hosa = pretty_num(hosa) catalog.entries[name].add_quantity( TEST_ENTRY.HOST_OFFSET_ANG, hosa, sources, derived=True, u_value='arcseconds') if (TEST_ENTRY.COMOVING_DIST in catalog.entries[name] and TEST_ENTRY.REDSHIFT in catalog.entries[name] and TEST_ENTRY.HOST_OFFSET_DIST not in catalog.entries[name]): offsetsig = get_sig_digits(catalog.entries[name][ TEST_ENTRY.HOST_OFFSET_ANG][0][QUANTITY.VALUE]) sources = uniq_cdl( sources.split(',') + (catalog.entries[name][ TEST_ENTRY.COMOVING_DIST][0][QUANTITY.SOURCE]). split(',') + (catalog.entries[name][TEST_ENTRY.REDSHIFT] [0][QUANTITY.SOURCE]).split(',')) (catalog.entries[name].add_quantity( TEST_ENTRY.HOST_OFFSET_DIST, pretty_num( float(catalog.entries[name][ TEST_ENTRY.HOST_OFFSET_ANG][0][QUANTITY.VALUE]) / 3600. * (pi / 180.) * float(catalog.entries[name][ TEST_ENTRY.COMOVING_DIST][0][QUANTITY.VALUE]) * 1000. / (1.0 + float(catalog.entries[name][ TEST_ENTRY.REDSHIFT][0][QUANTITY.VALUE])), sig=offsetsig), sources)) catalog.entries[name].sanitize() catalog.journal_entries(bury=True, final=True, gz=True) cleanupcnt = cleanupcnt + 1 if catalog.args.travis and cleanupcnt % 1000 == 0: break catalog.save_caches() return
def host_clean(name): """Clean host name.""" newname = name.strip(' ;,*') # Handle some special cases hostcases = {'M051a': 'M51A', 'M051b': 'M51B'} for k in hostcases: if newname == k: newname = hostcases[k] # Some general cases newname = newname.strip("()").replace(' ', ' ', 1) newname = newname.replace("ABELL", "Abell", 1) newname = newname.replace("Abell", "Abell ", 1) newname = newname.replace("APMUKS(BJ)", "APMUKS(BJ) ", 1) newname = newname.replace("ARP", "ARP ", 1) newname = newname.replace("CGCG", "CGCG ", 1) newname = newname.replace("HOLM", "HOLM ", 1) newname = newname.replace("ESO", "ESO ", 1) newname = newname.replace("IC", "IC ", 1) newname = newname.replace("Intergal.", "Intergalactic", 1) newname = newname.replace("MCG+", "MCG +", 1) newname = newname.replace("MCG-", "MCG -", 1) newname = newname.replace("M+", "MCG +", 1) newname = newname.replace("M-", "MCG -", 1) newname = newname.replace("MGC ", "MCG ", 1) newname = newname.replace("Mrk", "MRK", 1) newname = newname.replace("MRK", "MRK ", 1) newname = newname.replace("NGC", "NGC ", 1) newname = newname.replace("PGC", "PGC ", 1) newname = newname.replace("SDSS", "SDSS ", 1) newname = newname.replace("UGC", "UGC ", 1) if newname.startswith('MESSIER '): newname = newname.replace('MESSIER ', 'M', 1) if newname.startswith('M ') and is_number(newname[2:]): newname = newname.replace('M ', 'M', 1) if newname.startswith('M') and is_number(newname[1:]): newname = 'M' + newname[1:].lstrip(" 0") if len(newname) > 4 and newname.startswith("PGC "): newname = newname[:4] + newname[4:].lstrip(" 0") if len(newname) > 4 and newname.startswith("UGC "): newname = newname[:4] + newname[4:].lstrip(" 0") if len(newname) > 5 and newname.startswith(("MCG +", "MCG -")): newname = newname[:5] + '-'.join( [x.zfill(2) for x in newname[5:].strip().split("-")]) if len(newname) > 5 and newname.startswith("CGCG "): newname = newname[:5] + '-'.join( [x.zfill(3) for x in newname[5:].strip().split("-")]) if ((len(newname) > 1 and newname.startswith("E")) or (len(newname) > 3 and newname.startswith('ESO'))): if newname[0] == "E": esplit = newname[1:].split("-") else: esplit = newname[3:].split("-") if len(esplit) == 2 and is_number(esplit[0].strip()): if esplit[1].strip()[0] == 'G': parttwo = esplit[1][1:].strip() else: parttwo = esplit[1].strip() if is_number(parttwo.strip()): newname = 'ESO ' + \ esplit[0].lstrip('0') + '-G' + parttwo.lstrip('0') newname = ' '.join(newname.split()) return newname
def radec_clean(svalue, quantity, unit=''): """Clean R.A. and Dec.""" svalue = svalue.strip() if unit == 'floatdegrees': if not is_number(svalue): return (svalue, unit) deg = float('%g' % Decimal(svalue)) sig = get_sig_digits(svalue) if 'ra' in quantity: flhours = deg / 360.0 * 24.0 hours = floor(flhours) minutes = floor((flhours - hours) * 60.0) seconds = (flhours * 60.0 - (hours * 60.0 + minutes)) * 60.0 hours = 0 if hours < 1.e-6 else hours minutes = 0 if minutes < 1.e-6 else minutes seconds = 0.0 if seconds < 1.e-6 else seconds if seconds > 60.0: raise (ValueError('Invalid seconds value for ' + quantity)) svalue = str(hours).zfill(2) + ':' + str(minutes).zfill(2) + \ ':' + zpad(pretty_num(seconds, sig=sig - 1)) elif 'dec' in quantity: fldeg = abs(deg) degree = floor(fldeg) minutes = floor((fldeg - degree) * 60.0) seconds = (fldeg * 60.0 - (degree * 60.0 + minutes)) * 60.0 minutes = 0 if minutes < 1.e-6 else minutes seconds = 0.0 if seconds < 1.e-6 else seconds if seconds > 60.0: raise (ValueError('Invalid seconds value for ' + quantity)) svalue = (('+' if deg >= 0.0 else '-') + str(degree).strip('+-').zfill(2) + ':' + str(minutes).zfill(2) + ':' + zpad(pretty_num(seconds, sig=sig - 1))) elif unit == 'nospace' and 'ra' in quantity: svalue = svalue[:2] + ':' + svalue[2:4] + \ ((':' + zpad(svalue[4:])) if len(svalue) > 4 else '') elif unit == 'nospace' and 'dec' in quantity: if svalue.startswith(('+', '-')): svalue = svalue[:3] + ':' + svalue[3:5] + \ ((':' + zpad(svalue[5:])) if len(svalue) > 5 else '') else: svalue = '+' + svalue[:2] + ':' + svalue[2:4] + \ ((':' + zpad(svalue[4:])) if len(svalue) > 4 else '') else: svalue = svalue.replace(' ', ':') if 'dec' in quantity: valuesplit = svalue.split(':') svalue = ( ('-' if valuesplit[0].startswith('-') else '+') + valuesplit[0].strip('+-').zfill(2) + (':' + valuesplit[1].zfill(2) if len(valuesplit) > 1 else '') + (':' + zpad(valuesplit[2]) if len(valuesplit) > 2 else '')) if 'ra' in quantity: sunit = 'hours' elif 'dec' in quantity: sunit = 'degrees' # Correct case of arcseconds = 60.0. valuesplit = svalue.split(':') if len(valuesplit) == 3 and valuesplit[-1] in ["60.0", "60.", "60"]: svalue = valuesplit[0] + ':' + str( Decimal(valuesplit[1]) + Decimal(1.0)) + ':' + "00.0" # Strip trailing dots. svalue = svalue.rstrip('.') return (svalue, sunit)
def name_clean(name): """Apply list of renaming rules for test_entry names.""" newname = name.strip(' ;,*.') if newname.startswith('NAME '): newname = newname.replace('NAME ', '', 1) if newname.endswith(' SN'): newname = newname.replace(' SN', '') if newname.endswith(':SN'): newname = newname.replace(':SN', '') if newname.startswith('MASJ'): newname = newname.replace('MASJ', 'MASTER OT J', 1) if (newname.startswith('MASTER') and len(newname) > 7 and is_number(newname[7])): newname = newname.replace('MASTER', 'MASTER OT J', 1) if (newname.startswith('MASTER OT') and len(newname) > 10 and is_number(newname[10])): newname = newname.replace('MASTER OT', 'MASTER OT J', 1) if newname.startswith('MASTER OT J '): newname = newname.replace('MASTER OT J ', 'MASTER OT J', 1) if newname.startswith('PTSS '): newname = newname.replace('PTSS ', 'PTSS-', 1) if newname.startswith('SPIRITS '): newname = newname.replace('SPIRITS ', 'SPIRITS', 1) if newname.startswith('OGLE '): newname = newname.replace('OGLE ', 'OGLE-', 1) if newname.startswith('OGLE-') and len(newname) != 16: namesp = newname.split('-') if (len(namesp) == 4 and len(namesp[1]) == 4 and is_number(namesp[1]) and is_number(namesp[3])): newname = 'OGLE-' + namesp[1] + '-SN-' + namesp[3].zfill(3) elif (len(namesp) == 2 and is_number(namesp[1][:2]) and not is_number(namesp[1][2:])): newname = 'OGLE' + namesp[1] if newname.startswith('SN SDSS'): newname = newname.replace('SN SDSS ', 'SDSS', 1) if newname.startswith('SDSS '): newname = newname.replace('SDSS ', 'SDSS', 1) if newname.startswith('SDSS'): namesp = newname.split('-') if (len(namesp) == 3 and is_number(namesp[0][4:]) and is_number(namesp[1]) and is_number(namesp[2])): newname = namesp[0] + '-' + namesp[1] + '-' + namesp[2].zfill(3) if newname.startswith('SDSS-II SN'): namesp = newname.split() if len(namesp) == 3 and is_number(namesp[2]): newname = 'SDSS-II SN ' + namesp[2].lstrip('0') if newname.startswith('SN CL'): newname = newname.replace('SN CL', 'CL', 1) if newname.startswith('SN HiTS'): newname = newname.replace('SN HiTS', 'SNHiTS', 1) if newname.startswith('SNHiTS '): newname = newname.replace('SNHiTS ', 'SNHiTS', 1) if newname.startswith('GAIA'): newname = newname.replace('GAIA', 'Gaia', 1) if newname.startswith('KSN-'): newname = newname.replace('KSN-', 'KSN', 1) if newname.startswith('KSN'): newname = 'KSN' + newname[3:].lower() if newname.startswith('Gaia '): newname = newname.replace('Gaia ', 'Gaia', 1) if newname.startswith('Gaia'): newname = 'Gaia' + newname[4:].lower() if newname.startswith('GRB'): newname = newname.replace('GRB', 'GRB ', 1) # if newname.startswith('GRB ') and is_number(newname[4:].strip()): # newname = 'GRB ' + newname[4:].strip() + 'A' if newname.startswith('ESSENCE '): newname = newname.replace('ESSENCE ', 'ESSENCE', 1) if newname.startswith('LSQ '): newname = newname.replace('LSQ ', 'LSQ', 1) if newname.startswith('LSQ') and len(newname) > 3 and is_number( newname[3]): newname = newname[:3] + newname[3:].lower() if newname.startswith('DES') and len(newname) > 3 and is_number( newname[3]): newname = newname[:7] + newname[7:].lower() if newname.startswith('SNSDF '): newname = newname.replace(' ', '') if newname.startswith('SNSDF'): namesp = newname.split('.') if len(namesp[0]) == 9: newname = namesp[0] + '-' + namesp[1].zfill(2) if newname.startswith('HFF '): newname = newname.replace(' ', '') if newname.startswith('SN HST'): newname = newname.replace('SN HST', 'HST', 1) if newname.startswith('HST ') and newname[4] != 'J': newname = newname.replace('HST ', 'HST J', 1) if newname.startswith('SNLS') and newname[4] != '-': newname = newname.replace('SNLS', 'SNLS-', 1) if newname.startswith('SNLS- '): newname = newname.replace('SNLS- ', 'SNLS-', 1) if newname.startswith('CRTS CSS'): newname = newname.replace('CRTS CSS', 'CSS', 1) if newname.startswith('CRTS MLS'): newname = newname.replace('CRTS MLS', 'MLS', 1) if newname.startswith('CRTS SSS'): newname = newname.replace('CRTS SSS', 'SSS', 1) if newname.startswith(('CSS', 'MLS', 'SSS')): newname = newname.replace(' ', ':').replace('J', '') if newname.startswith('SN HFF'): newname = newname.replace('SN HFF', 'HFF', 1) if newname.startswith('SN GND'): newname = newname.replace('SN GND', 'GND', 1) if newname.startswith('SN SCP'): newname = newname.replace('SN SCP', 'SCP', 1) if newname.startswith('SN UDS'): newname = newname.replace('SN UDS', 'UDS', 1) if newname.startswith('SCP') and newname[3] != '-': newname = newname.replace('SCP', 'SCP-', 1) if newname.startswith('SCP- '): newname = newname.replace('SCP- ', 'SCP-', 1) if newname.startswith('SCP-') and is_integer(newname[7:]): newname = 'SCP-' + newname[4:7] + str(int(newname[7:])) if newname.startswith('PS 1'): newname = newname.replace('PS 1', 'PS1', 1) if newname.startswith('PS1 SN PS'): newname = newname.replace('PS1 SN PS', 'PS', 1) if newname.startswith('PS1 SN'): newname = newname.replace('PS1 SN', 'PS1', 1) if newname.startswith('PS1') and len(newname) > 3 and is_number( newname[3]): newname = newname[:3] + newname[3:].lower() elif newname.startswith('PS1-') and len(newname) > 4 and is_number( newname[4]): newname = newname[:4] + newname[4:].lower() if newname.startswith('PSN K'): newname = newname.replace('PSN K', 'K', 1) if newname.startswith('K') and len(newname) > 5 and is_number( newname[1:5]): namesp = newname.split('-') if len(namesp[0]) == 5: newname = namesp[0] + '-' + namesp[1].zfill(3) if newname.startswith('Psn'): newname = newname.replace('Psn', 'PSN', 1) if newname.startswith('PSNJ'): newname = newname.replace('PSNJ', 'PSN J', 1) if newname.startswith('TCPJ'): newname = newname.replace('TCPJ', 'TCP J', 1) if newname.startswith('SMTJ'): newname = newname.replace('SMTJ', 'SMT J', 1) if newname.startswith('PSN20J'): newname = newname.replace('PSN20J', 'PSN J', 1) if newname.startswith('kait'): newname = newname.replace('kait', 'KAIT', 1) if newname.startswith('SN ASASSN'): newname = newname.replace('SN ASASSN', 'ASASSN', 1) if newname.startswith('ASASSN-20') and is_number(newname[9]): newname = newname.replace('ASASSN-20', 'ASASSN-', 1) if newname.startswith('ASASSN '): newname = newname.replace('ASASSN ', 'ASASSN-', 1).replace('--', '-') if newname.startswith('ASASSN') and newname[6] != '-': newname = newname.replace('ASASSN', 'ASASSN-', 1) if newname.startswith('ASASSN-') and len(newname) > 7 and is_number( newname[7]): newname = newname[:7] + newname[7:].lower() if newname.startswith('ROTSE3J'): newname = newname.replace('ROTSE3J', 'ROTSE3 J', 1) if newname.startswith('MACSJ'): newname = newname.replace('MACSJ', 'MACS J', 1) if newname.startswith('MWSNR'): newname = newname.replace('MWSNR', 'MWSNR ', 1) if newname.startswith('SN HUNT'): newname = newname.replace('SN HUNT', 'SNhunt', 1) if newname.startswith('SN Hunt'): newname = newname.replace(' ', '') if newname.startswith('SNHunt'): newname = newname.replace('SNHunt', 'SNhunt', 1) if newname.startswith('SNhunt '): newname = newname.replace('SNhunt ', 'SNhunt', 1) if newname.startswith('ptf'): newname = newname.replace('ptf', 'PTF', 1) if newname.startswith('SN PTF'): newname = newname.replace('SN PTF', 'PTF', 1) if newname.startswith('PTF '): newname = newname.replace('PTF ', 'PTF', 1) if newname.startswith('PTF') and len(newname) > 3 and is_number( newname[3]): newname = newname[:3] + newname[3:].lower() if newname.startswith('IPTF'): newname = newname.replace('IPTF', 'iPTF', 1) if newname.startswith('iPTF '): newname = newname.replace('iPTF ', 'iPTF', 1) if newname.startswith('iPTF') and len(newname) > 4 and is_number( newname[4]): newname = newname[:4] + newname[4:].lower() if newname.startswith('PESSTOESO'): newname = newname.replace('PESSTOESO', 'PESSTO ESO ', 1) if newname.startswith('snf'): newname = newname.replace('snf', 'SNF', 1) if newname.startswith('SNF '): newname = newname.replace('SNF ', 'SNF', 1) if (newname.startswith('SNF') and is_number(newname[3:]) and len(newname) >= 12): newname = 'SNF' + newname[3:11] + '-' + newname[11:] if newname.startswith(('MASTER OT J', 'ROTSE3 J')): prefix = newname.split('J')[0] coords = newname.split('J')[-1].strip() decsign = '+' if '+' in coords else '-' coordsplit = coords.replace('+', '-').split('-') if ('.' not in coordsplit[0] and len(coordsplit[0]) > 6 and '.' not in coordsplit[1] and len(coordsplit[1]) > 6): newname = (prefix + 'J' + coordsplit[0][:6] + '.' + coordsplit[0][6:] + decsign + coordsplit[1][:6] + '.' + coordsplit[1][6:]) if (newname.startswith('Gaia ') and is_number(newname[3:4]) and len(newname) > 5): newname = newname.replace('Gaia ', 'Gaia', 1) if (newname.startswith('AT ') and len(newname) > 7 and is_number(newname[3:7])): newname = newname.replace('AT ', 'AT', 1) if len(newname) <= 4 and is_number(newname): newname = 'SN' + newname + 'A' if (len(newname) > 4 and is_number(newname[:4]) and not is_number(newname[4:])): newname = 'SN' + newname if (newname.startswith('Sn ') and is_number(newname[3:7]) and len(newname) > 7): newname = newname.replace('Sn ', 'SN', 1) if (newname.startswith('sn') and is_number(newname[2:6]) and len(newname) > 6): newname = newname.replace('sn', 'SN', 1) if (newname.startswith('SN ') and is_number(newname[3:7]) and len(newname) > 7): newname = newname.replace('SN ', 'SN', 1) if (newname.startswith('SN') and is_number(newname[2:6]) and len(newname) == 7 and newname[6].islower()): newname = 'SN' + newname[2:6] + newname[6].upper() elif (newname.startswith('SN') and is_number(newname[2:6]) and (len(newname) == 8 or len(newname) == 9) and newname[6:].isupper()): newname = 'SN' + newname[2:6] + newname[6:].lower() if (newname.startswith('AT') and is_number(newname[2:6]) and len(newname) == 7 and newname[6].islower()): newname = 'AT' + newname[2:6] + newname[6].upper() elif (newname.startswith('AT') and is_number(newname[2:6]) and (len(newname) == 8 or len(newname) == 9) and newname[6:].isupper()): newname = 'AT' + newname[2:6] + newname[6:].lower() newname = (' '.join(newname.split())).strip() return newname
def do_cfa_spectra(catalog): """Import spectra from the CfA archive.""" task_str = catalog.get_current_task_str() # II spectra oldname = '' file_names = next( os.walk(os.path.join(catalog.get_current_task_repo(), 'CfA_SNII')))[1] for ni, name in enumerate(utils.pbar(file_names, task_str, sort=True)): fullpath = os.path.join(catalog.get_current_task_repo(), 'CfA_SNII/') + name origname = name if name.startswith('sn') and utils.is_number(name[2:6]): name = 'SN' + name[2:] name = catalog.get_name_for_entry_or_alias(name) if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) reference = 'CfA Supernova Archive' refurl = 'https://www.cfa.harvard.edu/supernova/SNarchive.html' source = catalog.entries[name].add_source(name=reference, url=refurl, secondary=True, acknowledgment=ACKN_CFA) catalog.entries[name].add_quantity(TEST_ENTRY.ALIAS, name, source) for fi, fname in enumerate( sorted(glob(fullpath + '/*'), key=lambda s: s.lower())): filename = os.path.basename(fname) fileparts = filename.split('-') if origname.startswith('sn') and utils.is_number(origname[2:6]): year = fileparts[1][:4] month = fileparts[1][4:6] day = fileparts[1][6:] instrument = fileparts[2].split('.')[0] else: year = fileparts[2][:4] month = fileparts[2][4:6] day = fileparts[2][6:] instrument = fileparts[3].split('.')[0] time = _get_time_str(year, month, day) f = open(fname, 'r') data = csv.reader(f, delimiter=' ', skipinitialspace=True) data = [list(i) for i in zip(*data)] wavelengths = data[0] fluxes = data[1] errors = data[2] sources = utils.uniq_cdl([ source, (catalog.entries[name].add_source( bibcode='2017arXiv170601030H')) ]) catalog.entries[name].add_spectrum(u_wavelengths='Angstrom', u_fluxes='erg/s/cm^2/Angstrom', filename=filename, wavelengths=wavelengths, fluxes=fluxes, u_time='MJD' if time else '', time=time, instrument=instrument, u_errors='ergs/s/cm^2/Angstrom', errors=errors, source=sources, dereddened=False, deredshifted=False) if catalog.args.travis and ni >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() # Ia spectra oldname = '' file_names = next( os.walk(os.path.join(catalog.get_current_task_repo(), 'CfA_SNIa')))[1] for ni, name in enumerate(utils.pbar(file_names, task_str, sort=True)): fullpath = os.path.join(catalog.get_current_task_repo(), 'CfA_SNIa/') + name origname = name if name.startswith('sn') and utils.is_number(name[2:6]): name = 'SN' + name[2:] if name.startswith('snf') and utils.is_number(name[3:7]): name = 'SNF' + name[3:] name = catalog.get_name_for_entry_or_alias(name) if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) reference = 'CfA Supernova Archive' refurl = 'https://www.cfa.harvard.edu/supernova/SNarchive.html' source = catalog.entries[name].add_source(name=reference, url=refurl, secondary=True, acknowledgment=ACKN_CFA) catalog.entries[name].add_quantity(TEST_ENTRY.ALIAS, name, source) for fi, fname in enumerate( sorted(glob(fullpath + '/*'), key=lambda s: s.lower())): filename = os.path.basename(fname) fileparts = filename.split('-') if origname.startswith('sn') and utils.is_number(origname[2:6]): year = fileparts[1][:4] month = fileparts[1][4:6] day = fileparts[1][6:] instrument = fileparts[2].split('.')[0] else: year = fileparts[2][:4] month = fileparts[2][4:6] day = fileparts[2][6:] instrument = fileparts[3].split('.')[0] time = _get_time_str(year, month, day) f = open(fname, 'r') data = csv.reader(f, delimiter=' ', skipinitialspace=True) data = [list(i) for i in zip(*data)] wavelengths = data[0] fluxes = data[1] errors = data[2] sources = utils.uniq_cdl([ source, (catalog.entries[name].add_source( bibcode='2012AJ....143..126B')), (catalog.entries[name].add_source( bibcode='2008AJ....135.1598M')) ]) catalog.entries[name].add_spectrum(u_wavelengths='Angstrom', u_fluxes='erg/s/cm^2/Angstrom', filename=filename, wavelengths=wavelengths, fluxes=fluxes, u_time='MJD' if time else '', time=time, instrument=instrument, u_errors='ergs/s/cm^2/Angstrom', errors=errors, source=sources, dereddened=False, deredshifted=False) if catalog.args.travis and ni >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() # Ibc spectra oldname = '' file_names = next( os.walk(os.path.join(catalog.get_current_task_repo(), 'CfA_SNIbc')))[1] for ni, name in enumerate(utils.pbar(file_names, task_str)): fullpath = os.path.join(catalog.get_current_task_repo(), 'CfA_SNIbc/') + name if name.startswith('sn') and utils.is_number(name[2:6]): name = 'SN' + name[2:] name = catalog.get_name_for_entry_or_alias(name) if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) reference = 'CfA Supernova Archive' refurl = 'https://www.cfa.harvard.edu/supernova/SNarchive.html' source = catalog.entries[name].add_source(name=reference, url=refurl, secondary=True, acknowledgment=ACKN_CFA) catalog.entries[name].add_quantity(TEST_ENTRY.ALIAS, name, source) for fi, fname in enumerate( sorted(glob(fullpath + '/*'), key=lambda s: s.lower())): filename = os.path.basename(fname) fileparts = filename.split('-') instrument = '' year = fileparts[1][:4] month = fileparts[1][4:6] day = fileparts[1][6:].split('.')[0] if len(fileparts) > 2: instrument = fileparts[-1].split('.')[0] time = _get_time_str(year, month, day) f = open(fname, 'r') data = csv.reader(f, delimiter=' ', skipinitialspace=True) data = [list(i) for i in zip(*data)] wavelengths = data[0] fluxes = data[1] sources = utils.uniq_cdl([ source, catalog.entries[name].add_source(bibcode='2014AJ....147...99M') ]) catalog.entries[name].add_spectrum(u_wavelengths='Angstrom', u_fluxes='erg/s/cm^2/Angstrom', wavelengths=wavelengths, filename=filename, fluxes=fluxes, u_time='MJD' if time else '', time=time, instrument=instrument, source=sources, dereddened=False, deredshifted=False) if catalog.args.travis and ni >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() # Other spectra oldname = '' file_names = next( os.walk(os.path.join(catalog.get_current_task_repo(), 'CfA_Extra')))[1] for ni, name in enumerate(utils.pbar(file_names, task_str, sort=True)): fullpath = os.path.join(catalog.get_current_task_repo(), 'CfA_Extra/') + name if name.startswith('sn') and utils.is_number(name[2:6]): name = 'SN' + name[2:] name = catalog.get_name_for_entry_or_alias(name) if oldname and name != oldname: catalog.journal_entries() oldname = name name = catalog.add_entry(name) reference = 'CfA Supernova Archive' refurl = 'https://www.cfa.harvard.edu/supernova/SNarchive.html' source = catalog.entries[name].add_source(name=reference, url=refurl, secondary=True, acknowledgment=ACKN_CFA) catalog.entries[name].add_quantity(TEST_ENTRY.ALIAS, name, source) for fi, fname in enumerate( sorted(glob(fullpath + '/*'), key=lambda s: s.lower())): if not os.path.isfile(fname): continue filename = os.path.basename(fname) if ((not filename.startswith('sn') or not filename.endswith('flm') or any( x in filename for x in ['-interp', '-z', '-dered', '-obj', '-gal']))): continue fileparts = filename.split('.')[0].split('-') instrument = '' time = '' if len(fileparts) > 1: year = fileparts[1][:4] month = fileparts[1][4:6] day = fileparts[1][6:] if utils.is_number(year) and utils.is_number( month) and utils.is_number(day): if len(fileparts) > 2: instrument = fileparts[-1] time = _get_time_str(year, month, day) f = open(fname, 'r') data = csv.reader(f, delimiter=' ', skipinitialspace=True) data = [list(i) for i in zip(*data)] wavelengths = data[0] fluxes = [str(Decimal(x) * Decimal(1.0e-15)) for x in data[1]] catalog.entries[name].add_spectrum(u_wavelengths='Angstrom', u_fluxes='erg/s/cm^2/Angstrom', wavelengths=wavelengths, filename=filename, fluxes=fluxes, u_time='MJD' if time else '', time=time, instrument=instrument, source=source, dereddened=False, deredshifted=False) if catalog.args.travis and ni >= catalog.TRAVIS_QUERY_LIMIT: break catalog.journal_entries() return
def set_preferred_name(self): """Set preferred name of test_entry. Highest preference goes to names of the form 'SN####AA'. Otherwise base the name on whichever survey is the 'discoverer'. FIX: create function to match SN####AA type names. """ name = self[self._KEYS.NAME] newname = '' aliases = self.get_aliases() # if there are no other options to choose from, skip if len(aliases) <= 1: return name # If the name is already in the form 'SN####AA' then keep using # that if (name.startswith('SN') and ((is_number(name[2:6]) and not is_number(name[6:])) or (is_number(name[2:5]) and not is_number(name[5:])))): return name # If one of the aliases is in the form 'SN####AA' then use that for alias in aliases: if (alias.startswith('SN') and ((is_number(alias[2:6]) and not is_number(alias[6:])) or (is_number(alias[2:5]) and not is_number(alias[5:])))): newname = alias break # If not, name based on the 'discoverer' survey if not newname and TEST_ENTRY.DISCOVERER in self: discoverer = ','.join( [x['value'].upper() for x in self[TEST_ENTRY.DISCOVERER]]) if 'ASAS' in discoverer: for alias in aliases: if 'ASASSN' in alias.upper(): newname = alias break if not newname and 'OGLE' in discoverer: for alias in aliases: if 'OGLE' in alias.upper(): newname = alias break if not newname and 'CRTS' in discoverer: for alias in aliases: if True in [ x in alias.upper() for x in ['CSS', 'MLS', 'SSS', 'SNHUNT'] ]: newname = alias break if not newname and 'PS1' in discoverer: for alias in aliases: if 'PS1' in alias.upper(): newname = alias break if not newname and 'PTF' in discoverer: for alias in aliases: if 'PTF' in alias.upper(): newname = alias break if not newname and 'la silla-quest' in discoverer.lower(): for alias in aliases: if 'LSQ' in alias.upper(): newname = alias break if not newname and 'GAIA' in discoverer: for alias in aliases: if 'GAIA' in alias.upper(): newname = alias break # If one of the aliases is in the form 'AT####AA' then use that if not newname: for alias in aliases: if (alias.startswith('AT') and ((is_number(alias[2:6]) and not is_number(alias[6:])) or (is_number(alias[2:5]) and not is_number(alias[5:])))): newname = alias break # Otherwise, use the shortest name. if not newname: newname = min(aliases, key=len) # Always prefer another alias over PSN if not newname and name.startswith('PSN'): for alias in aliases: if not alias.startswith('PSN'): newname = alias if newname and name != newname: file_entry = None # Make sure new name doesn't already exist if newname in self.catalog.entries: if self.catalog.entries[newname]._stub: file_entry = self.init_from_file(self.catalog, name=newname) else: file_entry = self.catalog.entries[newname] if file_entry: self._log.info( "`{}` already exists, copying `{}` to it".format( newname, name)) self.catalog.copy_entry_to_entry(self.catalog.entries[name], file_entry) self.catalog.entries[newname] = file_entry else: self._log.info("Changing entry from name '{}' to preferred" " name '{}'".format(name, newname)) self.catalog.entries[newname] = self.catalog.entries[name] self.catalog.entries[newname][self._KEYS.NAME] = newname del self.catalog.entries[name] return newname return name
def _clean_quantity(self, quantity): """Clean quantity value before it is added to entry.""" value = quantity.get(QUANTITY.VALUE, '').strip() error = quantity.get(QUANTITY.E_VALUE, '').strip() unit = quantity.get(QUANTITY.U_VALUE, '').strip() kinds = [x.strip() for x in listify(quantity.get(QUANTITY.KIND, []))] key = quantity._key if not value: return False if error and (not is_number(error) or float(error) < 0): raise ValueError(self[self._KEYS.NAME] + "'s quanta " + key + ' error value must be a number and positive.') # Set default units if not unit and key == self._KEYS.VELOCITY: unit = 'km/s' if not unit and key == self._KEYS.RA: unit = 'hours' if not unit and key == self._KEYS.DEC: unit = 'degrees' if not unit and key in [self._KEYS.LUM_DIST, self._KEYS.COMOVING_DIST]: unit = 'Mpc' # Handle certain name if key == self._KEYS.ALIAS: value = self.catalog.clean_entry_name(value) for df in quantity.get(self._KEYS.DISTINCT_FROM, []): if value == df[QUANTITY.VALUE]: return False elif key == self._KEYS.HOST: if is_number(value): return False if value.lower() in [ 'anonymous', 'anon.', 'anon', 'intergalactic' ]: return False value = host_clean(value) if ((not kinds and ((value.lower().startswith('abell') and is_number(value[5:].strip())) or 'cluster' in value.lower()))): kinds = ['cluster'] elif key == self._KEYS.HOST_REDSHIFT: kinds = list(filter(lambda x: x != 'host', kinds)) elif key == self._KEYS.CLAIMED_TYPE: isq = False if value.startswith('SN '): value = value.replace('SN ', '', 1) value = value.replace('young', '') if '?' in value: isq = True value = value.strip(' ?') for rep in self.catalog.type_syns: if value in self.catalog.type_syns[rep]: value = rep break if isq: value = value + '?' if not value: return False elif key in [ self._KEYS.RA, self._KEYS.DEC, self._KEYS.HOST_RA, self._KEYS.HOST_DEC ]: (value, unit) = radec_clean(value, key, unit=unit) elif key == self._KEYS.MAX_DATE or key == self._KEYS.DISCOVER_DATE: # Make sure month and day have leading zeroes sparts = value.split('/') if len(sparts[0]) > 5: self._log.warn("Date year {} greater than four " "digits.".format(sparts[0])) if len(sparts) >= 2: value = sparts[0] + '/' + sparts[1].zfill(2) if len(sparts) == 3: value = value + '/' + sparts[2].zfill(2) # for ii, ct in enumerate(self.parent[key]): # # Only add dates if they have more information # if len(ct[QUANTITY.VALUE].split('/')) > # len(value.split('/')): # return False if is_number(value): value = '%g' % Decimal(value) if error: error = '%g' % Decimal(error) if value: quantity[QUANTITY.VALUE] = value if error: quantity[QUANTITY.E_VALUE] = error if unit: quantity[QUANTITY.U_VALUE] = unit if kinds: quantity[QUANTITY.KIND] = kinds if len(kinds) > 1 else kinds[0] elif QUANTITY.KIND in quantity: del (quantity[QUANTITY.KIND]) return True
def sanitize(self): super(Test_Entry, self).sanitize() # Calculate some columns based on imported data, sanitize some fields name = self[self._KEYS.NAME] aliases = self.get_aliases() if ((name.startswith('SN') and is_number(name[2:6]) and self._KEYS.DISCOVER_DATE in self and int(self[self._KEYS.DISCOVER_DATE][0][QUANTITY.VALUE].split( '/')[0]) >= 2016 and not any(['AT' in x for x in aliases]))): source = self.add_self_source() self.add_quantity(self._KEYS.ALIAS, 'AT' + name[2:], source) if self._KEYS.CLAIMED_TYPE in self: # FIX: this is something that should be done completely internally # i.e. add it to `clean` or something?? self[self._KEYS.CLAIMED_TYPE] = self.ct_list_prioritized() if self._KEYS.CLAIMED_TYPE in self: self[self._KEYS.CLAIMED_TYPE][:] = [ ct for ct in self[self._KEYS.CLAIMED_TYPE] if ct[QUANTITY.VALUE] not in ['?', '-'] ] if (len(self[self._KEYS.CLAIMED_TYPE]) > 1 and any([ x[QUANTITY.VALUE].lower() == 'candidate' for x in self[self._KEYS.CLAIMED_TYPE] ])): self[self._KEYS.CLAIMED_TYPE][:] = [ ct for ct in self[self._KEYS.CLAIMED_TYPE] if ct[QUANTITY.VALUE].lower() != 'candidate' ] if not len(self[self._KEYS.CLAIMED_TYPE]): del (self[self._KEYS.CLAIMED_TYPE]) if self._KEYS.CLAIMED_TYPE not in self and name.startswith('AT'): source = self.add_self_source() self.add_quantity(self._KEYS.CLAIMED_TYPE, 'Candidate', source) if self._KEYS.SOURCES in self: for source in self[self._KEYS.SOURCES]: if SOURCE.BIBCODE not in source: continue import urllib from html import unescape bibcode = source[SOURCE.BIBCODE] # First sanitize the bibcode if len(bibcode) != 19: bibcode = urllib.parse.unquote(unescape(bibcode)).replace( 'A.A.', 'A&A') if bibcode in self.catalog.biberror_dict: bibcode = self.catalog.biberror_dict[bibcode] if (bibcode not in self.catalog.bibauthor_dict): adsquery = (self.catalog.ADS_BIB_URL + urllib.parse.quote(bibcode) + '&data_type=Custom&format=%253m%20%25(y)') bibcodeauthor = '' try: response = urllib.request.urlopen(adsquery) html = response.read().decode('utf-8') hsplit = html.split("\n") if len(hsplit) > 5: bibcodeauthor = hsplit[5] except: pass if not bibcodeauthor: warnings.warn( "Bibcode didn't return authors, not converting" " this bibcode.") self.catalog.bibauthor_dict[bibcode] = unescape( bibcodeauthor).strip() source[SOURCE.BIBCODE] = bibcode if (self.catalog.bibauthor_dict.get(bibcode, None) is not None): source[SOURCE. REFERENCE] = self.catalog.bibauthor_dict[bibcode] if SOURCE.NAME not in source: source[SOURCE.NAME] = bibcode if self._KEYS.REDSHIFT in self: self[self._KEYS.REDSHIFT] = list( sorted(self[self._KEYS.REDSHIFT], key=lambda q: frame_priority(q, self._KEYS.REDSHIFT))) if self._KEYS.VELOCITY in self: self[self._KEYS.VELOCITY] = list( sorted(self[self._KEYS.VELOCITY], key=lambda q: frame_priority(q, self._KEYS.VELOCITY))) # Renumber and reorder sources if self._KEYS.SOURCES in self: # Sort sources reverse-chronologically self[self._KEYS.SOURCES] = sorted(self[self._KEYS.SOURCES], key=lambda x: bib_priority(x)) # Assign new aliases to match new order sources_list = self[self._KEYS.SOURCES] source_reps = OrderedDict([[src[SOURCE.ALIAS], str(ii + 1)] for ii, src in enumerate(sources_list)]) for ii, source in enumerate(sources_list): self[self._KEYS.SOURCES][ii][SOURCE.ALIAS] = source_reps[ source[SOURCE.ALIAS]] # Change sources of data to match new aliases for key in self.keys(): if self._KEYS.get_key_by_name(key).no_source: continue for item in self[key]: try: temp = [ int(source_reps[x]) for x in item[item._KEYS.SOURCE].split(',') ] except: print("Failed") print( "item[item._KEYS.SOURCE].split(',') = '{}'".format( item[item._KEYS.SOURCE].split(','))) print("source_reps = '{}'".format(source_reps)) print("key = '{}'".format(key), repr(key)) print("item = '{}'".format(item), repr(item)) raise aliases = [str(y) for y in sorted(temp)] item[item._KEYS.SOURCE] = ','.join(aliases)