Exemple #1
0
 def extra_aliases(self):
     """These aliases are considered when merging duplicates only, but are
     not added to the list of aliases that would be included with the event
     """
     if (self[TEST_ENTRY.NAME].startswith('SN')
             and is_number(self[TEST_ENTRY.NAME][2:6])):
         return ['AT' + self[TEST_ENTRY.NAME][2:]]
     return []
Exemple #2
0
    def add_source(self, **kwargs):
        # Sanitize some fields before adding source
        # Replace reference names and URLs using dictionaries.
        if SOURCE.NAME in kwargs:
            if (kwargs[SOURCE.NAME].upper().startswith('ATEL')
                    and SOURCE.BIBCODE not in kwargs):
                kwargs[SOURCE.NAME] = (kwargs[SOURCE.NAME].replace(
                    'ATEL', 'ATel').replace('Atel', 'ATel').replace(
                        'ATel #',
                        'ATel ').replace('ATel#',
                                         'ATel').replace('ATel', 'ATel '))
                kwargs[SOURCE.NAME] = ' '.join(kwargs[SOURCE.NAME].split())
                atelnum = kwargs[SOURCE.NAME].split()[-1]
                if is_number(atelnum) and atelnum in self.catalog.atels_dict:
                    kwargs[SOURCE.BIBCODE] = self.catalog.atels_dict[atelnum]

            if (kwargs[SOURCE.NAME].upper().startswith('CBET')
                    and SOURCE.BIBCODE not in kwargs):
                kwargs[SOURCE.NAME] = kwargs[SOURCE.NAME].replace(
                    'CBET', 'CBET ')
                kwargs[SOURCE.NAME] = ' '.join(kwargs[SOURCE.NAME].split())
                cbetnum = kwargs[SOURCE.NAME].split()[-1]
                if is_number(cbetnum) and cbetnum in self.catalog.cbets_dict:
                    kwargs[SOURCE.BIBCODE] = self.catalog.cbets_dict[cbetnum]

            if (kwargs[SOURCE.NAME].upper().startswith('IAUC')
                    and SOURCE.BIBCODE not in kwargs):
                kwargs[SOURCE.NAME] = kwargs[SOURCE.NAME].replace(
                    'IAUC', 'IAUC ')
                kwargs[SOURCE.NAME] = ' '.join(kwargs[SOURCE.NAME].split())
                iaucnum = kwargs[SOURCE.NAME].split()[-1]
                if is_number(iaucnum) and iaucnum in self.catalog.iaucs_dict:
                    kwargs[SOURCE.BIBCODE] = self.catalog.iaucs_dict[iaucnum]

            for rep in self.catalog.source_syns:
                if kwargs[SOURCE.NAME] in self.catalog.source_syns[rep]:
                    kwargs[SOURCE.NAME] = rep
                    break

        if SOURCE.URL in kwargs:
            for rep in self.catalog.url_redirs:
                if kwargs[SOURCE.URL] in self.catalog.url_redirs[rep]:
                    kwargs[SOURCE.URL] = rep
                    break

        return super(Test_Entry, self).add_source(**kwargs)
Exemple #3
0
def do_cleanup(catalog):
    """Cleanup catalog after importing all data."""
    task_str = catalog.get_current_task_str()

    # Set preferred names, calculate some columns based on imported data,
    # sanitize some fields
    keys = list(catalog.entries.keys())

    cleanupcnt = 0
    for oname in pbar(keys, task_str):
        # Some events may be merged in cleanup process, skip them if
        # non-existent.
        try:
            name = catalog.add_entry(oname)
        except Exception:
            err = '"{}" was not found, suggests merge occurred in cleanup process.'.format(oname)
            catalog.log.warning(err)
            continue

        # Set the preferred name, switching to that name if name changed.
        name = catalog.entries[name].set_preferred_name()

        aliases = catalog.entries[name].get_aliases()
        catalog.entries[name].purge_bandless_photometry()
        catalog.entries[name].set_first_max_light()

        if TEST_ENTRY.DISCOVER_DATE not in catalog.entries[name]:
            prefixes = ['MLS', 'SSS', 'CSS', 'GRB ']
            for alias in aliases:
                for prefix in prefixes:
                    if (alias.startswith(prefix) and is_number(alias.replace(prefix, '')[:2])):
                        temp = [
                            '20' + alias.replace(prefix, '')[:2],
                            alias.replace(prefix, '')[2:4],
                            alias.replace(prefix, '')[4:6]
                        ]
                        discoverdate = '/'.join(temp)
                        if catalog.args.verbose:
                            tprint('Added discoverdate from name [' + alias + ']: ' + discoverdate)
                        source = catalog.entries[name].add_self_source()
                        catalog.entries[name].add_quantity(
                            TEST_ENTRY.DISCOVER_DATE, discoverdate, source, derived=True)
                        break
                if TEST_ENTRY.DISCOVER_DATE in catalog.entries[name]:
                    break

        if TEST_ENTRY.DISCOVER_DATE not in catalog.entries[name]:
            prefixes = [
                'ASASSN-', 'PS1-', 'PS1', 'PS', 'iPTF', 'PTF', 'SCP-', 'SNLS-',
                'SPIRITS', 'LSQ', 'DES', 'SNHiTS', 'Gaia', 'GND', 'GNW', 'GSD',
                'GSW', 'EGS', 'COS', 'OGLE', 'HST'
            ]
            for alias in aliases:
                for prefix in prefixes:
                    if (alias.startswith(prefix) and
                            is_number(alias.replace(prefix, '')[:2]) and
                            is_number(alias.replace(prefix, '')[:1])):
                        discoverdate = '20' + alias.replace(prefix, '')[:2]
                        if catalog.args.verbose:
                            tprint('Added discoverdate from name [' + alias +
                                   ']: ' + discoverdate)
                        source = catalog.entries[name].add_self_source()
                        catalog.entries[name].add_quantity(
                            TEST_ENTRY.DISCOVER_DATE,
                            discoverdate,
                            source,
                            derived=True)
                        break
                if TEST_ENTRY.DISCOVER_DATE in catalog.entries[name]:
                    break
        if TEST_ENTRY.DISCOVER_DATE not in catalog.entries[name]:
            prefixes = ['SNF']
            for alias in aliases:
                for prefix in prefixes:
                    if (alias.startswith(prefix) and
                            is_number(alias.replace(prefix, '')[:4])):
                        discoverdate = ('/'.join([
                            alias.replace(prefix, '')[:4],
                            alias.replace(prefix, '')[4:6],
                            alias.replace(prefix, '')[6:8]
                        ]))
                        if catalog.args.verbose:
                            tprint('Added discoverdate from name [' + alias +
                                   ']: ' + discoverdate)
                        source = catalog.entries[name].add_self_source()
                        catalog.entries[name].add_quantity(
                            TEST_ENTRY.DISCOVER_DATE,
                            discoverdate,
                            source,
                            derived=True)
                        break
                if TEST_ENTRY.DISCOVER_DATE in catalog.entries[name]:
                    break
        if TEST_ENTRY.DISCOVER_DATE not in catalog.entries[name]:
            prefixes = ['PTFS', 'SNSDF']
            for alias in aliases:
                for prefix in prefixes:
                    if (alias.startswith(prefix) and
                            is_number(alias.replace(prefix, '')[:2])):
                        discoverdate = ('/'.join([
                            '20' + alias.replace(prefix, '')[:2],
                            alias.replace(prefix, '')[2:4]
                        ]))
                        if catalog.args.verbose:
                            tprint('Added discoverdate from name [' + alias +
                                   ']: ' + discoverdate)
                        source = catalog.entries[name].add_self_source()
                        catalog.entries[name].add_quantity(
                            TEST_ENTRY.DISCOVER_DATE,
                            discoverdate,
                            source,
                            derived=True)
                        break
                if TEST_ENTRY.DISCOVER_DATE in catalog.entries[name]:
                    break

        if TEST_ENTRY.DISCOVER_DATE not in catalog.entries[name]:
            prefixes = ['AT', 'SN', 'OGLE-', 'SM ', 'KSN']
            for alias in aliases:
                for prefix in prefixes:
                    if alias.startswith(prefix):
                        year = re.findall(r'\d+', alias)
                        if len(year) == 1:
                            year = year[0]
                        else:
                            continue
                        if alias.replace(prefix, '').index(year) != 0:
                            continue
                        if (year and is_number(year) and '.' not in year and len(year) <= 4):
                            discoverdate = year
                            if catalog.args.verbose:
                                tprint('Added discoverdate from name [' +
                                       alias + ']: ' + discoverdate)
                            source = catalog.entries[name].add_self_source()
                            catalog.entries[name].add_quantity(
                                TEST_ENTRY.DISCOVER_DATE, discoverdate, source, derived=True)
                            break

                if TEST_ENTRY.DISCOVER_DATE in catalog.entries[name]:
                    break

        if (TEST_ENTRY.RA not in catalog.entries[name] or TEST_ENTRY.DEC not in catalog.entries[name]):
            prefixes = [
                'PSN J', 'MASJ', 'CSS', 'SSS', 'MASTER OT J', 'HST J', 'TCP J',
                'MACS J', '2MASS J', 'EQ J', 'CRTS J', 'SMT J'
            ]
            for alias in aliases:
                for prefix in prefixes:
                    if (alias.startswith(prefix) and
                            is_number(alias.replace(prefix, '')[:6])):
                        noprefix = alias.split(':')[-1].replace(
                            prefix, '').replace('.', '')
                        decsign = '+' if '+' in noprefix else '-'
                        noprefix = noprefix.replace('+', '|').replace('-', '|')
                        nops = noprefix.split('|')
                        if len(nops) < 2:
                            continue
                        rastr = nops[0]
                        decstr = nops[1]
                        ra = ':'.join([rastr[:2], rastr[2:4], rastr[4:6]]) + \
                            ('.' + rastr[6:] if len(rastr) > 6 else '')
                        dec = (
                            decsign + ':'.join(
                                [decstr[:2], decstr[2:4], decstr[4:6]]) +
                            ('.' + decstr[6:] if len(decstr) > 6 else ''))
                        if catalog.args.verbose:
                            tprint('Added ra/dec from name: ' + ra + ' ' + dec)
                        source = catalog.entries[name].add_self_source()
                        catalog.entries[name].add_quantity(
                            TEST_ENTRY.RA, ra, source, derived=True)
                        catalog.entries[name].add_quantity(
                            TEST_ENTRY.DEC, dec, source, derived=True)
                        break
                if TEST_ENTRY.RA in catalog.entries[name]:
                    break

        no_host = (TEST_ENTRY.HOST not in catalog.entries[name] or not any([
            x[QUANTITY.VALUE] == 'Milky Way'
            for x in catalog.entries[name][TEST_ENTRY.HOST]
        ]))
        if (TEST_ENTRY.RA in catalog.entries[name] and
                TEST_ENTRY.DEC in catalog.entries[name] and no_host):
            from astroquery.irsa_dust import IrsaDust
            if name not in catalog.extinctions_dict:
                try:
                    ra_dec = catalog.entries[name][
                        TEST_ENTRY.RA][0][QUANTITY.VALUE] + \
                        " " + \
                        catalog.entries[name][TEST_ENTRY.DEC][0][QUANTITY.VALUE]
                    result = IrsaDust.get_query_table(ra_dec, section='ebv')
                except (KeyboardInterrupt, SystemExit):
                    raise
                except Exception:
                    warnings.warn("Coordinate lookup for " + name +
                                  " failed in IRSA.")
                else:
                    ebv = result['ext SandF mean'][0]
                    ebverr = result['ext SandF std'][0]
                    catalog.extinctions_dict[name] = [ebv, ebverr]
            if name in catalog.extinctions_dict:
                sources = uniq_cdl([
                    catalog.entries[name].add_self_source(),
                    catalog.entries[name]
                    .add_source(bibcode='2011ApJ...737..103S')
                ])
                (catalog.entries[name].add_quantity(
                    TEST_ENTRY.EBV,
                    str(catalog.extinctions_dict[name][0]),
                    sources,
                    e_value=str(catalog.extinctions_dict[name][1]),
                    derived=True))
        if ((TEST_ENTRY.HOST in catalog.entries[name] and
             (TEST_ENTRY.HOST_RA not in catalog.entries[name] or
              TEST_ENTRY.HOST_DEC not in catalog.entries[name]))):
            for host in catalog.entries[name][TEST_ENTRY.HOST]:
                alias = host[QUANTITY.VALUE]
                if ' J' in alias and is_number(alias.split(' J')[-1][:6]):
                    noprefix = alias.split(' J')[-1].split(':')[-1].replace(
                        '.', '')
                    decsign = '+' if '+' in noprefix else '-'
                    noprefix = noprefix.replace('+', '|').replace('-', '|')
                    nops = noprefix.split('|')
                    if len(nops) < 2:
                        continue
                    rastr = nops[0]
                    decstr = nops[1]
                    hostra = (':'.join([rastr[:2], rastr[2:4], rastr[4:6]]) +
                              ('.' + rastr[6:] if len(rastr) > 6 else ''))
                    hostdec = decsign + ':'.join([
                        decstr[:2], decstr[2:4], decstr[4:6]
                    ]) + ('.' + decstr[6:] if len(decstr) > 6 else '')
                    if catalog.args.verbose:
                        tprint('Added hostra/hostdec from name: ' + hostra +
                               ' ' + hostdec)
                    source = catalog.entries[name].add_self_source()
                    catalog.entries[name].add_quantity(
                        TEST_ENTRY.HOST_RA, hostra, source, derived=True)
                    catalog.entries[name].add_quantity(
                        TEST_ENTRY.HOST_DEC, hostdec, source, derived=True)
                    break
                if TEST_ENTRY.HOST_RA in catalog.entries[name]:
                    break

        if (TEST_ENTRY.REDSHIFT not in catalog.entries[name] and
                TEST_ENTRY.VELOCITY in catalog.entries[name]):
            # Find the "best" velocity to use for this
            bestsig = 0
            for hv in catalog.entries[name][TEST_ENTRY.VELOCITY]:
                sig = get_sig_digits(hv[QUANTITY.VALUE])
                if sig > bestsig:
                    besthv = hv[QUANTITY.VALUE]
                    bestsrc = hv['source']
                    bestsig = sig
            if bestsig > 0 and is_number(besthv):
                voc = float(besthv) * 1.e5 / CLIGHT
                source = catalog.entries[name].add_self_source()
                sources = uniq_cdl([source] + bestsrc.split(','))
                (catalog.entries[name].add_quantity(
                    TEST_ENTRY.REDSHIFT,
                    pretty_num(
                        sqrt((1. + voc) / (1. - voc)) - 1., sig=bestsig),
                    sources,
                    kind='heliocentric',
                    derived=True))
        if (TEST_ENTRY.REDSHIFT not in catalog.entries[name] and
                len(catalog.nedd_dict) > 0 and
                TEST_ENTRY.HOST in catalog.entries[name]):
            reference = "NED-D"
            refurl = "http://ned.ipac.caltech.edu/Library/Distances/"
            refbib = "1991ASSL..171...89H"
            for host in catalog.entries[name][TEST_ENTRY.HOST]:
                if host[QUANTITY.VALUE] in catalog.nedd_dict:
                    source = catalog.entries[name].add_source(
                        bibcode='2016A&A...594A..13P')
                    secondarysource = catalog.entries[name].add_source(
                        name=reference, url=refurl, bibcode=refbib,
                        secondary=True)
                    meddist = statistics.median(catalog.nedd_dict[host[
                        QUANTITY.VALUE]])
                    redz = z_at_value(cosmo.comoving_distance,
                                      float(meddist) * un.Mpc)
                    redshift = pretty_num(
                        redz, sig=get_sig_digits(str(meddist)))
                    catalog.entries[name].add_quantity(
                        [TEST_ENTRY.REDSHIFT, TEST_ENTRY.HOST_REDSHIFT],
                        redshift,
                        uniq_cdl([source, secondarysource]),
                        kind='host',
                        derived=True)
        if (TEST_ENTRY.MAX_ABS_MAG not in catalog.entries[name] and
                TEST_ENTRY.MAX_APP_MAG in catalog.entries[name] and
                TEST_ENTRY.LUM_DIST in catalog.entries[name]):
            # Find the "best" distance to use for this
            bestsig = 0
            for ld in catalog.entries[name][TEST_ENTRY.LUM_DIST]:
                sig = get_sig_digits(ld[QUANTITY.VALUE])
                if sig > bestsig:
                    bestld = ld[QUANTITY.VALUE]
                    bestsrc = ld[QUANTITY.SOURCE]
                    bestsig = sig
            if bestsig > 0 and is_number(bestld) and float(bestld) > 0.:
                source = catalog.entries[name].add_self_source()
                sources = uniq_cdl([source] + bestsrc.split(','))
                bestldz = z_at_value(cosmo.luminosity_distance,
                                     float(bestld) * un.Mpc)
                pnum = (
                    float(catalog.entries[name][TEST_ENTRY.MAX_APP_MAG][0][
                        QUANTITY.VALUE]) - 5.0 *
                    (log10(float(bestld) * 1.0e6) - 1.0
                     ) + 2.5 * log10(1.0 + bestldz))
                pnum = pretty_num(pnum, sig=bestsig + 1)
                catalog.entries[name].add_quantity(
                    TEST_ENTRY.MAX_ABS_MAG, pnum, sources, derived=True)
        if (TEST_ENTRY.MAX_VISUAL_ABS_MAG not in catalog.entries[name] and
                TEST_ENTRY.MAX_VISUAL_APP_MAG in catalog.entries[name] and
                TEST_ENTRY.LUM_DIST in catalog.entries[name]):
            # Find the "best" distance to use for this
            bestsig = 0
            for ld in catalog.entries[name][TEST_ENTRY.LUM_DIST]:
                sig = get_sig_digits(ld[QUANTITY.VALUE])
                if sig > bestsig:
                    bestld = ld[QUANTITY.VALUE]
                    bestsrc = ld[QUANTITY.SOURCE]
                    bestsig = sig
            if bestsig > 0 and is_number(bestld) and float(bestld) > 0.:
                source = catalog.entries[name].add_self_source()
                sources = uniq_cdl([source] + bestsrc.split(','))
                # FIX: what's happening here?!
                pnum = (
                    float(catalog.entries[name][
                        TEST_ENTRY.MAX_VISUAL_APP_MAG][0][QUANTITY.VALUE]) -
                    5.0 * (log10(float(bestld) * 1.0e6) - 1.0))
                pnum = pretty_num(pnum, sig=bestsig + 1)
                catalog.entries[name].add_quantity(
                    TEST_ENTRY.MAX_VISUAL_ABS_MAG, pnum, sources, derived=True)
        if TEST_ENTRY.REDSHIFT in catalog.entries[name]:
            # Find the "best" redshift to use for this
            bestz, bestkind, bestsig, bestsrc = catalog.entries[
                name].get_best_redshift()
            if bestsig > 0:
                try:
                    bestz = float(bestz)
                except Exception:
                    print(catalog.entries[name])
                    raise
                if TEST_ENTRY.VELOCITY not in catalog.entries[name]:
                    source = catalog.entries[name].add_self_source()
                    # FIX: what's happening here?!
                    pnum = CLIGHT / KM * \
                        ((bestz + 1.)**2. - 1.) / ((bestz + 1.)**2. + 1.)
                    pnum = pretty_num(pnum, sig=bestsig)
                    catalog.entries[name].add_quantity(
                        TEST_ENTRY.VELOCITY,
                        pnum,
                        source,
                        kind=(TEST_ENTRY.VELOCITY.kind_preference[bestkind]
                              if bestkind else ''))
                if bestz > 0.:
                    if TEST_ENTRY.LUM_DIST not in catalog.entries[name]:
                        dl = cosmo.luminosity_distance(bestz)
                        sources = [
                            catalog.entries[name].add_self_source(),
                            catalog.entries[name]
                            .add_source(bibcode='2016A&A...594A..13P')
                        ]
                        sources = uniq_cdl(sources + bestsrc.split(','))
                        catalog.entries[name].add_quantity(
                            TEST_ENTRY.LUM_DIST,
                            pretty_num(
                                dl.value, sig=bestsig + 1),
                            sources,
                            kind=(TEST_ENTRY.LUM_DIST.kind_preference[bestkind]
                                  if bestkind else ''),
                            derived=True)
                        if (TEST_ENTRY.MAX_ABS_MAG not in
                            catalog.entries[name] and TEST_ENTRY.MAX_APP_MAG in
                                catalog.entries[name]):
                            source = catalog.entries[name].add_self_source()
                            pnum = pretty_num(
                                float(catalog.entries[name][
                                    TEST_ENTRY.MAX_APP_MAG][0][QUANTITY.VALUE])
                                - 5.0 * (log10(dl.to('pc').value) - 1.0
                                         ) + 2.5 * log10(1.0 + bestz),
                                sig=bestsig + 1)
                            catalog.entries[name].add_quantity(
                                TEST_ENTRY.MAX_ABS_MAG,
                                pnum,
                                sources,
                                derived=True)
                        if (TEST_ENTRY.MAX_VISUAL_ABS_MAG not in
                                catalog.entries[name] and
                                TEST_ENTRY.MAX_VISUAL_APP_MAG in
                                catalog.entries[name]):
                            source = catalog.entries[name].add_self_source()
                            pnum = pretty_num(
                                float(catalog.entries[name][
                                    TEST_ENTRY.MAX_VISUAL_APP_MAG][0][
                                        QUANTITY.VALUE]) - 5.0 *
                                (log10(dl.to('pc').value) - 1.0),
                                sig=bestsig + 1)
                            catalog.entries[name].add_quantity(
                                TEST_ENTRY.MAX_VISUAL_ABS_MAG,
                                pnum,
                                sources,
                                derived=True)
                    if TEST_ENTRY.COMOVING_DIST not in catalog.entries[name]:
                        cd = cosmo.comoving_distance(bestz)
                        sources = [
                            catalog.entries[name].add_self_source(),
                            catalog.entries[name]
                            .add_source(bibcode='2016A&A...594A..13P')
                        ]
                        sources = uniq_cdl(sources + bestsrc.split(','))
                        catalog.entries[name].add_quantity(
                            TEST_ENTRY.COMOVING_DIST,
                            pretty_num(
                                cd.value, sig=bestsig),
                            sources,
                            derived=True)
        if TEST_ENTRY.HOST_REDSHIFT in catalog.entries[name]:
            # Find the "best" redshift to use for this
            bestz, bestkind, bestsig, bestsrc = catalog.entries[
                name].get_best_redshift(TEST_ENTRY.HOST_REDSHIFT)
            if bestsig > 0:
                try:
                    bestz = float(bestz)
                except Exception:
                    print(catalog.entries[name])
                    raise
                if TEST_ENTRY.HOST_VELOCITY not in catalog.entries[name]:
                    source = catalog.entries[name].add_self_source()
                    # FIX: what's happening here?!
                    pnum = CLIGHT / KM * \
                        ((bestz + 1.)**2. - 1.) / ((bestz + 1.)**2. + 1.)
                    pnum = pretty_num(pnum, sig=bestsig)
                    catalog.entries[name].add_quantity(
                        TEST_ENTRY.HOST_VELOCITY,
                        pnum,
                        source,
                        kind=(TEST_ENTRY.HOST_VELOCITY.kind_preference[bestkind]
                              if bestkind else ''))
                if bestz > 0.:
                    if TEST_ENTRY.HOST_LUM_DIST not in catalog.entries[name]:
                        dl = cosmo.luminosity_distance(bestz)
                        sources = [
                            catalog.entries[name].add_self_source(),
                            catalog.entries[name]
                            .add_source(bibcode='2016A&A...594A..13P')
                        ]
                        sources = uniq_cdl(sources + bestsrc.split(','))
                        catalog.entries[name].add_quantity(
                            TEST_ENTRY.HOST_LUM_DIST,
                            pretty_num(
                                dl.value, sig=bestsig + 1),
                            sources,
                            kind=(TEST_ENTRY.HOST_LUM_DIST.kind_preference[
                                bestkind] if bestkind else ''),
                            derived=True)
                    if TEST_ENTRY.HOST_COMOVING_DIST not in catalog.entries[
                            name]:
                        cd = cosmo.comoving_distance(bestz)
                        sources = [
                            catalog.entries[name].add_self_source(),
                            catalog.entries[name]
                            .add_source(bibcode='2016A&A...594A..13P')
                        ]
                        sources = uniq_cdl(sources + bestsrc.split(','))
                        catalog.entries[name].add_quantity(
                            TEST_ENTRY.HOST_COMOVING_DIST,
                            pretty_num(
                                cd.value, sig=bestsig),
                            sources,
                            derived=True)
        if all([
                x in catalog.entries[name]
                for x in [
                    TEST_ENTRY.RA, TEST_ENTRY.DEC, TEST_ENTRY.HOST_RA,
                    TEST_ENTRY.HOST_DEC
                ]
        ]):
            # For now just using first coordinates that appear in entry
            try:
                c1 = coord(
                    ra=catalog.entries[name][TEST_ENTRY.RA][0][QUANTITY.VALUE],
                    dec=catalog.entries[name][TEST_ENTRY.DEC][0][
                        QUANTITY.VALUE],
                    unit=(un.hourangle, un.deg))
                c2 = coord(
                    ra=catalog.entries[name][TEST_ENTRY.HOST_RA][0][
                        QUANTITY.VALUE],
                    dec=catalog.entries[name][TEST_ENTRY.HOST_DEC][0][
                        QUANTITY.VALUE],
                    unit=(un.hourangle, un.deg))
            except (KeyboardInterrupt, SystemExit):
                raise
            except Exception:
                pass
            else:
                sources = uniq_cdl(
                    [catalog.entries[name].add_self_source()] + catalog.
                    entries[name][TEST_ENTRY.RA][0][QUANTITY.SOURCE].split(',')
                    + catalog.entries[name][TEST_ENTRY.DEC][0][QUANTITY.SOURCE]
                    .split(',') + catalog.entries[name][TEST_ENTRY.HOST_RA][0][
                        QUANTITY.SOURCE].split(',') + catalog.entries[name][
                            TEST_ENTRY.HOST_DEC][0][QUANTITY.SOURCE].split(','))
                if TEST_ENTRY.HOST_OFFSET_ANG not in catalog.entries[name]:
                    hosa = Decimal(c1.separation(c2).arcsecond)
                    hosa = pretty_num(hosa)
                    catalog.entries[name].add_quantity(
                        TEST_ENTRY.HOST_OFFSET_ANG,
                        hosa,
                        sources,
                        derived=True,
                        u_value='arcseconds')
                if (TEST_ENTRY.COMOVING_DIST in catalog.entries[name] and
                        TEST_ENTRY.REDSHIFT in catalog.entries[name] and
                        TEST_ENTRY.HOST_OFFSET_DIST not in
                        catalog.entries[name]):
                    offsetsig = get_sig_digits(catalog.entries[name][
                        TEST_ENTRY.HOST_OFFSET_ANG][0][QUANTITY.VALUE])
                    sources = uniq_cdl(
                        sources.split(',') + (catalog.entries[name][
                            TEST_ENTRY.COMOVING_DIST][0][QUANTITY.SOURCE]).
                        split(',') + (catalog.entries[name][TEST_ENTRY.REDSHIFT]
                                      [0][QUANTITY.SOURCE]).split(','))
                    (catalog.entries[name].add_quantity(
                        TEST_ENTRY.HOST_OFFSET_DIST,
                        pretty_num(
                            float(catalog.entries[name][
                                TEST_ENTRY.HOST_OFFSET_ANG][0][QUANTITY.VALUE])
                            / 3600. * (pi / 180.) *
                            float(catalog.entries[name][
                                TEST_ENTRY.COMOVING_DIST][0][QUANTITY.VALUE]) *
                            1000. / (1.0 + float(catalog.entries[name][
                                TEST_ENTRY.REDSHIFT][0][QUANTITY.VALUE])),
                            sig=offsetsig),
                        sources))

        catalog.entries[name].sanitize()
        catalog.journal_entries(bury=True, final=True, gz=True)
        cleanupcnt = cleanupcnt + 1
        if catalog.args.travis and cleanupcnt % 1000 == 0:
            break

    catalog.save_caches()

    return
Exemple #4
0
def host_clean(name):
    """Clean host name."""
    newname = name.strip(' ;,*')

    # Handle some special cases
    hostcases = {'M051a': 'M51A', 'M051b': 'M51B'}
    for k in hostcases:
        if newname == k:
            newname = hostcases[k]

    # Some general cases
    newname = newname.strip("()").replace('  ', ' ', 1)
    newname = newname.replace("ABELL", "Abell", 1)
    newname = newname.replace("Abell", "Abell ", 1)
    newname = newname.replace("APMUKS(BJ)", "APMUKS(BJ) ", 1)
    newname = newname.replace("ARP", "ARP ", 1)
    newname = newname.replace("CGCG", "CGCG ", 1)
    newname = newname.replace("HOLM", "HOLM ", 1)
    newname = newname.replace("ESO", "ESO ", 1)
    newname = newname.replace("IC", "IC ", 1)
    newname = newname.replace("Intergal.", "Intergalactic", 1)
    newname = newname.replace("MCG+", "MCG +", 1)
    newname = newname.replace("MCG-", "MCG -", 1)
    newname = newname.replace("M+", "MCG +", 1)
    newname = newname.replace("M-", "MCG -", 1)
    newname = newname.replace("MGC ", "MCG ", 1)
    newname = newname.replace("Mrk", "MRK", 1)
    newname = newname.replace("MRK", "MRK ", 1)
    newname = newname.replace("NGC", "NGC ", 1)
    newname = newname.replace("PGC", "PGC ", 1)
    newname = newname.replace("SDSS", "SDSS ", 1)
    newname = newname.replace("UGC", "UGC ", 1)
    if newname.startswith('MESSIER '):
        newname = newname.replace('MESSIER ', 'M', 1)
    if newname.startswith('M ') and is_number(newname[2:]):
        newname = newname.replace('M ', 'M', 1)
    if newname.startswith('M') and is_number(newname[1:]):
        newname = 'M' + newname[1:].lstrip(" 0")
    if len(newname) > 4 and newname.startswith("PGC "):
        newname = newname[:4] + newname[4:].lstrip(" 0")
    if len(newname) > 4 and newname.startswith("UGC "):
        newname = newname[:4] + newname[4:].lstrip(" 0")
    if len(newname) > 5 and newname.startswith(("MCG +", "MCG -")):
        newname = newname[:5] + '-'.join(
            [x.zfill(2) for x in newname[5:].strip().split("-")])
    if len(newname) > 5 and newname.startswith("CGCG "):
        newname = newname[:5] + '-'.join(
            [x.zfill(3) for x in newname[5:].strip().split("-")])
    if ((len(newname) > 1 and newname.startswith("E"))
            or (len(newname) > 3 and newname.startswith('ESO'))):
        if newname[0] == "E":
            esplit = newname[1:].split("-")
        else:
            esplit = newname[3:].split("-")
        if len(esplit) == 2 and is_number(esplit[0].strip()):
            if esplit[1].strip()[0] == 'G':
                parttwo = esplit[1][1:].strip()
            else:
                parttwo = esplit[1].strip()
            if is_number(parttwo.strip()):
                newname = 'ESO ' + \
                    esplit[0].lstrip('0') + '-G' + parttwo.lstrip('0')
    newname = ' '.join(newname.split())
    return newname
Exemple #5
0
def radec_clean(svalue, quantity, unit=''):
    """Clean R.A. and Dec."""
    svalue = svalue.strip()
    if unit == 'floatdegrees':
        if not is_number(svalue):
            return (svalue, unit)
        deg = float('%g' % Decimal(svalue))
        sig = get_sig_digits(svalue)
        if 'ra' in quantity:
            flhours = deg / 360.0 * 24.0
            hours = floor(flhours)
            minutes = floor((flhours - hours) * 60.0)
            seconds = (flhours * 60.0 - (hours * 60.0 + minutes)) * 60.0
            hours = 0 if hours < 1.e-6 else hours
            minutes = 0 if minutes < 1.e-6 else minutes
            seconds = 0.0 if seconds < 1.e-6 else seconds
            if seconds > 60.0:
                raise (ValueError('Invalid seconds value for ' + quantity))
            svalue = str(hours).zfill(2) + ':' + str(minutes).zfill(2) + \
                ':' + zpad(pretty_num(seconds, sig=sig - 1))
        elif 'dec' in quantity:
            fldeg = abs(deg)
            degree = floor(fldeg)
            minutes = floor((fldeg - degree) * 60.0)
            seconds = (fldeg * 60.0 - (degree * 60.0 + minutes)) * 60.0
            minutes = 0 if minutes < 1.e-6 else minutes
            seconds = 0.0 if seconds < 1.e-6 else seconds
            if seconds > 60.0:
                raise (ValueError('Invalid seconds value for ' + quantity))
            svalue = (('+' if deg >= 0.0 else '-') +
                      str(degree).strip('+-').zfill(2) + ':' +
                      str(minutes).zfill(2) + ':' +
                      zpad(pretty_num(seconds, sig=sig - 1)))
    elif unit == 'nospace' and 'ra' in quantity:
        svalue = svalue[:2] + ':' + svalue[2:4] + \
            ((':' + zpad(svalue[4:])) if len(svalue) > 4 else '')
    elif unit == 'nospace' and 'dec' in quantity:
        if svalue.startswith(('+', '-')):
            svalue = svalue[:3] + ':' + svalue[3:5] + \
                ((':' + zpad(svalue[5:])) if len(svalue) > 5 else '')
        else:
            svalue = '+' + svalue[:2] + ':' + svalue[2:4] + \
                ((':' + zpad(svalue[4:])) if len(svalue) > 4 else '')
    else:
        svalue = svalue.replace(' ', ':')
        if 'dec' in quantity:
            valuesplit = svalue.split(':')
            svalue = (
                ('-' if valuesplit[0].startswith('-') else '+') +
                valuesplit[0].strip('+-').zfill(2) +
                (':' + valuesplit[1].zfill(2) if len(valuesplit) > 1 else '') +
                (':' + zpad(valuesplit[2]) if len(valuesplit) > 2 else ''))

    if 'ra' in quantity:
        sunit = 'hours'
    elif 'dec' in quantity:
        sunit = 'degrees'

    # Correct case of arcseconds = 60.0.
    valuesplit = svalue.split(':')
    if len(valuesplit) == 3 and valuesplit[-1] in ["60.0", "60.", "60"]:
        svalue = valuesplit[0] + ':' + str(
            Decimal(valuesplit[1]) + Decimal(1.0)) + ':' + "00.0"

    # Strip trailing dots.
    svalue = svalue.rstrip('.')

    return (svalue, sunit)
Exemple #6
0
def name_clean(name):
    """Apply list of renaming rules for test_entry names."""
    newname = name.strip(' ;,*.')
    if newname.startswith('NAME '):
        newname = newname.replace('NAME ', '', 1)
    if newname.endswith(' SN'):
        newname = newname.replace(' SN', '')
    if newname.endswith(':SN'):
        newname = newname.replace(':SN', '')
    if newname.startswith('MASJ'):
        newname = newname.replace('MASJ', 'MASTER OT J', 1)
    if (newname.startswith('MASTER') and len(newname) > 7
            and is_number(newname[7])):
        newname = newname.replace('MASTER', 'MASTER OT J', 1)
    if (newname.startswith('MASTER OT') and len(newname) > 10
            and is_number(newname[10])):
        newname = newname.replace('MASTER OT', 'MASTER OT J', 1)
    if newname.startswith('MASTER OT J '):
        newname = newname.replace('MASTER OT J ', 'MASTER OT J', 1)
    if newname.startswith('PTSS '):
        newname = newname.replace('PTSS ', 'PTSS-', 1)
    if newname.startswith('SPIRITS '):
        newname = newname.replace('SPIRITS ', 'SPIRITS', 1)
    if newname.startswith('OGLE '):
        newname = newname.replace('OGLE ', 'OGLE-', 1)
    if newname.startswith('OGLE-') and len(newname) != 16:
        namesp = newname.split('-')
        if (len(namesp) == 4 and len(namesp[1]) == 4 and is_number(namesp[1])
                and is_number(namesp[3])):
            newname = 'OGLE-' + namesp[1] + '-SN-' + namesp[3].zfill(3)
        elif (len(namesp) == 2 and is_number(namesp[1][:2])
              and not is_number(namesp[1][2:])):
            newname = 'OGLE' + namesp[1]
    if newname.startswith('SN SDSS'):
        newname = newname.replace('SN SDSS ', 'SDSS', 1)
    if newname.startswith('SDSS '):
        newname = newname.replace('SDSS ', 'SDSS', 1)
    if newname.startswith('SDSS'):
        namesp = newname.split('-')
        if (len(namesp) == 3 and is_number(namesp[0][4:])
                and is_number(namesp[1]) and is_number(namesp[2])):
            newname = namesp[0] + '-' + namesp[1] + '-' + namesp[2].zfill(3)
    if newname.startswith('SDSS-II SN'):
        namesp = newname.split()
        if len(namesp) == 3 and is_number(namesp[2]):
            newname = 'SDSS-II SN ' + namesp[2].lstrip('0')
    if newname.startswith('SN CL'):
        newname = newname.replace('SN CL', 'CL', 1)
    if newname.startswith('SN HiTS'):
        newname = newname.replace('SN HiTS', 'SNHiTS', 1)
    if newname.startswith('SNHiTS '):
        newname = newname.replace('SNHiTS ', 'SNHiTS', 1)
    if newname.startswith('GAIA'):
        newname = newname.replace('GAIA', 'Gaia', 1)
    if newname.startswith('KSN-'):
        newname = newname.replace('KSN-', 'KSN', 1)
    if newname.startswith('KSN'):
        newname = 'KSN' + newname[3:].lower()
    if newname.startswith('Gaia '):
        newname = newname.replace('Gaia ', 'Gaia', 1)
    if newname.startswith('Gaia'):
        newname = 'Gaia' + newname[4:].lower()
    if newname.startswith('GRB'):
        newname = newname.replace('GRB', 'GRB ', 1)
    # if newname.startswith('GRB ') and is_number(newname[4:].strip()):
    #     newname = 'GRB ' + newname[4:].strip() + 'A'
    if newname.startswith('ESSENCE '):
        newname = newname.replace('ESSENCE ', 'ESSENCE', 1)
    if newname.startswith('LSQ '):
        newname = newname.replace('LSQ ', 'LSQ', 1)
    if newname.startswith('LSQ') and len(newname) > 3 and is_number(
            newname[3]):
        newname = newname[:3] + newname[3:].lower()
    if newname.startswith('DES') and len(newname) > 3 and is_number(
            newname[3]):
        newname = newname[:7] + newname[7:].lower()
    if newname.startswith('SNSDF '):
        newname = newname.replace(' ', '')
    if newname.startswith('SNSDF'):
        namesp = newname.split('.')
        if len(namesp[0]) == 9:
            newname = namesp[0] + '-' + namesp[1].zfill(2)
    if newname.startswith('HFF '):
        newname = newname.replace(' ', '')
    if newname.startswith('SN HST'):
        newname = newname.replace('SN HST', 'HST', 1)
    if newname.startswith('HST ') and newname[4] != 'J':
        newname = newname.replace('HST ', 'HST J', 1)
    if newname.startswith('SNLS') and newname[4] != '-':
        newname = newname.replace('SNLS', 'SNLS-', 1)
    if newname.startswith('SNLS- '):
        newname = newname.replace('SNLS- ', 'SNLS-', 1)
    if newname.startswith('CRTS CSS'):
        newname = newname.replace('CRTS CSS', 'CSS', 1)
    if newname.startswith('CRTS MLS'):
        newname = newname.replace('CRTS MLS', 'MLS', 1)
    if newname.startswith('CRTS SSS'):
        newname = newname.replace('CRTS SSS', 'SSS', 1)
    if newname.startswith(('CSS', 'MLS', 'SSS')):
        newname = newname.replace(' ', ':').replace('J', '')
    if newname.startswith('SN HFF'):
        newname = newname.replace('SN HFF', 'HFF', 1)
    if newname.startswith('SN GND'):
        newname = newname.replace('SN GND', 'GND', 1)
    if newname.startswith('SN SCP'):
        newname = newname.replace('SN SCP', 'SCP', 1)
    if newname.startswith('SN UDS'):
        newname = newname.replace('SN UDS', 'UDS', 1)
    if newname.startswith('SCP') and newname[3] != '-':
        newname = newname.replace('SCP', 'SCP-', 1)
    if newname.startswith('SCP- '):
        newname = newname.replace('SCP- ', 'SCP-', 1)
    if newname.startswith('SCP-') and is_integer(newname[7:]):
        newname = 'SCP-' + newname[4:7] + str(int(newname[7:]))
    if newname.startswith('PS 1'):
        newname = newname.replace('PS 1', 'PS1', 1)
    if newname.startswith('PS1 SN PS'):
        newname = newname.replace('PS1 SN PS', 'PS', 1)
    if newname.startswith('PS1 SN'):
        newname = newname.replace('PS1 SN', 'PS1', 1)
    if newname.startswith('PS1') and len(newname) > 3 and is_number(
            newname[3]):
        newname = newname[:3] + newname[3:].lower()
    elif newname.startswith('PS1-') and len(newname) > 4 and is_number(
            newname[4]):
        newname = newname[:4] + newname[4:].lower()
    if newname.startswith('PSN K'):
        newname = newname.replace('PSN K', 'K', 1)
    if newname.startswith('K') and len(newname) > 5 and is_number(
            newname[1:5]):
        namesp = newname.split('-')
        if len(namesp[0]) == 5:
            newname = namesp[0] + '-' + namesp[1].zfill(3)
    if newname.startswith('Psn'):
        newname = newname.replace('Psn', 'PSN', 1)
    if newname.startswith('PSNJ'):
        newname = newname.replace('PSNJ', 'PSN J', 1)
    if newname.startswith('TCPJ'):
        newname = newname.replace('TCPJ', 'TCP J', 1)
    if newname.startswith('SMTJ'):
        newname = newname.replace('SMTJ', 'SMT J', 1)
    if newname.startswith('PSN20J'):
        newname = newname.replace('PSN20J', 'PSN J', 1)
    if newname.startswith('kait'):
        newname = newname.replace('kait', 'KAIT', 1)
    if newname.startswith('SN ASASSN'):
        newname = newname.replace('SN ASASSN', 'ASASSN', 1)
    if newname.startswith('ASASSN-20') and is_number(newname[9]):
        newname = newname.replace('ASASSN-20', 'ASASSN-', 1)
    if newname.startswith('ASASSN '):
        newname = newname.replace('ASASSN ', 'ASASSN-', 1).replace('--', '-')
    if newname.startswith('ASASSN') and newname[6] != '-':
        newname = newname.replace('ASASSN', 'ASASSN-', 1)
    if newname.startswith('ASASSN-') and len(newname) > 7 and is_number(
            newname[7]):
        newname = newname[:7] + newname[7:].lower()
    if newname.startswith('ROTSE3J'):
        newname = newname.replace('ROTSE3J', 'ROTSE3 J', 1)
    if newname.startswith('MACSJ'):
        newname = newname.replace('MACSJ', 'MACS J', 1)
    if newname.startswith('MWSNR'):
        newname = newname.replace('MWSNR', 'MWSNR ', 1)
    if newname.startswith('SN HUNT'):
        newname = newname.replace('SN HUNT', 'SNhunt', 1)
    if newname.startswith('SN Hunt'):
        newname = newname.replace(' ', '')
    if newname.startswith('SNHunt'):
        newname = newname.replace('SNHunt', 'SNhunt', 1)
    if newname.startswith('SNhunt '):
        newname = newname.replace('SNhunt ', 'SNhunt', 1)
    if newname.startswith('ptf'):
        newname = newname.replace('ptf', 'PTF', 1)
    if newname.startswith('SN PTF'):
        newname = newname.replace('SN PTF', 'PTF', 1)
    if newname.startswith('PTF '):
        newname = newname.replace('PTF ', 'PTF', 1)
    if newname.startswith('PTF') and len(newname) > 3 and is_number(
            newname[3]):
        newname = newname[:3] + newname[3:].lower()
    if newname.startswith('IPTF'):
        newname = newname.replace('IPTF', 'iPTF', 1)
    if newname.startswith('iPTF '):
        newname = newname.replace('iPTF ', 'iPTF', 1)
    if newname.startswith('iPTF') and len(newname) > 4 and is_number(
            newname[4]):
        newname = newname[:4] + newname[4:].lower()
    if newname.startswith('PESSTOESO'):
        newname = newname.replace('PESSTOESO', 'PESSTO ESO ', 1)
    if newname.startswith('snf'):
        newname = newname.replace('snf', 'SNF', 1)
    if newname.startswith('SNF '):
        newname = newname.replace('SNF ', 'SNF', 1)
    if (newname.startswith('SNF') and is_number(newname[3:])
            and len(newname) >= 12):
        newname = 'SNF' + newname[3:11] + '-' + newname[11:]
    if newname.startswith(('MASTER OT J', 'ROTSE3 J')):
        prefix = newname.split('J')[0]
        coords = newname.split('J')[-1].strip()
        decsign = '+' if '+' in coords else '-'
        coordsplit = coords.replace('+', '-').split('-')
        if ('.' not in coordsplit[0] and len(coordsplit[0]) > 6
                and '.' not in coordsplit[1] and len(coordsplit[1]) > 6):
            newname = (prefix + 'J' + coordsplit[0][:6] + '.' +
                       coordsplit[0][6:] + decsign + coordsplit[1][:6] + '.' +
                       coordsplit[1][6:])
    if (newname.startswith('Gaia ') and is_number(newname[3:4])
            and len(newname) > 5):
        newname = newname.replace('Gaia ', 'Gaia', 1)
    if (newname.startswith('AT ') and len(newname) > 7
            and is_number(newname[3:7])):
        newname = newname.replace('AT ', 'AT', 1)
    if len(newname) <= 4 and is_number(newname):
        newname = 'SN' + newname + 'A'
    if (len(newname) > 4 and is_number(newname[:4])
            and not is_number(newname[4:])):
        newname = 'SN' + newname
    if (newname.startswith('Sn ') and is_number(newname[3:7])
            and len(newname) > 7):
        newname = newname.replace('Sn ', 'SN', 1)
    if (newname.startswith('sn') and is_number(newname[2:6])
            and len(newname) > 6):
        newname = newname.replace('sn', 'SN', 1)
    if (newname.startswith('SN ') and is_number(newname[3:7])
            and len(newname) > 7):
        newname = newname.replace('SN ', 'SN', 1)
    if (newname.startswith('SN') and is_number(newname[2:6])
            and len(newname) == 7 and newname[6].islower()):
        newname = 'SN' + newname[2:6] + newname[6].upper()
    elif (newname.startswith('SN') and is_number(newname[2:6])
          and (len(newname) == 8 or len(newname) == 9)
          and newname[6:].isupper()):
        newname = 'SN' + newname[2:6] + newname[6:].lower()
    if (newname.startswith('AT') and is_number(newname[2:6])
            and len(newname) == 7 and newname[6].islower()):
        newname = 'AT' + newname[2:6] + newname[6].upper()
    elif (newname.startswith('AT') and is_number(newname[2:6])
          and (len(newname) == 8 or len(newname) == 9)
          and newname[6:].isupper()):
        newname = 'AT' + newname[2:6] + newname[6:].lower()

    newname = (' '.join(newname.split())).strip()
    return newname
Exemple #7
0
def do_cfa_spectra(catalog):
    """Import spectra from the CfA archive."""
    task_str = catalog.get_current_task_str()
    # II spectra
    oldname = ''
    file_names = next(
        os.walk(os.path.join(catalog.get_current_task_repo(), 'CfA_SNII')))[1]
    for ni, name in enumerate(utils.pbar(file_names, task_str, sort=True)):
        fullpath = os.path.join(catalog.get_current_task_repo(),
                                'CfA_SNII/') + name
        origname = name
        if name.startswith('sn') and utils.is_number(name[2:6]):
            name = 'SN' + name[2:]
        name = catalog.get_name_for_entry_or_alias(name)
        if oldname and name != oldname:
            catalog.journal_entries()
        oldname = name
        name = catalog.add_entry(name)
        reference = 'CfA Supernova Archive'
        refurl = 'https://www.cfa.harvard.edu/supernova/SNarchive.html'
        source = catalog.entries[name].add_source(name=reference,
                                                  url=refurl,
                                                  secondary=True,
                                                  acknowledgment=ACKN_CFA)
        catalog.entries[name].add_quantity(TEST_ENTRY.ALIAS, name, source)
        for fi, fname in enumerate(
                sorted(glob(fullpath + '/*'), key=lambda s: s.lower())):
            filename = os.path.basename(fname)
            fileparts = filename.split('-')
            if origname.startswith('sn') and utils.is_number(origname[2:6]):
                year = fileparts[1][:4]
                month = fileparts[1][4:6]
                day = fileparts[1][6:]
                instrument = fileparts[2].split('.')[0]
            else:
                year = fileparts[2][:4]
                month = fileparts[2][4:6]
                day = fileparts[2][6:]
                instrument = fileparts[3].split('.')[0]
            time = _get_time_str(year, month, day)
            f = open(fname, 'r')
            data = csv.reader(f, delimiter=' ', skipinitialspace=True)
            data = [list(i) for i in zip(*data)]
            wavelengths = data[0]
            fluxes = data[1]
            errors = data[2]
            sources = utils.uniq_cdl([
                source,
                (catalog.entries[name].add_source(
                    bibcode='2017arXiv170601030H'))
            ])
            catalog.entries[name].add_spectrum(u_wavelengths='Angstrom',
                                               u_fluxes='erg/s/cm^2/Angstrom',
                                               filename=filename,
                                               wavelengths=wavelengths,
                                               fluxes=fluxes,
                                               u_time='MJD' if time else '',
                                               time=time,
                                               instrument=instrument,
                                               u_errors='ergs/s/cm^2/Angstrom',
                                               errors=errors,
                                               source=sources,
                                               dereddened=False,
                                               deredshifted=False)

        if catalog.args.travis and ni >= catalog.TRAVIS_QUERY_LIMIT:
            break

    catalog.journal_entries()

    # Ia spectra
    oldname = ''
    file_names = next(
        os.walk(os.path.join(catalog.get_current_task_repo(), 'CfA_SNIa')))[1]
    for ni, name in enumerate(utils.pbar(file_names, task_str, sort=True)):
        fullpath = os.path.join(catalog.get_current_task_repo(),
                                'CfA_SNIa/') + name
        origname = name
        if name.startswith('sn') and utils.is_number(name[2:6]):
            name = 'SN' + name[2:]
        if name.startswith('snf') and utils.is_number(name[3:7]):
            name = 'SNF' + name[3:]
        name = catalog.get_name_for_entry_or_alias(name)
        if oldname and name != oldname:
            catalog.journal_entries()
        oldname = name
        name = catalog.add_entry(name)
        reference = 'CfA Supernova Archive'
        refurl = 'https://www.cfa.harvard.edu/supernova/SNarchive.html'
        source = catalog.entries[name].add_source(name=reference,
                                                  url=refurl,
                                                  secondary=True,
                                                  acknowledgment=ACKN_CFA)
        catalog.entries[name].add_quantity(TEST_ENTRY.ALIAS, name, source)
        for fi, fname in enumerate(
                sorted(glob(fullpath + '/*'), key=lambda s: s.lower())):
            filename = os.path.basename(fname)
            fileparts = filename.split('-')
            if origname.startswith('sn') and utils.is_number(origname[2:6]):
                year = fileparts[1][:4]
                month = fileparts[1][4:6]
                day = fileparts[1][6:]
                instrument = fileparts[2].split('.')[0]
            else:
                year = fileparts[2][:4]
                month = fileparts[2][4:6]
                day = fileparts[2][6:]
                instrument = fileparts[3].split('.')[0]
            time = _get_time_str(year, month, day)
            f = open(fname, 'r')
            data = csv.reader(f, delimiter=' ', skipinitialspace=True)
            data = [list(i) for i in zip(*data)]
            wavelengths = data[0]
            fluxes = data[1]
            errors = data[2]
            sources = utils.uniq_cdl([
                source,
                (catalog.entries[name].add_source(
                    bibcode='2012AJ....143..126B')),
                (catalog.entries[name].add_source(
                    bibcode='2008AJ....135.1598M'))
            ])
            catalog.entries[name].add_spectrum(u_wavelengths='Angstrom',
                                               u_fluxes='erg/s/cm^2/Angstrom',
                                               filename=filename,
                                               wavelengths=wavelengths,
                                               fluxes=fluxes,
                                               u_time='MJD' if time else '',
                                               time=time,
                                               instrument=instrument,
                                               u_errors='ergs/s/cm^2/Angstrom',
                                               errors=errors,
                                               source=sources,
                                               dereddened=False,
                                               deredshifted=False)

        if catalog.args.travis and ni >= catalog.TRAVIS_QUERY_LIMIT:
            break

    catalog.journal_entries()

    # Ibc spectra
    oldname = ''
    file_names = next(
        os.walk(os.path.join(catalog.get_current_task_repo(), 'CfA_SNIbc')))[1]
    for ni, name in enumerate(utils.pbar(file_names, task_str)):
        fullpath = os.path.join(catalog.get_current_task_repo(),
                                'CfA_SNIbc/') + name
        if name.startswith('sn') and utils.is_number(name[2:6]):
            name = 'SN' + name[2:]
        name = catalog.get_name_for_entry_or_alias(name)
        if oldname and name != oldname:
            catalog.journal_entries()
        oldname = name
        name = catalog.add_entry(name)
        reference = 'CfA Supernova Archive'
        refurl = 'https://www.cfa.harvard.edu/supernova/SNarchive.html'
        source = catalog.entries[name].add_source(name=reference,
                                                  url=refurl,
                                                  secondary=True,
                                                  acknowledgment=ACKN_CFA)
        catalog.entries[name].add_quantity(TEST_ENTRY.ALIAS, name, source)
        for fi, fname in enumerate(
                sorted(glob(fullpath + '/*'), key=lambda s: s.lower())):
            filename = os.path.basename(fname)
            fileparts = filename.split('-')
            instrument = ''
            year = fileparts[1][:4]
            month = fileparts[1][4:6]
            day = fileparts[1][6:].split('.')[0]
            if len(fileparts) > 2:
                instrument = fileparts[-1].split('.')[0]
            time = _get_time_str(year, month, day)
            f = open(fname, 'r')
            data = csv.reader(f, delimiter=' ', skipinitialspace=True)
            data = [list(i) for i in zip(*data)]
            wavelengths = data[0]
            fluxes = data[1]
            sources = utils.uniq_cdl([
                source,
                catalog.entries[name].add_source(bibcode='2014AJ....147...99M')
            ])
            catalog.entries[name].add_spectrum(u_wavelengths='Angstrom',
                                               u_fluxes='erg/s/cm^2/Angstrom',
                                               wavelengths=wavelengths,
                                               filename=filename,
                                               fluxes=fluxes,
                                               u_time='MJD' if time else '',
                                               time=time,
                                               instrument=instrument,
                                               source=sources,
                                               dereddened=False,
                                               deredshifted=False)

        if catalog.args.travis and ni >= catalog.TRAVIS_QUERY_LIMIT:
            break

    catalog.journal_entries()

    # Other spectra
    oldname = ''
    file_names = next(
        os.walk(os.path.join(catalog.get_current_task_repo(), 'CfA_Extra')))[1]
    for ni, name in enumerate(utils.pbar(file_names, task_str, sort=True)):
        fullpath = os.path.join(catalog.get_current_task_repo(),
                                'CfA_Extra/') + name
        if name.startswith('sn') and utils.is_number(name[2:6]):
            name = 'SN' + name[2:]
        name = catalog.get_name_for_entry_or_alias(name)
        if oldname and name != oldname:
            catalog.journal_entries()
        oldname = name
        name = catalog.add_entry(name)
        reference = 'CfA Supernova Archive'
        refurl = 'https://www.cfa.harvard.edu/supernova/SNarchive.html'
        source = catalog.entries[name].add_source(name=reference,
                                                  url=refurl,
                                                  secondary=True,
                                                  acknowledgment=ACKN_CFA)
        catalog.entries[name].add_quantity(TEST_ENTRY.ALIAS, name, source)
        for fi, fname in enumerate(
                sorted(glob(fullpath + '/*'), key=lambda s: s.lower())):
            if not os.path.isfile(fname):
                continue
            filename = os.path.basename(fname)
            if ((not filename.startswith('sn') or not filename.endswith('flm')
                 or any(
                     x in filename
                     for x in ['-interp', '-z', '-dered', '-obj', '-gal']))):
                continue
            fileparts = filename.split('.')[0].split('-')
            instrument = ''
            time = ''
            if len(fileparts) > 1:
                year = fileparts[1][:4]
                month = fileparts[1][4:6]
                day = fileparts[1][6:]
                if utils.is_number(year) and utils.is_number(
                        month) and utils.is_number(day):
                    if len(fileparts) > 2:
                        instrument = fileparts[-1]
                    time = _get_time_str(year, month, day)
            f = open(fname, 'r')
            data = csv.reader(f, delimiter=' ', skipinitialspace=True)
            data = [list(i) for i in zip(*data)]
            wavelengths = data[0]
            fluxes = [str(Decimal(x) * Decimal(1.0e-15)) for x in data[1]]
            catalog.entries[name].add_spectrum(u_wavelengths='Angstrom',
                                               u_fluxes='erg/s/cm^2/Angstrom',
                                               wavelengths=wavelengths,
                                               filename=filename,
                                               fluxes=fluxes,
                                               u_time='MJD' if time else '',
                                               time=time,
                                               instrument=instrument,
                                               source=source,
                                               dereddened=False,
                                               deredshifted=False)

        if catalog.args.travis and ni >= catalog.TRAVIS_QUERY_LIMIT:
            break

    catalog.journal_entries()
    return
Exemple #8
0
    def set_preferred_name(self):
        """Set preferred name of test_entry.

        Highest preference goes to names of the form 'SN####AA'.
        Otherwise base the name on whichever survey is the 'discoverer'.

        FIX: create function to match SN####AA type names.
        """
        name = self[self._KEYS.NAME]
        newname = ''
        aliases = self.get_aliases()
        # if there are no other options to choose from, skip
        if len(aliases) <= 1:
            return name
        # If the name is already in the form 'SN####AA' then keep using
        # that
        if (name.startswith('SN')
                and ((is_number(name[2:6]) and not is_number(name[6:])) or
                     (is_number(name[2:5]) and not is_number(name[5:])))):
            return name
        # If one of the aliases is in the form 'SN####AA' then use that
        for alias in aliases:
            if (alias.startswith('SN') and
                ((is_number(alias[2:6]) and not is_number(alias[6:])) or
                 (is_number(alias[2:5]) and not is_number(alias[5:])))):
                newname = alias
                break
        # If not, name based on the 'discoverer' survey
        if not newname and TEST_ENTRY.DISCOVERER in self:
            discoverer = ','.join(
                [x['value'].upper() for x in self[TEST_ENTRY.DISCOVERER]])
            if 'ASAS' in discoverer:
                for alias in aliases:
                    if 'ASASSN' in alias.upper():
                        newname = alias
                        break
            if not newname and 'OGLE' in discoverer:
                for alias in aliases:
                    if 'OGLE' in alias.upper():
                        newname = alias
                        break
            if not newname and 'CRTS' in discoverer:
                for alias in aliases:
                    if True in [
                            x in alias.upper()
                            for x in ['CSS', 'MLS', 'SSS', 'SNHUNT']
                    ]:
                        newname = alias
                        break
            if not newname and 'PS1' in discoverer:
                for alias in aliases:
                    if 'PS1' in alias.upper():
                        newname = alias
                        break
            if not newname and 'PTF' in discoverer:
                for alias in aliases:
                    if 'PTF' in alias.upper():
                        newname = alias
                        break
            if not newname and 'la silla-quest' in discoverer.lower():
                for alias in aliases:
                    if 'LSQ' in alias.upper():
                        newname = alias
                        break
            if not newname and 'GAIA' in discoverer:
                for alias in aliases:
                    if 'GAIA' in alias.upper():
                        newname = alias
                        break
        # If one of the aliases is in the form 'AT####AA' then use that
        if not newname:
            for alias in aliases:
                if (alias.startswith('AT') and
                    ((is_number(alias[2:6]) and not is_number(alias[6:])) or
                     (is_number(alias[2:5]) and not is_number(alias[5:])))):
                    newname = alias
                    break
        # Otherwise, use the shortest name.
        if not newname:
            newname = min(aliases, key=len)
        # Always prefer another alias over PSN
        if not newname and name.startswith('PSN'):
            for alias in aliases:
                if not alias.startswith('PSN'):
                    newname = alias
        if newname and name != newname:
            file_entry = None
            # Make sure new name doesn't already exist
            if newname in self.catalog.entries:
                if self.catalog.entries[newname]._stub:
                    file_entry = self.init_from_file(self.catalog,
                                                     name=newname)
                else:
                    file_entry = self.catalog.entries[newname]

            if file_entry:
                self._log.info(
                    "`{}` already exists, copying `{}` to it".format(
                        newname, name))
                self.catalog.copy_entry_to_entry(self.catalog.entries[name],
                                                 file_entry)
                self.catalog.entries[newname] = file_entry
            else:
                self._log.info("Changing entry from name '{}' to preferred"
                               " name '{}'".format(name, newname))
                self.catalog.entries[newname] = self.catalog.entries[name]
                self.catalog.entries[newname][self._KEYS.NAME] = newname
            del self.catalog.entries[name]
            return newname

        return name
Exemple #9
0
    def _clean_quantity(self, quantity):
        """Clean quantity value before it is added to entry."""
        value = quantity.get(QUANTITY.VALUE, '').strip()
        error = quantity.get(QUANTITY.E_VALUE, '').strip()
        unit = quantity.get(QUANTITY.U_VALUE, '').strip()
        kinds = [x.strip() for x in listify(quantity.get(QUANTITY.KIND, []))]
        key = quantity._key

        if not value:
            return False

        if error and (not is_number(error) or float(error) < 0):
            raise ValueError(self[self._KEYS.NAME] + "'s quanta " + key +
                             ' error value must be a number and positive.')

        # Set default units
        if not unit and key == self._KEYS.VELOCITY:
            unit = 'km/s'
        if not unit and key == self._KEYS.RA:
            unit = 'hours'
        if not unit and key == self._KEYS.DEC:
            unit = 'degrees'
        if not unit and key in [self._KEYS.LUM_DIST, self._KEYS.COMOVING_DIST]:
            unit = 'Mpc'

        # Handle certain name
        if key == self._KEYS.ALIAS:
            value = self.catalog.clean_entry_name(value)
            for df in quantity.get(self._KEYS.DISTINCT_FROM, []):
                if value == df[QUANTITY.VALUE]:
                    return False
        elif key == self._KEYS.HOST:
            if is_number(value):
                return False
            if value.lower() in [
                    'anonymous', 'anon.', 'anon', 'intergalactic'
            ]:
                return False
            value = host_clean(value)
            if ((not kinds and ((value.lower().startswith('abell')
                                 and is_number(value[5:].strip()))
                                or 'cluster' in value.lower()))):
                kinds = ['cluster']
        elif key == self._KEYS.HOST_REDSHIFT:
            kinds = list(filter(lambda x: x != 'host', kinds))
        elif key == self._KEYS.CLAIMED_TYPE:
            isq = False
            if value.startswith('SN '):
                value = value.replace('SN ', '', 1)
            value = value.replace('young', '')
            if '?' in value:
                isq = True
                value = value.strip(' ?')
            for rep in self.catalog.type_syns:
                if value in self.catalog.type_syns[rep]:
                    value = rep
                    break
            if isq:
                value = value + '?'
            if not value:
                return False
        elif key in [
                self._KEYS.RA, self._KEYS.DEC, self._KEYS.HOST_RA,
                self._KEYS.HOST_DEC
        ]:
            (value, unit) = radec_clean(value, key, unit=unit)
        elif key == self._KEYS.MAX_DATE or key == self._KEYS.DISCOVER_DATE:
            # Make sure month and day have leading zeroes
            sparts = value.split('/')
            if len(sparts[0]) > 5:
                self._log.warn("Date year {} greater than four "
                               "digits.".format(sparts[0]))
            if len(sparts) >= 2:
                value = sparts[0] + '/' + sparts[1].zfill(2)
            if len(sparts) == 3:
                value = value + '/' + sparts[2].zfill(2)

            # for ii, ct in enumerate(self.parent[key]):
            #     # Only add dates if they have more information
            #     if len(ct[QUANTITY.VALUE].split('/')) >
            #            len(value.split('/')):
            #         return False

        if is_number(value):
            value = '%g' % Decimal(value)
        if error:
            error = '%g' % Decimal(error)

        if value:
            quantity[QUANTITY.VALUE] = value
        if error:
            quantity[QUANTITY.E_VALUE] = error
        if unit:
            quantity[QUANTITY.U_VALUE] = unit
        if kinds:
            quantity[QUANTITY.KIND] = kinds if len(kinds) > 1 else kinds[0]
        elif QUANTITY.KIND in quantity:
            del (quantity[QUANTITY.KIND])

        return True
Exemple #10
0
    def sanitize(self):
        super(Test_Entry, self).sanitize()

        # Calculate some columns based on imported data, sanitize some fields
        name = self[self._KEYS.NAME]
        aliases = self.get_aliases()

        if ((name.startswith('SN') and is_number(name[2:6])
             and self._KEYS.DISCOVER_DATE in self
             and int(self[self._KEYS.DISCOVER_DATE][0][QUANTITY.VALUE].split(
                 '/')[0]) >= 2016 and not any(['AT' in x for x in aliases]))):
            source = self.add_self_source()
            self.add_quantity(self._KEYS.ALIAS, 'AT' + name[2:], source)

        if self._KEYS.CLAIMED_TYPE in self:
            # FIX: this is something that should be done completely internally
            #      i.e. add it to `clean` or something??
            self[self._KEYS.CLAIMED_TYPE] = self.ct_list_prioritized()

        if self._KEYS.CLAIMED_TYPE in self:
            self[self._KEYS.CLAIMED_TYPE][:] = [
                ct for ct in self[self._KEYS.CLAIMED_TYPE]
                if ct[QUANTITY.VALUE] not in ['?', '-']
            ]
            if (len(self[self._KEYS.CLAIMED_TYPE]) > 1 and any([
                    x[QUANTITY.VALUE].lower() == 'candidate'
                    for x in self[self._KEYS.CLAIMED_TYPE]
            ])):
                self[self._KEYS.CLAIMED_TYPE][:] = [
                    ct for ct in self[self._KEYS.CLAIMED_TYPE]
                    if ct[QUANTITY.VALUE].lower() != 'candidate'
                ]
            if not len(self[self._KEYS.CLAIMED_TYPE]):
                del (self[self._KEYS.CLAIMED_TYPE])

        if self._KEYS.CLAIMED_TYPE not in self and name.startswith('AT'):
            source = self.add_self_source()
            self.add_quantity(self._KEYS.CLAIMED_TYPE, 'Candidate', source)

        if self._KEYS.SOURCES in self:
            for source in self[self._KEYS.SOURCES]:
                if SOURCE.BIBCODE not in source:
                    continue

                import urllib
                from html import unescape
                bibcode = source[SOURCE.BIBCODE]
                # First sanitize the bibcode
                if len(bibcode) != 19:
                    bibcode = urllib.parse.unquote(unescape(bibcode)).replace(
                        'A.A.', 'A&A')

                if bibcode in self.catalog.biberror_dict:
                    bibcode = self.catalog.biberror_dict[bibcode]

                if (bibcode not in self.catalog.bibauthor_dict):
                    adsquery = (self.catalog.ADS_BIB_URL +
                                urllib.parse.quote(bibcode) +
                                '&data_type=Custom&format=%253m%20%25(y)')
                    bibcodeauthor = ''
                    try:
                        response = urllib.request.urlopen(adsquery)
                        html = response.read().decode('utf-8')
                        hsplit = html.split("\n")
                        if len(hsplit) > 5:
                            bibcodeauthor = hsplit[5]
                    except:
                        pass

                    if not bibcodeauthor:
                        warnings.warn(
                            "Bibcode didn't return authors, not converting"
                            " this bibcode.")

                    self.catalog.bibauthor_dict[bibcode] = unescape(
                        bibcodeauthor).strip()

                    source[SOURCE.BIBCODE] = bibcode

                if (self.catalog.bibauthor_dict.get(bibcode, None)
                        is not None):
                    source[SOURCE.
                           REFERENCE] = self.catalog.bibauthor_dict[bibcode]

                if SOURCE.NAME not in source:
                    source[SOURCE.NAME] = bibcode

        if self._KEYS.REDSHIFT in self:
            self[self._KEYS.REDSHIFT] = list(
                sorted(self[self._KEYS.REDSHIFT],
                       key=lambda q: frame_priority(q, self._KEYS.REDSHIFT)))

        if self._KEYS.VELOCITY in self:
            self[self._KEYS.VELOCITY] = list(
                sorted(self[self._KEYS.VELOCITY],
                       key=lambda q: frame_priority(q, self._KEYS.VELOCITY)))

        # Renumber and reorder sources
        if self._KEYS.SOURCES in self:
            # Sort sources reverse-chronologically
            self[self._KEYS.SOURCES] = sorted(self[self._KEYS.SOURCES],
                                              key=lambda x: bib_priority(x))

            # Assign new aliases to match new order
            sources_list = self[self._KEYS.SOURCES]
            source_reps = OrderedDict([[src[SOURCE.ALIAS],
                                        str(ii + 1)]
                                       for ii, src in enumerate(sources_list)])
            for ii, source in enumerate(sources_list):
                self[self._KEYS.SOURCES][ii][SOURCE.ALIAS] = source_reps[
                    source[SOURCE.ALIAS]]

            # Change sources of data to match new aliases
            for key in self.keys():
                if self._KEYS.get_key_by_name(key).no_source:
                    continue
                for item in self[key]:
                    try:
                        temp = [
                            int(source_reps[x])
                            for x in item[item._KEYS.SOURCE].split(',')
                        ]
                    except:
                        print("Failed")
                        print(
                            "item[item._KEYS.SOURCE].split(',') = '{}'".format(
                                item[item._KEYS.SOURCE].split(',')))
                        print("source_reps = '{}'".format(source_reps))
                        print("key = '{}'".format(key), repr(key))
                        print("item = '{}'".format(item), repr(item))
                        raise

                    aliases = [str(y) for y in sorted(temp)]
                    item[item._KEYS.SOURCE] = ','.join(aliases)