Example #1
0
def main():
    # TODO: bad, hard-coded...
    # base_path = '/Volumes/ProjectData/gaia-comoving-followup/'
    base_path = '../data/'
    db_path = path.join(base_path, 'db.sqlite')
    engine = db_connect(db_path)
    session = Session()

    credentials = dict(user='******', password='******')
    Gaia.login(**credentials)

    for obs in session.query(Observation).all():
        q = session.query(Photometry).join(Observation).filter(Observation.id == obs.id).count()
        if q > 0:
            logger.debug('Photometry already exists')
            continue

        if obs.tgas_source is None:
            continue

        tgas_source_id = obs.tgas_source.source_id
        res = get_photometry(tgas_source_id)

        phot_kw = dict()
        for col in result_columns:
            phot_kw[col] = res[col]

        phot = Photometry(**phot_kw)
        phot.observation = obs
        session.add(phot)
        session.commit()
Example #2
0
    def get_corrected_rv(self, obs):
        """Compute a corrected radial velocity for the given observation"""

        # Compute the raw offset: difference between Halpha centroid and true
        # wavelength value
        x0 = obs.measurements[0].x0 * u.angstrom
        raw_offset = (x0 - self.Halpha)

        # precision estimate from line centroid error
        precision = (obs.measurements[0].x0_error *
                     u.angstrom) / self.Halpha * c.to(u.km / u.s)

        # For each sky line (that passes certain quality checks), compute the
        # offset between the predicted wavelength and measured centroid
        # TODO: generalize these quality cuts - see also above in
        # _compute_offset_corrections
        sky_offsets = np.full(3, np.nan) * u.angstrom
        for j, meas in enumerate(obs.measurements[1:]):
            sky_offset = meas.x0 * u.angstrom - meas.info.wavelength
            if (meas.amp > 16 and meas.std_G < 2 and meas.std_G > 0.3
                    and np.abs(sky_offset) <
                    3.3 * u.angstrom):  # MAGIC NUMBER: quality cuts
                sky_offsets[j] = sky_offset

        # final sky offset to apply
        flag = 0
        sky_offset = np.nanmean(sky_offsets)
        if np.isnan(sky_offset.value):
            logger.debug("not correcting with sky line for {0}".format(obs))
            sky_offset = 0 * u.angstrom
            flag = 1

        # apply global sky offset correction - see _compute_offset_corrections()
        sky_offset -= self._night_polys[obs.night](obs.utc_hour) * u.angstrom

        # compute radial velocity and correct for sky line
        rv = (raw_offset - sky_offset) / self.Halpha * c.to(u.km / u.s)

        # correct for offset of median of ∆RV distribution from targets with
        # prior/known RV's
        rv -= self._night_final_offsets[obs.night]

        # rv error
        err = np.sqrt(self._abs_err**2 + precision**2)

        return rv, err, flag
Example #3
0
def main(db_path, run_name, overwrite=False, pool=None):

    if pool is None:
        pool = schwimmbad.SerialPool()

    # connect to the database
    engine = db_connect(db_path)
    # engine.echo = True
    logger.debug("Connected to database at '{}'".format(db_path))

    # create a new session for interacting with the database
    session = Session()

    root_path, _ = path.split(db_path)
    plot_path = path.join(root_path, 'plots', run_name)
    if not path.exists(plot_path):
        os.makedirs(plot_path, exist_ok=True)

    # get object to correct the observed RV's
    rv_corr = RVCorrector(session, run_name)

    observations = session.query(Observation).join(Run)\
                          .filter(Run.name == run_name).all()

    for obs in observations:
        q = session.query(RVMeasurement).join(Observation)\
                   .filter(Observation.id == obs.id)

        if q.count() > 0 and not overwrite:
            logger.debug('RV measurement already complete for object '
                         '{0} in file {1}'.format(obs.object,
                                                  obs.filename_raw))
            continue

        elif q.count() > 1:
            raise RuntimeError(
                'Multiple RV measurements found for object {0}'.format(obs))

        elif len(obs.measurements) == 0:
            logger.debug(
                'Observation {0} has no line measurements.'.format(obs))
            continue

        corrected_rv, err, flag = rv_corr.get_corrected_rv(obs)

        # remove previous RV measurements
        if q.count() > 0:
            session.delete(q.one())
            session.commit()

        rv_meas = RVMeasurement(rv=corrected_rv, err=err, flag=flag)
        rv_meas.observation = obs
        session.add(rv_meas)
        session.commit()

    pool.close()
Example #4
0
    def work(self, id):
        engine = db_connect(self.db_path)
        session = Session()
        obs = session.query(Observation).filter(Observation.id == id).one()
        model = obs_to_starmodel(obs)

        # initial conditions for emcee walkers
        p0 = []
        m0, age0, feh0 = model.ic.random_points(self.nwalkers,
                                                minmass=0.01,
                                                maxmass=10.,
                                                minfeh=-1,
                                                maxfeh=1)
        _, max_distance = model.bounds('distance')
        _, max_AV = model.bounds('AV')
        d0 = 10**(np.random.uniform(0,
                                    np.log10(max_distance),
                                    size=self.nwalkers))
        AV0 = np.random.uniform(0, max_AV, size=self.nwalkers)
        p0 += [m0]
        p0 += [age0, feh0, d0, AV0]

        p0 = np.array(p0).T
        npars = p0.shape[1]

        logger.debug('Running emcee - initial sampling...')
        sampler = emcee.EnsembleSampler(self.nwalkers, npars, model.lnpost)

        pos, prob, _ = sampler.run_mcmc(p0, self.ninit)

        # cull the weak walkers
        best_ix = sampler.flatlnprobability.argmax()
        best_p0 = (sampler.flatchain[best_ix][None] +
                   np.random.normal(0, 1E-5, size=(self.nwalkers, npars)))

        sampler.reset()
        logger.debug('burn-in...')
        pos, prob, _ = sampler.run_mcmc(best_p0, self.nburn)

        sampler.reset()
        logger.debug('sampling...')
        _ = sampler.run_mcmc(pos, self.niter)

        model._sampler = sampler
        model._make_samples(0.01)

        return id, model
Example #5
0
def main(db_path, run_root_path, drop_all=False, overwrite=False, **kwargs):

    # Make sure the specified paths actually exist
    db_path = path.abspath(db_path)
    run_root_path = path.abspath(run_root_path)
    for path_ in [path.dirname(db_path), run_root_path]:
        if not path.exists(path_):
            raise ValueError("Path '{0}' doesn't exist!".format(path_))

    # --------------------------------------------------------------------------
    # These are relative paths, so the script needs to be run from the
    #   scripts path...

    # ID table for mapping group index to TGAS row
    ID_tbl = Table.read('../data/star_identifier.csv')

    # TGAS table
    logger.debug("Loading TGAS data...")
    tgas = Table.read('../../gaia-comoving-stars/data/stacked_tgas.fits')

    # Catalog of velocities for Bensby's HIP stars:
    bensby = Table.read('../data/bensbyrv_bestunique.csv')

    # --------------------------------------------------------------------------

    # connect to the database
    engine = db_connect(db_path, ensure_db_exists=True)
    # engine.echo = True
    logger.debug("Connected to database at '{}'".format(db_path))

    if drop_all: # remove all tables and replace
        Base.metadata.drop_all()
        Base.metadata.create_all()

    # create a new session for interacting with the database
    session = Session()

    logger.debug("Loading SpectralLineInfo table")

    line_info = OrderedDict()
    # air wavelength of Halpha -- wavelength calibration from comp lamp is done
    #   at air wavelengths, so this is where Halpha should be, right?
    line_info['Halpha'] = 6562.8*u.angstrom

    # [OI] emission lines -- wavelengths from:
    #   http://physics.nist.gov/PhysRefData/ASD/lines_form.html
    line_info['[OI] 5577'] = 5577.3387*u.angstrom
    line_info['[OI] 6300'] = 6300.304*u.angstrom
    line_info['[OI] 6364'] = 6363.776*u.angstrom

    for name, wvln in line_info.items():
        n = session.query(SpectralLineInfo).filter(SpectralLineInfo.name == name).count()
        if n == 0:
            logger.debug('Loading line {0} at {1}'.format(name, wvln))
            line = SpectralLineInfo(name=name, wavelength=wvln)
            session.add(line)
            session.commit()
        else:
            logger.debug('Line {0} already loaded'.format(name))

    # Create an entry for this observing run
    data_path, run_name = path.split(run_root_path)
    logger.info("Path to night paths: {0}".format(data_path))
    n = session.query(Run).filter(Run.name == run_name).count()
    if n == 0:
        logger.debug('Adding run {0} to database'.format(run_name))
        run = Run(name=run_name)
        session.add(run)
        session.commit()

    elif n == 1:
        logger.debug('Loading run from database'.format(run_name))
        run = session.query(Run).filter(Run.name == run_name).limit(1).one()

    else:
        raise RuntimeError("F**k.")

    # Now we need to go through each processed night of data and load all of the
    # relevant observations of sources.

    # First we get the column names for the Observation and TGASSource tables
    obs_columns = [str(c).split('.')[1] for c in Observation.__table__.columns]
    tgassource_columns = [str(c).split('.')[1]
                          for c in TGASSource.__table__.columns]

    # Here's where there's a bit of hard-coded bewitchery - the nights (within
    # each run) have to be labeled 'n1', 'n2', and etc. Sorry.
    glob_pattr_proc = path.join(data_path, 'processed', run_name, 'n?')
    for proc_night_path in glob.glob(glob_pattr_proc):
        night = path.basename(proc_night_path)
        night_id = int(night[1])
        logger.debug('Loading night {0}...'.format(night_id))

        observations = []
        tgas_sources = []
        prior_rvs = []

        glob_pattr_1d = path.join(proc_night_path, '1d_*.fit')
        for path_1d in ProgressBar(glob.glob(glob_pattr_1d)):
            hdr = fits.getheader(path_1d)

            # skip all except OBJECT observations
            if hdr['IMAGETYP'] != 'OBJECT':
                continue

            basename = path.basename(path_1d)[3:]
            logger.log(1, 'loading row for {0}'.format(basename))

            kw = dict()

            # construct filenames using hard-coded bullshit
            kw['filename_raw'] = basename
            kw['filename_p'] = 'p_' + basename
            kw['filename_1d'] = '1d_' + basename

            # check if this filename is already in the database, if so, drop it
            base_query = session.query(Observation)\
                                .filter(Observation.filename_raw == kw['filename_raw'])
            already_loaded = base_query.count() > 0

            if already_loaded and overwrite:
                base_query.delete()
                session.commit()

            elif already_loaded:
                logger.debug('Object {0} [{1}] already loaded'
                             .format(hdr['OBJECT'],
                                     path.basename(kw['filename_raw'])))
                continue

            # read in header of 1d file and store keywords that exist as columns
            kw.update(fits_header_to_cols(hdr, obs_columns))

            # HACK: skip empty object name
            if len(str(hdr['OBJECT'])) == 0:
                logger.warning('SKIPPING - empty OBJECT key')
                continue

            # get group id from object name
            if '-' in str(hdr['OBJECT']):
                # Per APW and SMOH's convention

                split_name = hdr['OBJECT'].split('-')
                kw['group_id'] = int(split_name[0])

                # because: reasons
                if kw['group_id'] == 10:
                    tgas_row_idx = int(split_name[1])
                else:
                    smoh_idx = int(split_name[1])
                    tgas_row_idx = ID_tbl[smoh_idx]['tgas_row']
                tgas_row = tgas[tgas_row_idx]

                # query Simbad to get all possible names for this target
                if tgas_row['hip'] > 0:
                    object_name = 'HIP{0}'.format(tgas_row['hip'])
                else:
                    object_name = 'TYC {0}'.format(tgas_row['tycho2_id'])
                logger.log(1, 'common name: {0}'.format(object_name))

                try:
                    all_ids = Simbad.query_objectids(object_name)['ID'].astype(str)
                except Exception as e:
                    logger.warning('Simbad query_objectids failed for "{0}" '
                                   'with error: {1}'
                                   .format(object_name, str(e)))
                    all_ids = []

                logger.log(1, 'this is a group object')

                if len(all_ids) > 0:
                    logger.log(1, 'other names for this object: {0}'
                               .format(', '.join(all_ids)))
                else:
                    logger.log(1, 'simbad names for this object could not be '
                               'retrieved')

            elif (isinstance(hdr['OBJECT'], int) or
                    str(hdr['OBJECT']).startswith('k') or
                    hdr['OBJECT'][0].isdigit()):
                # Assume it's a KIC number - per Ruth and Dan's convention

                if isinstance(hdr['OBJECT'], int):
                    object_name = 'KIC {0:d}'.format(hdr['OBJECT'])

                elif hdr['OBJECT'].startswith('k'):
                    object_name = 'KIC {0}'.format(hdr['OBJECT'][1:])

                else:
                    object_name = 'KIC {0}'.format(hdr['OBJECT'])

                # query Simbad to get all possible names for this target
                logger.log(1, 'common name: {0}'.format(object_name))

                try:
                    all_ids = Simbad.query_objectids(object_name)['ID'].astype(str)
                except Exception as e:
                    logger.warning('Simbad query_objectids failed for "{0}" '
                                   'with error: {1}'
                                   .format(object_name, str(e)))
                    all_ids = []

                logger.log(1, 'this is a KIC object')

                if len(all_ids) > 0:
                    logger.log(1, 'other names for this object: {0}'
                               .format(', '.join(all_ids)))
                else:
                    logger.log(1, 'simbad names for this object could not be '
                               'retrieved')

                # get the Tycho 2 ID, if it has one
                hip_id = [id_ for id_ in all_ids if 'HIP' in id_]
                tyc_id = [id_ for id_ in all_ids if 'TYC' in id_]
                if hip_id:
                    hip_id = int(hip_id[0].replace('HIP', '').strip())
                    logger.log(1, 'source has HIP id: {0}'.format(hip_id))
                    tgas_row_idx = np.where(tgas['hip'] == hip_id)[0]

                    if len(tgas_row_idx) == 0:
                        tgas_row_idx = None
                    else:
                        tgas_row = tgas[tgas_row_idx]

                elif tyc_id:
                    tyc_id = tyc_id[0].replace('TYC', '').strip()
                    logger.log(1, 'source has tycho 2 id: {0}'.format(tyc_id))
                    tgas_row_idx = np.where(tgas['tycho2_id'] == tyc_id)[0]

                    if len(tgas_row_idx) == 0:
                        tgas_row_idx = None
                    else:
                        tgas_row = tgas[tgas_row_idx]

                else:
                    logger.log(1, 'source has no HIP or TYC id.')
                    tgas_row_idx = None

                # result_table = Simbad.query_object(object_name)

            else:
                object_name = hdr['OBJECT']
                logger.log(1, 'common name: {0}'.format(object_name))
                logger.log(1, 'this is not a group object')

                # query Simbad to get all possible names for this target
                try:
                    all_ids = Simbad.query_objectids(object_name)['ID'].astype(str)
                except Exception as e:
                    logger.warning('SKIPPING: Simbad query_objectids failed for '
                                   '"{0}" with error: {1}'
                                   .format(object_name, str(e)))
                    continue

                # get the Tycho 2 ID, if it has one
                hip_id = [id_ for id_ in all_ids if 'HIP' in id_]
                tyc_id = [id_ for id_ in all_ids if 'TYC' in id_]
                if hip_id:
                    hip_id = int(hip_id[0].replace('HIP', '').strip())
                    logger.log(1, 'source has HIP id: {0}'.format(hip_id))
                    tgas_row_idx = np.where(tgas['hip'] == hip_id)[0]

                    if len(tgas_row_idx) == 0:
                        tgas_row_idx = None
                    else:
                        tgas_row = tgas[tgas_row_idx]

                elif tyc_id:
                    tyc_id = tyc_id[0].replace('TYC', '').strip()
                    logger.log(1, 'source has tycho 2 id: {0}'.format(tyc_id))
                    tgas_row_idx = np.where(tgas['tycho2_id'] == tyc_id)[0]

                    if len(tgas_row_idx) == 0:
                        tgas_row_idx = None
                    else:
                        tgas_row = tgas[tgas_row_idx]

                else:
                    logger.log(1, 'source has no tycho 2 id.')
                    tgas_row_idx = None

            # store relevant names / IDs
            simbad_info_kw = dict()
            for id_ in all_ids:
                if id_.lower().startswith('hd'):
                    simbad_info_kw['hd_id'] = id_[2:]

                elif id_.lower().startswith('hip'):
                    simbad_info_kw['hip_id'] = id_[3:]

                elif id_.lower().startswith('tyc'):
                    simbad_info_kw['tyc_id'] = id_[3:]

                elif id_.lower().startswith('2mass'):
                    simbad_info_kw['twomass_id'] = id_[5:]

            for k,v in simbad_info_kw.items():
                simbad_info_kw[k] = v.strip()

            simbad_info = SimbadInfo(**simbad_info_kw)

            # Compute barycenter velocity given coordinates of where the
            # telescope was pointing and observation time
            t = Time(hdr['JD'], format='jd', scale='utc')
            sc = coord.SkyCoord(ra=hdr['RA'], dec=hdr['DEC'],
                                unit=(u.hourangle, u.degree))
            kw['v_bary'] = bary_vel_corr(t, sc, location=kitt_peak)

            obs = Observation(night=night_id, **kw)
            obs.run = run

            # Get the TGAS data if the source is in TGAS
            if tgas_row_idx is not None:
                logger.log(1, 'TGAS row: {0}'.format(tgas_row_idx))

                tgas_kw = dict()
                tgas_kw['row_index'] = tgas_row_idx
                for name in tgas.colnames:
                    if name in tgassource_columns:
                        tgas_kw[name] = tgas_row[name]

                job = Gaia.launch_job(gaia_query.format(tgas_kw['source_id'][0]),
                                      dump_to_file=False)
                res = job.get_results()

                if len(res) == 0:
                    logger.warning("No 2MASS data found for: {0}"
                                   .format(tgas_kw['source_id']))

                elif len(res) == 1:
                    tgas_kw['J'] = res['j_m'][0]
                    tgas_kw['J_err'] = res['j_msigcom'][0]
                    tgas_kw['H'] = res['h_m'][0]
                    tgas_kw['H_err'] = res['h_msigcom'][0]
                    tgas_kw['Ks'] = res['ks_m'][0]
                    tgas_kw['Ks_err'] = res['ks_msigcom'][0]

                tgas_source = TGASSource(**tgas_kw)
                tgas_sources.append(tgas_source)

                obs.tgas_source = tgas_source

            else:
                logger.log(1, 'TGAS row could not be found.')

            obs.simbad_info = simbad_info
            observations.append(obs)

            # retrieve a previous measurement from the literature
            result = get_best_rv(obs)
            if result is not None:
                rv, rv_err, rv_qual, rv_bibcode, rv_source = result

                prv = PriorRV(rv=rv*u.km/u.s, err=rv_err*u.km/u.s,
                              qual=rv_qual, bibcode=rv_bibcode,
                              source=rv_source)
                obs.prior_rv = prv
                prior_rvs.append(prv)

            logger.log(1, '-'*68)

        session.add_all(observations)
        session.add_all(tgas_sources)
        session.add_all(prior_rvs)
        session.commit()

    # Last thing to do is cross-match with the Bensby catalog to
    #   replace velocities when they are better
    for sim_info in session.query(SimbadInfo)\
                           .filter(SimbadInfo.hip_id != None).all():
        hip_id = 'HIP' + str(sim_info.hip_id)
        row = bensby[bensby['OBJECT'] == hip_id]
        if len(row) > 0:
            sim_info.rv = row['velValue']
            sim_info.rv_qual = row['quality']
            sim_info.rv_bibcode = row['bibcode']
            session.flush()

    session.close()
def main(db_path,
         run_name,
         data_root_path=None,
         filename=None,
         overwrite=False,
         pool=None):

    if pool is None:
        pool = schwimmbad.SerialPool()

    # connect to the database
    engine = db_connect(db_path)
    # engine.echo = True
    logger.debug("Connected to database at '{}'".format(db_path))

    # create a new session for interacting with the database
    session = Session()

    root_path, _ = path.split(db_path)
    if data_root_path is None:
        data_root_path = root_path

    plot_path = path.join(root_path, 'plots', run_name)
    if not path.exists(plot_path):
        os.makedirs(plot_path, exist_ok=True)

    # TODO: there might be some bugs here...
    n_lines = session.query(SpectralLineInfo).count()
    Halpha = session.query(SpectralLineInfo)\
                    .filter(SpectralLineInfo.name == 'Halpha').one()
    OI_lines = session.query(SpectralLineInfo)\
                      .filter(SpectralLineInfo.name.contains('[OI]')).all()

    if filename is None:  # grab all unfinished sources
        observations = session.query(Observation).join(Run)\
                              .filter(Run.name == run_name).all()

    else:  # only process the observation corresponding to this filename
        observations = session.query(Observation).join(Run)\
                              .filter(Run.name == run_name)\
                              .filter(Observation.filename_raw == filename).all()

    for obs in observations:
        measurements = session.query(SpectralLineMeasurement)\
                              .join(Observation)\
                              .filter(Observation.id == obs.id).all()

        if len(measurements) == n_lines and not overwrite:
            logger.debug('All line measurements already complete for object '
                         '{0} in file {1}'.format(obs.object,
                                                  obs.filename_raw))
            continue

        # Read the spectrum data and get wavelength solution
        filebase, _ = path.splitext(obs.filename_1d)
        filename_1d = obs.path_1d(data_root_path)
        spec = Table.read(filename_1d)
        logger.debug('Loaded 1D spectrum for object {0} from file {1}'.format(
            obs.object, filename_1d))

        # Extract region around Halpha
        x, (flux, ivar) = extract_region(
            spec['wavelength'],
            center=Halpha.wavelength.value,
            width=100,
            arrs=[spec['source_flux'], spec['source_ivar']])

        # We start by doing maximum likelihood estimation to fit the line, then
        # use the best-fit parameters to initialize an MCMC run.
        # TODO: need to figure out if it's emission or absorption...for now just
        #   assume absorption
        absorp_emiss = -1.
        lf = VoigtLineFitter(x, flux, ivar, absorp_emiss=absorp_emiss)
        lf.fit()
        fit_pars = lf.get_gp_mean_pars()

        if (not lf.success
                or abs(fit_pars['x0'] - Halpha.wavelength.value) > 16.
                or  # 16 Å = ~700 km/s
                abs(fit_pars['amp']) < 10):  # minimum amplitude - MAGIC NUMBER
            # TODO: should try again with emission line
            logger.error('absorption line has tiny amplitude! did '
                         'auto-determination of absorption/emission fail?')
            # TODO: what now?
            continue

        fig = lf.plot_fit()
        fig.savefig(path.join(plot_path, '{}_maxlike.png'.format(filebase)),
                    dpi=256)
        plt.close(fig)

        # ----------------------------------------------------------------------

        # Run `emcee` instead to sample over GP model parameters:
        if fit_pars['std_G'] < 1E-2:
            lf.gp.freeze_parameter('mean:ln_std_G')

        initial = np.array(lf.gp.get_parameter_vector())
        if initial[4] < -10:  # TODO: ???
            initial[4] = -8.
        if initial[5] < -10:  # TODO: ???
            initial[5] = -8.
        ndim, nwalkers = len(initial), 64

        sampler = emcee.EnsembleSampler(nwalkers,
                                        ndim,
                                        log_probability,
                                        pool=pool,
                                        args=(lf.gp, flux))

        logger.debug("Running burn-in...")
        p0 = initial + 1e-6 * np.random.randn(nwalkers, ndim)
        p0, lp, _ = sampler.run_mcmc(p0, 128)

        logger.debug("Running 2nd burn-in...")
        sampler.reset()
        p0 = p0[lp.argmax()] + 1e-3 * np.random.randn(nwalkers, ndim)
        p0, lp, _ = sampler.run_mcmc(p0, 512)

        logger.debug("Running production...")
        sampler.reset()
        pos, lp, _ = sampler.run_mcmc(p0, 1024)

        fit_kw = dict()
        for i, par_name in enumerate(lf.gp.get_parameter_names()):
            if 'kernel' in par_name: continue

            # remove 'mean:'
            par_name = par_name[5:]

            # skip bg
            if par_name.startswith('bg'): continue

            samples = sampler.flatchain[:, i]

            if par_name.startswith('ln_'):
                par_name = par_name[3:]
                samples = np.exp(samples)

            MAD = np.median(np.abs(samples - np.median(samples)))
            fit_kw[par_name] = np.median(samples)
            fit_kw[par_name + '_error'] = 1.5 * MAD  # convert to ~stddev

        # remove all previous line measurements
        q = session.query(SpectralLineMeasurement).join(Observation)\
                   .filter(Observation.id == obs.id)
        if q.count() > 0:
            for meas in q.all():
                session.delete(meas)
            session.commit()

        slm = SpectralLineMeasurement(**fit_kw)
        slm.info = Halpha
        slm.observation = obs
        session.add(slm)
        session.commit()

        # --------------------------------------------------------------------
        # plot MCMC traces
        fig, axes = plt.subplots(2, 4, figsize=(18, 6))
        for i in range(sampler.dim):
            for walker in sampler.chain[..., i]:
                axes.flat[i].plot(walker,
                                  marker='',
                                  drawstyle='steps-mid',
                                  alpha=0.2)
            axes.flat[i].set_title(lf.gp.get_parameter_names()[i], fontsize=12)
        fig.tight_layout()
        fig.savefig(path.join(plot_path, '{}_mcmc_trace.png'.format(filebase)),
                    dpi=256)
        plt.close(fig)
        # --------------------------------------------------------------------

        # --------------------------------------------------------------------
        # plot samples
        fig, axes = plt.subplots(3, 1, figsize=(10, 10), sharex=True)

        samples = sampler.flatchain
        for s in samples[np.random.randint(len(samples), size=32)]:
            lf.gp.set_parameter_vector(s)
            lf.plot_fit(axes=axes, fit_alpha=0.2)

        fig.tight_layout()
        fig.savefig(path.join(plot_path, '{}_mcmc_fits.png'.format(filebase)),
                    dpi=256)
        plt.close(fig)
        # --------------------------------------------------------------------

        # --------------------------------------------------------------------
        # corner plot
        fig = corner.corner(
            sampler.flatchain[::10, :],
            labels=[x.split(':')[1] for x in lf.gp.get_parameter_names()])
        fig.savefig(path.join(plot_path, '{}_corner.png'.format(filebase)),
                    dpi=256)
        plt.close(fig)
        # --------------------------------------------------------------------

        # compute centroids for sky lines
        sky_centroids = []
        for j, sky_line in enumerate(OI_lines):
            wvln = sky_line.wavelength.value
            x, (flux, ivar) = extract_region(
                spec['wavelength'],
                center=wvln,
                width=32.,  # angstroms
                arrs=[spec['background_flux'], spec['background_ivar']])

            lf = GaussianLineFitter(x, flux, ivar,
                                    absorp_emiss=1.)  # all emission lines

            try:
                lf.fit()
                fit_pars = lf.get_gp_mean_pars()

            except Exception as e:
                logger.warn("Failed to fit sky line {0}:\n{1}".format(
                    sky_line, e))
                lf.success = False
                fit_pars = lf.get_init()
                # OMG this is the biggest effing hack
                fit_pars['amp'] = 0.
                fit_pars['bg_coef'] = None
                fit_pars['x0'] = 0.

            # HACK: hackish signal-to-noise
            max_ = fit_pars['amp'] / np.sqrt(2 * np.pi * fit_pars['std']**2)
            SNR = max_ / np.median(1 / np.sqrt(ivar))

            if (not lf.success or abs(fit_pars['x0'] - wvln) > 4
                    or fit_pars['amp'] < 10 or fit_pars['std'] > 4
                    or SNR < 2.5):
                # failed
                x0 = np.nan * u.angstrom
                title = 'f****d'
                fit_pars['amp'] = 0.

            else:
                x0 = fit_pars['x0'] * u.angstrom
                title = '{:.2f}'.format(fit_pars['amp'])

            if lf.success:
                fig = lf.plot_fit()
                fig.suptitle(title, y=0.95)
                fig.subplots_adjust(top=0.8)
                fig.savefig(path.join(
                    plot_path,
                    '{}_maxlike_sky_{:.0f}.png'.format(filebase, wvln)),
                            dpi=256)
                plt.close(fig)

            # store the sky line measurements
            fit_pars['std_G'] = fit_pars.pop('std')  # HACK
            fit_pars.pop('bg_coef')  # HACK
            slm = SpectralLineMeasurement(**fit_pars)
            slm.info = sky_line
            slm.observation = obs
            session.add(slm)
            session.commit()

            sky_centroids.append(x0)
        sky_centroids = u.Quantity(sky_centroids)

        logger.info('{} [{}]: x0={x0:.3f} σ={err:.3f}\n--------'.format(
            obs.object, filebase, x0=fit_kw['x0'], err=fit_kw['x0_error']))

        session.commit()

    pool.close()
Example #7
0
    def _compute_offset_corrections(self):
        session = self.session
        run_name = self.run_name

        q = session.query(Observation).join(Run, SpectralLineMeasurement,
                                            PriorRV)
        q = q.filter(Run.name == run_name)
        q = q.filter(SpectralLineMeasurement.x0 != None)
        q = q.filter(PriorRV.rv != None)
        logger.debug('{0} observations with prior RV measurements'.format(
            q.distinct().count()))

        # retrieve all observations with measured centroids and previous RV's
        observations = q.all()

        # What we do below is look at the residual offsets between applying a naïve
        # sky-line correction and the true RV (with the barycentric velocity
        # applied)

        raw_offsets = np.zeros(len(observations)) * u.angstrom
        all_sky_offsets = np.full((len(observations), 3), np.nan) * u.angstrom
        true_rv = np.zeros(len(observations)) * u.km / u.s
        obs_time = np.zeros(len(observations))
        night_id = np.zeros(len(observations), dtype=int)
        corrected_rv = np.zeros(len(observations)) * u.km / u.s

        for i, obs in enumerate(observations):
            # convert obstime into decimal hour
            obs_time[i] = np.sum(
                np.array(list(map(float, obs.time_obs.split(':')))) /
                np.array([1., 60., 3600.]))

            # Compute the raw offset: difference between Halpha centroid and true
            # wavelength value
            x0 = obs.measurements[0].x0 * u.angstrom
            offset = (x0 - self.Halpha)
            raw_offsets[i] = offset

            night_id[i] = obs.night

            # For each sky line (that passes certain quality checks), compute the
            # offset between the predicted wavelength and measured centroid
            # TODO: generalize these quality cuts - see also below in
            # get_corrected_rv
            sky_offsets = []
            for j, meas in enumerate(obs.measurements[1:]):
                sky_offset = meas.x0 * u.angstrom - meas.info.wavelength
                if (meas.amp > 16 and meas.std_G < 2 and meas.std_G > 0.3
                        and np.abs(sky_offset) <
                        4 * u.angstrom):  # MAGIC NUMBER: quality cuts
                    sky_offsets.append(sky_offset)
                    all_sky_offsets[i, j] = sky_offset

            sky_offsets = u.Quantity(sky_offsets)

            if len(sky_offsets) > 0:
                sky_offset = np.mean(sky_offsets)
            else:
                sky_offset = np.nan * u.angstrom
                logger.debug(
                    "not correcting with sky line for {0}".format(obs))

            true_rv[i] = obs.prior_rv.rv - obs.v_bary

        raw_rv = raw_offsets / self.Halpha * c.to(u.km / u.s)

        # unique night ID's
        unq_night_id = np.unique(night_id)
        unq_night_id.sort()

        # Now we do a totally insane thing. From visualizing the residual
        # differences, there seems to be a trend with the observation time. We
        # fit a line to these residuals and use this to further correct the
        # wavelength solutions using just the (strongest) [OI] 5577 Å line.
        diff = all_sky_offsets[:, 0] - (
            (raw_rv - true_rv) / c * 5577 * u.angstrom).decompose()
        diff[np.abs(diff) > 2 *
             u.angstrom] = np.nan * u.angstrom  # reject BIG offsets

        self._night_polys = dict()
        self._night_final_offsets = dict()
        for n in unq_night_id:
            mask = (night_id == n) & np.isfinite(diff)
            coef = np.polyfit(obs_time[mask],
                              diff[mask],
                              deg=1,
                              w=np.full(mask.sum(), 1 / 0.1))
            poly = np.poly1d(coef)
            self._night_polys[n] = poly

            sky_offset = np.nanmean(all_sky_offsets[mask, :2], axis=1)
            sky_offset[np.isnan(sky_offset)] = 0. * u.angstrom
            sky_offset -= self._night_polys[n](obs_time[mask]) * u.angstrom

            corrected_rv[mask] = (raw_offsets[mask] -
                                  sky_offset) / self.Halpha * c.to(u.km / u.s)

            # Finally, we align the median of each night's ∆RV distribution with 0
            drv = corrected_rv[mask] - true_rv[mask]
            self._night_final_offsets[n] = np.nanmedian(drv)

        # now estimate the std. dev. uncertainty using the MAD
        all_drv = corrected_rv - true_rv
        self._abs_err = 1.5 * np.nanmedian(
            np.abs(all_drv - np.nanmedian(all_drv)))
Example #8
0
def main(night_path, skip_list_file, mask_file, overwrite=False, plot=False):
    """
    See argparse block at bottom of script for description of parameters.
    """

    night_path = path.realpath(path.expanduser(night_path))
    if not path.exists(night_path):
        raise IOError("Path '{}' doesn't exist".format(night_path))
    logger.info("Reading data from path: {}".format(night_path))

    base_path, night_name = path.split(night_path)
    data_path, run_name = path.split(base_path)
    output_path = path.realpath(
        path.join(data_path, 'processed', run_name, night_name))
    os.makedirs(output_path, exist_ok=True)
    logger.info("Saving processed files to path: {}".format(output_path))

    if plot:  # if we're making plots
        plot_path = path.realpath(path.join(output_path, 'plots'))
        logger.debug("Will make and save plots to: {}".format(plot_path))
        os.makedirs(plot_path, exist_ok=True)
    else:
        plot_path = None

    # check for files to skip (e.g., saturated or errored exposures)
    if skip_list_file is not None:  # a file containing a list of filenames to skip
        with open(skip_list_file, 'r') as f:
            skip_list = [x.strip() for x in f if x.strip()]
    else:
        skip_list = None

    # look for pixel mask file
    if mask_file is not None:
        with open(
                mask_file, 'r'
        ) as f:  # load YAML file specifying pixel masks for nearby sources
            pixel_mask_spec = yaml.load(f.read())
    else:
        pixel_mask_spec = None

    # generate the raw image file collection to process
    ic = GlobImageFileCollection(night_path, skip_filenames=skip_list)
    logger.info("Frames to process:")
    logger.info("- Bias frames: {}".format(
        len(ic.files_filtered(imagetyp='BIAS'))))
    logger.info("- Flat frames: {}".format(
        len(ic.files_filtered(imagetyp='FLAT'))))
    logger.info("- Comparison lamp frames: {}".format(
        len(ic.files_filtered(imagetyp='COMP'))))
    logger.info("- Object frames: {}".format(
        len(ic.files_filtered(imagetyp='OBJECT'))))

    # HACK:
    ic = GlobImageFileCollection(night_path, skip_filenames=skip_list)

    # ============================
    # Create the master bias frame
    # ============================

    # overscan region of the CCD, using FITS index notation
    oscan_fits_section = "[{}:{},:]".format(oscan_idx, oscan_idx + oscan_size)

    master_bias_file = path.join(output_path, 'master_bias.fits')

    if not os.path.exists(master_bias_file) or overwrite:
        # get list of overscan-subtracted bias frames as 2D image arrays
        bias_list = []
        for hdu, fname in ic.hdus(return_fname=True, imagetyp='BIAS'):
            logger.debug('Processing Bias frame: {0}'.format(fname))
            ccd = CCDData.read(path.join(ic.location, fname), unit='adu')
            ccd = ccdproc.gain_correct(ccd, gain=ccd_gain)
            ccd = ccdproc.subtract_overscan(ccd, overscan=ccd[:, oscan_idx:])
            ccd = ccdproc.trim_image(ccd,
                                     fits_section="[1:{},:]".format(oscan_idx))
            bias_list.append(ccd)

        # combine all bias frames into a master bias frame
        logger.info("Creating master bias frame")
        master_bias = ccdproc.combine(bias_list,
                                      method='average',
                                      clip_extrema=True,
                                      nlow=1,
                                      nhigh=1,
                                      error=True)
        master_bias.write(master_bias_file, overwrite=True)

    else:
        logger.info("Master bias frame file already exists: {}".format(
            master_bias_file))
        master_bias = CCDData.read(master_bias_file)

    if plot:
        # TODO: this assumes vertical CCD
        assert master_bias.shape[0] > master_bias.shape[1]
        aspect_ratio = master_bias.shape[1] / master_bias.shape[0]

        fig, ax = plt.subplots(1, 1, figsize=(10, 12 * aspect_ratio))
        vmin, vmax = zscaler.get_limits(master_bias.data)
        cs = ax.imshow(master_bias.data.T,
                       origin='bottom',
                       cmap=cmap,
                       vmin=max(0, vmin),
                       vmax=vmax)
        ax.set_title('master bias frame [zscale]')

        fig.colorbar(cs)
        fig.tight_layout()
        fig.savefig(path.join(plot_path, 'master_bias.png'))
        plt.close(fig)

    # ============================
    # Create the master flat field
    # ============================
    # HACK:
    ic = GlobImageFileCollection(night_path, skip_filenames=skip_list)

    master_flat_file = path.join(output_path, 'master_flat.fits')

    if not os.path.exists(master_flat_file) or overwrite:
        # create a list of flat frames
        flat_list = []
        for hdu, fname in ic.hdus(return_fname=True, imagetyp='FLAT'):
            logger.debug('Processing Flat frame: {0}'.format(fname))
            ccd = CCDData.read(path.join(ic.location, fname), unit='adu')
            ccd = ccdproc.gain_correct(ccd, gain=ccd_gain)
            ccd = ccdproc.ccd_process(ccd,
                                      oscan=oscan_fits_section,
                                      trim="[1:{},:]".format(oscan_idx),
                                      master_bias=master_bias)
            flat_list.append(ccd)

        # combine into a single master flat - use 3*sigma sigma-clipping
        logger.info("Creating master flat frame")
        master_flat = ccdproc.combine(flat_list,
                                      method='average',
                                      sigma_clip=True,
                                      low_thresh=3,
                                      high_thresh=3)
        master_flat.write(master_flat_file, overwrite=True)

        # TODO: make plot if requested?

    else:
        logger.info("Master flat frame file already exists: {}".format(
            master_flat_file))
        master_flat = CCDData.read(master_flat_file)

    if plot:
        # TODO: this assumes vertical CCD
        assert master_flat.shape[0] > master_flat.shape[1]
        aspect_ratio = master_flat.shape[1] / master_flat.shape[0]

        fig, ax = plt.subplots(1, 1, figsize=(10, 12 * aspect_ratio))
        vmin, vmax = zscaler.get_limits(master_flat.data)
        cs = ax.imshow(master_flat.data.T,
                       origin='bottom',
                       cmap=cmap,
                       vmin=max(0, vmin),
                       vmax=vmax)
        ax.set_title('master flat frame [zscale]')

        fig.colorbar(cs)
        fig.tight_layout()
        fig.savefig(path.join(plot_path, 'master_flat.png'))
        plt.close(fig)

    # =====================
    # Process object frames
    # =====================
    # HACK:
    ic = GlobImageFileCollection(night_path, skip_filenames=skip_list)

    logger.info("Beginning object frame processing...")
    for hdu, fname in ic.hdus(return_fname=True, imagetyp='OBJECT'):
        new_fname = path.join(output_path, 'p_{}'.format(fname))

        # -------------------------------------------
        # First do the simple processing of the frame
        # -------------------------------------------

        logger.debug("Processing '{}' [{}]".format(hdu.header['OBJECT'],
                                                   fname))
        if path.exists(new_fname) and not overwrite:
            logger.log(1, "\tAlready processed! {}".format(new_fname))
            ext = SourceCCDExtractor(filename=path.join(
                ic.location, new_fname),
                                     plot_path=plot_path,
                                     zscaler=zscaler,
                                     cmap=cmap,
                                     **ccd_props)
            nccd = ext.ccd

            # HACK: F**K this is a bad hack
            ext._filename_base = ext._filename_base[2:]

        else:
            # process the frame!
            ext = SourceCCDExtractor(filename=path.join(ic.location, fname),
                                     plot_path=plot_path,
                                     zscaler=zscaler,
                                     cmap=cmap,
                                     unit='adu',
                                     **ccd_props)

            _pix_mask = pixel_mask_spec.get(
                fname, None) if pixel_mask_spec is not None else None
            nccd = ext.process_raw_frame(pixel_mask_spec=_pix_mask,
                                         master_bias=master_bias,
                                         master_flat=master_flat)
            nccd.write(new_fname, overwrite=overwrite)

        # -------------------------------------------
        # Now do the 1D extraction
        # -------------------------------------------

        fname_1d = path.join(output_path, '1d_{0}'.format(fname))
        if path.exists(fname_1d) and not overwrite:
            logger.log(1, "\tAlready extracted! {}".format(fname_1d))
            continue

        else:
            logger.debug("\tExtracting to 1D")

            # first step is to fit a voigt profile to a middle-ish row to determine LSF
            lsf_p = ext.get_lsf_pars()  # MAGIC NUMBER

            try:
                tbl = ext.extract_1d(lsf_p)
            except Exception as e:
                logger.error('Failed! {}: {}'.format(e.__class__.__name__,
                                                     str(e)))
                continue

            hdu0 = fits.PrimaryHDU(header=nccd.header)
            hdu1 = fits.table_to_hdu(tbl)
            hdulist = fits.HDUList([hdu0, hdu1])

            hdulist.writeto(fname_1d, overwrite=overwrite)

        del ext

    # ==============================
    # Process comparison lamp frames
    # ==============================
    # HACK:
    ic = GlobImageFileCollection(night_path, skip_filenames=skip_list)

    logger.info("Beginning comp. lamp frame processing...")
    for hdu, fname in ic.hdus(return_fname=True, imagetyp='COMP'):
        new_fname = path.join(output_path, 'p_{}'.format(fname))

        logger.debug("\tProcessing '{}'".format(hdu.header['OBJECT']))

        if path.exists(new_fname) and not overwrite:
            logger.log(1, "\tAlready processed! {}".format(new_fname))
            ext = CompCCDExtractor(filename=path.join(ic.location, new_fname),
                                   plot_path=plot_path,
                                   zscaler=zscaler,
                                   cmap=cmap,
                                   **ccd_props)
            nccd = ext.ccd

            # HACK: F**K this is a bad hack
            ext._filename_base = ext._filename_base[2:]

        else:
            # process the frame!
            ext = CompCCDExtractor(filename=path.join(ic.location, fname),
                                   plot_path=plot_path,
                                   unit='adu',
                                   **ccd_props)

            _pix_mask = pixel_mask_spec.get(
                fname, None) if pixel_mask_spec is not None else None
            nccd = ext.process_raw_frame(
                pixel_mask_spec=_pix_mask,
                master_bias=master_bias,
                master_flat=master_flat,
            )
            nccd.write(new_fname, overwrite=overwrite)

        # -------------------------------------------
        # Now do the 1D extraction
        # -------------------------------------------

        fname_1d = path.join(output_path, '1d_{0}'.format(fname))
        if path.exists(fname_1d) and not overwrite:
            logger.log(1, "\tAlready extracted! {}".format(fname_1d))
            continue

        else:
            logger.debug("\tExtracting to 1D")

            try:
                tbl = ext.extract_1d()
            except Exception as e:
                logger.error('Failed! {}: {}'.format(e.__class__.__name__,
                                                     str(e)))
                continue

            hdu0 = fits.PrimaryHDU(header=nccd.header)
            hdu1 = fits.table_to_hdu(tbl)
            hdulist = fits.HDUList([hdu0, hdu1])

            hdulist.writeto(fname_1d, overwrite=overwrite)
Example #9
0
    def auto_identify(self):
        if self.line_list is None:
            raise ValueError("Can't auto-identify lines without a line list.")

        if len(self._map_dict['wavel']) < 4:
            msg = "Please identify at least 4 lines before trying auto-identify."
            logger.error(msg)
            self._ui['textbox'].setText("ERROR: {}".format(msg))
            return None

        _idx = np.argsort(self._map_dict['wavel'])
        wvln = np.array(self._map_dict['wavel'])[_idx]
        pixl = np.array(self._map_dict['pixel'])[_idx]

        # build an approximate wavelength solution to predict where lines are
        spl = InterpolatedUnivariateSpline(wvln, pixl, k=1) # use linear interp.

        predicted_pixels = spl(self.line_list)

        new_wavels = []
        new_pixels = []

        # from Wikipedia: https://en.wikipedia.org/wiki/Voigt_profile
        fG = 2*self._line_std_G*np.sqrt(2*np.log(2))
        fL = 2*self._line_hwhm_L
        lw = 0.5346*fL + np.sqrt(0.2166*fL**2 + fG**2)
        for pix_ctr,xmin,xmax,wave_idx,wave in zip(predicted_pixels,
                                                   predicted_pixels-5*lw,
                                                   predicted_pixels+5*lw,
                                                   range(len(self.line_list)),
                                                   self.line_list):

            if pix_ctr < 200 or pix_ctr > 1600: # skip if outside good rows
                continue

            elif wave_idx in self._done_wavel_idx: # skip if already fit
                continue

            logger.debug("Fitting line at predicted pix={:.2f}, λ={:.2f}"
                         .format(pix_ctr, wave))
            try:
                lp,gp = self.get_line_props(xmin, xmax,
                                            std_G0=self._line_std_G,
                                            hwhm_L0=self._line_hwhm_L)
            except Exception as e:
                logger.error("Failed to auto-fit line at {} ({msg})"
                             .format(wave, msg=str(e)))
                continue

            print(lp['amp'], lp['x0'])
            if lp is None or lp['amp'] < 100.: # HACK
                continue

            # figure out closest line
            # _all_pix = np.concatenate((self._map_dict['pixel'], new_pixels))
            # _all_wav = np.concatenate((self._map_dict['wavel'], new_wavels))
            # _diff = np.abs(lp['x0'] - np.array(_all_pix))
            # min_diff_idx = np.argmin(_diff)
            # min_diff_pix = _all_pix[min_diff_idx]
            # min_diff_wav = _all_wav[min_diff_idx]

            # if _diff[min_diff_idx] < 3.:
            #     logger.error("Fit line is too close to another at pix={:.2f}, λ={:.2f}"
            #                  .format(min_diff_pix, min_diff_wav))
            #     continue

            self.draw_line_marker(lp, wave, xmin, xmax, gp=gp)
            new_wavels.append(wave)
            new_pixels.append(pix_ctr)
            self._done_wavel_idx.append(wave_idx)

        self.fig.canvas.draw()

        _idx = np.argsort(new_wavels)
        self._map_dict['wavel'] = np.array(new_wavels)[_idx]
        self._map_dict['pixel'] = np.array(new_pixels)[_idx]
Example #10
0
def main():
    # TODO: bad, hard-coded...
    # base_path = '/Volumes/ProjectData/gaia-comoving-followup/'
    base_path = '../../data/'
    db_path = path.join(base_path, 'db.sqlite')
    engine = db_connect(db_path)
    session = Session()

    chain_path = path.abspath('./isochrone_chains')
    os.makedirs(chain_path, exist_ok=True)

    # Check out the bottom of "Color-magnitude diagram.ipynb":
    interesting_group_ids = [1500, 1229, 1515]

    all_photometry = OrderedDict([
        ('1500-8455',
         OrderedDict([('J', (6.8379998, 0.021)),
                      ('H', (6.4640002, 0.017000001)),
                      ('K', (6.3369999, 0.017999999)),
                      ('W1', (6.2950001, 0.093000002)),
                      ('W2', (6.2490001, 0.026000001)),
                      ('W3', (6.3330002, 0.015)), ('B', (9.5950003, 0.022)),
                      ('V', (8.5120001, 0.014))])),
        ('1500-1804',
         OrderedDict([('J', (6.9039998, 0.041000001)),
                      ('H', (6.8559999, 0.027000001)),
                      ('K', (6.7989998, 0.017000001)),
                      ('W1', (6.803, 0.064999998)),
                      ('W2', (6.7600002, 0.018999999)),
                      ('W3', (6.8270001, 0.016000001)),
                      ('B', (7.4980001, 0.015)), ('V', (7.289, 0.011))])),
        ('1229-1366',
         OrderedDict([('J', (6.7290001, 0.024)), ('H', (6.2449999, 0.02)),
                      ('K', (6.1529999, 0.023)),
                      ('W1', (6.1799998, 0.096000001)), ('W2', (6.04, 0.035)),
                      ('W3', (6.132, 0.016000001)), ('B', (9.5539999, 0.021)),
                      ('V', (8.4619999, 0.014))])),
        ('1229-7470',
         OrderedDict([
             ('J', (9.1709995, 0.024)), ('H', (8.7959995, 0.026000001)),
             ('K', (8.7299995, 0.022)), ('W1', (8.6669998, 0.023)),
             ('W2', (8.7189999, 0.02)), ('W3', (8.6680002, 0.025)),
             ('B', (11.428, 0.054000001)), ('V', (10.614, 0.039999999))
         ])),
        ('1515-3584',
         OrderedDict([('J', (5.363999843597412, 0.024000000208616257)),
                      ('H', (4.965000152587891, 0.035999998450279236)),
                      ('K', (4.815999984741211, 0.032999999821186066)),
                      ('W1', (4.758, 0.215)), ('W2', (4.565, 0.115)),
                      ('W3', (4.771, 0.015)),
                      ('B', (8.347999572753906, 0.01600000075995922)),
                      ('V', (7.182000160217285, 0.009999999776482582))])),
        ('1515-1834',
         OrderedDict([('J', (8.855999946594238, 0.024000000208616257)),
                      ('H', (8.29699993133545, 0.020999999716877937)),
                      ('K', (8.178999900817871, 0.017999999225139618)),
                      ('W1', (8.117, 0.022)), ('W2', (8.15, 0.019)),
                      ('W3', (8.065, 0.02)),
                      ('B', (12.309000015258789, 0.11999999731779099)),
                      ('V', (11.069999694824219, 0.054999999701976776))]))
    ])

    for k in all_photometry:
        samples_file = path.join(chain_path, '{0}.hdf5'.format(k))

        if path.exists(samples_file):
            logger.info("skipping {0} - samples exist at {1}".format(
                k, samples_file))
            continue

        phot = all_photometry[k]
        obs = session.query(Observation).filter(Observation.object == k).one()
        plx = (obs.tgas_source.parallax, obs.tgas_source.parallax_error)

        # fit an isochrone
        model = StarModel(iso, use_emcee=True, parallax=plx, **phot)
        model.set_bounds(mass=(0.01, 20),
                         feh=(-1, 1),
                         distance=(0, 300),
                         AV=(0, 1))

        # initial conditions for emcee walkers
        nwalkers = 128

        p0 = []
        m0, age0, feh0 = model.ic.random_points(nwalkers,
                                                minmass=0.01,
                                                maxmass=10.,
                                                minfeh=-1,
                                                maxfeh=1)
        _, max_distance = model.bounds('distance')
        _, max_AV = model.bounds('AV')
        d0 = 10**(np.random.uniform(0, np.log10(max_distance), size=nwalkers))
        AV0 = np.random.uniform(0, max_AV, size=nwalkers)
        p0 += [m0]
        p0 += [age0, feh0, d0, AV0]

        p0 = np.array(p0).T
        npars = p0.shape[1]

        # run emcee
        ninit = 256
        nburn = 1024
        niter = 4096

        logger.debug('Running emcee - initial sampling...')
        sampler = emcee.EnsembleSampler(nwalkers, npars, model.lnpost)
        # pos, prob, state = sampler.run_mcmc(p0, ninit)

        for pos, prob, state in tqdm(sampler.sample(p0, iterations=ninit),
                                     total=ninit):
            pass

        # cull the weak walkers
        best_ix = sampler.flatlnprobability.argmax()
        best_p0 = (sampler.flatchain[best_ix][None] +
                   np.random.normal(0, 1E-5, size=(nwalkers, npars)))

        sampler.reset()
        logger.debug('burn-in...')
        for pos, prob, state in tqdm(sampler.sample(best_p0, iterations=nburn),
                                     total=nburn):
            pass
        # pos,_,_ = sampler.run_mcmc(best_p0, nburn)

        sampler.reset()
        logger.debug('sampling...')
        # _ = sampler.run_mcmc(pos, niter)
        for pos, prob, state in tqdm(sampler.sample(pos, iterations=niter),
                                     total=niter):
            pass

        model._sampler = sampler
        model._make_samples(0.08)

        model.samples.to_hdf(samples_file, key='samples')
        # np.save('isochrone_chains/chain.npy', sampler.chain)
        logger.debug('...done and saved!')
def solve_radial_velocity(filename,
                          wavelength_coef,
                          done_list=None,
                          plot=False):
    hdulist = fits.open(filename)

    # read both hdu's
    hdu0 = hdulist[0]
    hdu1 = hdulist[1]
    name = hdu0.header['OBJECT']
    logger.debug("\tObject: {}".format(name))

    if done_list is not None and name in done_list:
        return

    # extract just the middle part of the CCD (we only really care about Halpha)
    tbl = hdu1.data[200:1600][::-1]

    # compute wavelength array for the pixels
    wvln = np.polynomial.polynomial.polyval(tbl['pix'], wavelength_coef)

    # ==============================
    # Fit a Voigt profile to H-alpha
    # ==============================

    # extract region of SOURCE spectrum around Halpha
    i1 = np.argmin(np.abs(wvln - 6460))
    i2 = np.argmin(np.abs(wvln - 6665))

    wave = wvln[i1:i2 + 1]
    flux = tbl['source_flux'][i1:i2 + 1]
    ivar = tbl['source_ivar'][i1:i2 + 1]
    halpha_fit_p = fit_spec_line(wave,
                                 flux,
                                 ivar,
                                 n_bg_coef=2,
                                 target_x=6563.,
                                 absorp_emiss=-1.)

    if plot:
        _grid = np.linspace(wave.min(), wave.max(), 512)
        fit_flux = voigt_polynomial(_grid, **halpha_fit_p)

        plt.figure(figsize=(14, 8))
        plt.title("OBJECT: {}, EXPTIME: {}".format(hdu0.header['OBJECT'],
                                                   hdu0.header['EXPTIME']))

        plt.plot(wave, flux, marker='', drawstyle='steps-mid', alpha=0.5)
        plt.errorbar(wave,
                     flux,
                     1 / np.sqrt(ivar),
                     linestyle='none',
                     marker='',
                     ecolor='#666666',
                     alpha=0.75,
                     zorder=-10)
        plt.plot(_grid, fit_flux, marker='', alpha=0.75)

    # =========================================
    # Fit a Voigt profile to [OI] 6300 and 5577
    # =========================================

    # needed for Barycenter correction
    # earth_loc = coord.EarthLocation.of_site('KPNO')

    for target_wave in [5577.3387, 6300.3]:
        # extract region of SKY spectrum around line
        i1 = np.argmin(np.abs(wvln - (target_wave - 25)))
        i2 = np.argmin(np.abs(wvln - (target_wave + 25)))

        wave = wvln[i1:i2 + 1]
        flux = tbl['background_flux'][i1:i2 + 1]
        ivar = tbl['background_ivar'][i1:i2 + 1]

        OI_fit_p = fit_spec_line(wave,
                                 flux,
                                 ivar,
                                 std_G0=1.,
                                 n_bg_coef=2,
                                 target_x=target_wave,
                                 absorp_emiss=1.)

        print('[OI] {:.2f}'.format(target_wave))
        print('∆x0: {:.3f}'.format(OI_fit_p['x0'] - target_wave))
        print('amp: {:.3e}'.format(OI_fit_p['amp']))

        chi2 = np.sum((voigt_polynomial(wave, **OI_fit_p) - flux)**2 * ivar)
        print('chi2: {}'.format(chi2))

        if plot:
            _grid = np.linspace(wave.min(), wave.max(), 512)
            fit_flux = voigt_polynomial(_grid, **OI_fit_p)

            plt.figure(figsize=(14, 8))
            plt.title("OBJECT: {}, EXPTIME: {}".format(hdu0.header['OBJECT'],
                                                       hdu0.header['EXPTIME']))

            plt.plot(wave, flux, marker='', drawstyle='steps-mid', alpha=0.5)
            plt.errorbar(wave,
                         flux,
                         1 / np.sqrt(ivar),
                         linestyle='none',
                         marker='',
                         ecolor='#666666',
                         alpha=0.75,
                         zorder=-10)
            plt.plot(_grid, fit_flux, marker='', alpha=0.75)

    # from: http://www.star.ucl.ac.uk/~msw/lines.html
    # Halpha = 6562.80 # air, STP
    # OI_5577 = 5577.3387 # air, STP
    # OI_6300 = 6300.30 # air, STP

    # dOI = OI_fit_p['x0'] - OI_5577
    # dHalpha = halpha_fit_p['x0'] - Halpha
    # dlambda = dHalpha - dOI

    # RV = dlambda / Halpha * c
    # print("Radial velocity: ", RV.to(u.km/u.s))

    # sc = coord.SkyCoord(ra=hdu0.header['RA'], dec=hdu0.header['DEC'],
    #                     unit=(u.hourangle, u.degree))
    # time = hdu0.header['JD']*u.day + hdu0.header['EXPTIME']/2.*u.second
    # time = Time(time.to(u.day), format='jd', scale='utc')
    # v_bary = bary_vel_corr(time, sc, location=earth_loc)
    # RV_corr = (RV + v_bary).to(u.km/u.s)
    # print("Bary. correction: ", v_bary.to(u.km/u.s))
    # print("Radial velocity (bary. corrected): ", RV_corr)
    # print()

    if plot:
        plt.show()
Example #12
0
def generate_wavelength_model(comp_lamp_path, night_path, plot_path):
    """
    Fit a line + Gaussian Process model to the pixel vs. wavelength relation for
    identified and centroided comp. lamp spectrum emission lines.

    Parameters
    ----------
    comp_lamp_path : str
    night_path : str
    plot_path : str

    """

    # read 1D comp lamp spectrum
    spec = Table.read(comp_lamp_path)

    # read wavelength guess file
    guess_path = path.abspath(
        path.join(night_path, '..', 'wavelength_guess.csv'))
    pix_wav = np.genfromtxt(guess_path, delimiter=',', names=True)

    # get emission line centroids at the guessed positions of the lines
    pix_x0s = fit_all_lines(spec['pix'], spec['flux'], spec['ivar'],
                            pix_wav['wavelength'], pix_wav['pixel'])

    # only keep successful ones:
    mask = np.isfinite(pix_x0s)
    logger.debug("Successfully fit {}/{} comp. lamp lines".format(
        mask.sum(), len(mask)))
    pix_wav = pix_wav[mask]
    pix_x0s = pix_x0s[mask]

    # --------------------------------------------------------------------------
    # fit a gaussian process to determine the pixel-to-wavelength transformation
    #
    idx = np.argsort(pix_x0s)
    med_x = np.median(pix_x0s[idx])
    x = pix_x0s[idx] - med_x
    y = pix_wav['wavelength'][idx]

    model = GPModel(x=x, y=y, n_bg_coef=n_bg_coef, x_shift=med_x)

    # Fit for the maximum likelihood parameters
    bounds = model.gp.get_parameter_bounds()
    init_params = model.gp.get_parameter_vector()
    soln = minimize(model, init_params, method="L-BFGS-B", bounds=bounds)
    model.gp.set_parameter_vector(soln.x)
    logger.debug("Success: {}, Final log-likelihood: {}".format(
        soln.success, -soln.fun))

    # ---
    # residuals to the mean model
    x_grid = np.linspace(0, 1600, 1024) - med_x
    mu, var = model.gp.predict(y, x_grid, return_var=True)
    std = np.sqrt(var)

    _y_mean = model.mean_model.get_value(x)
    _mu_mean = model.mean_model.get_value(x_grid)

    # Plot the maximum likelihood model
    fig, ax = plt.subplots(1, 1, figsize=(8, 8))

    # data
    ax.scatter(x + med_x, y - _y_mean, marker='o')

    # full GP model
    gp_color = "#ff7f0e"
    ax.plot(x_grid + med_x, mu - _mu_mean, color=gp_color, marker='')
    ax.fill_between(x_grid + med_x,
                    mu + std - _mu_mean,
                    mu - std - _mu_mean,
                    color=gp_color,
                    alpha=0.3,
                    edgecolor="none")

    ax.set_xlabel('pixel')
    ax.set_ylabel(r'wavelength [$\AA$]')
    ax.set_title(path.basename(comp_lamp_path))

    fig.tight_layout()
    fig.savefig(path.join(plot_path, 'wavelength_mean_subtracted.png'),
                dpi=200)
    # ---

    # ---
    # residuals to full GP model
    mu, var = model.gp.predict(y, x_grid, return_var=True)
    std = np.sqrt(var)

    y_mu, var = model.gp.predict(y, x, return_var=True)

    # Plot the maximum likelihood model
    fig, ax = plt.subplots(1, 1, figsize=(12, 8))

    # data
    ax.scatter(x + med_x, y - y_mu, marker='o')

    gp_color = "#ff7f0e"
    ax.plot(x_grid + med_x, mu - mu, color=gp_color, marker='')
    ax.fill_between(x_grid + med_x,
                    std,
                    -std,
                    color=gp_color,
                    alpha=0.3,
                    edgecolor="none")

    ax.set_xlabel('pixel')
    ax.set_ylabel(r'wavelength residual [$\AA$]')
    ax.set_title(path.basename(comp_lamp_path))

    ax.set_ylim(-1, 1)
    ax.axvline(683., zorder=-10, color='#666666', alpha=0.5)

    ax2 = ax.twinx()
    ax2.set_ylim([x / 6563 * 300000 for x in ax.get_ylim()])
    ax2.set_ylabel(r'velocity error at ${{\rm H}}_\alpha$ [{}]'.format(
        (u.km / u.s).to_string(format='latex_inline')))

    fig.tight_layout()
    fig.savefig(path.join(plot_path, 'wavelength_residuals.png'), dpi=200)
    # --------------------------------------------------------------------------

    return model
Example #13
0
def add_wavelength(filename, model, std_tol, overwrite=False, plot_path=None):
    """
    Given an extracted, 1D spectrum FITS file, add wavelength and
    wavelength_prec columnes to the file.

    Parameters
    ----------
    filename : str
        Path to a 1D extracted spectrum file.
    model : `comoving_rv.longslit.GPModel`
    std_tol : quantity_like
        Set the wavelength grid to NaN when the root-variance of the prediction
        from the Gaussian process is larger than this tolerance.
    overwrite : bool (optional)
        Overwrite any existing wavelength information.
    plot_path : str (optional)
    """
    hdulist = fits.open(filename)

    # read both hdu's
    logger.debug("\tObject: {}".format(hdulist[0].header['OBJECT']))

    # extract just the middle part of the CCD (we only really care about Halpha)
    tbl = Table(hdulist[1].data)

    if 'wavelength' in tbl.colnames and not overwrite:
        logger.debug("\tTable already contains wavelength values!")
        return

    # compute wavelength array for the pixels
    wavelength, var = model.gp.predict(model.y,
                                       tbl['pix'] - model.x_shift,
                                       return_var=True)
    bad_idx = np.sqrt(var) > std_tol.to(u.angstrom).value
    wavelength[bad_idx] = np.nan

    tbl['wavelength'] = wavelength
    tbl['wavelength_err'] = np.sqrt(var)

    new_hdu1 = fits.table_to_hdu(tbl)
    new_hdulist = fits.HDUList([hdulist[0], new_hdu1])

    logger.debug("\tWriting out file with wavelength array.")
    new_hdulist.writeto(filename, overwrite=True)

    if plot_path is not None:
        # plot the spectrum vs. wavelength
        fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True)

        axes[0].plot(tbl['wavelength'],
                     tbl['source_flux'],
                     marker='',
                     drawstyle='steps-mid',
                     linewidth=1.)
        axes[0].errorbar(tbl['wavelength'],
                         tbl['source_flux'],
                         1 / np.sqrt(tbl['source_ivar']),
                         linestyle='none',
                         marker='',
                         ecolor='#666666',
                         alpha=1.,
                         zorder=-10)
        axes[0].set_ylim(tbl['source_flux'][200] / 4,
                         np.nanmax(tbl['source_flux']))
        axes[0].set_yscale('log')

        axes[1].plot(tbl['wavelength'],
                     tbl['background_flux'],
                     marker='',
                     drawstyle='steps-mid',
                     linewidth=1.)
        axes[1].errorbar(tbl['wavelength'],
                         tbl['background_flux'],
                         1 / np.sqrt(tbl['background_ivar']),
                         linestyle='none',
                         marker='',
                         ecolor='#666666',
                         alpha=1.,
                         zorder=-10)
        axes[1].set_ylim(1e-1, np.nanmax(tbl['background_flux']))
        axes[1].set_yscale('log')

        fig.tight_layout()
        _filename_base = path.splitext(path.basename(filename))[0]
        fig.savefig(
            path.join(plot_path, '{0}_1d_wvln.png'.format(_filename_base)))

        plt.close(fig)