def _correct(self, df, cache=True): """ This function is called by convert_measured_to_actual and is NOT meant to be called directly! It takes a pandas dataframe that all have the same star """ # Group by instrument and addmode, and get the PDF for the actual temperature for each df = df.dropna(subset=['Tmeas']) if len(df) == 0: df['Corrected_Temperature'] = np.nan df['T_uperr'] = np.nan df['T_lowerr'] = np.nan return df[['Star', 'Corrected_Temperature', 'T_uperr', 'T_lowerr']].copy() groups = df.groupby(('Instrument', 'addmode')) Probabilities = [] for (inst, addmode), group in groups: # Make a fitter instance d = {'instrument': inst, 'directory': self._caldir[inst], 'addmode': addmode} key = (inst, addmode) fitter = self._fitters[inst]() # get/set the cache if key in self._chainCache: chain, probs = self._chainCache[key] Tpredictions = self._predictionCache[key] fitter.spoof_sampler(chain, probs) else: chain = np.loadtxt(self._flatchain_format.format(**d)) probs = np.loadtxt(self._flatlnprob_format.format(**d)) fitter.spoof_sampler(chain, probs) Ta_arr = np.arange(2000, 12000, 2.0) Tmeas_pred = fitter.predict(Ta_arr, N=10000) Tpredictions = pd.DataFrame(Tmeas_pred, columns=Ta_arr) if cache: self._chainCache[key] = (chain, probs) self._predictionCache[key] = Tpredictions # Get the PDF (probability distribution function) Tmeas = group['Tmeas'].values Tmeas_err = group['Tmeas_err'].values for Tm, Tm_err in zip(Tmeas, Tmeas_err): temperature, probability = CCF_Systematics.get_actual_temperature(fitter, Tm, Tm_err, cache=Tpredictions, summarize=False) Probabilities.append(probability / probability.sum()) # Multiply all the PDFs Prob = np.array(Probabilities).prod(axis=0) # Summarize the PDF (similar interface to np.percentile) l, m, h = integral(temperature, Prob, [0.16, 0.5, 0.84], k=0) # Save in a Pandas DataFrame and return return pd.DataFrame(data={'Star': df['Star'].values[0], 'Corrected_Temperature': m, 'T_uperr': h - m, 'T_lowerr': m - l}, index=[0])
def get_measured_temperature(self, starname, date, Tmax, instrument=None, N=7, addmode='simple', feh=None, vsini=None): """ Get the measured temperature by doing a weighted sum over temperatures near the given one (which I find by hand) Parameters: =========== - starname: string The name of the star - date: string The date the observation was taken - Tmax: float The temperature to search near - instrument: string The instrument used (this function automatically finds it if not given) - N: integer The number of temperature points to take - addmode: string The way the individual order CCFs were co-added. - feh: float The metallicity to use. If not given, it finds whatever gives the highest ccf peak. - vsini: float The vsini to use. If not given, it finds whatever gives the highest ccf peak. Returns: ======== A pandas DataFrame with the starname, date, instrument, and model parameters for the temperatures near the requested one. """ if instrument is None: # Find this star/date in all of the interfaces found = False df_list = [] for inst in self._interfaces.keys(): interface = self._interfaces[inst] if starname in interface.list_stars() and date in interface.list_dates(starname): found = True df = self.get_measured_temperature(starname, date, Tmax, instrument=inst, N=N) df_list.append(df) if not found: warnings.warn('Star ({}) not found for date ({}) in any CCF interfaces!'.format(starname, date)) return None return pd.concat(df_list, ignore_index=True) # Check that the star/date combination are in the requested interface if starname not in self._interfaces[instrument].list_stars(): raise KeyError('Star ({}) not in instrument ({})'.format(starname, instrument)) if date not in self._interfaces[instrument].list_dates(starname): # Try date +/- 1 before failing (in case of civil/UT date mismatch or something) from datetime import datetime, timedelta year, month, day = [int(s) for s in date.split('-')] for inc in [-1, 1]: t = datetime(year, month, day) + timedelta(inc) test_date = '{}-{:02d}-{:02d}'.format(t.year, t.month, t.day) if test_date in self._interfaces[instrument].list_dates(starname): return self.get_measured_temperature(starname, test_date, Tmax, instrument=instrument, N=N, addmode=addmode) raise KeyError( 'Date ({}) not in CCF interface for star {} and instrument {}'.format(date, starname, instrument)) # Get CCF information from the requested instrument/star/date combo interface = self._interfaces[instrument] logging.info('{}, {}, {}, {}'.format(starname, date, instrument, addmode)) df = interface._compile_data(starname=starname, date=date, addmode=addmode, read_ccf=True) #df['ccf_max'] = df.ccf.map(np.max) Already done now # Get the parameters and RV of the CCF with the highest peak (which has temperature = Tmax) requested = df.loc[df['T'] == Tmax] if feh is not None: requested = requested.loc[requested['[Fe/H]'] == feh] if vsini is not None: requested = requested.loc[requested['vsini'] == vsini] i = np.argmax(requested.ccf_max) vsini = requested.loc[i, 'vsini'].item() metal = requested.loc[i, '[Fe/H]'].item() logg = requested.loc[i, 'logg'].item() idx = requested.loc[i, 'ccf'].argmax() rv = interface.velocities[idx] # Now, get the CCF height for the N/2 temperatures on either side of Tmax N = roundodd(N) d = defaultdict(list) for T in np.arange(Tmax - 100 * (N - 1) / 2, Tmax + 100 * (N - 1) / 2 + 1, 100): requested = df.loc[(df['T'] == T) & (df.vsini == vsini) & (df['[Fe/H]'] == metal) & (df.logg == logg)] if len(requested) == 0: warnings.warn('No matches for T = {} with star/date = {}/{}!'.format(T, starname, date)) d['Star'].append(starname) d['Date'].append(date) d['Instrument'].append(instrument) d['Temperature'].append(T) d['vsini'].append(vsini) d['logg'].append(logg) d['[Fe/H]'].append(metal) d['rv'].append(rv) d['CCF'].append(np.nan) d['significance'].append(np.nan) continue if len(requested) > 1: requested = requested.sort_values(by='ccf_max').tail(1) # Save the best parameters for this temperature d['Star'].append(starname) d['Date'].append(date) d['Instrument'].append(instrument) d['Temperature'].append(T) d['vsini'].append(requested['vsini'].item()) d['logg'].append(requested['logg'].item()) d['[Fe/H]'].append(requested['[Fe/H]'].item()) idx = np.argmin(np.abs(interface.velocities - rv)) d['rv'].append(rv) ccf = requested['ccf'].item() d['CCF'].append(ccf[idx]) # Measure the detection significance std = mad(ccf) mean = np.median(ccf) d['significance'].append((d['CCF'][-1] - mean) / std) # Do the weighted sum. summary = CCF_Systematics.get_Tmeas(pd.DataFrame(data=d), include_actual=False) # Put the star, date, and instrument back in the dataframe before returning summary['Star'] = starname summary['Date'] = date summary['Instrument'] = instrument summary['addmode'] = addmode return summary