def testFitSine(self): # High frequency sine wave. all_time = [np.arange(0, 100, 0.1), np.arange(100, 200, 0.1)] all_flux = [np.sin(t) for t in all_time] # Logarithmically sample candidate break point spacings. bkspaces = np.logspace(np.log10(0.5), np.log10(5), num=20) def _rmse(all_flux, all_spline): f = np.concatenate(all_flux) s = np.concatenate(all_spline) return np.sqrt(np.mean((f - s)**2)) # Penalty coefficient 1.0. spline, metadata = kepler_spline.choose_kepler_spline( all_time, all_flux, bkspaces, penalty_coeff=1.0) self.assertAlmostEqual(_rmse(all_flux, spline), 0.013013) self.assertTrue(np.all(metadata.light_curve_mask)) self.assertAlmostEqual(metadata.bkspace, 1.67990914314) self.assertEmpty(metadata.bad_bkspaces) self.assertAlmostEqual(metadata.likelihood_term, -6685.64217856480) self.assertAlmostEqual(metadata.penalty_term, 942.51190498322) self.assertAlmostEqual(metadata.bic, -5743.13027358158) # Decrease penalty coefficient; allow smaller spacing for closer fit. spline, metadata = kepler_spline.choose_kepler_spline( all_time, all_flux, bkspaces, penalty_coeff=0.1) self.assertAlmostEqual(_rmse(all_flux, spline), 0.0066376) self.assertTrue(np.all(metadata.light_curve_mask)) self.assertAlmostEqual(metadata.bkspace, 1.48817572082) self.assertEmpty(metadata.bad_bkspaces) self.assertAlmostEqual(metadata.likelihood_term, -6731.59913975551) self.assertAlmostEqual(metadata.penalty_term, 1064.12634433589) self.assertAlmostEqual(metadata.bic, -6625.18650532192) # Increase penalty coefficient; require larger spacing at the cost of worse # fit. spline, metadata = kepler_spline.choose_kepler_spline(all_time, all_flux, bkspaces, penalty_coeff=2) self.assertAlmostEqual(_rmse(all_flux, spline), 0.026215449) self.assertTrue(np.all(metadata.light_curve_mask)) self.assertAlmostEqual(metadata.bkspace, 1.89634509537) self.assertEmpty(metadata.bad_bkspaces) self.assertAlmostEqual(metadata.likelihood_term, -6495.65564287904) self.assertAlmostEqual(metadata.penalty_term, 836.099270549629) self.assertAlmostEqual(metadata.bic, -4823.45710177978)
def read_and_process_light_curve(kepid, kepler_data_dir, max_gap_width=0.75): """Reads a light curve, fits a B-spline and divides the curve by the spline. Args: kepid: Kepler id of the target star. kepler_data_dir: Base directory containing Kepler data. See kepler_io.kepler_filenames(). max_gap_width: Gap size (in days) above which the light curve is split for the fitting of B-splines. Returns: time: 1D NumPy array; the time values of the light curve. flux: 1D NumPy array; the normalized flux values of the light curve. Raises: IOError: If the light curve files for this Kepler ID cannot be found. ValueError: If the spline could not be fit. """ # Read the Kepler light curve. file_names = kepler_io.kepler_filenames(kepler_data_dir, kepid) if not file_names: raise IOError("Failed to find .fits files in %s for Kepler ID %s" % (kepler_data_dir, kepid)) all_time, all_flux = kepler_io.read_kepler_light_curve(file_names) # Split on gaps. all_time, all_flux = util.split(all_time, all_flux, gap_width=max_gap_width) # Logarithmically sample candidate break point spacings between 0.5 and 20 # days. bkspaces = np.logspace(np.log10(0.5), np.log10(20), num=20) # Generate spline. spline = kepler_spline.choose_kepler_spline( all_time, all_flux, bkspaces, penalty_coeff=1.0, verbose=False)[0] if spline is None: raise ValueError("Failed to fit spline with Kepler ID %s", kepid) # Concatenate the piecewise light curve and spline. time = np.concatenate(all_time) flux = np.concatenate(all_flux) spline = np.concatenate(spline) # In rare cases the piecewise spline contains NaNs in places the spline could # not be fit. We can't normalize those points if the spline isn't defined # there. Instead we just remove them. finite_i = np.isfinite(spline) if not np.all(finite_i): tf.logging.warn("Incomplete spline with Kepler ID %s", kepid) time = time[finite_i] flux = flux[finite_i] spline = spline[finite_i] # "Flatten" the light curve (remove low-frequency variability) by dividing by # the spline. flux /= spline return time, flux
def testFitSine(self): # High frequency sine wave. all_time = [np.arange(0, 100, 0.1), np.arange(100, 200, 0.1)] all_flux = [np.sin(t) for t in all_time] # Logarithmically sample candidate break point spacings. bkspaces = np.logspace(np.log10(0.5), np.log10(5), num=20) def _rmse(all_flux, all_spline): f = np.concatenate(all_flux) s = np.concatenate(all_spline) return np.sqrt(np.mean((f - s)**2)) # Penalty coefficient 1.0. spline, metadata = kepler_spline.choose_kepler_spline( all_time, all_flux, bkspaces, penalty_coeff=1.0) self.assertAlmostEqual(_rmse(all_flux, spline), 0.013013) self.assertTrue(np.all(metadata.light_curve_mask)) self.assertAlmostEqual(metadata.bkspace, 1.67990914314) self.assertEmpty(metadata.bad_bkspaces) self.assertAlmostEqual(metadata.likelihood_term, -6685.64217856480) self.assertAlmostEqual(metadata.penalty_term, 942.51190498322) self.assertAlmostEqual(metadata.bic, -5743.13027358158) # Decrease penalty coefficient; allow smaller spacing for closer fit. spline, metadata = kepler_spline.choose_kepler_spline( all_time, all_flux, bkspaces, penalty_coeff=0.1) self.assertAlmostEqual(_rmse(all_flux, spline), 0.0066376) self.assertTrue(np.all(metadata.light_curve_mask)) self.assertAlmostEqual(metadata.bkspace, 1.48817572082) self.assertEmpty(metadata.bad_bkspaces) self.assertAlmostEqual(metadata.likelihood_term, -6731.59913975551) self.assertAlmostEqual(metadata.penalty_term, 1064.12634433589) self.assertAlmostEqual(metadata.bic, -6625.18650532192) # Increase penalty coefficient; require larger spacing at the cost of worse # fit. spline, metadata = kepler_spline.choose_kepler_spline( all_time, all_flux, bkspaces, penalty_coeff=2) self.assertAlmostEqual(_rmse(all_flux, spline), 0.026215449) self.assertTrue(np.all(metadata.light_curve_mask)) self.assertAlmostEqual(metadata.bkspace, 1.89634509537) self.assertEmpty(metadata.bad_bkspaces) self.assertAlmostEqual(metadata.likelihood_term, -6495.65564287904) self.assertAlmostEqual(metadata.penalty_term, 836.099270549629) self.assertAlmostEqual(metadata.bic, -4823.45710177978)
def testTooFewPoints(self): # Sine wave with segments of 1, 2, 3 points. all_time = [ np.array([0.1]), np.array([0.2, 0.3]), np.array([0.4, 0.5, 0.6]) ] all_flux = [np.sin(t) for t in all_time] # Logarithmically sample candidate break point spacings. bkspaces = np.logspace(np.log10(0.5), np.log10(5), num=20) spline, metadata = kepler_spline.choose_kepler_spline( all_time, all_flux, bkspaces, penalty_coeff=1.0, verbose=False) # All segments are NaN. self.assertTrue(np.all(np.isnan(np.concatenate(spline)))) self.assertFalse(np.any(np.concatenate(metadata.light_curve_mask))) self.assertIsNone(metadata.bkspace) self.assertEmpty(metadata.bad_bkspaces) self.assertIsNone(metadata.likelihood_term) self.assertIsNone(metadata.penalty_term) self.assertIsNone(metadata.bic) # Add a longer segment. all_time.append(np.arange(0.7, 2.0, 0.1)) all_flux.append(np.sin(all_time[-1])) spline, metadata = kepler_spline.choose_kepler_spline( all_time, all_flux, bkspaces, penalty_coeff=1.0, verbose=False) # First 3 segments are NaN. for i in range(3): self.assertTrue(np.all(np.isnan(spline[i]))) self.assertFalse(np.any(metadata.light_curve_mask[i])) # Final segment is a good fit. self.assertTrue(np.all(np.isfinite(spline[3]))) self.assertTrue(np.all(metadata.light_curve_mask[3])) self.assertEmpty(metadata.bad_bkspaces) self.assertAlmostEqual(metadata.likelihood_term, -58.0794069927957) self.assertAlmostEqual(metadata.penalty_term, 7.69484807238461) self.assertAlmostEqual(metadata.bic, -50.3845589204111)
def testNoPoints(self): all_time = [np.array([])] all_flux = [np.array([])] # Logarithmically sample candidate break point spacings. bkspaces = np.logspace(np.log10(0.5), np.log10(5), num=20) spline, metadata = kepler_spline.choose_kepler_spline( all_time, all_flux, bkspaces, penalty_coeff=1.0, verbose=False) np.testing.assert_array_equal(spline, [[]]) np.testing.assert_array_equal(metadata.light_curve_mask, [[]])
def testChooseKeplerSpline(self): # High frequency sine wave. time = [np.arange(0, 100, 0.1), np.arange(100, 200, 0.1)] flux = [np.sin(t) for t in time] # Logarithmically sample candidate break point spacings. bkspaces = np.logspace(np.log10(0.5), np.log10(5), num=20) def _rmse(all_flux, all_spline): f = np.concatenate(all_flux) s = np.concatenate(all_spline) return np.sqrt(np.mean((f - s)**2)) # Penalty coefficient 1.0. spline, mask, bkspace, bad_bkspaces = kepler_spline.choose_kepler_spline( time, flux, bkspaces, penalty_coeff=1.0) self.assertAlmostEqual(_rmse(flux, spline), 0.013013) self.assertTrue(np.all(mask)) self.assertAlmostEqual(bkspace, 1.67990914314) self.assertEmpty(bad_bkspaces) # Decrease penalty coefficient; allow smaller spacing for closer fit. spline, mask, bkspace, bad_bkspaces = kepler_spline.choose_kepler_spline( time, flux, bkspaces, penalty_coeff=0.1) self.assertAlmostEqual(_rmse(flux, spline), 0.0066376) self.assertTrue(np.all(mask)) self.assertAlmostEqual(bkspace, 1.48817572082) self.assertEmpty(bad_bkspaces) # Increase penalty coefficient; require larger spacing at the cost of worse # fit. spline, mask, bkspace, bad_bkspaces = kepler_spline.choose_kepler_spline( time, flux, bkspaces, penalty_coeff=2) self.assertAlmostEqual(_rmse(flux, spline), 0.026215449) self.assertTrue(np.all(mask)) self.assertAlmostEqual(bkspace, 1.89634509537) self.assertEmpty(bad_bkspaces)
def read_and_process_light_curve(kepid, kepler_data_dir, campaign, max_gap_width=0.75): """Reads a light curve, fits a B-spline and divides the curve by the spline. Args: kepid: Kepler id of the target star. kepler_data_dir: Base directory containing Kepler data. See kepler_io.kepler_filenames(). campaign: K2 campaign where data was taken. max_gap_width: Gap size (in days) above which the light curve is split for the fitting of B-splines. Returns: time: 1D NumPy array; the time values of the light curve. flux: 1D NumPy array; the normalized flux values of the light curve. Raises: IOError: If the light curve files for this Kepler ID cannot be found. ValueError: If the spline could not be fit. """ # Read the Kepler light curve. file_names = kepler_io.kepler_filenames(kepler_data_dir, kepid, campaign) if not file_names: print(campaign) raise IOError("Failed to find .idl file in %s for EPIC ID %s" % (kepler_data_dir, kepid)) all_time, all_flux = kepler_io.read_kepler_light_curve(file_names) # Split on gaps. all_time, all_flux = util.split(all_time, all_flux, gap_width=max_gap_width) # Logarithmically sample candidate break point spacings between 0.5 and 20 # days. bkspaces = np.logspace(np.log10(0.5), np.log10(20), num=20) # Generate spline. spline = kepler_spline.choose_kepler_spline( all_time, all_flux, bkspaces, penalty_coeff=1.0, verbose=False)[0] if spline is None: raise ValueError("Failed to fit spline with Kepler ID %s", kepid) # Concatenate the piecewise light curve and spline. time = np.concatenate(all_time) flux = np.concatenate(all_flux) spline = np.concatenate(spline) # In rare cases the piecewise spline contains NaNs in places the spline could # not be fit. We can't normalize those points if the spline isn't defined # there. Instead we just remove them. finite_i = np.isfinite(spline) if not np.all(finite_i): tf.logging.warn("Incomplete spline with Kepler ID %s", kepid) time = time[finite_i] flux = flux[finite_i] spline = spline[finite_i] # "Flatten" the light curve (remove low-frequency variability) by dividing by # the spline. flux /= spline #Remove points where the thrusters are on #using s.data.moving #Remove points where the xcenter is off #using.s.data.xc #Remove points where the background flux is off #using s.data.medians #Let's remove upward outliers? deviation = flux - np.median(flux) is_upward_outlier = np.logical_not(robust_mean.robust_mean(deviation, cut=3)[2]) np.logical_and(is_upward_outlier, deviation > 0, out=is_upward_outlier) flux = flux[~is_upward_outlier] time = time[~is_upward_outlier] return time, flux
def read_and_process_light_curve(kepid, kepler_data_dir, max_gap_width=0.75): """Reads a light curve, fits a B-spline and divides the curve by the spline. Args: kepid: Kepler id of the target star. kepler_data_dir: Base directory containing Kepler data. See kepler_io.kepler_filenames(). max_gap_width: Gap size (in days) above which the light curve is split for the fitting of B-splines. Returns: time: 1D NumPy array; the time values of the light curve. flux: 1D NumPy array; the normalized flux values of the light curve. Raises: IOError: If the light curve files for this Kepler ID cannot be found. ValueError: If the spline could not be fit. """ # Read the Kepler light curve. file_names = kepler_io.kepler_filenames(kepler_data_dir, kepid) if not file_names: raise IOError("Failed to find .fits files in %s for Kepler ID %s" % (kepler_data_dir, kepid)) all_time, all_flux = kepler_io.read_kepler_light_curve(file_names) # Split on gaps. all_time, all_flux = util.split(all_time, all_flux, gap_width=max_gap_width) # Logarithmically sample candidate break point spacings between 0.5 and 20 # days. bkspaces = np.logspace(np.log10(0.5), np.log10(20), num=20) # Generate spline. spline = kepler_spline.choose_kepler_spline(all_time, all_flux, bkspaces, penalty_coeff=1.0, verbose=False)[0] if spline is None: raise ValueError("Failed to fit spline with Kepler ID %s", kepid) # Concatenate the piecewise light curve and spline. time = np.concatenate(all_time) flux = np.concatenate(all_flux) spline = np.concatenate(spline) # In rare cases the piecewise spline contains NaNs in places the spline could # not be fit. We can't normalize those points if the spline isn't defined # there. Instead we just remove them. finite_i = np.isfinite(spline) if not np.all(finite_i): tf.logging.warn("Incomplete spline with Kepler ID %s", kepid) time = time[finite_i] flux = flux[finite_i] spline = spline[finite_i] # "Flatten" the light curve (remove low-frequency variability) by dividing by # the spline. flux /= spline return time, flux