예제 #1
0
    def testRobustMean(self):
        # To avoid non-determinism in the unit test, we use a pre-generated vector
        # of length 1,000. Each entry is independently sampled from a random normal
        # distribution with mean 2 and standard deviation 1. The maximum value of
        # y is 6.075 (+4.075 sigma from the mean) and the minimum value is -1.54
        # (-3.54 sigma from the mean).
        y = np.array(random_normal.RANDOM_NORMAL)
        self.assertAlmostEqual(np.mean(y), 2.00336615850485)
        self.assertAlmostEqual(np.std(y), 1.01690907798)

        # High cut. No points rejected, so the mean should be the sample mean, and
        # the mean standard deviation should be the sample standard deviation
        # divided by sqrt(1000 - 1).
        mean, mean_stddev, mask = robust_mean.robust_mean(y, cut=5)
        self.assertAlmostEqual(mean, 2.00336615850485)
        self.assertAlmostEqual(mean_stddev, 0.032173579)
        self.assertLen(mask, 1000)
        self.assertEqual(np.sum(mask), 1000)

        # Cut of 3 standard deviations.
        mean, mean_stddev, mask = robust_mean.robust_mean(y, cut=3)
        self.assertAlmostEqual(mean, 2.0059050070632178)
        self.assertAlmostEqual(mean_stddev, 0.03197075302321066)
        # There are exactly 3 points in the sample less than 1 or greater than 5.
        # These have indices 12, 220, 344.
        self.assertLen(mask, 1000)
        self.assertEqual(np.sum(mask), 997)
        self.assertFalse(np.any(mask[[12, 220, 344]]))

        # Add outliers. This corrupts the sample mean to 2.082.
        mean, mean_stddev, mask = robust_mean.robust_mean(y=np.concatenate(
            [y, [10] * 10]),
                                                          cut=5)
        self.assertAlmostEqual(mean, 2.0033661585048681)
        self.assertAlmostEqual(mean_stddev, 0.032013749413590531)
        self.assertLen(mask, 1010)
        self.assertEqual(np.sum(mask), 1000)
        self.assertFalse(np.any(mask[1000:1010]))

        # Add an outlier. This corrupts the mean to 1.002.
        mean, mean_stddev, mask = robust_mean.robust_mean(y=np.concatenate(
            [y, [-1000]]),
                                                          cut=5)
        self.assertAlmostEqual(mean, 2.0033661585048681)
        self.assertAlmostEqual(mean_stddev, 0.032157488597211903)
        self.assertLen(mask, 1001)
        self.assertEqual(np.sum(mask), 1000)
        self.assertFalse(mask[1000])
예제 #2
0
  def testRobustMean(self):
    # To avoid non-determinism in the unit test, we use a pre-generated vector
    # of length 1,000. Each entry is independently sampled from a random normal
    # distribution with mean 2 and standard deviation 1. The maximum value of
    # y is 6.075 (+4.075 sigma from the mean) and the minimum value is -1.54
    # (-3.54 sigma from the mean).
    y = np.array(random_normal.RANDOM_NORMAL)
    self.assertAlmostEqual(np.mean(y), 2.00336615850485)
    self.assertAlmostEqual(np.std(y), 1.01690907798)

    # High cut. No points rejected, so the mean should be the sample mean, and
    # the mean standard deviation should be the sample standard deviation
    # divided by sqrt(1000 - 1).
    mean, mean_stddev, mask = robust_mean.robust_mean(y, cut=5)
    self.assertAlmostEqual(mean, 2.00336615850485)
    self.assertAlmostEqual(mean_stddev, 0.032173579)
    self.assertLen(mask, 1000)
    self.assertEqual(np.sum(mask), 1000)

    # Cut of 3 standard deviations.
    mean, mean_stddev, mask = robust_mean.robust_mean(y, cut=3)
    self.assertAlmostEqual(mean, 2.0059050070632178)
    self.assertAlmostEqual(mean_stddev, 0.03197075302321066)
    # There are exactly 3 points in the sample less than 1 or greater than 5.
    # These have indices 12, 220, 344.
    self.assertLen(mask, 1000)
    self.assertEqual(np.sum(mask), 997)
    self.assertFalse(np.any(mask[[12, 220, 344]]))

    # Add outliers. This corrupts the sample mean to 2.082.
    mean, mean_stddev, mask = robust_mean.robust_mean(
        y=np.concatenate([y, [10] * 10]), cut=5)
    self.assertAlmostEqual(mean, 2.0033661585048681)
    self.assertAlmostEqual(mean_stddev, 0.032013749413590531)
    self.assertLen(mask, 1010)
    self.assertEqual(np.sum(mask), 1000)
    self.assertFalse(np.any(mask[1000:1010]))

    # Add an outlier. This corrupts the mean to 1.002.
    mean, mean_stddev, mask = robust_mean.robust_mean(
        y=np.concatenate([y, [-1000]]), cut=5)
    self.assertAlmostEqual(mean, 2.0033661585048681)
    self.assertAlmostEqual(mean_stddev, 0.032157488597211903)
    self.assertLen(mask, 1001)
    self.assertEqual(np.sum(mask), 1000)
    self.assertFalse(mask[1000])
예제 #3
0
def kepler_spline(time, flux, bkspace=1.5, maxiter=5, outlier_cut=3):
    """Computes a best-fit spline curve for a light curve segment.

  The spline is fit using an iterative process to remove outliers that may cause
  the spline to be "pulled" by discrepent points. In each iteration the spline
  is fit, and if there are any points where the absolute deviation from the
  median residual is at least 3*sigma (where sigma is a robust estimate of the
  standard deviation of the residuals), those points are removed and the spline
  is re-fit.

  Args:
    time: Numpy array; the time values of the light curve.
    flux: Numpy array; the flux (brightness) values of the light curve.
    bkspace: Spline break point spacing in time units.
    maxiter: Maximum number of attempts to fit the spline after removing badly
      fit points.
    outlier_cut: The maximum number of standard deviations from the median
      spline residual before a point is considered an outlier.

  Returns:
    spline: The values of the fitted spline corresponding to the input time
        values.
    mask: Boolean mask indicating the points used to fit the final spline.

  Raises:
    InsufficientPointsError: If there were insufficient points (after removing
        outliers) for spline fitting.
    SplineError: If the spline could not be fit, for example if the breakpoint
        spacing is too small.
  """
    if len(time) < 4:
        raise InsufficientPointsError(
            "Cannot fit a spline on less than 4 points. Got {} points.".format(
                len(time)))

    # Rescale time into [0, 1].
    t_min = np.min(time)
    t_max = np.max(time)
    time = (time - t_min) / (t_max - t_min)
    bkspace /= (t_max - t_min)  # Rescale bucket spacing.

    # Values of the best fitting spline evaluated at the time points.
    spline = None

    # Mask indicating the points used to fit the spline.
    mask = None

    for _ in range(maxiter):
        if spline is None:
            mask = np.ones_like(time, dtype=np.bool)  # Try to fit all points.
        else:
            # Choose points where the absolute deviation from the median residual is
            # less than outlier_cut*sigma, where sigma is a robust estimate of the
            # standard deviation of the residuals from the previous spline.
            residuals = flux - spline
            new_mask = robust_mean.robust_mean(residuals, cut=outlier_cut)[2]

            if np.all(new_mask == mask):
                break  # Spline converged.

            mask = new_mask

        if np.sum(mask) < 4:
            # Fewer than 4 points after removing outliers. We could plausibly return
            # the spline from the previous iteration because it was fit with at least
            # 4 points. However, since the outliers were such a significant fraction
            # of the curve, the spline from the previous iteration is probably junk,
            # and we consider this a fatal error.
            raise InsufficientPointsError(
                "Cannot fit a spline on less than 4 points. After removing "
                "outliers, got {} points.".format(np.sum(mask)))

        try:
            with warnings.catch_warnings():
                # Suppress warning messages printed by pydlutils.bspline. Instead we
                # catch any exception and raise a more informative error.
                warnings.simplefilter("ignore")

                # Fit the spline on non-outlier points.
                curve = bspline.iterfit(time[mask],
                                        flux[mask],
                                        bkspace=bkspace)[0]

            # Evaluate spline at the time points.
            spline = curve.value(time)[0]
        except (IndexError, TypeError) as e:
            raise SplineError(
                "Fitting spline failed with error: '{}'. This might be caused by the "
                "breakpoint spacing being too small, and/or there being insufficient "
                "points to fit the spline in one of the intervals.".format(e))

    return spline, mask
예제 #4
0
def kepler_spline(time, flux, bkspace=1.5, maxiter=5, outlier_cut=3):
  """Computes a best-fit spline curve for a light curve segment.

  The spline is fit using an iterative process to remove outliers that may cause
  the spline to be "pulled" by discrepent points. In each iteration the spline
  is fit, and if there are any points where the absolute deviation from the
  median residual is at least 3*sigma (where sigma is a robust estimate of the
  standard deviation of the residuals), those points are removed and the spline
  is re-fit.

  Args:
    time: Numpy array; the time values of the light curve.
    flux: Numpy array; the flux (brightness) values of the light curve.
    bkspace: Spline break point spacing in time units.
    maxiter: Maximum number of attempts to fit the spline after removing badly
        fit points.
    outlier_cut: The maximum number of standard deviations from the median
        spline residual before a point is considered an outlier.

  Returns:
    spline: The values of the fitted spline corresponding to the input time
        values.
    mask: Boolean mask indicating the points used to fit the final spline.
  """
  # Rescale time into [0, 1].
  t_min = np.min(time)
  t_max = np.max(time)
  time = (time - t_min) / (t_max - t_min)
  bkspace /= (t_max - t_min)  # Rescale bucket spacing.

  # Values of the best fitting spline evaluated at the time points.
  spline = None

  # Mask indicating the points used to fit the spline.
  mask = None

  for _ in range(maxiter):
    if spline is None:
      mask = np.ones_like(time, dtype=np.bool)  # Try to fit all points.
    else:
      # Choose points where the absolute deviation from the median residual is
      # less than 3*sigma, where sigma is a robust estimate of the standard
      # deviation of the residuals from the previous spline.
      residuals = flux - spline
      _, _, new_mask = robust_mean.robust_mean(residuals, cut=outlier_cut)

      if np.all(new_mask == mask):
        break  # Spline converged.

      mask = new_mask

    try:
      with warnings.catch_warnings():
        # Suppress warning messages printed by pydlutils.bspline. Instead we
        # catch any exception and raise a more informative error.
        warnings.simplefilter("ignore")

        # Fit the spline on non-outlier points.
        curve = bspline.iterfit(time[mask], flux[mask], bkspace=bkspace)[0]

      # Evaluate spline at the time points.
      spline = curve.value(time)[0]
    except (IndexError, TypeError) as e:
      raise SplineError(
          "Fitting spline failed with error: '%s'. This might be caused by the "
          "breakpoint spacing being too small, and/or there being insufficient "
          "points to fit the spline in one of the intervals." % e)

  return spline, mask
예제 #5
0
def read_and_process_light_curve(kepid, kepler_data_dir, campaign, max_gap_width=0.75):
  """Reads a light curve, fits a B-spline and divides the curve by the spline.

  Args:
    kepid: Kepler id of the target star.
    kepler_data_dir: Base directory containing Kepler data. See
        kepler_io.kepler_filenames().
    campaign: K2 campaign where data was taken.
    max_gap_width: Gap size (in days) above which the light curve is split for
        the fitting of B-splines.

  Returns:
    time: 1D NumPy array; the time values of the light curve.
    flux: 1D NumPy array; the normalized flux values of the light curve.

  Raises:
    IOError: If the light curve files for this Kepler ID cannot be found.
    ValueError: If the spline could not be fit.
  """
  # Read the Kepler light curve.
  file_names = kepler_io.kepler_filenames(kepler_data_dir, kepid, campaign)
  if not file_names:
    print(campaign)
    raise IOError("Failed to find .idl file in %s for EPIC ID %s" %
                  (kepler_data_dir, kepid))

  all_time, all_flux = kepler_io.read_kepler_light_curve(file_names)

  # Split on gaps.
  all_time, all_flux = util.split(all_time, all_flux, gap_width=max_gap_width)

  # Logarithmically sample candidate break point spacings between 0.5 and 20
  # days.
  bkspaces = np.logspace(np.log10(0.5), np.log10(20), num=20)

  # Generate spline.
  spline = kepler_spline.choose_kepler_spline(
      all_time, all_flux, bkspaces, penalty_coeff=1.0, verbose=False)[0]

  if spline is None:
    raise ValueError("Failed to fit spline with Kepler ID %s", kepid)

  # Concatenate the piecewise light curve and spline.
  time = np.concatenate(all_time)
  flux = np.concatenate(all_flux)
  spline = np.concatenate(spline)

  # In rare cases the piecewise spline contains NaNs in places the spline could
  # not be fit. We can't normalize those points if the spline isn't defined
  # there. Instead we just remove them.
  finite_i = np.isfinite(spline)
  if not np.all(finite_i):
    tf.logging.warn("Incomplete spline with Kepler ID %s", kepid)
    time = time[finite_i]
    flux = flux[finite_i]
    spline = spline[finite_i]

  # "Flatten" the light curve (remove low-frequency variability) by dividing by
  # the spline.
  flux /= spline

  #Remove points where the thrusters are on
  #using s.data.moving

  #Remove points where the xcenter is off
  #using.s.data.xc

  #Remove points where the background flux is off
  #using s.data.medians

  #Let's remove upward outliers?
  deviation = flux - np.median(flux)
  is_upward_outlier = np.logical_not(robust_mean.robust_mean(deviation, cut=3)[2])
  np.logical_and(is_upward_outlier, deviation > 0, out=is_upward_outlier)

  flux = flux[~is_upward_outlier]
  time = time[~is_upward_outlier]

  return time, flux
예제 #6
0
    def process(self, inputs):
        """Processes a single light curve."""
        raw_lc = inputs["raw_light_curve"]
        all_time = [
            np.array(s.time, dtype=np.float64) for s in raw_lc.segments
        ]
        all_flux = [
            np.array(s.flux, dtype=np.float64) for s in raw_lc.segments
        ]
        length_raw = sum([len(t) for t in all_time])

        # Remove events.
        events_to_remove = inputs.pop("events_to_remove", [])
        if events_to_remove:
            all_time, all_flux = util.remove_events(
                all_time,
                all_flux,
                events_to_remove,
                width_factor=self.remove_events_width_factor,
                include_empty_segments=False)

        if not all_time:
            return  # Removed entire light curve.

        # Split on gaps.
        all_time, all_flux = util.split(all_time,
                                        all_flux,
                                        gap_width=self.gap_width)

        # Mask events.
        events_to_mask = inputs.pop("events_to_mask_for_spline", [])
        if events_to_mask:
            all_masked_time, all_masked_flux = util.remove_events(
                all_time,
                all_flux,
                events=events_to_mask,
                width_factor=self.remove_events_width_factor)
        else:
            all_masked_time = all_time
            all_masked_flux = all_flux

        # Fit normalization curve.
        if self.normalize_method == "spline":
            all_spline, metadata = kepler_spline.fit_kepler_spline(
                all_masked_time, all_masked_flux, **self.normalize_args)
        else:
            raise ValueError("Unrecognized normalize_method: {}".format(
                self.normalize_method))

        # Interpolate the spline between the events removed.
        if events_to_mask:
            all_spline = util.interpolate_masked_spline(
                all_time, all_masked_time, all_spline)

        # Concatenate the results.
        time = np.concatenate(all_time)
        flux = np.concatenate(all_flux)
        norm_curve = np.concatenate(all_spline)

        # Initialize the output.
        light_curve = light_curve_pb2.LightCurve(
            length_raw=length_raw,
            spline_metadata=light_curve_pb2.SplineMetadata(
                bkspace=metadata.bkspace,
                bad_bkspaces=metadata.bad_bkspaces,
                likelihood_term=metadata.likelihood_term,
                penalty_term=metadata.penalty_term,
                bic=metadata.bic,
                masked_events=events_to_mask,
                **self.normalize_args),
            removed_events=events_to_remove)

        # If the normalization curve contains NaNs, we can't normalize those places.
        is_finite = np.isfinite(norm_curve)
        is_not_finite = np.logical_not(is_finite)
        if np.any(is_not_finite):
            light_curve.norm_curve_failures.time[:] = time[is_not_finite]
            light_curve.norm_curve_failures.flux[:] = flux[is_not_finite]
            time = time[is_finite]
            flux = flux[is_finite]
            norm_curve = norm_curve[is_finite]

        # Possibly remove outliers.
        if self.upward_outlier_sigma_cut or self.downward_outlier_sigma_cut:
            norm_flux = flux / norm_curve  # We compute outliers on normalized flux.
            deviation = norm_flux - np.median(norm_flux)

            if self.upward_outlier_sigma_cut:
                is_upward_outlier = np.logical_not(
                    robust_mean.robust_mean(
                        deviation, cut=self.upward_outlier_sigma_cut)[2])
                np.logical_and(is_upward_outlier,
                               deviation > 0,
                               out=is_upward_outlier)
            else:
                is_upward_outlier = np.zeros_like(deviation, dtype=np.bool)

            if self.downward_outlier_sigma_cut:
                is_downward_outlier = np.logical_not(
                    robust_mean.robust_mean(
                        deviation, cut=self.downward_outlier_sigma_cut)[2])
                np.logical_and(is_downward_outlier,
                               deviation < 0,
                               out=is_downward_outlier)
            else:
                is_downward_outlier = np.zeros_like(deviation, dtype=np.bool)

            is_outlier = np.logical_or(is_upward_outlier, is_downward_outlier)
            is_not_outlier = np.logical_not(is_outlier)
            if np.any(is_outlier):
                light_curve.outliers_removed.time[:] = time[is_outlier]
                light_curve.outliers_removed.flux[:] = flux[is_outlier]
                light_curve.outliers_removed.norm_curve[:] = norm_curve[
                    is_outlier]
                time = time[is_not_outlier]
                flux = flux[is_not_outlier]
                norm_curve = norm_curve[is_not_outlier]

        # Fill the output.
        light_curve.light_curve.time[:] = time
        light_curve.light_curve.flux[:] = flux
        light_curve.light_curve.norm_curve[:] = norm_curve
        inputs[self.output_name] = light_curve

        yield inputs