Beispiel #1
0
def test_memory_leak():
    import resource

    arr = np.arange(1).reshape((1, 1))

    starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

    for i in range(1000):
        for axis in [None, 0, 1]:
            bn.nansum(arr, axis=axis)
            bn.nanargmax(arr, axis=axis)
            bn.nanargmin(arr, axis=axis)
            bn.nanmedian(arr, axis=axis)
            bn.nansum(arr, axis=axis)
            bn.nanmean(arr, axis=axis)
            bn.nanmin(arr, axis=axis)
            bn.nanmax(arr, axis=axis)
            bn.nanvar(arr, axis=axis)

    ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

    diff = ending - starting
    diff_bytes = diff * resource.getpagesize()
    print(diff_bytes)
    # For 1.3.0 release, this had value of ~100kB
    assert diff_bytes == 0
Beispiel #2
0
def test_nanvar_issue60():
    "nanvar regression test (issue #60)"

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        f = bn.nanvar([1.0], ddof=1)
        with np.errstate(invalid='ignore'):
            s = bn.slow.nanvar([1.0], ddof=1)
        assert_equal(f, s, err_msg="bn.nanvar([1.0], ddof=1) wrong")

        f = bn.nanvar([1], ddof=1)
        with np.errstate(invalid='ignore'):
            s = bn.slow.nanvar([1], ddof=1)
        assert_equal(f, s, err_msg="bn.nanvar([1], ddof=1) wrong")

        f = bn.nanvar([1, np.nan], ddof=1)
        with np.errstate(invalid='ignore'):
            s = bn.slow.nanvar([1, np.nan], ddof=1)
        assert_equal(f, s, err_msg="bn.nanvar([1, nan], ddof=1) wrong")

        f = bn.nanvar([[1, np.nan], [np.nan, 1]], axis=0, ddof=1)
        with np.errstate(invalid='ignore'):
            s = bn.slow.nanvar([[1, np.nan], [np.nan, 1]], axis=0, ddof=1)
        assert_equal(f, s, err_msg="issue #60 regression")
Beispiel #3
0
def test_nanvar_issue60():
    "nanvar regression test (issue #60)"

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        f = bn.nanvar([1.0], ddof=1)
        with np.errstate(invalid='ignore'):
            s = bn.slow.nanvar([1.0], ddof=1)
        assert_equal(f, s, err_msg="bn.nanvar([1.0], ddof=1) wrong")

        f = bn.nanvar([1], ddof=1)
        with np.errstate(invalid='ignore'):
            s = bn.slow.nanvar([1], ddof=1)
        assert_equal(f, s, err_msg="bn.nanvar([1], ddof=1) wrong")

        f = bn.nanvar([1, np.nan], ddof=1)
        with np.errstate(invalid='ignore'):
            s = bn.slow.nanvar([1, np.nan], ddof=1)
        assert_equal(f, s, err_msg="bn.nanvar([1, nan], ddof=1) wrong")

        f = bn.nanvar([[1, np.nan], [np.nan, 1]], axis=0, ddof=1)
        with np.errstate(invalid='ignore'):
            s = bn.slow.nanvar([[1, np.nan], [np.nan, 1]], axis=0, ddof=1)
        assert_equal(f, s, err_msg="issue #60 regression")
Beispiel #4
0
def test_memory_leak() -> None:
    import resource

    arr = np.arange(1).reshape((1, 1))

    n_attempts = 3
    results = []

    for _ in range(n_attempts):
        starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

        for _ in range(1000):
            for axis in [None, 0, 1]:
                bn.nansum(arr, axis=axis)
                bn.nanargmax(arr, axis=axis)
                bn.nanargmin(arr, axis=axis)
                bn.nanmedian(arr, axis=axis)
                bn.nansum(arr, axis=axis)
                bn.nanmean(arr, axis=axis)
                bn.nanmin(arr, axis=axis)
                bn.nanmax(arr, axis=axis)
                bn.nanvar(arr, axis=axis)

        ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

        diff = ending - starting
        diff_bytes = diff * resource.getpagesize()
        # For 1.3.0 release, this had value of ~100kB
        if diff_bytes:
            results.append(diff_bytes)
        else:
            break

    assert len(results) < n_attempts
Beispiel #5
0
def get_lugsail_batch_means_est(data_in, steps=None):
    m = len(data_in)
    T_iL = []
    s_i = []
    n_i = []

    for data_chain, burnin_chain in data_in:
        data = data_chain[burnin_chain:steps]
        if data.size < 2:
            return np.inf
        # [chapter 2.2 in Vats and Knudson, 2018]
        n_ii = data.size
        b = int(n_ii**(1 / 2))  # Batch size. Alternative: n ** (1/3)
        n_i.append(n_ii)

        chain_mean = bn.nanmean(data)
        T_iL.append(
            2 * get_tau_lugsail(b, data, chain_mean) \
            - get_tau_lugsail(b // 3, data, chain_mean)
        )
        s_i.append(bn.nanvar(data, ddof=1))

    T_L = np.mean(T_iL)
    s = np.mean(s_i)
    n = np.round(np.mean(n_i))

    sigma_L = ((n - 1) * s + T_L) / n

    # [eq. 5 in Vats and Knudson, 2018]
    R_L = np.sqrt(sigma_L / s)

    return R_L
Beispiel #6
0
def proportionality(x, y):
    num = bottleneck.nanvar(np.log1p(y) - np.log1p(x))

    denom = (bottleneck.nanstd(np.log1p(x)) +
             bottleneck.nanstd(np.log1p(y)))**2
    try:
        return num / denom
    except:
        return np.nan
Beispiel #7
0
def test_nanvar_issue60():
    """nanvar regression test (issue #60)"""

    f = bn.nanvar([1.0], ddof=1)
    with np.errstate(invalid="ignore"):
        s = bn.slow.nanvar([1.0], ddof=1)
    assert_equal(f, s, err_msg="bn.nanvar([1.0], ddof=1) wrong")

    f = bn.nanvar([1], ddof=1)
    with np.errstate(invalid="ignore"):
        s = bn.slow.nanvar([1], ddof=1)
    assert_equal(f, s, err_msg="bn.nanvar([1], ddof=1) wrong")

    f = bn.nanvar([1, np.nan], ddof=1)
    with np.errstate(invalid="ignore"):
        s = bn.slow.nanvar([1, np.nan], ddof=1)
    assert_equal(f, s, err_msg="bn.nanvar([1, nan], ddof=1) wrong")

    f = bn.nanvar([[1, np.nan], [np.nan, 1]], axis=0, ddof=1)
    with np.errstate(invalid="ignore"):
        s = bn.slow.nanvar([[1, np.nan], [np.nan, 1]], axis=0, ddof=1)
    assert_equal(f, s, err_msg="issue #60 regression")
Beispiel #8
0
def welch(xs, ys, meanfcn=bn.nanmedian):
    """
    Welch's statistic for equal means
    http://en.wikipedia.org/wiki/Welch%27s_t_test

    Parameters
    ----------
    xs: np.array
    ys: np.array

    Returns
    -------
    float
    """
    xbar, ybar = map(meanfcn, (xs, ys))
    sx2, sy2 = map(lambda zs: bn.nanvar(xs) + np.spacing(1), (xs, ys))
    return np.abs(xbar - ybar)/np.sqrt(sx2/len(xs) + sy2/len(ys))
    def fit(self, X, y, mask=None):
        """Fit Gaussian Naive Bayes according to X, y

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.
        y : array-like, shape = [n_samples]
            Target values.
        mask : array-like, shape = [n_samples, n_features]
            Binary, 1 at unobserved features.

        Returns
        -------
        self : object
            Returns self.
        """
        X, y = check_arrays(X, y, sparse_format='dense')

        n_samples, n_features = X.shape

        if n_samples != y.shape[0]:
            raise ValueError("X and y have incompatible shapes")

        if mask is not None:
            mask = array2d(mask)
            X = X.copy()
            X[mask] = np.nan

        self.classes_ = unique_y = np.unique(y)
        n_classes = unique_y.shape[0]

        self.theta_ = np.zeros((n_classes, n_features))
        self.sigma_ = np.zeros((n_classes, n_features))
        self.class_prior_ = np.zeros(n_classes)
        self._n_ij = []
        epsilon = 1e-9
        for i, y_i in enumerate(unique_y):
            self.theta_[i, :] = bn.nanmean(X[y == y_i, :], axis=0)
            self.sigma_[i, :] = bn.nanvar(X[y == y_i, :], axis=0) + epsilon
            self.class_prior_[i] = np.float(np.sum(y == y_i)) / n_samples
            self._n_ij.append(-0.5 * np.sum(np.log(np.pi * self.sigma_[i, :])))
        self._logprior = np.log(self.class_prior_)
        return self
    def fit(self, X, y, mask=None):
        """Fit Gaussian Naive Bayes according to X, y

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.
        y : array-like, shape = [n_samples]
            Target values.
        mask : array-like, shape = [n_samples, n_features]
            Binary, 1 at unobserved features.

        Returns
        -------
        self : object
            Returns self.
        """
        X, y = check_arrays(X, y, sparse_format='dense')

        n_samples, n_features = X.shape

        if n_samples != y.shape[0]:
            raise ValueError("X and y have incompatible shapes")

        if mask is not None:
            mask = array2d(mask)
            X = X.copy()
            X[mask] = np.nan

        self.classes_ = unique_y = np.unique(y)
        n_classes = unique_y.shape[0]

        self.theta_ = np.zeros((n_classes, n_features))
        self.sigma_ = np.zeros((n_classes, n_features))
        self.class_prior_ = np.zeros(n_classes)
        self._n_ij = []
        epsilon = 1e-9
        for i, y_i in enumerate(unique_y):
            self.theta_[i, :] = bn.nanmean(X[y == y_i, :], axis=0)
            self.sigma_[i, :] = bn.nanvar(X[y == y_i, :], axis=0) + epsilon
            self.class_prior_[i] = np.float(np.sum(y == y_i)) / n_samples
            self._n_ij.append(-0.5 * np.sum(np.log(np.pi * self.sigma_[i, :])))
        self._logprior = np.log(self.class_prior_)
        return self
Beispiel #11
0
				m = nanmedian(flux)
				flux = 1e6*(flux/m - 1)
				flux_err = 1e6*flux_err/m

				#fig, ax = plt.subplots()
				#ax.plot(data[:,0], data[:,1])
				#fig.savefig(os.path.splitext(fpath_save)[0] + '.png', bbox_inches='tight')
				#plt.close(fig)

				# Save file:
				os.makedirs(os.path.dirname(fpath_save), exist_ok=True)
				np.savetxt(fpath_save, np.column_stack((time, flux, flux_err)),
					delimiter='  ', fmt=('%.8f', '%.16e', '%.16e'))

			# Calculate diagnostics:
			lc = LightCurve(time=time, flux=flux, flux_err=flux_err)
			variance = nanvar(flux, ddof=1)
			rms_hour = rms_timescale(lc, timescale=3600/86400)
			ptp = nanmedian(np.abs(np.diff(flux)))

			# Add target to TODO-list:
			diag.write("{variance:.16e},{rms_hour:.16e},{ptp:.16e}\n".format(
				variance=variance,
				rms_hour=rms_hour,
				ptp=ptp
			))

		diag.write("#-------------------------------------------\n")

	print("DONE")
Beispiel #12
0
def ndcombine(arr,
              mask=None,
              copy=True,
              blank=np.nan,
              offsets=None,
              thresholds=[-np.inf, np.inf],
              zero=None,
              scale=None,
              weight=None,
              zero_kw={
                  'cenfunc': 'median',
                  'stdfunc': 'std',
                  'std_ddof': 1
              },
              scale_kw={
                  'cenfunc': 'median',
                  'stdfunc': 'std',
                  'std_ddof': 1
              },
              zero_to_0th=True,
              scale_to_0th=True,
              zero_section=None,
              scale_section=None,
              reject=None,
              cenfunc='median',
              sigma=[3., 3.],
              maxiters=3,
              ddof=1,
              nkeep=1,
              maxrej=None,
              n_minmax=[1, 1],
              rdnoise=0.,
              gain=1.,
              snoise=0.,
              pclip=-0.5,
              combine='average',
              dtype='float32',
              memlimit=2.5e+9,
              irafmode=True,
              verbose=False,
              full=False,
              return_variance=False):
    if copy:
        arr = arr.copy()

    if np.array(arr).ndim == 1:
        raise ValueError("1-D array combination is not supported!")

    _mask = _set_mask(arr, mask)  # _mask = propagated through this function.
    sigma_lower, sigma_upper = _set_sigma(sigma)
    nkeep, maxrej = _set_keeprej(arr, nkeep, maxrej, axis=0)
    cenfunc = _set_cenfunc(cenfunc)
    reject_fullname = _set_reject_name(reject)
    maxiters = int(maxiters)
    ddof = int(ddof)

    combfunc = _set_combfunc(combine, nameonly=False, nan=True)

    if verbose and reject is not None:
        print("- Rejection")
        if thresholds != [-np.inf, np.inf]:
            print(f"-- thresholds (low, upp) = {thresholds}")
        print(f"-- {reject=} ({irafmode=})")
        print(f"--       params: {nkeep=}, {maxrej=}, {maxiters=}, {cenfunc=}")
        if reject_fullname == "sigclip":
            print(f"  (for sigclip): {sigma=}, {ddof=}")
        elif reject_fullname == "ccdclip":
            print(f"  (for ccdclip): {gain=}, {rdnoise=}, {snoise=}")
        # elif reject_fullnme == "pclip":
        #   print(f"    (for pclip)  : spclip={pclip}")
        # elif reject_fullname == "minmax":
        # print(f" (for minmaxclip): n_minmax={n_minmax}")

    # == 01 - Thresholding + Initial masking ============================================= #
    # Updating mask: _mask = _mask | mask_thresh
    mask_thresh = _set_thresh_mask(arr=arr,
                                   mask=_mask,
                                   thresholds=thresholds,
                                   update_mask=True)

    # if safemode:
    #     # Backup the pixels which are rejected by thresholding and # initial
    #     mask for future restoration (see below) for debugging # purpose.
    #     backup_thresh = arr[mask_thresh]
    #     backup_thresh_inmask = arr[_mask]

    # TODO: remove this np.nan and instead, let `get_zsw` to accept mask.
    arr[_mask] = np.nan
    # ------------------------------------------------------------------------------------ #

    # == 02 - Calculate zero, scale, weights ============================================= #
    # This should be done before rejection but after threshold masking..
    zeros, scales, weights = get_zsw(arr=arr,
                                     zero=zero,
                                     scale=scale,
                                     weight=weight,
                                     zero_kw=zero_kw,
                                     scale_kw=scale_kw,
                                     zero_to_0th=zero_to_0th,
                                     scale_to_0th=scale_to_0th,
                                     zero_section=zero_section,
                                     scale_section=scale_section)
    arr = do_zs(arr, zeros=zeros, scales=scales)
    # ------------------------------------------------------------------------------------ #

    # == 02 - Rejection ================================================================== #
    if isinstance(reject_fullname, str):
        if reject_fullname == 'sigclip':
            _mask_rej = sigclip_mask(arr,
                                     mask=_mask,
                                     sigma_lower=sigma_lower,
                                     sigma_upper=sigma_upper,
                                     maxiters=maxiters,
                                     ddof=ddof,
                                     nkeep=nkeep,
                                     maxrej=maxrej,
                                     cenfunc=cenfunc,
                                     axis=0,
                                     irafmode=irafmode,
                                     full=full)
        elif reject_fullname == 'minmax':
            _mask_rej = minmax_mask(arr,
                                    mask=_mask,
                                    n_minmax=n_minmax,
                                    full=full)
        elif reject_fullname == 'ccdclip':
            _mask_rej = ccdclip_mask(arr,
                                     mask=_mask,
                                     sigma_lower=sigma_lower,
                                     sigma_upper=sigma_upper,
                                     scale_ref=np.mean(scales),
                                     zero_ref=np.mean(zeros),
                                     maxiters=maxiters,
                                     ddof=ddof,
                                     nkeep=nkeep,
                                     maxrej=maxrej,
                                     cenfunc=cenfunc,
                                     axis=0,
                                     gain=gain,
                                     rdnoise=rdnoise,
                                     snoise=snoise,
                                     irafmode=irafmode,
                                     full=True)
        elif reject_fullname == 'pclip':
            pass
        else:
            raise ValueError("reject not understood.")
        if full:
            _mask_rej, low, upp, nit, rejcode = _mask_rej
        # _mask is a subset of _mask_rej, so to extract pixels which are
        # masked PURELY due to the rejection is:
        mask_rej = _mask_rej ^ _mask
    elif reject_fullname is None:
        mask_rej = _set_mask(arr, None)
        if full:
            low = bn.nanmin(arr, axis=0)
            upp = bn.nanmax(arr, axis=0)
            nit = None
            rejcode = None
    else:
        raise ValueError("reject not understood.")

    if reject is not None and verbose:
        print("Done.")

    _mask |= mask_rej

    # ------------------------------------------------------------------------------------ #

    # TODO: add "grow" rejection here?

    # == 03 - combine ==================================================================== #
    # Replace rejected / masked pixel to NaN and backup for debugging purpose. This is done to reduce
    # memory (instead of doing _arr = arr.copy())
    # backup_nan = arr[_mask]
    if verbose:
        print("- Combining")
        print(f"-- combine = {combine}")
    arr[_mask] = np.nan

    # Combine and calc sigma
    comb = combfunc(arr, axis=0)
    if verbose:
        print("Done.")

    # Restore NaN-replaced pixels of arr for debugging purpose.
    # arr[_mask] = backup_nan
    # arr[mask_thresh] = backup_thresh_inmask
    if full:
        if verbose:
            print("- Error calculation")
            print("-- to skip this, use `full=False`")
            print(f"-- return_variance={return_variance}, ddof={ddof}")
        if return_variance:
            err = bn.nanvar(arr, ddof=ddof, axis=0)
        else:
            err = bn.nanstd(arr, ddof=ddof, axis=0)
        if verbose:
            print("Done.")
        return comb, err, mask_rej, mask_thresh, low, upp, nit, rejcode
    else:
        return comb
Beispiel #13
0
	def load_star(self, task, fname):
		"""
		Receive a task from the TaskManager, loads the lightcurve and returns derived features.

		Parameters:
			task (dict): Task dictionary as returned by :func:`TaskManager.get_task`.
			fname (str): Path to lightcurve file associated with task.

		Returns:
			dict: Dictionary with features.

		See Also:
			:py:func:`TaskManager.get_task`

		.. codeauthor:: Rasmus Handberg <*****@*****.**>
		"""

		logger = logging.getLogger(__name__)

		# Define variables used below:
		features = {}
		save_to_cache = False

		# The Meta-classifier is only using features from the other classifiers,
		# so there is no reason to load lightcurves and calculate/load any other classifiers:
		if self.classifier_key != 'meta':
			# Load features from cache file, or calculate them
			# and put them into cache file for other classifiers
			# to use later on:
			if self.features_cache:
				features_file = os.path.join(self.features_cache, 'features-' + str(task['priority']) + '.pickle')
				if os.path.exists(features_file):
					features = loadPickle(features_file)

			# Load lightcurve file and create a TessLightCurve object:
			if 'lightcurve' in features:
				lightcurve = features['lightcurve']
			else:
				lightcurve = load_lightcurve(fname,
					starid=task['starid'],
					truncate_lightcurve=self.truncate_lightcurves)

			# No features found in cache, so calculate them:
			if not features:
				save_to_cache = True
				features = self.calc_features(lightcurve)

		# Add the fields from the task to the list of features:
		for key in ('tmag', 'variance', 'rms_hour', 'ptp', 'other_classifiers'):
			if key in task.keys():
				features[key] = task[key]
			else:
				logger.warning("Key '%s' not found in task.", key)
				features[key] = np.NaN

		# If these features were not provided with the task, i.e. they
		# have not been pre-computed, we should compute them now:
		if features['variance'] is None or not np.isfinite(features['variance']):
			features['variance'] = nanvar(lightcurve.flux, ddof=1)
		if features['rms_hour'] is None or not np.isfinite(features['rms_hour']):
			features['rms_hour'] = rms_timescale(lightcurve)
		if features['ptp'] is None or not np.isfinite(features['ptp']):
			features['ptp'] = ptp(lightcurve)

		# Save features in cache file for later use:
		if save_to_cache and self.features_cache:
			savePickle(features_file, features)

		# Add the fields from the task to the list of features:
		features['priority'] = task['priority']
		features['starid'] = task['starid']

		logger.debug(features)
		return features
Beispiel #14
0
    def correct(self, task, output_folder=None):
        """
		Run correction.

		Parameters:
			task (dict): Dictionary defining a task/lightcurve to process.
			output_folder (str, optional): Path to directory where lightcurve should be saved.

		Returns:
			dict: Result dictionary containing information about the processing.

		.. codeauthor:: Rasmus Handberg <*****@*****.**>
		"""

        logger = logging.getLogger(__name__)

        t1 = default_timer()

        error_msg = []
        details = {}
        save_file = None
        result = task.copy()
        try:
            # Load the lightcurve
            lc = self.load_lightcurve(task)

            # Run the correction on this lightcurve:
            lc_corr, status = self.do_correction(lc)

        except (KeyboardInterrupt, SystemExit):  # pragma: no cover
            status = STATUS.ABORT
            logger.warning("Correction was aborted (priority=%d)",
                           task['priority'])
        except:  # noqa: E722 pragma: no cover
            status = STATUS.ERROR
            logger.exception("Correction failed (priority=%d)",
                             task['priority'])

        # Check that the status has been changed:
        if status == STATUS.UNKNOWN:  # pragma: no cover
            raise ValueError("STATUS was not set by do_correction")

        # Do sanity checks:
        if status in (STATUS.OK, STATUS.WARNING):
            # Make sure all NaN fluxes have corresponding NaN errors:
            lc_corr.flux_err[np.isnan(lc_corr.flux)] = np.NaN

            # Simple check that entire lightcurve is not NaN:
            if allnan(lc_corr.flux):
                logger.error("Final lightcurve is all NaNs")
                status = STATUS.ERROR
            if allnan(lc_corr.flux_err):
                logger.error("Final lightcurve errors are all NaNs")
                status = STATUS.ERROR
            if np.any(np.isinf(lc_corr.flux)):
                logger.error("Final lightcurve contains Inf")
                status = STATUS.ERROR
            if np.any(np.isinf(lc_corr.flux_err)):
                logger.error("Final lightcurve errors contains Inf")
                status = STATUS.ERROR

        # Calculate diagnostics:
        if status in (STATUS.OK, STATUS.WARNING):
            # Calculate diagnostics:
            details['variance'] = nanvar(lc_corr.flux, ddof=1)
            details['rms_hour'] = rms_timescale(lc_corr,
                                                timescale=3600 / 86400)
            details['ptp'] = ptp(lc_corr)

            # Diagnostics specific to the method:
            if self.CorrMethod == 'cbv':
                details['cbv_num'] = lc_corr.meta['additional_headers'][
                    'CBV_NUM']
            elif self.CorrMethod == 'ensemble':
                details['ens_num'] = lc_corr.meta['additional_headers'][
                    'ENS_NUM']
                details['ens_fom'] = lc_corr.meta['FOM']

            # Save the lightcurve to file:
            try:
                save_file = self.save_lightcurve(lc_corr,
                                                 output_folder=output_folder)
            except (KeyboardInterrupt, SystemExit):  # pragma: no cover
                status = STATUS.ABORT
                logger.warning("Correction was aborted (priority=%d)",
                               task['priority'])
            except:  # noqa: E722 pragma: no cover
                status = STATUS.ERROR
                logger.exception(
                    "Could not save lightcurve file (priority=%d)",
                    task['priority'])

            # Plot the final lightcurve:
            if self.plot:
                fig = plt.figure(dpi=200)
                ax = fig.add_subplot(111)
                ax.scatter(lc.time,
                           1e6 * (lc.flux / nanmedian(lc.flux) - 1),
                           s=2,
                           alpha=0.3,
                           marker='o',
                           label="Original")
                ax.scatter(lc_corr.time,
                           lc_corr.flux,
                           s=2,
                           alpha=0.3,
                           marker='o',
                           label="Corrected")
                ax.set_xlabel('Time (TBJD)')
                ax.set_ylabel('Relative flux (ppm)')
                ax.legend()
                save_figure(os.path.join(self.plot_folder(lc),
                                         self.CorrMethod + '_final'),
                            fig=fig)
                plt.close(fig)

        # Unpack any errors or warnings that were sent to the logger during the correction:
        if self.message_queue:
            error_msg += self.message_queue
            self.message_queue.clear()
        if not error_msg:
            error_msg = None

        # Update results:
        t2 = default_timer()
        details['errors'] = error_msg
        result.update({
            'corrector': self.CorrMethod,
            'status_corr': status,
            'elaptime_corr': t2 - t1,
            'lightcurve_corr': save_file,
            'details': details
        })

        return result
Beispiel #15
0
    def stats_area(self, loc, tol=0, lmean=False, lmed=False, lskew=False,
                   lvar=False, lstd=False, lcoefvar=False, lperc=False,
                   p=0.95, save=False):
        """Calculate some statistics among every realisation, considering a
        circular (only horizontaly) area of radius `tol` around the point
        located at `loc`.

        Parameters
        ----------
        loc : array_like
            Location of the vertical line [x, y].
        tol : number, default 0
            Tolerance radius used to search for neighbour nodes.
        lmean : boolean, default False
            Calculate the mean.
        lmed : boolean, default False
            Calculate the median.
        lskew : boolean, default False
            Calculate skewness.
        lvar : boolean, default False
            Calculate the variance.
        lstd : boolean, default False
            Calculate the standard deviation.
        lcoefvar : boolean, default False
            Calculate the coefficient of variation.
        lperc : boolean, default False
            Calculate the percentile `100 * (1 - p)`.
        p : number, default 0.95
            Probability value.
        save : boolean, default False
            Write the points used to calculate the chosen statistics in
            PointSet format to a file named 'sim values at (x, y, line).prn'.

        Returns
        -------
        statspset : PointSet
            PointSet instance containing the calculated statistics.

        .. TODO: checkar stats variance com geoms

        """
        if lmean:
            meanline = np.zeros(self.dz)
        if lmed:
            medline = np.zeros(self.dz)
        if lskew:
            skewline = np.zeros(self.dz)
        if lvar:
            varline = np.zeros(self.dz)
        if lstd:
            stdline = np.zeros(self.dz)
        if lcoefvar:
            coefvarline = np.zeros(self.dz)
        if lperc:
            percline = np.zeros((self.dz, 2))

        # convert the coordinates of the first point to grid nodes
        loc = coord_to_grid(loc, [self.cellx, self.celly, self.cellz],
                            [self.xi, self.yi, self.zi])[:2]
        # find the nodes coordinates within a circle centred in the first point
        neighbours_nodes = circle(loc[0], loc[1], tol)
        # compute the lines numbers for each point in the neighbourhood, across
        # each grid layer. this yields a N*M matrix, with N equal to the number
        # of neighbour nodes, and M equal to the number of layers in the grid.
        neighbours_lines = [line_zmirror(node, [self.dx, self.dy, self.dz])
                            for node in neighbours_nodes]
        # sort the lines in ascending order
        neighbours_lines = np.sort(neighbours_lines, axis=0)
        # create an array to store the neighbour nodes in each grid file
        nnodes = neighbours_lines.shape[0]
        arr = np.zeros(self.nfiles * nnodes)

        skip = True
        curr_line = np.zeros(self.nfiles)

        for layer in xrange(neighbours_lines.shape[1]):
            for i, line in enumerate(neighbours_lines[:, layer]):
                for j, grid in enumerate(self.files):
                    # skip header lines only once per grid file
                    if skip and self.header:
                        skip_lines(grid, self.header)

                    # advance to the next line with a neighbour node
                    skip_lines(grid, int(line - curr_line[j] - 1))
                    # read the line and store its value
                    a = grid.readline()
                    arr[i + j * nnodes] = float(a)

                    curr_line[j] = line
                    skip = False

            # replace no data's with NaN
            bn.replace(arr, self.nodata, np.nan)
            # compute the required statistics
            if lmean:
                meanline[layer] = bn.nanmean(arr)
            if lmed:
                medline[layer] = bn.nanmedian(arr)
            if lskew:
                skewline[layer] = pd.Series(arr).skew()
            if lvar:
                varline[layer] = bn.nanvar(arr, ddof=1)
            if lstd:
                stdline[layer] = bn.nanstd(arr, ddof=1)
            if lcoefvar:
                if lstd and lmean:
                    coefvarline[layer] = stdline[layer] / meanline[layer] * 100
                else:
                    std = bn.nanstd(arr, ddof=1)
                    mean = bn.nanmean(arr)
                    coefvarline[layer] = std / mean * 100
            if lperc:
                percline[layer] = pd.Series(arr).quantile([(1 - p) / 2,
                                                           1 - (1 - p) / 2])
            if save and tol == 0:
                # FIXME: not working with the tolerance feature
                # need to adjust the arrpset or cherry-pick arr
                arrpset = PointSet('realisations at location ({0}, {1}, {2})'.
                                   format(loc[0], loc[1], layer * self.cellz +
                                          self.zi), self.nodata, 3,
                                   ['x', 'y', 'value'],
                                   values=np.zeros((self.nfiles, 3)))
                arrout = os.path.join(os.path.dirname(self.files[0].name),
                                      'sim values at ({0}, {1}, {2}).prn'.format(
                                          loc[0], loc[1], layer * self.cellz
                                          + self.zi))
                arrpset.values.iloc[:, 2] = arr
                arrpset.values.iloc[:, :2] = np.repeat(np.array(loc)
                                                       [np.newaxis, :],
                                                       self.nfiles, axis=0)
                arrpset.save(arrout, header=True)

        ncols = sum((lmean, lmed, lvar, lstd, lcoefvar, lskew))
        if lperc:
            ncols += 2
        statspset = PointSet(name='vertical line stats at (x,y) = ({0},{1})'.
                             format(loc[0], loc[1]), nodata=self.nodata,
                             nvars=3 + ncols, varnames=['x', 'y', 'z'],
                             values=np.zeros((self.dz, 3 + ncols)))

        statspset.values.iloc[:, :3] = (np.column_stack
                                        (((np.repeat(np.array(loc)
                                                     [np.newaxis, :], self.dz,
                                                     axis=0)),
                                          np.arange(self.zi, self.zi +
                                                    self.cellz * self.dz))))

        j = 3
        if lmean:
            statspset.varnames.append('mean')
            statspset.values.iloc[:, j] = meanline
            j += 1
        if lmed:
            statspset.varnames.append('median')
            statspset.values.iloc[:, j] = medline
            j += 1
        if lskew:
            statspset.varnames.append('skewness')
            statspset.values.iloc[:, j] = skewline
            j += 1
        if lvar:
            statspset.varnames.append('variance')
            statspset.values.iloc[:, j] = varline
            j += 1
        if lstd:
            statspset.varnames.append('std')
            statspset.values.iloc[:, j] = stdline
            j += 1
        if lcoefvar:
            statspset.varnames.append('coefvar')
            statspset.values.iloc[:, j] = coefvarline
            j += 1
        if lperc:
            statspset.varnames.append('lperc')
            statspset.varnames.append('rperc')
            statspset.values.iloc[:, -2:] = percline

        # reset the reading pointer in each grid file
        self.reset_read()
        # update varnames
        statspset.flush_varnames()
        return statspset
Beispiel #16
0
 def time_nanvar(self, dtype, shape):
     bn.nanvar(self.arr)
Beispiel #17
0
def calculate_beta(returns: pd.DataFrame,
                   in_flag: pd.DataFrame,
                   mkt: pd.DataFrame = None,
                   class_df: pd.DataFrame = None,
                   output_freq="D",
                   window_size=252,
                   k=5,
                   universe=True,
                   target_dates=None,
                   len_beta=None,
                   minimum_coverage=None) -> pd.DataFrame:
    """
    Beta calculation is very fast in nature. No need to utilize parallel computation
    :param returns: daily stock return DataFrame
    :param in_flag: daily in flag DataFrame
    :param mkt: daily market return DataFrame; this return is used as the benchmark (regressor) in a CAPM model;
                if it is an one row dataframe, then there is only one benchmark (e.g., equal weighted market return);
                if it has the same shape as returns, then different stocks may correspond to different benchmarks (e.g., industry return)
    :param class_df: DataFrame, the class each stock corresponds to (e.g., GICS industry);
                     by default, returns, in_flag and class_df should be of the same shape
                     and have their indexes (ID) & columns (dates) aligned
    :param output_freq: {'M', 'D'}; 'M' for monthly, 'D' for daily
    :param window_size: used in combination with output_freq; usually 252D (252 days) or 12M (12 months)
    :param k: a parameter used to determine outlier returns; only effective when mkt is None
    :param universe: if True, in_flag will be used to tell whether a stock is in the corresponding universe on a specific day
    :param target_dates: list-like, the specific dates that betas are calculated for
    :param len_beta: number of cross-sections that betas are calculated for
    :param minimum_coverage: the minimum percentage of non-missing returns needed to calculate betas
    :return: DataFrame, beta matrix
    """
    assert output_freq.lower() in ('d', 'm')
    if minimum_coverage is None or minimum_coverage > 1 or minimum_coverage < 0:
        minimum_coverage = 0.75
    if class_df is None:
        class_df = returns.values.copy()
        if universe:
            class_df[in_flag.values != 1] = np.nan
        class_df[np.isfinite(class_df)] = 1
        class_df = pd.DataFrame(class_df,
                                index=returns.index,
                                columns=returns.columns)
    diff_class = np.unique(class_df.values[~np.isnan(class_df)])
    period_end = frequency_convert(returns.columns, output_freq)
    period_end_idx = np.array(
        [np.argwhere(returns.columns == x)[0, 0] for x in period_end])
    selected_idx = range(
        window_size + np.argwhere((period_end_idx + 1) > np.argwhere(
            np.sum(np.isfinite(returns.values), axis=0) > 1)[0, 0])[0, 0],
        len(period_end_idx))
    if target_dates is None and len_beta is not None and len_beta > 0:
        selected_idx = range(selected_idx.stop - len_beta, selected_idx.stop)
    if target_dates is not None:
        target_idx = [
            np.argwhere(period_end == x).flatten()[0] for x in target_dates
        ]
        selected_idx_2 = [x for x in target_idx if x in selected_idx]
        selected_idx = target_idx
    output = np.full((returns.shape[0], len(selected_idx)), np.nan)
    for col_id, c_sel_idx in tqdm(enumerate(selected_idx)):
        if target_dates is not None and c_sel_idx not in selected_idx_2:
            continue
        for c_class in diff_class:
            if universe:
                c_mask = (
                    (in_flag.loc[:, period_end[c_sel_idx]] == 1) &
                    (class_df.loc[:, period_end[c_sel_idx]] == c_class)).values
            else:
                c_mask = (
                    class_df.loc[:, period_end[c_sel_idx]] == c_class).values
            if c_mask.any():
                c_idx = in_flag.values[c_mask, (
                    period_end_idx[c_sel_idx - window_size] +
                    1):(period_end_idx[c_sel_idx] + 1)] == 1
                c_rtn = returns.values[c_mask, (
                    period_end_idx[c_sel_idx - window_size] +
                    1):(period_end_idx[c_sel_idx] + 1)]
                if mkt is None:
                    # if mkt is not provided, calculate the ew-market return as the market return benchmark
                    cc_rtn = c_rtn.copy()
                    cc_rtn[~c_idx] = np.nan
                    if k > 1:
                        cc_rtn = remove_outliers(cc_rtn, k=k, set_na=False)
                    else:
                        cc_rtn[cc_rtn > k] = k
                    mkt_rtn = bn.nanmean(cc_rtn, axis=0)
                else:
                    c_rtn_columns = returns.columns[(
                        period_end_idx[c_sel_idx - window_size] +
                        1):(period_end_idx[c_sel_idx] + 1)]
                    dates = np.intersect1d(mkt.columns,
                                           c_rtn_columns,
                                           assume_unique=True)
                    c_rtn = pd.DataFrame(c_rtn, columns=c_rtn_columns)
                    c_rtn = c_rtn.loc[:, dates].values
                    mkt_rtn = mkt.loc[:, returns.columns[period_end_idx[
                        c_sel_idx - window_size]]:dates[-1]]
                    mkt_rtn_cols = mkt_rtn.columns
                    mkt_rtn = mkt_rtn.values.copy()
                    if mkt_rtn.shape[0] > 1:
                        mkt_rtn = mkt_rtn[c_mask, :]
                    mkt_rtn[np.isnan(mkt_rtn)] = 0
                    wealth = np.exp(np.log(1 + mkt_rtn).cumsum(axis=1))
                    mkt_rtn = pd.DataFrame(
                        wealth / shift_2darray(wealth, 1, axis=1) - 1,
                        columns=mkt_rtn_cols)
                    mkt_rtn = mkt_rtn.loc[:, dates].values
                mask_beta = np.sum(np.isfinite(c_rtn), axis=1) >= (
                    period_end_idx[c_sel_idx] -
                    period_end_idx[c_sel_idx - window_size]) * minimum_coverage
                if mask_beta.any():
                    mkt_var = bn.nanvar(mkt_rtn, ddof=1, axis=1)
                    if mkt_rtn.shape[0] == 1:
                        c_beta = pairwise_covariance(c_rtn[mask_beta],
                                                     mkt_rtn) / mkt_var
                    else:
                        c_beta = pairwise_covariance(
                            c_rtn[mask_beta],
                            mkt_rtn[mask_beta]) / mkt_var[mask_beta]
                    output[np.argwhere(c_mask).flatten()[mask_beta],
                           col_id] = c_beta

    return pd.DataFrame(output,
                        index=returns.index,
                        columns=period_end[selected_idx])
Beispiel #18
0
	def correct(self, task, output_folder=None):
		"""
		Run correction.

		Parameters:
			task (dict): Dictionary defining a task/lightcurve to process.
			output_folder (string, optional): Path to directory where lightcurve should be saved.

		Returns:
			dict: Result dictionary containing information about the processing.

		.. codeauthor:: Rasmus Handberg <*****@*****.**>
		"""

		logger = logging.getLogger(__name__)

		t1 = default_timer()

		error_msg = None
		save_file = None
		result = task.copy()
		try:
			# Load the lightcurve
			lc = self.load_lightcurve(task)

			# Run the correction on this lightcurve:
			lc_corr, status = self.do_correction(lc)

		except (KeyboardInterrupt, SystemExit):
			status = STATUS.ABORT
			logger.warning("Correction was aborted.")

		except:
			status = STATUS.ERROR
			error_msg = traceback.format_exc().strip()
			logger.exception("Correction failed.")

		# Check that the status has been changed:
		if status == STATUS.UNKNOWN:
			raise Exception("STATUS was not set by do_correction")

		# Calculate diagnostics:
		details = {}

		if status in (STATUS.OK, STATUS.WARNING):
			# Calculate diagnostics:
			details['variance'] = nanvar(lc_corr.flux, ddof=1)
			details['rms_hour'] = rms_timescale(lc_corr, timescale=3600/86400)
			details['ptp'] = nanmedian(np.abs(np.diff(lc_corr.flux)))

			# TODO: set outputs; self._details = self.lightcurve, etc.
			save_file = self.save_lightcurve(lc_corr, output_folder=output_folder)

			# Plot the final lightcurve:
			if self.plot:
				fig = plt.figure(dpi=200)
				ax = fig.add_subplot(111)
				ax.scatter(lc.time, 1e6*(lc.flux/nanmedian(lc.flux)-1), s=2, alpha=0.3, marker='o', label="Original")
				ax.scatter(lc_corr.time, lc_corr.flux, s=2, alpha=0.3, marker='o', label="Corrected")
				ax.set_xlabel('Time (TBJD)')
				ax.set_ylabel('Relative flux (ppm)')
				ax.legend()
				save_figure(os.path.join(self.plot_folder(lc), self.CorrMethod + '_final'), fig=fig)
				plt.close(fig)

			# Construct result dictionary from the original task
			result = lc_corr.meta['task'].copy()

		# Update results:
		t2 = default_timer()
		details['errors'] = error_msg
		result.update({
			'status_corr': status,
			'elaptime_corr': t2-t1,
			'lightcurve_corr': save_file,
			'details': details
		})

		return result
Beispiel #19
0
def find_center_row(data):

    # Create interpolator for the median profile
    interp = scipy.interpolate.interp1d(
        x=data[:,0], y=data[:,1], kind='linear', 
        bounds_error=False, fill_value=numpy.NaN)

    #
    # Optimization routine
    #
    def fold_profile(p, interp, maxy, count):

        dx = numpy.arange(maxy, dtype=numpy.float) 
        x_left = p[0] - dx
        x_right = p[0] + dx

        profile_left = interp(x_left)
        profile_right = interp(x_right)

        diff = profile_left - profile_right

        count[0] += 1
        # print "iteration %d --> %e" % (count[0], p[0])
        # with open("opt_%d.del" % (count[0]), "w") as f:
        #     numpy.savetxt(f, profile_left)
        #     print >>f, "\n"*5,
        #     numpy.savetxt(f, profile_right)
        #     print >>f, "\n"*5,
        #     numpy.savetxt(f, diff)

        return diff[numpy.isfinite(diff)]

    #
    # Get rid of all points that are too noisy
    #
    w=5
    noise = numpy.array([bottleneck.nanvar(data[i-w:i+w,1]) for i in range(w,data.shape[0]-w+1)])
    # numpy.savetxt("median_noise", noise)
    noise[:w] = numpy.NaN
    noise[-w:] = numpy.NaN

    for iteration in range(3):
        valid = numpy.isfinite(noise)
        _perc = numpy.percentile(noise[valid], [16,50,84])
        _med = _perc[1]
        _sigma = 0.5*(_perc[2]-_perc[0])
        outlier = (noise > _med+3*_sigma) | (noise < _med - 3*_sigma)
        noise[outlier] = numpy.NaN

    #numpy.savetxt("median_noise2", noise)
    valid = numpy.isfinite(noise)
    data[:,1][~valid] = numpy.NaN

    #numpy.savetxt("median_noise3", data)

    count=[0]
    fit_all = scipy.optimize.leastsq(
        func=fold_profile,
        x0=[data.shape[0]/5.],
        args=(interp, data.shape[0]/2,count),
        full_output=True,
        epsfcn=1e-1,
        )

    #print fit_all[0]

    return fit_all[0][0]
Beispiel #20
0
    def correct(self, task):
        """
		Run correction.

		Parameters:
			task (dict): Dictionary defining a task/lightcurve to process.

		Returns:
			dict: Result dictionary containing information about the processing.

		.. codeauthor:: Rasmus Handberg <*****@*****.**>
		"""

        logger = logging.getLogger(__name__)

        t1 = default_timer()

        error_msg = None
        save_file = None
        result = task.copy()
        try:
            # Load the lightcurve
            lc = self.load_lightcurve(task)

            # Run the correction on this lightcurve:
            lc, status = self.do_correction(lc)

        except (KeyboardInterrupt, SystemExit):
            status = STATUS.ABORT
            logger.warning("Correction was aborted.")

        except:
            status = STATUS.ERROR
            error_msg = traceback.format_exc().strip()
            logger.exception("Correction failed.")

        # Check that the status has been changed:
        if status == STATUS.UNKNOWN:
            raise Exception("STATUS was not set by do_correction")

        # Calculate diagnostics:
        details = {}

        if status in (STATUS.OK, STATUS.WARNING):
            # Calculate diagnostics:
            details['variance'] = nanvar(lc.flux, ddof=1)
            details['rms_hour'] = rms_timescale(lc, timescale=3600 / 86400)
            details['ptp'] = nanmedian(np.abs(np.diff(lc.flux)))

            # TODO: set outputs; self._details = self.lightcurve, etc.
            save_file = self.save_lightcurve(lc)

            # Construct result dictionary from the original task
            result = lc.meta['task'].copy()

        # Update results:
        t2 = default_timer()
        details['errors'] = error_msg
        result.update({
            'status_corr': status,
            'elaptime_corr': t2 - t1,
            'lightcurve_corr': save_file,
            'details': details
        })

        return result
Beispiel #21
0
 def time_nanvar(self, dtype, shape, order, axis):
     bn.nanvar(self.arr, axis=axis)
Beispiel #22
0
    def generate_todolist(self):
        """
		Generate todo.sqlite file in training set directory.

		.. codeauthor:: Rasmus Handberg <*****@*****.**>
		"""
        logger = logging.getLogger(__name__)

        try:
            with closing(sqlite3.connect(self.todo_file)) as conn:
                conn.row_factory = sqlite3.Row
                cursor = conn.cursor()

                # Create the basic file structure of a TODO-list:
                todolist_structure(conn)

                logger.info(
                    "Step 3: Reading file and extracting information...")
                pri = 0

                diagnostics_file = os.path.join(self.input_folder,
                                                'diagnostics.txt')
                diagnostics = None
                if os.path.isfile(diagnostics_file):
                    diagnostics = np.genfromtxt(diagnostics_file,
                                                delimiter=',',
                                                comments='#',
                                                dtype=None,
                                                encoding='utf-8')

                for k, star in tqdm(enumerate(self.starlist),
                                    total=len(self.starlist)):
                    # Get starid:
                    starname = star[0]
                    starclass = star[1]
                    if starname.startswith('constant_'):
                        starid = -10000 - int(starname[9:])
                    elif starname.startswith('fakerrlyr_'):
                        starid = -20000 - int(starname[10:])
                    else:
                        starid = int(starname)
                        starname = '{0:09d}'.format(starid)

                    # Path to lightcurve:
                    lightcurve = starclass + '/' + starname + '.txt'

                    # Check that the file actually exists:
                    if not os.path.exists(
                            os.path.join(self.input_folder, lightcurve)):
                        raise FileNotFoundError(lightcurve)

                    # Load diagnostics from file, to speed up the process:
                    if diagnostics is not None:
                        variance, rms_hour, ptp = diagnostics[k]
                    else:
                        # Try to load the lightcurve using the BaseClassifier method.
                        # This will ensure that the lightcurve can actually be read by the system.
                        lc = io.load_lightcurve(
                            os.path.join(self.input_folder, lightcurve))

                        variance = nanvar(lc.flux, ddof=1)
                        rms_hour = utilities.rms_timescale(lc)
                        ptp = utilities.ptp(lc)

                        #if datasource is None:
                        #	if (lc.time[1] - lc.time[0])*86400 > 1000:
                        #		datasource = 'ffi'
                        #	else:
                        #		datasource = 'tpf'

                    elaptime = np.random.normal(3.14, 0.5)

                    pri += 1
                    todolist_insert(cursor,
                                    priority=pri,
                                    starid=starid,
                                    lightcurve=lightcurve,
                                    datasource='ffi',
                                    variance=variance,
                                    rms_hour=rms_hour,
                                    ptp=ptp,
                                    elaptime=elaptime)

                conn.commit()
                todolist_cleanup(conn, cursor)
                cursor.close()

        except:  # noqa: E722, pragma: no cover
            if os.path.exists(self.todo_file):
                os.remove(self.todo_file)
            raise

        logger.info("%s training set successfully built.", self.key)
Beispiel #23
0
def test_known_star(SHARED_INPUT_DIR, corrector, starid, cadence, var_goal, rms_goal, ptp_goal):
	""" Check that the ensemble returns values that are reasonable and within expected bounds """

	# All stars we check here come from the same sector and camera.
	# Define these here for the future where we may test on other combinations of these:
	sector = 1
	camera = 1

	__dir__ = os.path.abspath(os.path.dirname(__file__))
	logger = logging.getLogger(__name__)
	logger.info("-------------------------------------------------------------")
	logger.info("CORRECTOR = %s, SECTOR=%d, CADENCE=%s, STARID=%d", corrector, sector, cadence, starid)

	# All stars are from the same CCD, find the task for it:
	with corrections.TaskManager(SHARED_INPUT_DIR) as tm:
		task = tm.get_task(starid=starid, sector=sector, camera=camera, cadence=cadence)

	# Check that task was actually found:
	assert task is not None, "Task could not be found"

	# Load lightcurve that will also be plotted together with the result:
	# This lightcurve is of the same objects, at a state where it was deemed that the
	# corrections were doing a good job.
	compare_lc_path = os.path.join(__dir__, 'compare', f'compare-{corrector}-s{sector:04d}-c{cadence:04d}-tic{starid:011d}.ecsv.gz')
	compare_lc = None
	if os.path.isfile(compare_lc_path):
		compare_lc = Table.read(compare_lc_path, format='ascii.ecsv')
	else:
		warnings.warn("Comparison data does not exist: " + compare_lc_path)

	# Initiate the class
	CorrClass = corrections.corrclass(corrector)
	with tempfile.TemporaryDirectory() as tmpdir:
		with CorrClass(SHARED_INPUT_DIR, plot=True) as corr:
			# Check basic parameters of object (from BaseCorrector):
			assert corr.input_folder == SHARED_INPUT_DIR, "Incorrect input folder"
			assert corr.plot, "Plot parameter passed appropriately"
			assert os.path.isdir(corr.data_folder), "DATA_FOLDER doesn't exist"

			# Load the input lightcurve:
			inlc = corr.load_lightcurve(task)

			# Print input lightcurve properties:
			print( inlc.show_properties() )
			assert inlc.sector == sector
			assert inlc.camera == camera

			# Run correction:
			tmplc = inlc.copy()
			outlc, status = corr.do_correction(tmplc)

			# Check status
			assert outlc is not None, "Correction fails"
			assert isinstance(outlc, TessLightCurve), "Should return TessLightCurve object"
			assert isinstance(status, corrections.STATUS), "Should return a STATUS object"
			assert status in (corrections.STATUS.OK, corrections.STATUS.WARNING), "STATUS was not set appropriately"

			# Print output lightcurve properties:
			print( outlc.show_properties() )

			# Save the lightcurve to FITS file to be tested later on:
			save_file = corr.save_lightcurve(outlc, output_folder=tmpdir)

		# Check contents
		assert len(outlc) == len(inlc), "Input flux ix different length to output flux"
		assert isinstance(outlc.flux, np.ndarray), "FLUX is not a ndarray"
		assert isinstance(outlc.flux_err, np.ndarray), "FLUX_ERR is not a ndarray"
		assert isinstance(outlc.quality, np.ndarray), "QUALITY is not a ndarray"
		assert outlc.flux.dtype.type is inlc.flux.dtype.type, "FLUX changes dtype"
		assert outlc.flux_err.dtype.type is inlc.flux_err.dtype.type, "FLUX_ERR changes dtype"
		assert outlc.quality.dtype.type is inlc.quality.dtype.type, "QUALITY changes dtype"
		assert outlc.flux.shape == inlc.flux.shape, "FLUX changes shape"
		assert outlc.flux_err.shape == inlc.flux_err.shape, "FLUX_ERR changes shape"
		assert outlc.quality.shape == inlc.quality.shape, "QUALITY changes shape"

		# Plot output lightcurves:
		fig, (ax1, ax2, ax3) = plt.subplots(3, 1, squeeze=True, figsize=[10, 10])
		ax1.plot(inlc.time, inlc.flux, lw=0.5)
		ax1.set_title(f"{corrector} - Sector {sector:d} - {cadence}s - TIC {starid:d}")
		if compare_lc:
			ax2.plot(compare_lc['time'], compare_lc['flux'], label='Compare', lw=0.5)
			ax3.axhline(0, lw=0.5, ls=':', color='0.7')
			ax3.plot(outlc.time, outlc.flux - compare_lc['flux'], lw=0.5)
		ax2.plot(outlc.time, outlc.flux, label='New', lw=0.5)
		ax1.set_ylabel('Flux [e/s]')
		ax1.minorticks_on()
		ax2.set_ylabel('Relative Flux [ppm]')
		ax2.minorticks_on()
		ax2.legend()
		ax3.set_ylabel('New - Compare [ppm]')
		ax3.set_xlabel('Time [TBJD]')
		ax3.minorticks_on()
		fig.savefig(os.path.join(__dir__, f'test-{corrector}-s{sector:04d}-c{cadence:04d}-tic{starid:011d}.png'), bbox_inches='tight')
		plt.close(fig)

		# Check things that are allowed to change:
		assert all(outlc.flux != inlc.flux), "Input and output flux are identical."
		assert not np.any(np.isinf(outlc.flux)), "FLUX contains Infinite"
		assert not np.any(np.isinf(outlc.flux_err)), "FLUX_ERR contains Infinite"
		assert np.sum(np.isnan(outlc.flux)) < 0.5*len(outlc), "More than half the lightcurve is NaN"
		assert allnan(outlc.flux_err[np.isnan(outlc.flux)]), "FLUX_ERR should be NaN where FLUX is"

		# TODO: Check that quality hasn't changed in ways that are not allowed:
		# - Only values defined in CorrectorQualityFlags
		# - No removal of flags already set
		assert all(outlc.quality >= 0)
		assert all(outlc.quality <= 128)
		assert all(outlc.quality >= inlc.quality)

		# Things that shouldn't chance from the corrections:
		assert outlc.targetid == inlc.targetid, "TARGETID has changed"
		assert outlc.label == inlc.label, "LABEL has changed"
		assert outlc.sector == inlc.sector, "SECTOR has changed"
		assert outlc.camera == inlc.camera, "CAMERA has changed"
		assert outlc.ccd == inlc.ccd, "CCD has changed"
		assert outlc.quality_bitmask == inlc.quality_bitmask, "QUALITY_BITMASK has changed"
		assert outlc.ra == inlc.ra, "RA has changed"
		assert outlc.dec == inlc.dec, "DEC has changed"
		assert outlc.mission == 'TESS', "MISSION has changed"
		assert outlc.time_format == 'btjd', "TIME_FORMAT has changed"
		assert outlc.time_scale == 'tdb', "TIME_SCALE has changed"
		assert_array_equal(outlc.time, inlc.time, "TIME has changed")
		assert_array_equal(outlc.timecorr, inlc.timecorr, "TIMECORR has changed")
		assert_array_equal(outlc.cadenceno, inlc.cadenceno, "CADENCENO has changed")
		assert_array_equal(outlc.pixel_quality, inlc.pixel_quality, "PIXEL_QUALITY has changed")
		assert_array_equal(outlc.centroid_col, inlc.centroid_col, "CENTROID_COL has changed")
		assert_array_equal(outlc.centroid_row, inlc.centroid_row, "CENTROID_ROW has changed")

		# Check metadata
		assert tmplc.meta == inlc.meta, "Correction changed METADATA in-place"
		assert outlc.meta['task'] == inlc.meta['task'], "Metadata is incomplete"
		assert isinstance(outlc.meta['additional_headers'], fits.Header)

		# Check performance metrics:
		#logger.warning("VAR: %e", nanvar(outlc.flux))
		if var_goal is not None:
			var_in = nanvar(inlc.flux)
			var_out = nanvar(outlc.flux)
			var_diff = np.abs(var_out - var_goal) / var_goal
			logger.info("VAR: %f - %f - %f", var_in, var_out, var_diff)
			assert_array_less(var_diff, 0.05, "VARIANCE changed outside interval")

		#logger.warning("RMS: %e", rms_timescale(outlc))
		if rms_goal is not None:
			rms_in = rms_timescale(inlc)
			rms_out = rms_timescale(outlc)
			rms_diff = np.abs(rms_out - rms_goal) / rms_goal
			logger.info("RMS: %f - %f - %f", rms_in, rms_out, rms_diff)
			assert_array_less(rms_diff, 0.05, "RMS changed outside interval")

		#logger.warning("PTP: %e", ptp(outlc))
		if ptp_goal is not None:
			ptp_in = ptp(inlc)
			ptp_out = ptp(outlc)
			ptp_diff = np.abs(ptp_out - ptp_goal) / ptp_goal
			logger.info("PTP: %f - %f - %f", ptp_in, ptp_out, ptp_diff)
			assert_array_less(ptp_diff, 0.05, "PTP changed outside interval")

		# Check FITS file:
		with fits.open(os.path.join(tmpdir, save_file), mode='readonly') as hdu:
			# Lightcurve FITS table:
			fitslc = hdu['LIGHTCURVE'].data
			hdr = hdu['LIGHTCURVE'].header

			# Simple checks of header values:
			assert hdu[0].header['TICID'] == starid

			# Checks of things in FITS table that should not have changed at all:
			assert_array_equal(fitslc['TIME'], inlc.time, "FITS: TIME has changed")
			assert_array_equal(fitslc['TIMECORR'], inlc.timecorr, "FITS: TIMECORR has changed")
			assert_array_equal(fitslc['CADENCENO'], inlc.cadenceno, "FITS: CADENCENO has changed")
			assert_array_equal(fitslc['FLUX_RAW'], inlc.flux, "FITS: FLUX_RAW has changed")
			assert_array_equal(fitslc['FLUX_RAW_ERR'], inlc.flux_err, "FITS: FLUX_RAW_ERR has changed")
			assert_array_equal(fitslc['MOM_CENTR1'], inlc.centroid_col, "FITS: CENTROID_COL has changed")
			assert_array_equal(fitslc['MOM_CENTR2'], inlc.centroid_row, "FITS: CENTROID_ROW has changed")

			# Some things are allowed to change, but still within some requirements:
			assert all(fitslc['FLUX_CORR'] != inlc.flux), "FITS: Input and output flux are identical."
			assert np.sum(np.isnan(fitslc['FLUX_CORR'])) < 0.5*len(fitslc['TIME']), "FITS: More than half the lightcurve is NaN"
			assert allnan(fitslc['FLUX_CORR_ERR'][np.isnan(fitslc['FLUX_CORR'])]), "FITS: FLUX_ERR should be NaN where FLUX is"

			if corrector == 'ensemble':
				# Check special headers:
				assert np.isfinite(hdr['ENS_MED']) and hdr['ENS_MED'] > 0
				assert isinstance(hdr['ENS_NUM'], int) and hdr['ENS_NUM'] > 0
				assert hdr['ENS_DLIM'] == 1.0
				assert hdr['ENS_DREL'] == 10.0
				assert hdr['ENS_RLIM'] == 0.4

				# Special extension for ensemble:
				tic = hdu['ENSEMBLE'].data['TIC']
				bzeta = hdu['ENSEMBLE'].data['BZETA']
				assert len(tic) == len(bzeta)
				assert len(np.unique(tic)) == len(tic), "TIC numbers in ENSEMBLE table are not unique"
				assert len(tic) == hdr['ENS_NUM'], "Not the same number of targets in ENSEMBLE table as specified in header"

			elif corrector == 'cbv':
				# Check special headers:
				assert isinstance(hdr['CBV_NUM'], int) and hdr['CBV_NUM'] > 0

				# Check coefficients:
				for k in range(0, hdr['CBV_NUM']+1):
					assert np.isfinite(hdr['CBV_C%d' % k])
				for k in range(1, hdr['CBV_NUM']+1):
					assert np.isfinite(hdr['CBVS_C%d' % k])
				# Check that no other coefficients are present
				assert 'CBV_C%d' % (hdr['CBV_NUM']+1) not in hdr
				assert 'CBVS_C%d' % (hdr['CBV_NUM']+1) not in hdr

			elif corrector == 'kasoc_filter':
				# Check special headers:
				assert hdr['KF_POSS'] == 'None'
				assert np.isfinite(hdr['KF_LONG']) and hdr['KF_LONG'] > 0
				assert np.isfinite(hdr['KF_SHORT']) and hdr['KF_SHORT'] > 0
				assert hdr['KF_SCLIP'] == 4.5
				assert hdr['KF_TCLIP'] == 5.0
				assert hdr['KF_TWDTH'] == 1.0
				assert hdr['KF_PSMTH'] == 200

				assert isinstance(hdr['NUM_PER'], int) and hdr['NUM_PER'] >= 0
				for k in range(1, hdr['NUM_PER']+1):
					assert np.isfinite(hdr['PER_%d' % k]) and hdr['PER_%d' % k] > 0
				# Check that no other periods are present
				assert 'PER_%d' % (hdr['NUM_PER'] + 1) not in hdr

		# Test that the Gzip FITS file has the correct uncompressed file name, by simply
		# decompressing the Gzip file, asking to keep the original file name.
		# This uses the system GZIP utility, since there doesn't seem to be a way to do this
		# through the Python gzip module:
		fpath = os.path.join(tmpdir, save_file)
		fpath_uncompressed = fpath.replace('.fits.gz', '.fits')
		assert not os.path.exists(fpath_uncompressed), "Uncompressed file already exists"
		gzip_output = subprocess.check_output(['gzip', '-dkNv', os.path.basename(fpath)],
			cwd=os.path.dirname(fpath),
			stderr=subprocess.STDOUT,
			encoding='utf8')
		print("Gzip output:")
		print(gzip_output)
		assert os.path.isfile(fpath_uncompressed), "Incorrect uncompressed file name"

		# Just see if we can in fact also open the uncompressed FITS file and get a simple header:
		with fits.open(fpath_uncompressed, mode='readonly') as hdu:
			assert hdu[0].header['TICID'] == starid
Beispiel #24
0
    def stats(self, lmean=False, lmed=False, lskew=False, lvar=False,
              lstd=False, lcoefvar=False, lperc=False, p=0.95):
        """Calculate some statistics among every realisation.

        Each statistic is calculated node-wise along the complete number of
        realisations.

        Parameters
        ----------
        lmean : boolean, default False
            Calculate the mean.
        lmed : boolean, default False
            Calculate the median.
        lskew : boolean, default False
            Calculate skewness.
        lvar : boolean, default False
            Calculate the variance.
        lstd : boolean, default False
            Calculate the standard deviation.
        lcoefvar : boolean, default False
            Calculate the coefficient of variation.
        lperc : boolean, default False
            Calculate the percentile `100 * (1 - p)`.
        p : number, default 0.95
            Probability value.

        Returns
        -------
        retdict : dict of GridArr
            Dictionary containing one GridArr for each calculated statistic.

        See Also
        --------
        stats_area : same but considering a circular (and horizontal) area of
        a specified radius around a given point.

        """
        # check if the map files are already opened or not
        if isinstance(self.files[0], file):
            opened_files = True
        else:
            opened_files = False

        if lmean:
            meanmap = np.zeros(self.cells)
        if lmed:
            medmap = np.zeros(self.cells)
        if lskew:
            skewmap = np.zeros(self.cells)
        if lvar:
            varmap = np.zeros(self.cells)
        if lstd:
            stdmap = np.zeros(self.cells)
        if lcoefvar:
            coefvarmap = np.zeros(self.cells)
        if lperc:
            percmap = np.zeros((self.cells, 2))

        arr = np.zeros(self.nfiles)
        skip = True
        offset = os.SEEK_SET
        for cell in xrange(self.cells - self.header):
            for i, gridfile in enumerate(self.files):
                # deal with map files not open yet
                if opened_files:
                    grid = gridfile
                else:
                    grid = open(gridfile, 'rb')
                    grid.seek(offset)

                if skip:
                    skip_lines(grid, self.header)
                arr[i] = grid.readline()

            if not opened_files:
                offset = grid.tell()
                grid.close()

            skip = False
            # replace no data's with NaN
            bn.replace(arr, self.nodata, np.nan)
            if lmean:
                meanmap[cell] = bn.nanmean(arr)
            if lmed:
                medmap[cell] = bn.nanmedian(arr)
            if lskew:
                skewmap[cell] = pd.Series(arr).skew()
            if lvar:
                varmap[cell] = bn.nanvar(arr, ddof=1)
            if lstd:
                stdmap[cell] = bn.nanstd(arr, ddof=1)
            if lcoefvar:
                if lstd and lmean:
                    coefvarmap[cell] = stdmap[cell] / meanmap[cell] * 100
                else:
                    std = bn.nanstd(arr, ddof=1)
                    mean = bn.nanmean(arr)
                    coefvarmap[cell] = std / mean * 100
            if lperc:
                percmap[cell] = pd.Series(arr).quantile([(1 - p) / 2,
                                                         1 - (1 - p) / 2])

        retdict = dict()

        if lmean:
            meangrid = GridArr(name='meanmap', dx=self.dx, dy=self.dy,
                               dz=self.dz, nodata=self.nodata, val=meanmap)
            retdict['meanmap'] = meangrid
        if lmed:
            medgrid = GridArr(name='medianmap', dx=self.dx, dy=self.dy,
                              dz=self.dz, nodata=self.nodata, val=medmap)
            retdict['medianmap'] = medgrid
        if lskew:
            skewgrid = GridArr(name='skewmap', dx=self.dx, dy=self.dy,
                               dz=self.dz, nodata=self.nodata, val=skewmap)
            retdict['skewmap'] = skewgrid
        if lvar:
            vargrid = GridArr(name='varmap', dx=self.dx, dy=self.dy,
                              dz=self.dz, nodata=self.nodata, val=varmap)
            retdict['varmap'] = vargrid
        if lstd:
            stdgrid = GridArr(name='stdmap', dx=self.dx, dy=self.dy,
                              dz=self.dz, nodata=self.nodata, val=stdmap)
            retdict['stdmap'] = stdgrid
        if lcoefvar:
            coefvargrid = GridArr(name='coefvarmap', dx=self.dx, dy=self.dy,
                                  dz=self.dz, nodata=self.nodata,
                                  val=coefvarmap)
            retdict['coefvarmap'] = coefvargrid
        if lperc:
            percgrid = GridArr(name='percmap', dx=self.dx, dy=self.dy,
                               dz=self.dz, nodata=self.nodata, val=percmap)
            retdict['percmap'] = percgrid

        return retdict