def test_aperturephotometry(SHARED_INPUT_DIR, datasource): with TemporaryDirectory() as OUTPUT_DIR: with AperturePhotometry(DUMMY_TARGET, SHARED_INPUT_DIR, OUTPUT_DIR, plot=True, datasource=datasource, **DUMMY_KWARG) as pho: pho.photometry() filepath = pho.save_lightcurve() print(pho.lightcurve) # It should set the status to one of these: assert (pho.status in (STATUS.OK, STATUS.WARNING)) # Check the sumimage: plt.figure() plot_image(pho.sumimage, title=datasource) assert not anynan(pho.sumimage), "There are NaNs in the SUMIMAGE" # They shouldn't be exactly zero: assert not np.all(pho.lightcurve['flux'] == 0) assert not np.all(pho.lightcurve['flux_err'] == 0) assert not np.all(pho.lightcurve['pos_centroid'][:, 0] == 0) assert not np.all(pho.lightcurve['pos_centroid'][:, 1] == 0) # They shouldn't be NaN (in this case!): assert not allnan(pho.lightcurve['flux']) assert not allnan(pho.lightcurve['flux_err']) assert not allnan(pho.lightcurve['pos_centroid'][:, 0]) assert not allnan(pho.lightcurve['pos_centroid'][:, 1]) assert not np.any(~np.isfinite(pho.lightcurve['time'])) assert not np.any(pho.lightcurve['time'] == 0) # Test the outputted FITS file: with fits.open(filepath, mode='readonly') as hdu: # Should be the same vectors in FITS as returned in Table: np.testing.assert_allclose(pho.lightcurve['time'], hdu[1].data['TIME']) np.testing.assert_allclose(pho.lightcurve['timecorr'], hdu[1].data['TIMECORR']) np.testing.assert_allclose(pho.lightcurve['flux'], hdu[1].data['FLUX_RAW']) np.testing.assert_allclose(pho.lightcurve['flux_err'], hdu[1].data['FLUX_RAW_ERR']) np.testing.assert_allclose(pho.lightcurve['cadenceno'], hdu[1].data['CADENCENO']) # Test FITS aperture image: ap = hdu['APERTURE'].data print(ap) assert np.all(pho.aperture == ap), "Aperture image mismatch" assert not anynan(ap), "NaN in aperture image" assert np.all(ap >= 0), "Negative values in aperture image" assert np.any(ap & 2 != 0), "No photometric mask set" assert np.any(ap & 8 != 0), "No position mask set"
def remove_whole_nan_ys(x, ys): """Remove whole NaN columns of ys with corresponding x coordinates.""" whole_nan_columns = bottleneck.allnan(ys, axis=0) if np.any(whole_nan_columns): x = x[~whole_nan_columns] ys = ys[:, ~whole_nan_columns] return x, ys
def fit(self, X, y): X_y = self._check_params(X, y) self.X = X_y[0] self.y = X_y[1].reshape((-1, 1)) n, p = X.shape S = [] # list of selected features F = range(p) # list of unselected features if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # Find the first feature k_min = 3 range_k = 7 xy_MI = np.empty((range_k, p)) for i in range(range_k): xy_MI[i, :] = self._get_first_mi_vector(i + k_min) xy_MI = bn.nanmedian(xy_MI, axis=0) S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) if self.verbose > 0: self._info_print(S, S_mi) # Find the next features if self.n_features == 'auto': n_features = np.inf else: n_features = self.n_features while len(S) < n_features: s = len(S) - 1 feature_mi_matrix[s, F] = self._get_mi_vector(F, S[-1]) fmm = feature_mi_matrix[:len(S), F] if bn.allnan(bn.nanmean(fmm, axis=0)): break MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) if np.isnan(MRMR).all(): break selected = F[bn.nanargmax(MRMR)] S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) if self.verbose > 0: self._info_print(S, S_mi) if self.n_features == 'auto' and len(S) > 10: MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1) if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break self.n_features_ = len(S) self.ranking_ = S self.mi_ = S_mi return self
def ptp(lc): """ Compute robust Point-To-Point scatter. Parameters: lc (``lightkurve.TessLightCurve`` object): Lightcurve to calculate PTP for. Returns: float: Robust PTP. .. codeauthor:: Rasmus Handberg <*****@*****.**> """ if len(lc.flux) == 0 or allnan(lc.flux): return np.nan if len(lc.time) == 0 or allnan(lc.time): raise ValueError("Invalid time-vector specified. No valid timestamps.") return nanmedian(np.abs(np.diff(lc.flux)))
def test_halo(SHARED_INPUT_DIR, datasource): with TemporaryDirectory() as OUTPUT_DIR: with HaloPhotometry(267211065, SHARED_INPUT_DIR, OUTPUT_DIR, plot=True, datasource=datasource, sector=1, camera=3, ccd=2) as pho: pho.photometry() filepath = pho.save_lightcurve() print( pho.lightcurve ) # It should set the status to one of these: print(pho.status) assert pho.status in (STATUS.OK, STATUS.WARNING) # They shouldn't be exactly zero: assert not np.all(pho.lightcurve['flux'] == 0) assert not np.all(pho.lightcurve['flux_err'] == 0) assert not np.all(pho.lightcurve['pos_centroid'][:,0] == 0) assert not np.all(pho.lightcurve['pos_centroid'][:,1] == 0) # They shouldn't be NaN (in this case!): assert not allnan(pho.lightcurve['flux']) assert not allnan(pho.lightcurve['flux_err']) assert not allnan(pho.lightcurve['pos_centroid'][:,0]) assert not allnan(pho.lightcurve['pos_centroid'][:,1]) # Test the outputted FITS file: with fits.open(filepath, mode='readonly') as hdu: # Should be the same vectors in FITS as returned in Table: np.testing.assert_allclose(pho.lightcurve['time'], hdu[1].data['TIME']) np.testing.assert_allclose(pho.lightcurve['timecorr'], hdu[1].data['TIMECORR']) np.testing.assert_allclose(pho.lightcurve['flux'], hdu[1].data['FLUX_RAW']) np.testing.assert_allclose(pho.lightcurve['flux_err'], hdu[1].data['FLUX_RAW_ERR']) np.testing.assert_allclose(pho.lightcurve['cadenceno'], hdu[1].data['CADENCENO']) # Test FITS aperture image: ap = hdu['APERTURE'].data print(ap) assert np.all(pho.aperture == ap), "Aperture image mismatch" assert not anynan(ap), "NaN in aperture image" assert np.all(ap >= 0), "Negative values in aperture image" assert np.any(ap & 2 != 0), "No photometric mask set" #assert np.any(ap & 8 != 0), "No position mask set" print("Passed Tests for %s" % datasource)
def rms_timescale(time, flux, timescale=3600 / 86400): """ Compute robust RMS on specified timescale. Using MAD scaled to RMS. Parameters: time (ndarray): Timestamps in days. flux (ndarray): Flux to calculate RMS for. timescale (float, optional): Timescale to bin timeseries before calculating RMS. Default=1 hour. Returns: float: Robust RMS on specified timescale. .. codeauthor:: Rasmus Handberg <*****@*****.**> """ time = np.asarray(time) flux = np.asarray(flux) if len(flux) == 0 or allnan(flux): return np.nan if len(time) == 0 or allnan(time): raise ValueError("Invalid time-vector specified. No valid timestamps.") time_min = np.nanmin(time) time_max = np.nanmax(time) if not np.isfinite(time_min) or not np.isfinite( time_max) or time_max - time_min <= 0: raise ValueError("Invalid time-vector specified") # Construct the bin edges seperated by the timescale: bins = np.arange(time_min, time_max, timescale) bins = np.append(bins, time_max) # Bin the timeseries to one hour: indx = np.isfinite(flux) flux_bin, _, _ = binned_statistic(time[indx], flux[indx], nanmean, bins=bins) # Compute robust RMS value (MAD scaled to RMS) return mad_to_sigma * nanmedian(np.abs(flux_bin - nanmedian(flux_bin)))
def lc_matrix_calc(Nstars, mat0): logger = logging.getLogger(__name__) logger.info("Calculating correlations...") indx_nancol = allnan(mat0, axis=0) mat1 = mat0[:, ~indx_nancol] mat1[np.isnan(mat1)] = 0 correlations = np.abs(AlmightyCorrcoefEinsumOptimized(mat1.T, mat1.T)) np.fill_diagonal(correlations, np.nan) return correlations
def __call__(self, data): """ Remove columns with constant values from the dataset and return the resulting data table. Parameters ---------- data : an input dataset """ oks = np.logical_and(~bn.allnan(data.X, axis=0), bn.nanmin(data.X, axis=0) != bn.nanmax(data.X, axis=0)) atts = [data.domain.attributes[i] for i, ok in enumerate(oks) if ok] domain = Orange.data.Domain(atts, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def lightcurve_correlation_matrix(mat): """ Calculate the correlation matrix between all lightcurves in matrix. Parameters: mat (numpy.array): (NxM) Returns: numpy.array: Correlation matrix (NxN). """ indx_nancol = allnan(mat, axis=0) mat1 = mat[:, ~indx_nancol] mat1[np.isnan(mat1)] = 0 correlations = np.abs(AlmightyCorrcoefEinsumOptimized(mat1.T, mat1.T)) np.fill_diagonal(correlations, np.nan) return correlations
def __call__(self, data): data = self.transform_domain(data) if "edge_jump" in data.domain: edges = data.transform( Orange.data.Domain([data.domain["edge_jump"]])) I_jumps = edges.X[:, 0] else: raise NoEdgejumpProvidedException( 'Invalid meta data: Intensity jump at edge is missing') # order X by wavenumbers: # xs non ordered energies # xsind - indecies corresponding to the ordered energies # mon = True # X spectra as corresponding to the ordered energies xs, xsind, mon, X = transform_to_sorted_features(data) # for the missing data X, nans = nan_extend_edges_and_interpolate(xs[xsind], X) # TODO notify the user if some unknown values were interpolated # Replace remaining NaNs (where whole rows were NaN) with # with some values so that the function does not crash. # Results are going to be discarded later. nan_rows = bottleneck.allnan(X, axis=1) if np.any(nan_rows ): # if there were no nans X is a view, so do not modify X[nan_rows] = 1. # do the transformation X = self.transformed(X, xs[xsind], I_jumps) # discard nan rows X[nan_rows] = np.nan # k scores are always ordered, so do not restore order return X
def test_freqextr_onlynoise(): np.random.seed(42) time = np.arange(0, 27.0, 1800/86400) flux = np.random.normal(0, 2, size=len(time)) lc = lk.TessLightCurve(time=time, flux=flux) tab = freqextr(lc, n_peaks=5, n_harmonics=2) _summary(lc, tab) assert tab.meta['n_peaks'] == 5 assert tab.meta['n_harmonics'] == 2 #print(tab.loc[1]) assert allnan(tab['frequency']) assert allnan(tab['amplitude']) assert allnan(tab['phase']) assert allnan(tab['alpha']) assert allnan(tab['beta']) assert allnan(tab['deviation'])
def test_known_star(SHARED_INPUT_DIR, corrector, starid, cadence, var_goal, rms_goal, ptp_goal): """ Check that the ensemble returns values that are reasonable and within expected bounds """ # All stars we check here come from the same sector and camera. # Define these here for the future where we may test on other combinations of these: sector = 1 camera = 1 __dir__ = os.path.abspath(os.path.dirname(__file__)) logger = logging.getLogger(__name__) logger.info("-------------------------------------------------------------") logger.info("CORRECTOR = %s, SECTOR=%d, CADENCE=%s, STARID=%d", corrector, sector, cadence, starid) # All stars are from the same CCD, find the task for it: with corrections.TaskManager(SHARED_INPUT_DIR) as tm: task = tm.get_task(starid=starid, sector=sector, camera=camera, cadence=cadence) # Check that task was actually found: assert task is not None, "Task could not be found" # Load lightcurve that will also be plotted together with the result: # This lightcurve is of the same objects, at a state where it was deemed that the # corrections were doing a good job. compare_lc_path = os.path.join(__dir__, 'compare', f'compare-{corrector}-s{sector:04d}-c{cadence:04d}-tic{starid:011d}.ecsv.gz') compare_lc = None if os.path.isfile(compare_lc_path): compare_lc = Table.read(compare_lc_path, format='ascii.ecsv') else: warnings.warn("Comparison data does not exist: " + compare_lc_path) # Initiate the class CorrClass = corrections.corrclass(corrector) with tempfile.TemporaryDirectory() as tmpdir: with CorrClass(SHARED_INPUT_DIR, plot=True) as corr: # Check basic parameters of object (from BaseCorrector): assert corr.input_folder == SHARED_INPUT_DIR, "Incorrect input folder" assert corr.plot, "Plot parameter passed appropriately" assert os.path.isdir(corr.data_folder), "DATA_FOLDER doesn't exist" # Load the input lightcurve: inlc = corr.load_lightcurve(task) # Print input lightcurve properties: print( inlc.show_properties() ) assert inlc.sector == sector assert inlc.camera == camera # Run correction: tmplc = inlc.copy() outlc, status = corr.do_correction(tmplc) # Check status assert outlc is not None, "Correction fails" assert isinstance(outlc, TessLightCurve), "Should return TessLightCurve object" assert isinstance(status, corrections.STATUS), "Should return a STATUS object" assert status in (corrections.STATUS.OK, corrections.STATUS.WARNING), "STATUS was not set appropriately" # Print output lightcurve properties: print( outlc.show_properties() ) # Save the lightcurve to FITS file to be tested later on: save_file = corr.save_lightcurve(outlc, output_folder=tmpdir) # Check contents assert len(outlc) == len(inlc), "Input flux ix different length to output flux" assert isinstance(outlc.flux, np.ndarray), "FLUX is not a ndarray" assert isinstance(outlc.flux_err, np.ndarray), "FLUX_ERR is not a ndarray" assert isinstance(outlc.quality, np.ndarray), "QUALITY is not a ndarray" assert outlc.flux.dtype.type is inlc.flux.dtype.type, "FLUX changes dtype" assert outlc.flux_err.dtype.type is inlc.flux_err.dtype.type, "FLUX_ERR changes dtype" assert outlc.quality.dtype.type is inlc.quality.dtype.type, "QUALITY changes dtype" assert outlc.flux.shape == inlc.flux.shape, "FLUX changes shape" assert outlc.flux_err.shape == inlc.flux_err.shape, "FLUX_ERR changes shape" assert outlc.quality.shape == inlc.quality.shape, "QUALITY changes shape" # Plot output lightcurves: fig, (ax1, ax2, ax3) = plt.subplots(3, 1, squeeze=True, figsize=[10, 10]) ax1.plot(inlc.time, inlc.flux, lw=0.5) ax1.set_title(f"{corrector} - Sector {sector:d} - {cadence}s - TIC {starid:d}") if compare_lc: ax2.plot(compare_lc['time'], compare_lc['flux'], label='Compare', lw=0.5) ax3.axhline(0, lw=0.5, ls=':', color='0.7') ax3.plot(outlc.time, outlc.flux - compare_lc['flux'], lw=0.5) ax2.plot(outlc.time, outlc.flux, label='New', lw=0.5) ax1.set_ylabel('Flux [e/s]') ax1.minorticks_on() ax2.set_ylabel('Relative Flux [ppm]') ax2.minorticks_on() ax2.legend() ax3.set_ylabel('New - Compare [ppm]') ax3.set_xlabel('Time [TBJD]') ax3.minorticks_on() fig.savefig(os.path.join(__dir__, f'test-{corrector}-s{sector:04d}-c{cadence:04d}-tic{starid:011d}.png'), bbox_inches='tight') plt.close(fig) # Check things that are allowed to change: assert all(outlc.flux != inlc.flux), "Input and output flux are identical." assert not np.any(np.isinf(outlc.flux)), "FLUX contains Infinite" assert not np.any(np.isinf(outlc.flux_err)), "FLUX_ERR contains Infinite" assert np.sum(np.isnan(outlc.flux)) < 0.5*len(outlc), "More than half the lightcurve is NaN" assert allnan(outlc.flux_err[np.isnan(outlc.flux)]), "FLUX_ERR should be NaN where FLUX is" # TODO: Check that quality hasn't changed in ways that are not allowed: # - Only values defined in CorrectorQualityFlags # - No removal of flags already set assert all(outlc.quality >= 0) assert all(outlc.quality <= 128) assert all(outlc.quality >= inlc.quality) # Things that shouldn't chance from the corrections: assert outlc.targetid == inlc.targetid, "TARGETID has changed" assert outlc.label == inlc.label, "LABEL has changed" assert outlc.sector == inlc.sector, "SECTOR has changed" assert outlc.camera == inlc.camera, "CAMERA has changed" assert outlc.ccd == inlc.ccd, "CCD has changed" assert outlc.quality_bitmask == inlc.quality_bitmask, "QUALITY_BITMASK has changed" assert outlc.ra == inlc.ra, "RA has changed" assert outlc.dec == inlc.dec, "DEC has changed" assert outlc.mission == 'TESS', "MISSION has changed" assert outlc.time_format == 'btjd', "TIME_FORMAT has changed" assert outlc.time_scale == 'tdb', "TIME_SCALE has changed" assert_array_equal(outlc.time, inlc.time, "TIME has changed") assert_array_equal(outlc.timecorr, inlc.timecorr, "TIMECORR has changed") assert_array_equal(outlc.cadenceno, inlc.cadenceno, "CADENCENO has changed") assert_array_equal(outlc.pixel_quality, inlc.pixel_quality, "PIXEL_QUALITY has changed") assert_array_equal(outlc.centroid_col, inlc.centroid_col, "CENTROID_COL has changed") assert_array_equal(outlc.centroid_row, inlc.centroid_row, "CENTROID_ROW has changed") # Check metadata assert tmplc.meta == inlc.meta, "Correction changed METADATA in-place" assert outlc.meta['task'] == inlc.meta['task'], "Metadata is incomplete" assert isinstance(outlc.meta['additional_headers'], fits.Header) # Check performance metrics: #logger.warning("VAR: %e", nanvar(outlc.flux)) if var_goal is not None: var_in = nanvar(inlc.flux) var_out = nanvar(outlc.flux) var_diff = np.abs(var_out - var_goal) / var_goal logger.info("VAR: %f - %f - %f", var_in, var_out, var_diff) assert_array_less(var_diff, 0.05, "VARIANCE changed outside interval") #logger.warning("RMS: %e", rms_timescale(outlc)) if rms_goal is not None: rms_in = rms_timescale(inlc) rms_out = rms_timescale(outlc) rms_diff = np.abs(rms_out - rms_goal) / rms_goal logger.info("RMS: %f - %f - %f", rms_in, rms_out, rms_diff) assert_array_less(rms_diff, 0.05, "RMS changed outside interval") #logger.warning("PTP: %e", ptp(outlc)) if ptp_goal is not None: ptp_in = ptp(inlc) ptp_out = ptp(outlc) ptp_diff = np.abs(ptp_out - ptp_goal) / ptp_goal logger.info("PTP: %f - %f - %f", ptp_in, ptp_out, ptp_diff) assert_array_less(ptp_diff, 0.05, "PTP changed outside interval") # Check FITS file: with fits.open(os.path.join(tmpdir, save_file), mode='readonly') as hdu: # Lightcurve FITS table: fitslc = hdu['LIGHTCURVE'].data hdr = hdu['LIGHTCURVE'].header # Simple checks of header values: assert hdu[0].header['TICID'] == starid # Checks of things in FITS table that should not have changed at all: assert_array_equal(fitslc['TIME'], inlc.time, "FITS: TIME has changed") assert_array_equal(fitslc['TIMECORR'], inlc.timecorr, "FITS: TIMECORR has changed") assert_array_equal(fitslc['CADENCENO'], inlc.cadenceno, "FITS: CADENCENO has changed") assert_array_equal(fitslc['FLUX_RAW'], inlc.flux, "FITS: FLUX_RAW has changed") assert_array_equal(fitslc['FLUX_RAW_ERR'], inlc.flux_err, "FITS: FLUX_RAW_ERR has changed") assert_array_equal(fitslc['MOM_CENTR1'], inlc.centroid_col, "FITS: CENTROID_COL has changed") assert_array_equal(fitslc['MOM_CENTR2'], inlc.centroid_row, "FITS: CENTROID_ROW has changed") # Some things are allowed to change, but still within some requirements: assert all(fitslc['FLUX_CORR'] != inlc.flux), "FITS: Input and output flux are identical." assert np.sum(np.isnan(fitslc['FLUX_CORR'])) < 0.5*len(fitslc['TIME']), "FITS: More than half the lightcurve is NaN" assert allnan(fitslc['FLUX_CORR_ERR'][np.isnan(fitslc['FLUX_CORR'])]), "FITS: FLUX_ERR should be NaN where FLUX is" if corrector == 'ensemble': # Check special headers: assert np.isfinite(hdr['ENS_MED']) and hdr['ENS_MED'] > 0 assert isinstance(hdr['ENS_NUM'], int) and hdr['ENS_NUM'] > 0 assert hdr['ENS_DLIM'] == 1.0 assert hdr['ENS_DREL'] == 10.0 assert hdr['ENS_RLIM'] == 0.4 # Special extension for ensemble: tic = hdu['ENSEMBLE'].data['TIC'] bzeta = hdu['ENSEMBLE'].data['BZETA'] assert len(tic) == len(bzeta) assert len(np.unique(tic)) == len(tic), "TIC numbers in ENSEMBLE table are not unique" assert len(tic) == hdr['ENS_NUM'], "Not the same number of targets in ENSEMBLE table as specified in header" elif corrector == 'cbv': # Check special headers: assert isinstance(hdr['CBV_NUM'], int) and hdr['CBV_NUM'] > 0 # Check coefficients: for k in range(0, hdr['CBV_NUM']+1): assert np.isfinite(hdr['CBV_C%d' % k]) for k in range(1, hdr['CBV_NUM']+1): assert np.isfinite(hdr['CBVS_C%d' % k]) # Check that no other coefficients are present assert 'CBV_C%d' % (hdr['CBV_NUM']+1) not in hdr assert 'CBVS_C%d' % (hdr['CBV_NUM']+1) not in hdr elif corrector == 'kasoc_filter': # Check special headers: assert hdr['KF_POSS'] == 'None' assert np.isfinite(hdr['KF_LONG']) and hdr['KF_LONG'] > 0 assert np.isfinite(hdr['KF_SHORT']) and hdr['KF_SHORT'] > 0 assert hdr['KF_SCLIP'] == 4.5 assert hdr['KF_TCLIP'] == 5.0 assert hdr['KF_TWDTH'] == 1.0 assert hdr['KF_PSMTH'] == 200 assert isinstance(hdr['NUM_PER'], int) and hdr['NUM_PER'] >= 0 for k in range(1, hdr['NUM_PER']+1): assert np.isfinite(hdr['PER_%d' % k]) and hdr['PER_%d' % k] > 0 # Check that no other periods are present assert 'PER_%d' % (hdr['NUM_PER'] + 1) not in hdr # Test that the Gzip FITS file has the correct uncompressed file name, by simply # decompressing the Gzip file, asking to keep the original file name. # This uses the system GZIP utility, since there doesn't seem to be a way to do this # through the Python gzip module: fpath = os.path.join(tmpdir, save_file) fpath_uncompressed = fpath.replace('.fits.gz', '.fits') assert not os.path.exists(fpath_uncompressed), "Uncompressed file already exists" gzip_output = subprocess.check_output(['gzip', '-dkNv', os.path.basename(fpath)], cwd=os.path.dirname(fpath), stderr=subprocess.STDOUT, encoding='utf8') print("Gzip output:") print(gzip_output) assert os.path.isfile(fpath_uncompressed), "Incorrect uncompressed file name" # Just see if we can in fact also open the uncompressed FITS file and get a simple header: with fits.open(fpath_uncompressed, mode='readonly') as hdu: assert hdu[0].header['TICID'] == starid
def lightcurve_matrix(self): """ Load matrix filled with light curves. The steps performed are the following: #. Only targets with a variability below a threshold are included. #. Computes correlation matrix for light curves in a given cbv-area and only includes the :meth:`threshold_correlation` most correlated light curves. #. Performs gap-filling of light curves and removes time stamps where all flux values are NaN. Returns: tuple: - :class:`numpy.ndarray`: matrix of light curves to be used in CBV calculation. - :class:`numpy.ndarray`: the indices for the timestamps with nans in all light curves. - `int`: Number of timestamps. .. codeauthor:: Rasmus Handberg <*****@*****.**> .. codeauthor:: Mikkel N. Lund <*****@*****.**> """ logger = logging.getLogger(__name__) tqdm_settings = { 'disable': None if logger.isEnabledFor(logging.INFO) else True } logger.info('Running matrix clean') if logger.isEnabledFor( logging.DEBUG) and 'matrix' in self.hdf: # pragma: no cover logger.info("Loading existing file...") return self.hdf['matrix'], self.hdf['nancol'], self.hdf.attrs[ 'Ntimes'] logger.info("We are running CBV_AREA=%d", self.cbv_area) # Set up search parameters for database: search_params = [ f'status={STATUS.OK.value:d}', # Only including targets with status=OK from photometry "method_used='aperture'", # Only including aperature photometry targets f'cadence={self.cadence:d}', f'cbv_area={self.cbv_area:d}', f'sector={self.sector:d}' ] # Find the median of the variabilities: variability = np.array([ float(row['variability']) for row in self.search_database(search=search_params, select='variability') ], dtype='float64') if len(variability) == 0: raise ValueError( "No lightcurves found for this CBV_AREA that have VARIABILITY defined" ) median_variability = nanmedian(variability) # Plot the distribution of variability for all stars: fig = plt.figure() ax = fig.add_subplot(111) ax.hist(variability / median_variability, bins=np.logspace(np.log10(0.1), np.log10(1000.0), 50)) ax.axvline(self.threshold_variability, color='r') ax.set_xscale('log') ax.set_xlabel('Variability') fig.savefig( os.path.join( self.cbv_plot_folder, f'variability-s{self.sector:04d}-c{self.cadence:04d}-a{self.cbv_area}.png' )) plt.close(fig) # Get the list of star that we are going to load in the lightcurves for: search_params.append('variability < %f' % (self.threshold_variability * median_variability)) stars = self.search_database( select=['lightcurve', 'mean_flux', 'variance'], search=search_params) # Number of stars returned: Nstars = len(stars) # Load the very first timeseries only to find the number of timestamps. lc = self.load_lightcurve(stars[0]) Ntimes = len(lc.time) # Save aux information about this CBV to an separate file. self.hdf.create_dataset('time', data=lc.time - lc.timecorr) self.hdf.create_dataset('cadenceno', data=lc.cadenceno) self.hdf.attrs['camera'] = lc.camera self.hdf.attrs['ccd'] = lc.ccd self.hdf.attrs['data_rel'] = lc.meta['data_rel'] self.hdf.flush() logger.info("Matrix size: %d x %d", Nstars, Ntimes) # Make the matrix that will hold all the lightcurves: logger.info("Loading in lightcurves...") mat = np.full((Nstars, Ntimes), np.nan, dtype='float64') varis = np.empty(Nstars, dtype='float64') # Loop over stars, fill for k, star in tqdm(enumerate(stars), total=Nstars, **tqdm_settings): # Load lightkurve object lc = self.load_lightcurve(star) # Remove bad data based on quality flag_good = TESSQualityFlags.filter( lc.pixel_quality, TESSQualityFlags.CBV_BITMASK) & CorrectorQualityFlags.filter( lc.quality, CorrectorQualityFlags.CBV_BITMASK) lc.flux[~flag_good] = np.nan # Normalize the data and store it in the rows of the matrix: mat[k, :] = lc.flux / star['mean_flux'] - 1.0 # Store the standard deviations of each lightcurve: varis[k] = np.NaN if star['variance'] is None else star['variance'] # Only start calculating correlations if we are actually filtering using them: if self.threshold_correlation < 1.0: # Calculate the correlation matrix between all lightcurves: logger.info("Calculating correlations...") correlations = lightcurve_correlation_matrix(mat) # If running in DEBUG mode, save the correlations matrix to file: if logger.isEnabledFor(logging.DEBUG): # pragma: no cover self.hdf.create_dataset('correlations', data=correlations) # Find the median absolute correlation between each lightcurve and all other lightcurves: c = nanmedian(correlations, axis=0) # Indicies that would sort the lightcurves by correlations in descending order: indx = np.argsort(c)[::-1] indx = indx[:int(self.threshold_correlation * Nstars)] #TODO: remove based on threshold value? rather than just % of stars # Only keep the top "threshold_correlation"% of the lightcurves that are most correlated: mat = mat[indx, :] varis = varis[indx] # Clean up a bit: del correlations, c, indx # Print the final shape of the matrix: Nstars = mat.shape[0] Ntimes = mat.shape[1] # Find columns where all stars have NaNs and remove them: indx_nancol = allnan(mat, axis=0) mat = mat[:, ~indx_nancol] logger.info("Matrix size: %d x %d", mat.shape[0], mat.shape[1]) logger.info("Gap-filling lightcurves...") cadenceno = np.arange(mat.shape[1]) count_interp = 0 for k in tqdm(range(Nstars), total=Nstars, **tqdm_settings): # Normalize the lightcurves by their variances: mat[k, :] /= varis[k] # Fill out missing values by interpolating the lightcurve: ibad = ~np.isfinite(mat[k, :]) Ninterp = int(np.sum(ibad)) count_interp += Ninterp if Ninterp > 0: mat[k, ibad] = pchip_interpolate(cadenceno[~ibad], mat[k, ~ibad], cadenceno[ibad]) # Print the average number of interpolated points: avg_interp = count_interp / Nstars logger.info("Average interpolated per star: %f points, %.3f%%", avg_interp, 100 * avg_interp / Ntimes) # Save something for debugging: self.hdf.attrs['Ntimes'] = Ntimes self.hdf.attrs['Nstars'] = Nstars self.hdf.attrs['average_interpolated_points'] = avg_interp if logger.isEnabledFor(logging.DEBUG): # pragma: no cover self.hdf.create_dataset('matrix', data=mat) self.hdf.create_dataset('nancols', data=indx_nancol) return mat, indx_nancol, Ntimes
def fit(self, lc, use_bic=True, use_prior=False, cbvs=None, alpha=1.3, WS_lim=0.5, N_neigh=1000): """ Fit the CBV object to a lightcurve, and return the fitted cotrending-lightcurve and the fitting coefficients. Parameters: lc (:class:`LightCurve`): Lightcurve to be cotrended. use_bic (bool, optional): Use the Bayesian Information Criterion to find the optimal number of CBVs to fit. Default=True. use_prior (bool, optional): cbvs (int, optional): Number of CBVs to fit to lightcurve. If `use_bic=True`, this indicated the maximum number of CBVs to fit. Returns: - `numpy.array`: Fitted lightcurve with the same length as `lc`. - list: Coefficients for each CBV. - dict: Diagnostics information about the fitting. """ logger = logging.getLogger(__name__) # If no uncertainties are provided, fill it with ones: if allnan(lc.flux_err): lc.flux_err[:] = 1 # Remove bad data based on quality if not allnan(lc.quality): flag_good = CorrectorQualityFlags.filter(lc.quality) lc.flux[~flag_good] = np.nan lc.flux_err[~flag_good] = np.nan # Diagnostics to return at the end about what was # actually used in the fitting: diagnostics = { 'method': None, 'use_bic': use_bic, 'use_prior': use_prior } # Fit the CBV to the flux: if use_prior: # Do fits including prior information from the initial fits # allow switching to a simple LSSQ fit depending on # variability measures (not fully implemented yet!) # Position of target in multidimentional prior space: row = lc.meta['task']['pos_row'] col = lc.meta['task']['pos_column'] tmag = np.clip(lc.meta['task']['tmag'], 2, 20) pos = np.array([row, col, tmag]) # Prior curve n_components = self.cbs.shape[1] pc0, opts = self._priorcurve(pos, n_components, N_neigh) pc = pc0 * lc.meta['task']['mean_flux'] # Compute new variability measure idx = np.isfinite(lc.flux) polyfit = np.polyval(np.polyfit(lc.time[idx], lc.flux[idx], 3), lc.time) residual = MAD_model(lc.flux - pc) #residual_ratio = MAD_model(lc.flux-lc.meta['task']['mean_flux'])/residual #WS = np.min([1, residual_ratio]) AA = 2 GRAW = np.std((pc - polyfit) / MAD_model2(lc.flux - polyfit) - 1) GPR = 0 + (1 - (GRAW / AA)**2) * (GRAW < AA) beta1 = 1 beta2 = 1 VAR = np.nanstd(lc.flux - polyfit) WS = np.min([1, (VAR**beta1) * (GPR**beta2)]) if WS > WS_lim: logger.debug('Fitting using LLSQ') flux_filter, res = self._fit( lc, Numcbvs=5, use_bic=use_bic) # use smaller number of CBVs diagnostics['method'] = 'LS' diagnostics['use_prior'] = False diagnostics['use_bic'] = False else: logger.debug('Fitting using Priors') # Define multi-dimentional prior: dist, ind = self.priors.query(pos, k=N_neigh + 1) W = 1 / dist[0][1:]**2 V = self.inifit[ind[1:], :] KDE = gaussian_kde(V, weights=W.flatten(), bw_method='scott') wscale = 1.0 def logprior(coeff): return wscale * KDE.logpdf(coeff) flux_filter, res = self._fit(lc, err=residual, use_bic=use_bic, logprior=logprior, start_guess=opts) diagnostics.update({ 'method': 'MAP', 'residual': residual, 'WS': WS, 'pc': pc }) else: # Do "simple" LSSQ fits using BIC to decide on number of CBVs to include logger.debug('Fitting TIC %d using LLSQ', lc.targetid) flux_filter, res = self._fit(lc, Numcbvs=cbvs, use_bic=use_bic) diagnostics['method'] = 'LS' return flux_filter, res, diagnostics
del correlations, c, indx # Save something for debugging: np.savez('mat-sector%02d-%d.npz' % (sector, cbv_area), mat=mat, priorities=priorities, stds=stds) # Print the final shape of the matrix: print("Matrix size: %d x %d" % mat.shape) # Simple low-pass filter of the individual targets: #mat = move_median_central(mat, 48, axis=1) # Find columns where all stars have NaNs and remove them: indx_nancol = allnan(mat, axis=0) Ntimes = mat.shape[1] mat = mat[:, ~indx_nancol] cadenceno = np.arange(mat.shape[1]) # TODO: Is this even needed? Or should it be done earlier? print("Gap-filling lightcurves...") for k in tqdm(range(mat.shape[0]), total=mat.shape[0]): mat[k, :] /= stds[k] # Fill out missing values by interpolating the lightcurve: indx = np.isfinite(mat[k, :]) mat[k, ~indx] = pchip_interpolate(cadenceno[indx], mat[k, indx], cadenceno[~indx])
def time_allnan(self, dtype, shape, order, axis, case): bn.allnan(self.arr, axis=axis)
def filtered_extinction(inv,msl_altitudes,min_alt,window_od, t_window_length ,max_z_window_length,order,adaptive,telescope_pointing=None): """filtered_extinction(inv,msl_altitudes,telescope_pointing, min_alt window_od,max_window_length,order) Stravitzky-Golay fitting which can use a fixed window or an adaptive widow which restricts the length of the fit to a user specified optical depth interval estimated from the integrated backscatter cross section divided by a p180/4pi = 0.025 inv = dictionary containing beta_a_backscat_par, beta_a_backscat_perp and beta_r_backscat, the Rayliegh backscatter msl_altitudes = vector of bin altitudes (m) min_alt = smooth alititudes > (min_alt + max_window_length/2.0) (this is ignored in current code) window_od = max estimated od within fit window t_window_length = length of time window (seconds) max_z_window_length= max length of altitude fit window (m) order = order of polynomial to use in fit adaptive = 0, fixed length window = 1, window length can not exceed od limit""" if not hasattr(inv,'Nm'): print 'ERROR: XXXXXXXXXXXXXXXXXX Filtered extinction missing Nm in inv. Nothing to do.' return inv data_len = len(inv.Nm[0,:]) ntimes = len(inv.Nm[:,0]) #distance between altitude bins dz=0 for dzi in range(len(msl_altitudes)-1): ndz = msl_altitudes[dzi+1]-msl_altitudes[dzi] if ndz>0 and abs(dz-ndz)<.01: break dz=ndz z_window_pts = int(max_z_window_length/dz) #must be at least order +1 if z_window_pts < order +1: z_window_pts = order +1 #must be odd if z_window_pts%2==0: z_window_pts = z_window_pts + 1 if ntimes > order: dt = inv.delta_t.copy()#(inv.times[2] - inv.times[1]).seconds dt[dt<1.0] = 1.0 t_window_pts = np.array(t_window_length/dt,dtype='int') #must be at least order +1 t_window_pts[t_window_pts < order +1]=order +1 #must be odd t_window_pts[t_window_pts%2==0]+=1 filtered_Nm = inv.Nm.copy() integrated_backscat = inv.integrated_backscat.copy() #filter in time for k in range(data_len-1): if not allnan(filtered_Nm[:,k]) and ntimes > t_window_pts: filtered_Nm[:,k] = sg.savitzky_golay(filtered_Nm[:,k],t_window_pts[k],order) else: filtered_Nm = inv.Nm.copy() integrated_backscat = inv.integrated_backscat.copy() if telescope_pointing is None : #if not provided assume zenith pointing t_pointing = np.ones_like(inv.Nm[:,0]) else: t_pointing = telescope_pointing.copy() t_pointing[t_pointing < 0.1] = -1.0 extinction = np.zeros(filtered_Nm.shape) #if (data_len-min_alt/dz) < z_window_pts*5.0: if data_len < z_window_pts*5.0: print print 'WARNING---filtered_extinction--filter window length too long' print 'reseting z_window_pts from ',z_window_pts, #z_window_pts = int((data_len-min_alt/dz)/5.0) z_window_pts = int(data_len/5.0) z_window_pts = 2*(z_window_pts/2)+1 print 'to ', z_window_pts if z_window_pts < order +2: print #raise ValueError, 'number of altitude resolution elements two few for filter length' print 'WARNING-----Savitzky_golay---number of altitude resolution elements two few for filter length' print ' inv.extintiction returned as NaN' print inv.extinction = np.NaN * inv.Nm return inv print #start_pt = int(np.ceil(z_window_pts/2 + min_alt/dz)) start_pt = int(np.ceil(z_window_pts/2)) end_pt = data_len -z_window_pts/2 -1 if adaptive == 0: slope_Nm = np.zeros_like(filtered_Nm[0,:]) inv.p180 = np.zeros_like(integrated_backscat) dbeta_dr = np.zeros_like(filtered_Nm[0,:]) dbeta_dr[1:] = + 0.5*(1/inv.beta_r_backscat[1:])\ *(inv.beta_r_backscat[1:] -inv.beta_r_backscat[:-1])/dz for i in range(ntimes): #compute extinction from the first derivative of a filtered Nm slope_Nm[start_pt:end_pt] = -sg.savitzky_golay( filtered_Nm[i,start_pt:end_pt],z_window_pts,order,deriv = 1) / dz extinction[i,start_pt:end_pt]= \ (-0.5*(1/filtered_Nm[i,start_pt:end_pt]) *slope_Nm[start_pt:end_pt] +dbeta_dr[start_pt:end_pt])*t_pointing[i] if 0: import matplotlib.pyplot as plt bin_vec = np.arange(len(filtered_Nm[0,:])) bin_vec2 = np.arange(len(slope_Nm)) plt.figure(777777) plt.plot(slope_Nm,bin_vec2,'b',extinction[0,:],bin_vec2,'r',filtered_Nm[0,:],bin_vec2,'g' ,-dbeta_dr,bin_vec2,'k',inv.p180[0,:],bin_vec2,'c') ax=plt.gca() ax.set_xscale('log') ax.grid(True) #when adaptive ==1, window length derived from integrated backscat else: #pick a intermediate value of p180/4pi for od estimate p180 = 0.025 #half of the maximum fit window in bins max_half_win =int((max_z_window_length/dz)/2) wind_od = window_od/2.0 #reflect data at end for padding yy=np.zeros((ntimes,data_len+max_half_win+1)) end_range = range(data_len-1,(data_len-max_half_win-1),-1) yy[0:ntimes,0:data_len]=inv.Nm[0:ntimes,0:data_len] yy[0:ntimes,data_len:data_len+max_half_win] = inv.Nm[0:ntimes,end_range] filtered_Nm = inv.Nm.copy() #extinction = np.zeros_like(inv.Nm) extinction = np.zeros(inv.Nm.shape) low_limit = np.zeros(data_len) high_limit = np.zeros_like(low_limit) beta_a=np.zeros(data_len+max_half_win+1) for k in range(ntimes): beta_a[0:data_len]=(inv.beta_a_backscat_par[k,0:data_len]\ +inv.beta_a_backscat_perp[k,0:data_len]) beta_a[range((data_len-max_half_win),data_len)] \ =(inv.beta_a_backscat_par[k,end_range] \ +inv.beta_a_backscat_perp[k,end_range]) #compute integrated backscatter beta_a[np.isnan(beta_a)]=0 int_bs_od=np.cumsum(beta_a)*dz/(2*p180) int_bs_od[data_len:data_len+max_half_win+1]=int_bs_od[data_len] #find end points of fit for each data point #use the optical depth estimated from the integrated backscatter to compute #limits for polynomial fit at each data point derivative_coefs=np.arange(order,0,-1) order_lmt = order/2 +1 for i in range(start_pt,data_len): lo_lmt=np.max([i-max_half_win,start_pt]) while (int_bs_od[i] -int_bs_od[lo_lmt] > wind_od) and i-lo_lmt > order_lmt: lo_lmt = lo_lmt + 1 hi_lmt = i+max_half_win while (int_bs_od[hi_lmt] - int_bs_od[i] > wind_od) and hi_lmt-i > order_lmt: hi_lmt = hi_lmt -1 #make fitting interval symetric around i if i - lo_lmt < hi_lmt-i : hi_lmt = 2*i -lo_lmt elif i - lo_lmt > hi_lmt -i : lo_lmt = 2*i - hi_lmt ylocal = yy[k,lo_lmt:hi_lmt+1] x=np.arange(len(ylocal)) #print x pc = np.polyfit(x,ylocal,order) filtered_Nm[k,i]= np.polyval(pc,i-lo_lmt) slope_Nm = np.polyval(derivative_coefs*pc[0:order],i-lo_lmt)/dz #print 'ext',i,lo_lmt,hi_lmt,ylocal,slope_Nm,pc,pc[0:order]\ # ,dz,np.polyval(pc,[0,1,2,3]),t_pointing[k] extinction[k,i]=(-0.5*(1/filtered_Nm[k,i])*slope_Nm \ + 0.5*(1/inv.beta_r_backscat[i])\ *(inv.beta_r_backscat[i]-inv.beta_r_backscat[i-1])/dz)\ *t_pointing[k] #print 'ext',i,lo_lmt,hi_lmt,extinction[k,i],slope_Nm\ # ,(inv.beta_r_backscat[i]-inv.beta_r_backscat[i-1])/dz\ # ,inv.beta_r_backscat[i],inv.beta_r_backscat[i-1],t_pointing[k] xx=np.arange(len(x)*10)/10.0 inv.extinction = type(filtered_Nm)(extinction) time_vec = np.ones_like(inv.extinction[:,0]) beta_r_array = 8 * np.pi * (time_vec[:,np.newaxis] * inv.beta_r_backscat[np.newaxis,:])/3.0 inv.extinction_aerosol = inv.extinction - beta_r_array #compute p180 from integrated backscatter and optical depth over window segments inv.p180 = np.zeros_like(inv.extinction_aerosol) start_pt = int(np.ceil(z_window_pts/2)) delta_tau = np.zeros_like(inv.extinction) delta_tau[:,start_pt:] = -0.5 * np.log( inv.Nm[:,start_pt:]/inv.Nm[:,:-start_pt]\ *(beta_r_array[:,:-start_pt]/beta_r_array[:,start_pt:])) delta_tau[:,start_pt:] = delta_tau[:,start_pt:] \ - (beta_r_array[:,start_pt:]+beta_r_array[:,:-start_pt]) * msl_altitudes[start_pt]/2.0 inv.p180[:,start_pt:] = (\ integrated_backscat[:,start_pt:] - integrated_backscat[:,:-start_pt])\ /delta_tau[:,start_pt:] return inv
def do_photometry(self): """Perform photometry on the given target. This function needs to set * self.lightcurve """ logger = logging.getLogger(__name__) logger.info("Running aperture photometry...") k2p2_settings = { 'thresh': 0.8, 'min_no_pixels_in_mask': 4, 'min_for_cluster': 4, 'cluster_radius': np.sqrt(2) + np.finfo(np.float64).eps, 'segmentation': True, 'ws_blur': 0.5, 'ws_thres': 0, 'ws_footprint': 3, 'extend_overflow': True } for retries in range(5): # Delete any plots left over in the plots folder from an earlier iteration: self.delete_plots() # Create the sum-image: SumImage = self.sumimage logger.info(self.stamp) logger.info("Target position in stamp: (%f, %f)", self.target_pos_row_stamp, self.target_pos_column_stamp) cat = np.column_stack( (self.catalog['column_stamp'], self.catalog['row_stamp'], self.catalog['tmag'])) logger.info("Creating new masks...") try: masks, background_bandwidth = k2p2.k2p2FixFromSum( SumImage, plot_folder=self.plot_folder, show_plot=False, catalog=cat, **k2p2_settings) masks = np.asarray(masks, dtype='bool') except k2p2.K2P2NoStars: self.report_details(error='No flux above threshold.') masks = np.asarray(0, dtype='bool') using_minimum_mask = False if len(masks.shape) == 0: logger.warning("No masks found") self.report_details( error='No masks found. Using minimum aperture.') mask_main = self._minimum_aperture() using_minimum_mask = True else: # Look at the central pixel where the target should be: indx_main = masks[:, int(round(self.target_pos_row_stamp)), int(round(self.target_pos_column_stamp) )].flatten() if not np.any(indx_main): logger.warning( 'No mask found for main target. Using minimum aperture.' ) self.report_details( error= 'No mask found for main target. Using minimum aperture.' ) mask_main = self._minimum_aperture() using_minimum_mask = True elif np.sum(indx_main) > 1: logger.error('Too many masks') self.report_details(error='Too many masks') return STATUS.ERROR else: # Mask of the main target: mask_main = masks[indx_main, :, :].reshape(SumImage.shape) # Find out if we are touching any of the edges: resize_args = {} if np.any(mask_main[0, :]): resize_args['down'] = 10 if np.any(mask_main[-1, :]): resize_args['up'] = 10 if np.any(mask_main[:, 0]): resize_args['left'] = 10 if np.any(mask_main[:, -1]): resize_args['right'] = 10 if resize_args: logger.warning("Touching the edges! Retrying") logger.info(resize_args) if not self.resize_stamp(**resize_args): resize_args = {} logger.warning("Could not resize stamp any further") break else: break # If we reached the last retry but still needed a resize, give up: if resize_args: self.report_details(error='Too many stamp resizes') return STATUS.ERROR # XY of pixels in frame cols, rows = self.get_pixel_grid() members = np.column_stack((cols[mask_main], rows[mask_main])) # Loop through the images and backgrounds together: for k, (img, imgerr, bck) in enumerate( zip(self.images, self.images_err, self.backgrounds)): flux_in_cluster = img[mask_main] # Calculate flux in mask: if allnan(flux_in_cluster) or np.all(flux_in_cluster == 0): self.lightcurve['flux'][k] = np.NaN self.lightcurve['flux_err'][k] = np.NaN self.lightcurve['pos_centroid'][k, :] = np.NaN #self.lightcurve['quality'] else: self.lightcurve['flux'][k] = np.sum(flux_in_cluster) self.lightcurve['flux_err'][k] = np.sqrt( np.sum(imgerr[mask_main]**2)) # Calculate flux centroid: finite_vals = (flux_in_cluster > 0) if np.any(finite_vals): self.lightcurve['pos_centroid'][k, :] = np.average( members[finite_vals], weights=flux_in_cluster[finite_vals], axis=0) else: self.lightcurve['pos_centroid'][k, :] = np.NaN if allnan(bck[mask_main]): self.lightcurve['flux_background'][k] = np.NaN else: self.lightcurve['flux_background'][k] = np.nansum( bck[mask_main]) # Save the mask to be stored in the outout file: self.final_mask = mask_main # Add additional headers specific to this method: #self.additional_headers['KP_SUBKG'] = (bool(subtract_background), 'K2P2 subtract background?') self.additional_headers['KP_THRES'] = (k2p2_settings['thresh'], 'K2P2 sum-image threshold') self.additional_headers['KP_MIPIX'] = ( k2p2_settings['min_no_pixels_in_mask'], 'K2P2 min pixels in mask') self.additional_headers['KP_MICLS'] = ( k2p2_settings['min_for_cluster'], 'K2P2 min pix. for cluster') self.additional_headers['KP_CLSRA'] = (k2p2_settings['cluster_radius'], 'K2P2 cluster radius') self.additional_headers['KP_WS'] = (bool( k2p2_settings['segmentation']), 'K2P2 watershed segmentation') #self.additional_headers['KP_WSALG'] = (k2p2_settings['ws_alg'], 'K2P2 watershed weighting') self.additional_headers['KP_WSBLR'] = (k2p2_settings['ws_blur'], 'K2P2 watershed blur') self.additional_headers['KP_WSTHR'] = (k2p2_settings['ws_thres'], 'K2P2 watershed threshold') self.additional_headers['KP_WSFOT'] = (k2p2_settings['ws_footprint'], 'K2P2 watershed footprint') self.additional_headers['KP_EX'] = (bool( k2p2_settings['extend_overflow']), 'K2P2 extend overflow') # Targets that are in the mask: target_in_mask = [ k for k, t in enumerate(self.catalog) if np.any(mask_main & (rows == np.round(t['row']) + 1) & (cols == np.round(t['column']) + 1)) ] # Figure out which status to report back: my_status = STATUS.OK # Calculate contamination from the other targets in the mask: if len(target_in_mask) == 0: logger.error("No targets in mask") self.report_details(error='No targets in mask') contamination = np.nan my_status = STATUS.ERROR elif len(target_in_mask) == 1 and self.catalog[target_in_mask][0][ 'starid'] == self.starid: contamination = 0 else: # Calculate contamination metric as defined in Lund & Handberg (2014): mags_in_mask = self.catalog[target_in_mask]['tmag'] mags_total = -2.5 * np.log10(np.nansum(10**(-0.4 * mags_in_mask))) contamination = 1.0 - 10**(0.4 * (mags_total - self.target_tmag)) contamination = np.abs( contamination) # Avoid stupid signs due to round-off errors logger.info("Contamination: %f", contamination) if not np.isnan(contamination): self.additional_headers['AP_CONT'] = (contamination, 'AP contamination') # Check if there are other targets in the mask that could then be skipped from # processing, and report this back to the TaskManager. The TaskManager will decide # if this means that this target or the other targets should be skipped in the end. skip_targets = [ t['starid'] for t in self.catalog[target_in_mask] if t['starid'] != self.starid ] if skip_targets: logger.info("These stars could be skipped: %s", skip_targets) self.report_details(skip_targets=skip_targets) # Figure out which status to report back: if using_minimum_mask: my_status = STATUS.WARNING # Return whether you think it went well: return my_status
def do_photometry(self): """Linear PSF Photometry TODO: add description of method and what A and b are """ logger = logging.getLogger(__name__) # Load catalog to determine what stars to fit: cat = self.catalog staridx = np.squeeze(np.where(cat['starid'] == self.starid)) # Log full catalog for current stamp: logger.debug(cat) # Calculate distance from main target: cat['dist'] = np.sqrt( (cat['row_stamp'][staridx] - cat['row_stamp'])**2 + (cat['column_stamp'][staridx] - cat['column_stamp'])**2) # Find indices of stars in catalog to fit: # (only include stars that are close to the main target and that are # not much fainter) indx = (cat['dist'] < 5) & (cat['tmag'][staridx] - cat['tmag'] > -5) nstars = int(np.sum(indx)) # Get target star index in the reduced catalog of stars to fit: staridx = np.squeeze(np.where(cat[indx]['starid'] == self.starid)) logger.debug('Target star index: %s', np.str(staridx)) # Preallocate flux sum array for contamination calculation: fluxes_sum = np.zeros(nstars, dtype='float64') # Start looping through the images (time domain): for k, img in enumerate(self.images): # Get catalog at current time in MJD: cat = self.catalog_attime(self.lightcurve['time'][k] - self.lightcurve['timecorr'][k]) # Reduce catalog to only include stars that should be fitted: cat = cat[indx] logger.debug(cat) # Get the number of pixels in the image: good_pixels = np.isfinite(img) npx = int(np.sum(good_pixels)) # Create A, the 2D of vertically reshaped PRF 1D arrays: A = np.empty([npx, nstars], dtype='float64') for col, target in enumerate(cat): # Get star parameters with flux set to 1 and reshape: params0 = np.atleast_2d( [target['row_stamp'], target['column_stamp'], 1.]) # Fill out column of A with reshaped PRF array from one star: A[:, col] = self.psf.integrate_to_image( params0, ctoff_radius=20)[good_pixels].flatten() # Crate b, the solution array by reshaping the image to a 1D array: b = img[good_pixels].flatten() # Do linear least squares fit to solve Ax=b: try: # Linear least squares: res = np.linalg.lstsq(A, b) fluxes = res[0] # Non-negative linear least squares: #fluxes, rnorm = scipy.optimize.nnls(A, b) except np.linalg.LinAlgError: logger.debug("Linear PSF Fitting failed") fluxes = None # Pass result if fit did not fail: if fluxes is None: logger.warning("We should flag that this has not gone well.") self.lightcurve['flux'][k] = np.NaN self.lightcurve['quality'][k] = 1 # FIXME: Use the real flag! else: # Get flux of target star: result = fluxes[staridx] logger.debug('Fluxes are: %s', fluxes) logger.debug('Result is: %f', result) # Add the result of the main star to the lightcurve: self.lightcurve['flux'][k] = result # Add current fitted fluxes for contamination calculation: fluxes_sum += fluxes # Make plots for debugging: if self.plot and logger.isEnabledFor(logging.DEBUG): fig = plt.figure() result4plot = [] for star, target in enumerate(cat): result4plot.append( np.array([ target['row_stamp'], target['column_stamp'], fluxes[star] ])) # Add subplots with the image, fit and residuals: ax_list = plot_image_fit_residuals( fig=fig, image=img, fit=self.psf.integrate_to_image(result4plot, cutoff_radius=20)) # Add star position to the first plot: ax_list[0].scatter(result4plot[staridx][1], result4plot[staridx][0], c='r', alpha=0.5) # Save figure to file: fig_name = 'tess_{0:011d}_linpsf_{1:05d}'.format( self.starid, k) save_figure(os.path.join(self.plot_folder, fig_name)) plt.close(fig) # Set contamination to NaN if all flux values are NaN: if allnan(self.lightcurve['flux']): self.report_details(error='All target flux values are NaN.') return STATUS.ERROR # Divide by number of added fluxes to get the mean flux: fluxes_mean = fluxes_sum / np.sum(~np.isnan(self.lightcurve['flux'])) logger.debug('Mean fluxes are: %s', fluxes_mean) # Calculate contamination from other stars in target PSF using latest A: not_target_star = np.arange(len(fluxes_mean)) != staridx contamination = np.sum( A[:, not_target_star].dot(fluxes_mean[not_target_star]) * A[:, staridx]) / fluxes_mean[staridx] logger.info("Contamination: %f", contamination) self.additional_headers['PSF_CONT'] = (contamination, 'PSF contamination') # If contamination is high, return a warning: if contamination > 0.1: self.report_details(error='High contamination') return STATUS.WARNING # Return whether you think it went well: return STATUS.OK
def plot_image(image, ax=None, scale='log', cmap=None, origin='lower', xlabel=None, ylabel=None, cbar=None, clabel='Flux ($e^{-}s^{-1}$)', cbar_ticks=None, cbar_ticklabels=None, cbar_pad=None, cbar_size='5%', title=None, percentile=95.0, vmin=None, vmax=None, offset_axes=None, color_bad='k', **kwargs): """ Utility function to plot a 2D image. Parameters: image (2d array): Image data. ax (matplotlib.pyplot.axes, optional): Axes in which to plot. Default (None) is to use current active axes. scale (str or :py:class:`astropy.visualization.ImageNormalize` object, optional): Normalization used to stretch the colormap. Options: ``'linear'``, ``'sqrt'``, ``'log'``, ``'asinh'``, ``'histeq'``, ``'sinh'`` and ``'squared'``. Can also be a :py:class:`astropy.visualization.ImageNormalize` object. Default is ``'log'``. origin (str, optional): The origin of the coordinate system. xlabel (str, optional): Label for the x-axis. ylabel (str, optional): Label for the y-axis. cbar (string, optional): Location of color bar. Choises are ``'right'``, ``'left'``, ``'top'``, ``'bottom'``. Default is not to create colorbar. clabel (str, optional): Label for the color bar. cbar_size (float, optional): Fractional size of colorbar compared to axes. Default=0.03. cbar_pad (float, optional): Padding between axes and colorbar. title (str or None, optional): Title for the plot. percentile (float, optional): The fraction of pixels to keep in color-trim. If single float given, the same fraction of pixels is eliminated from both ends. If tuple of two floats is given, the two are used as the percentiles. Default=95. cmap (matplotlib colormap, optional): Colormap to use. Default is the ``Blues`` colormap. vmin (float, optional): Lower limit to use for colormap. vmax (float, optional): Upper limit to use for colormap. color_bad (str, optional): Color to apply to bad pixels (NaN). Default is black. kwargs (dict, optional): Keyword arguments to be passed to :py:func:`matplotlib.pyplot.imshow`. Returns: :py:class:`matplotlib.image.AxesImage`: Image from returned by :py:func:`matplotlib.pyplot.imshow`. .. codeauthor:: Rasmus Handberg <*****@*****.**> """ logger = logging.getLogger(__name__) # Backward compatible settings: make_cbar = kwargs.pop('make_cbar', None) if make_cbar: raise FutureWarning("'make_cbar' is deprecated. Use 'cbar' instead.") if not cbar: cbar = make_cbar # Special treatment for boolean arrays: if isinstance(image, np.ndarray) and image.dtype == 'bool': if vmin is None: vmin = 0 if vmax is None: vmax = 1 if cbar_ticks is None: cbar_ticks = [0, 1] if cbar_ticklabels is None: cbar_ticklabels = ['False', 'True'] # Calculate limits of color scaling: interval = None if vmin is None or vmax is None: if allnan(image): logger.warning("Image is all NaN") vmin = 0 vmax = 1 if cbar_ticks is None: cbar_ticks = [] if cbar_ticklabels is None: cbar_ticklabels = [] elif isinstance(percentile, (list, tuple, np.ndarray)): interval = viz.AsymmetricPercentileInterval( percentile[0], percentile[1]) else: interval = viz.PercentileInterval(percentile) # Create ImageNormalize object with extracted limits: if scale in ('log', 'linear', 'sqrt', 'asinh', 'histeq', 'sinh', 'squared'): if scale == 'log': stretch = viz.LogStretch() elif scale == 'linear': stretch = viz.LinearStretch() elif scale == 'sqrt': stretch = viz.SqrtStretch() elif scale == 'asinh': stretch = viz.AsinhStretch() elif scale == 'histeq': stretch = viz.HistEqStretch(image[np.isfinite(image)]) elif scale == 'sinh': stretch = viz.SinhStretch() elif scale == 'squared': stretch = viz.SquaredStretch() # Create ImageNormalize object. Very important to use clip=False if the image contains # NaNs, otherwise NaN points will not be plotted correctly. norm = viz.ImageNormalize(data=image[np.isfinite(image)], interval=interval, vmin=vmin, vmax=vmax, stretch=stretch, clip=not anynan(image)) elif isinstance(scale, (viz.ImageNormalize, matplotlib.colors.Normalize)): norm = scale else: raise ValueError("scale {} is not available.".format(scale)) if offset_axes: extent = (offset_axes[0] - 0.5, offset_axes[0] + image.shape[1] - 0.5, offset_axes[1] - 0.5, offset_axes[1] + image.shape[0] - 0.5) else: extent = (-0.5, image.shape[1] - 0.5, -0.5, image.shape[0] - 0.5) if ax is None: ax = plt.gca() # Set up the colormap to use. If a bad color is defined, # add it to the colormap: if cmap is None: cmap = copy.copy(plt.get_cmap('Blues')) elif isinstance(cmap, str): cmap = copy.copy(plt.get_cmap(cmap)) if color_bad: cmap.set_bad(color_bad, 1.0) # Plotting the image using all the settings set above: im = ax.imshow(image, cmap=cmap, norm=norm, origin=origin, extent=extent, interpolation='nearest', **kwargs) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) if title is not None: ax.set_title(title) ax.set_xlim([extent[0], extent[1]]) ax.set_ylim([extent[2], extent[3]]) if cbar: colorbar(im, ax=ax, loc=cbar, size=cbar_size, pad=cbar_pad, label=clabel, ticks=cbar_ticks, ticklabels=cbar_ticklabels) # Settings for ticks: integer_locator = MaxNLocator(nbins=10, integer=True) ax.xaxis.set_major_locator(integer_locator) ax.xaxis.set_minor_locator(integer_locator) ax.yaxis.set_major_locator(integer_locator) ax.yaxis.set_minor_locator(integer_locator) ax.tick_params(which='both', direction='out', pad=5) ax.xaxis.tick_bottom() ax.yaxis.tick_left() return im
def plot_image(image, scale='log', origin='lower', xlabel='Pixel Column Number', ylabel='Pixel Row Number', make_cbar=False, clabel='Flux ($e^{-}s^{-1}$)', title=None, percentile=95.0, ax=None, cmap=plt.cm.Blues, offset_axes=None, **kwargs): """ Utility function to plot a 2D image. Parameters: image (2d array): Image data. scale (str or astropy.visualization.ImageNormalize object, optional): Normalization used to stretch the colormap. Options: ``'linear'``, ``'sqrt'``, or ``'log'``. Can also be a `astropy.visualization.ImageNormalize` object. Default is ``'log'``. origin (str, optional): The origin of the coordinate system. xlabel (str, optional): Label for the x-axis. ylabel (str, optional): Label for the y-axis. make_cbar (boolean, optional): Create colorbar? Default is ``False``. clabel (str, optional): Label for the color bar. title (str or None, optional): Title for the plot. percentile (float, optional): The fraction of pixels to keep in color-trim. The same fraction of pixels is eliminated from both ends. Default=95. ax (matplotlib.pyplot.axes, optional): Axes in which to plot. Default (None) is to use current active axes. cmap (matplotlib colormap, optional): Colormap to use. Default is the ``Blues`` colormap. kwargs (dict, optional): Keyword arguments to be passed to `matplotlib.pyplot.imshow`. """ # Negative values will throw warnings, so add offset so we are above zero: # TODO: Something weird is going on, and this doesn't work, so for now we ignore warnings?! (see above) if scale == 'log' or scale == 'sqrt': img_min = np.nanmin(image) if img_min <= 0: image = image.copy() image += np.abs(img_min) + 1.0 #print(scale, np.all(np.isfinite(image)), np.all(image > 0), np.min(image), np.max(image)) if allnan(image): logger = logging.getLogger(__name__) logger.error("Image is all NaN") return None # Calcualte limits of color scaling: vmin, vmax = PercentileInterval(percentile).get_limits(image) # Create ImageNormalize object with extracted limits: if scale == 'log': norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=LogStretch()) elif scale == 'linear': norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=LinearStretch()) elif scale == 'sqrt': norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=SqrtStretch()) elif isinstance(scale, matplotlib.colors.Normalize) or isinstance( scale, ImageNormalize): norm = scale else: raise ValueError("scale {} is not available.".format(scale)) if offset_axes: extent = (offset_axes[0] - 0.5, offset_axes[0] + image.shape[1] - 0.5, offset_axes[1] - 0.5, offset_axes[1] + image.shape[0] - 0.5) else: extent = (-0.5, image.shape[1] - 0.5, -0.5, image.shape[0] - 0.5) if ax is None: ax = plt.gca() if isinstance(cmap, six.string_types): cmap = plt.get_cmap(cmap) im = ax.imshow(image, origin=origin, norm=norm, extent=extent, cmap=cmap, interpolation='nearest', **kwargs) if not xlabel is None: ax.set_xlabel(xlabel) if not ylabel is None: ax.set_ylabel(ylabel) if not title is None: ax.set_title(title) ax.set_xlim([extent[0], extent[1]]) ax.set_ylim([extent[2], extent[3]]) if make_cbar: # TODO: In cases where image was rescaled, should we change something here? cbar = plt.colorbar(im, norm=norm) cbar.set_label(clabel) # Settings for ticks (to make Mikkel happy): ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.xaxis.set_minor_locator(MaxNLocator(integer=True)) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) ax.yaxis.set_minor_locator(MaxNLocator(integer=True)) ax.tick_params(direction='out', which='both', pad=5) ax.xaxis.tick_bottom() #ax.set_aspect(aspect) return im
def _fit(self, X, y): self.X, y = self._check_params(X, y) n, p = X.shape self.y = y.reshape((n, 1)) # list of selected features S = [] # list of all features F = list(range(p)) if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # --------------------------------------------------------------------- # FIND FIRST FEATURE # --------------------------------------------------------------------- # check a range of ks (3-10), and choose the one with the max median MI k_min = 3 k_max = 11 xy_MI = np.zeros((k_max - k_min, p)) xy_MI[:] = np.nan for i, k in enumerate(range(k_min, k_max)): xy_MI[i, :] = mi.get_first_mi_vector(self, k) xy_MI = bn.nanmedian(xy_MI, axis=0) # choose the best, add it to S, remove it from F S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) # notify user if self.verbose > 0: self._print_results(S, S_mi) # --------------------------------------------------------------------- # FIND SUBSEQUENT FEATURES # --------------------------------------------------------------------- if self.n_features == 'auto': n_features = np.inf else: n_features = self.n_features while len(S) < n_features: # loop through the remaining unselected features and calculate MI s = len(S) - 1 feature_mi_matrix[s, F] = mi.get_mi_vector(self, F, S[-1]) # make decision based on the chosen FS algorithm fmm = feature_mi_matrix[:len(S), F] if self.method == 'JMI': selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))] elif self.method == 'JMIM': if bn.allnan(bn.nanmin(fmm, axis=0)): break selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))] elif self.method == 'MRMR': if bn.allnan(bn.nanmean(fmm, axis=0)): break MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) selected = F[bn.nanargmax(MRMR)] # record the JMIM of the newly selected feature and add it to S S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) # notify user if self.verbose > 0: self._print_results(S, S_mi) # if n_features == 'auto', let's check the S_mi to stop if self.n_features == 'auto' and len(S) > 10: # smooth the 1st derivative of the MI values of previously sel MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1) # does the mean of the last 5 converge to 0? if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break # --------------------------------------------------------------------- # SAVE RESULTS # --------------------------------------------------------------------- self.n_features_ = len(S) self.support_ = np.zeros(p, dtype=np.bool) self.support_[S] = 1 self.ranking_ = S self.mi_ = S_mi return self
def remove_jumps(t, x, jumps, width=3, return_flags=False): """ Remove jumps from timeseries. Parameters: t (ndarray): Time vector (days). Must be sorted in time. x (ndarray): Flux vector. Can contain invalid points (NaN). jumps (list): Vector of timestamps where jumps are to be corrected. width (float): Width of the region on each side of jumps to compare (default=3 days). return_flags (boolean): Return two additional arrays with location of corrected jumps. """ # Get the logger to use for printing messages: logger = logging.getLogger(__name__) # Number of points: N = len(t) dt = nanmedian(diff(t)) # Convert a simple list of times to a jumps-dictionary: jumps = np.atleast_1d(jumps) for k,jump in enumerate(jumps): if np.isscalar(jump): jumps[k] = {'time': jump} elif not isinstance(jump, dict): raise Exception("Invalid input in JUMPS") # Important that we correct the jumps in the right order: jumps = sorted(jumps, key=lambda k: k['time']) # Arrays needed for the following: correction = empty(2, dtype='float64') if return_flags: flag_jumps = [False]*len(jumps) flag_jumps2 = zeros(N, dtype='int64') # Correct jumps one after the other: kj = 0 for k,jump in enumerate(jumps): logger.debug(jump) # Extract information about jump: tjump = jump.get('time') jumptype = jump.get('type', 'multiplicative') jumpforce = jump.get('force', False) # Make maps to central region and region after jump: kj_pre = kj kj = searchsorted(t, tjump) if kj == 0 or kj == N or kj == kj_pre: continue # Stop if first, last or same point as previous central1 = searchsorted(t, t[kj-1]-width) central2 = searchsorted(t, t[kj]+width) gapsize = t[kj] - t[kj-1] # The length of the jump # Make small timeseries around the gap: tcen = t[central1:central2] xcen = x[central1:central2] xmdl = np.empty_like(xcen) indx = searchsorted(tcen, tjump) # Do simple check to see if all datapoints are NaN: if allnan(x[central1:kj]) or allnan(x[kj:central2]): continue # Run LOWESS filter on two halves to eliminate effects of transit: if (kj-central1 < 0.5*int(width/dt)): w1 = np.hstack(([t[central1:kj],], [x[central1:kj],])) else: w1 = lowess(x[central1:kj], t[central1:kj], frac=1./3, is_sorted=True) if (central2-kj< 0.5*int(width/dt)): w2 = np.hstack(([t[kj:central2],], [x[kj:central2],])) else: w2 = lowess(x[kj:central2], t[kj:central2], frac=1./3, is_sorted=True) # Calculate median levels before and after jump # and make these match up: level1_const = nanmedian(w1[:,1]) level2_const = nanmedian(w2[:,1]) # Do not try to use linear relation on very long gaps # it will in many cases not work. if gapsize < 2*width: # Do robust linear fit of part before and after jump: res1 = theil_sen(w1[:,0], w1[:,1], n_samples=1e5) res2 = theil_sen(w2[:,0], w2[:,1], n_samples=1e5) # Evaluate fitted lines at midpoint in the gap: tmid = (t[kj] + t[kj-1])/2 # Midpoint in gap level1_linear = np.polyval(res1, tmid) level2_linear = np.polyval(res2, tmid) else: level1_linear = NaN level2_linear = NaN # Calculate Bayesian Information Criterion (BIC) for the different # models of the jump to decide which one should be applied to the data: if jumptype == 'additive': # Constant model: correction[0] = level1_const - level2_const if isfinite(correction[0]): # Calculate model: xmdl[:indx] = level1_const xmdl[indx:] = level2_const # Calculate BIC: s1 = BIC(xcen, xmdl, 2) else: s1 = Inf # Linear model: correction[1] = level1_linear - level2_linear if isfinite(correction[1]): # Calculate model: xmdl[:indx] = np.polyval(res1, tcen[:indx]) xmdl[indx:] = np.polyval(res2, tcen[indx:]) # Calculate BIC: s2 = BIC(xcen, xmdl, 4) else: s2 = Inf elif jumptype == 'multiplicative': # Constant model: correction[0] = level1_const / level2_const if isfinite(correction[0]) and correction[0] > 0: # Correct data: xcen2 = dc(xcen) # take a deep copy, such that corrections doesn't affect xcen xcen2[indx:] *= correction[0] # Calculate model: xmdl[:] = level1_const # Calculate BIC: s1 = BIC(xcen2, xmdl, 2) else: s1 = Inf # Linear model: correction[1] = level1_linear / level2_linear if isfinite(correction[1]) and correction[1] > 0: # Correct data: xcen2 = dc(xcen) # take a deep copy, such that corrections doesn't affect xcen xcen2[indx:] *= correction[1] # Calculate model: xmdl[:indx] = np.polyval(res1, tcen[:indx]) xmdl[indx:] = np.polyval(res2, tcen[indx:]) * correction[1] # Calculate BIC: s2 = BIC(xcen2, xmdl, 4) else: s2 = Inf else: raise Exception('Unknown jump type') # Apply correction to entire timeseries if the standard deviation improves: if jumpforce: i = np.argmin([s1, s2]) + 1 else: # Calculate BIC of uncorrected central part: s0 = BIC(xcen, nanmedian(xcen), 1) i = np.argmin([s0, s1, s2]) logger.debug(i) if i != 0: # Do not correct if unaltered data gives the best # Apply the best correction to everything to the right of the jump: if jumptype == 'additive': x[kj:] += correction[i-1] else: x[kj:] *= correction[i-1] # Set the flags, if required: if return_flags: flag_jumps[k] = True flag_jumps2[kj] = 2**i # Returns 2 (mean) or 4 (linear) when correction was made, zero otherwise if return_flags: return x, flag_jumps, flag_jumps2 else: return x
def correct(self, task, output_folder=None): """ Run correction. Parameters: task (dict): Dictionary defining a task/lightcurve to process. output_folder (str, optional): Path to directory where lightcurve should be saved. Returns: dict: Result dictionary containing information about the processing. .. codeauthor:: Rasmus Handberg <*****@*****.**> """ logger = logging.getLogger(__name__) t1 = default_timer() error_msg = [] details = {} save_file = None result = task.copy() try: # Load the lightcurve lc = self.load_lightcurve(task) # Run the correction on this lightcurve: lc_corr, status = self.do_correction(lc) except (KeyboardInterrupt, SystemExit): # pragma: no cover status = STATUS.ABORT logger.warning("Correction was aborted (priority=%d)", task['priority']) except: # noqa: E722 pragma: no cover status = STATUS.ERROR logger.exception("Correction failed (priority=%d)", task['priority']) # Check that the status has been changed: if status == STATUS.UNKNOWN: # pragma: no cover raise ValueError("STATUS was not set by do_correction") # Do sanity checks: if status in (STATUS.OK, STATUS.WARNING): # Make sure all NaN fluxes have corresponding NaN errors: lc_corr.flux_err[np.isnan(lc_corr.flux)] = np.NaN # Simple check that entire lightcurve is not NaN: if allnan(lc_corr.flux): logger.error("Final lightcurve is all NaNs") status = STATUS.ERROR if allnan(lc_corr.flux_err): logger.error("Final lightcurve errors are all NaNs") status = STATUS.ERROR if np.any(np.isinf(lc_corr.flux)): logger.error("Final lightcurve contains Inf") status = STATUS.ERROR if np.any(np.isinf(lc_corr.flux_err)): logger.error("Final lightcurve errors contains Inf") status = STATUS.ERROR # Calculate diagnostics: if status in (STATUS.OK, STATUS.WARNING): # Calculate diagnostics: details['variance'] = nanvar(lc_corr.flux, ddof=1) details['rms_hour'] = rms_timescale(lc_corr, timescale=3600 / 86400) details['ptp'] = ptp(lc_corr) # Diagnostics specific to the method: if self.CorrMethod == 'cbv': details['cbv_num'] = lc_corr.meta['additional_headers'][ 'CBV_NUM'] elif self.CorrMethod == 'ensemble': details['ens_num'] = lc_corr.meta['additional_headers'][ 'ENS_NUM'] details['ens_fom'] = lc_corr.meta['FOM'] # Save the lightcurve to file: try: save_file = self.save_lightcurve(lc_corr, output_folder=output_folder) except (KeyboardInterrupt, SystemExit): # pragma: no cover status = STATUS.ABORT logger.warning("Correction was aborted (priority=%d)", task['priority']) except: # noqa: E722 pragma: no cover status = STATUS.ERROR logger.exception( "Could not save lightcurve file (priority=%d)", task['priority']) # Plot the final lightcurve: if self.plot: fig = plt.figure(dpi=200) ax = fig.add_subplot(111) ax.scatter(lc.time, 1e6 * (lc.flux / nanmedian(lc.flux) - 1), s=2, alpha=0.3, marker='o', label="Original") ax.scatter(lc_corr.time, lc_corr.flux, s=2, alpha=0.3, marker='o', label="Corrected") ax.set_xlabel('Time (TBJD)') ax.set_ylabel('Relative flux (ppm)') ax.legend() save_figure(os.path.join(self.plot_folder(lc), self.CorrMethod + '_final'), fig=fig) plt.close(fig) # Unpack any errors or warnings that were sent to the logger during the correction: if self.message_queue: error_msg += self.message_queue self.message_queue.clear() if not error_msg: error_msg = None # Update results: t2 = default_timer() details['errors'] = error_msg result.update({ 'corrector': self.CorrMethod, 'status_corr': status, 'elaptime_corr': t2 - t1, 'lightcurve_corr': save_file, 'details': details }) return result
def plot_image(image, scale='log', origin='lower', xlabel='Pixel Column Number', ylabel='Pixel Row Number', make_cbar=False, clabel='Flux ($e^{-}s^{-1}$)', cbar_ticks=None, cbar_ticklabels=None, title=None, percentile=95.0, vmin=None, vmax=None, ax=None, cmap=plt.cm.Blues, offset_axes=None, **kwargs): """ Utility function to plot a 2D image. Parameters: image (2d array): Image data. scale (str or astropy.visualization.ImageNormalize object, optional): Normalization used to stretch the colormap. Options: ``'linear'``, ``'sqrt'``, or ``'log'``. Can also be a `astropy.visualization.ImageNormalize` object. Default is ``'log'``. origin (str, optional): The origin of the coordinate system. xlabel (str, optional): Label for the x-axis. ylabel (str, optional): Label for the y-axis. make_cbar (boolean, optional): Create colorbar? Default is ``False``. clabel (str, optional): Label for the color bar. title (str or None, optional): Title for the plot. percentile (float, optional): The fraction of pixels to keep in color-trim. The same fraction of pixels is eliminated from both ends. Default=95. ax (matplotlib.pyplot.axes, optional): Axes in which to plot. Default (None) is to use current active axes. cmap (matplotlib colormap, optional): Colormap to use. Default is the ``Blues`` colormap. kwargs (dict, optional): Keyword arguments to be passed to `matplotlib.pyplot.imshow`. """ if allnan(image): logger = logging.getLogger(__name__) logger.error("Image is all NaN") return None # Calcualte limits of color scaling: if vmin is None or vmax is None: vmin1, vmax1 = PercentileInterval(percentile).get_limits(image) if vmin is None: vmin = vmin1 if vmax is None: vmax = vmax1 # Create ImageNormalize object with extracted limits: if scale == 'log': norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=LogStretch()) elif scale == 'linear': norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=LinearStretch()) elif scale == 'sqrt': norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=SqrtStretch()) elif isinstance(scale, matplotlib.colors.Normalize) or isinstance(scale, ImageNormalize): norm = scale else: raise ValueError("scale {} is not available.".format(scale)) if offset_axes: extent = (offset_axes[0]-0.5, offset_axes[0] + image.shape[1]-0.5, offset_axes[1]-0.5, offset_axes[1] + image.shape[0]-0.5) else: extent = (-0.5, image.shape[1]-0.5, -0.5, image.shape[0]-0.5) if ax is None: ax = plt.gca() if isinstance(cmap, str): cmap = plt.get_cmap(cmap) im = ax.imshow(image, origin=origin, norm=norm, extent=extent, cmap=cmap, interpolation='nearest', **kwargs) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) if title is not None: ax.set_title(title) ax.set_xlim([extent[0], extent[1]]) ax.set_ylim([extent[2], extent[3]]) if make_cbar: cbar = plt.colorbar(im, norm=norm, ax=ax, orientation='horizontal', pad=0.02) cbar.set_label(clabel) if cbar_ticks is not None: cbar.set_ticks(cbar_ticks) if cbar_ticklabels is not None: cbar.set_ticklabels(cbar_ticklabels) # Settings for ticks (to make Mikkel happy): ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.xaxis.set_minor_locator(MaxNLocator(integer=True)) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) ax.yaxis.set_minor_locator(MaxNLocator(integer=True)) ax.tick_params(direction='out', which='both', pad=5) ax.xaxis.tick_bottom() #ax.set_aspect(aspect) return im
def lc_matrix_clean(self, cbv_area): """ Performs gap-filling of light curves returned by :py:func:`CBVCorrector.lc_matrix`, and removes time stamps where all flux values are nan Parameters: cbv_area: the cbv area to calculate light curve matrix for Returns: mat: matrix from :py:func:`CBVCorrector.lc_matrix` that has been gap-filled and with nans removed, to be used in CBV calculation varis: variances of light curves in "mat" indx_nancol: the indices for the timestamps with nans in all light curves Ntimes: Number of timestamps in light curves contained in mat before removing nans .. codeauthor:: Mikkel N. Lund <*****@*****.**> """ logger = logging.getLogger(__name__) logger.info('Running matrix clean') tmpfile = os.path.join( self.data_folder, 'mat-%s-%d_clean.npz' % (self.datasource, cbv_area)) if logger.isEnabledFor(logging.DEBUG) and os.path.exists(tmpfile): logger.info("Loading existing file...") data = np.load(tmpfile) mat = data['mat'] varis = data['varis'] Ntimes = data['Ntimes'] indx_nancol = data['indx_nancol'] else: # Compute light curve correlation matrix mat0, varis = self.lc_matrix(cbv_area) # Print the final shape of the matrix: logger.info("Matrix size: %d x %d" % mat0.shape) # Find columns where all stars have NaNs and remove them: indx_nancol = allnan(mat0, axis=0) Ntimes = mat0.shape[1] mat = mat0[:, ~indx_nancol] cadenceno = np.arange(mat.shape[1]) logger.info("Gap-filling lightcurves...") for k in tqdm(range(mat.shape[0]), total=mat.shape[0], disable=not logger.isEnabledFor(logging.INFO)): mat[k, :] /= varis[k] # Fill out missing values by interpolating the lightcurve: indx = np.isfinite(mat[k, :]) mat[k, ~indx] = pchip_interpolate(cadenceno[indx], mat[k, indx], cadenceno[~indx]) # Save something for debugging: if logger.isEnabledFor(logging.DEBUG): np.savez(tmpfile, mat=mat, varis=varis, indx_nancol=indx_nancol, Ntimes=Ntimes) return mat, varis, indx_nancol, Ntimes
def fit(self, X, y): """ Fits the MI_FS feature selection with the chosen MI_FS method. Parameters ---------- X : array-like, shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values. """ # Check if n_jobs is negative if self.n_jobs < 0: self.n_jobs = NUM_CORES - self.n_jobs self.X, y = self._check_params(X, y) n, p = X.shape self.y = y.reshape((n, 1)) # list of selected features S = [] # list of all features F = list(range(p)) if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # --------------------------------------------------------------------- # FIND FIRST FEATURE # --------------------------------------------------------------------- xy_MI = np.array(mimy.get_first_mi_vector(self, self.k)) #print(xy_MI) #xy_MI[np.where(np.isnan(xy_MI))]=0. #print("first", sorted(enumerate(xy_MI), key=lambda x:x[1], reverse=True)[0]) # choose the best, add it to S, remove it from F S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) # notify user if self.verbose > 0: self._print_results(S, S_mi) # --------------------------------------------------------------------- # FIND SUBSEQUENT FEATURES # --------------------------------------------------------------------- if self.n_features == 'auto': n_features = np.inf else: n_features = self.n_features while len(S) < n_features: # loop through the remaining unselected features and calculate MI s = len(S) - 1 # Calculate s-th row of feature_mi_matrix which contains the JMI score of the last element in S # with all remaining features in F feature_mi_matrix[s, F] = mimy.get_mi_vector(self, F, S[-1]) # make decision based on the chosen FS algorithm fmm = feature_mi_matrix[:len(S), F] if self.method == 'JMI': # Which feature in F has the largest \sum_{s\in S} selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))] # Find out which pair of features is the jmim for if self.verbose > 0: jmim = bn.nanmax(bn.nanmin(fmm, axis=0)) jmi_vals = fmm[:, bn.nanargmax(bn.nanmin(fmm, axis=0))] jmi_idx = np.where(jmi_vals == jmim)[0] print(jmim, S[jmi_idx[0]], selected) elif self.method == 'JMIM': if bn.allnan(bn.nanmin(fmm, axis=0)): break selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))] # Find out which pair of features is the jmim for if self.verbose > 0: jmim = bn.nanmax(bn.nanmin(fmm, axis=0)) jmi_vals = fmm[:, bn.nanargmax(bn.nanmin(fmm, axis=0))] jmi_idx = np.where(jmi_vals == jmim)[0] print(jmim, S[jmi_idx[0]], selected) elif self.method == 'MRMR': if bn.allnan(bn.nanmean(fmm, axis=0)): break MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) selected = F[bn.nanargmax(MRMR)] S_mi.append(bn.nanmax(MRMR)) # record the JMIM of the newly selected feature and add it to S if self.method != 'MRMR': S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) # notify user if self.verbose > 0: self._print_results(S, S_mi) # if n_features == 'auto', let's check the S_mi to stop if self.n_features == 'auto' and len(S) > 10: # smooth the 1st derivative of the MI values of previously sel MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1) # does the mean of the last 5 converge to 0? if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break # --------------------------------------------------------------------- # SAVE RESULTS # --------------------------------------------------------------------- self.n_features_ = len(S) self._support_mask = np.zeros(p, dtype=np.bool) self._support_mask[S] = True self.ranking_ = S self.mi_ = S_mi return self
def train(self, tset, savecl=True, overwrite=False): """ Train the Meta-classifier. Parameters: tset (:class:`TrainingSet`): Training set to train classifier on. savecl (bool, optional): Save the classifier to file? overwrite (bool, optional): Overwrite existing classifer save file. .. codeauthor:: James S. Kuszlewicz <*****@*****.**> .. codeauthor:: Rasmus Handberg <*****@*****.**> """ # Start a logger that should be used to output e.g. debug information: logger = logging.getLogger(__name__) # Check for pre-calculated features fitlabels = self.parse_labels(tset.labels()) # First create list of all possible classifiers: all_classifiers = list(classifier_list) all_classifiers.remove('meta') # Create list of all features: # Save this to object, we are using it to keep track of which features were used # to train the classifier: self.features_used = list( itertools.product(all_classifiers, self.StellarClasses)) self.features_names = [ f'{classifier:s}_{stcl.name:s}' for classifier, stcl in self.features_used ] # Create table of features: # Create as float32, since that is what RandomForestClassifier converts it to anyway. logger.info("Importing features...") features = self.build_features_table(tset.features(), total=len(tset)) # Remove columns that are all NaN: # This can be classifiers that never returns a given class or a classifier that # has not been run at all. keepcols = ~allnan(features, axis=0) features = features[:, keepcols] self.features_used = [ x for i, x in enumerate(self.features_used) if keepcols[i] ] self.features_names = [ x for i, x in enumerate(self.features_names) if keepcols[i] ] # Throw an error if a classifier is not run at all: run_classifiers = set([fu[0] for fu in self.features_used]) if run_classifiers != set(all_classifiers): raise RuntimeError( "Classifier did not contribute at all: %s" % set(all_classifiers).difference(run_classifiers)) # Raise an exception if there are NaNs left in the features: if anynan(features): raise ValueError("Features contains NaNs") logger.info("Features imported. Shape = %s", features.shape) # Run actual training: self.classifier.oob_score = True logger.info("Fitting model.") self.classifier.fit(features, fitlabels) logger.info('Trained. OOB Score = %s', self.classifier.oob_score_) self.classifier.trained = True if savecl and self.classifier.trained and self.clfile is not None: if overwrite or not os.path.exists(self.clfile): logger.info("Saving pickled classifier instance to '%s'", self.clfile) self.save(self.clfile)