def setupClass(cls): cls.decimal_density = 2 # low accuracy because binning is different res1 = KDE(Xi) res1.fit(kernel="gau", fft=True, bw="silverman") cls.res1 = res1 rfname2 = os.path.join(curdir,'results','results_kde_fft.csv') cls.res_density = np.genfromtxt(open(rfname2, 'rb'))
def SExtractorSNR(catalog, bins=16, timeStamp=False): """ A simple plot showing the SNR SExtractor finds. """ txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now()) snr = 1./catalog.magerr_aper kde1 = KDE(snr) kde1.fit() fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(snr, bins=bins, label='r=0.65 Aperture', normed=True, color='r', alpha=0.5) ax1.axvline(x=np.mean(snr), c='g' ,ls='--', label='Mean', lw=1.6) ax1.plot(kde1.support, kde1.density, 'b-', label='Gaussian KDE', lw=1.6) print 'Sextractor:', np.mean(1./catalog.magerr_aper), np.mean(1./catalog.magerr_auto) ax1.set_xlabel('SExtractor Signal-to-Noise Ratio') ax1.set_ylabel('PDF') if timeStamp: ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0) plt.savefig('SExtractorSNR.pdf') plt.close()
def setupClass(cls): cls.decimal_density = 2 # low accuracy because binning is different res1 = KDE(Xi) res1.fit(kernel="gau", fft=True, bw="silverman") cls.res1 = res1 rfname2 = os.path.join(curdir, 'results', 'results_kde_fft.csv') cls.res_density = np.genfromtxt(open(rfname2, 'rb'))
def calctime(dval, maxtime=50.0): logger = logging.getLogger('dataval') logger.info('Plotting calculation times for photometry...') for cadence in dval.cadences: star_vals = dval.search_database( select=['diagnostics.stamp_resizes', 'diagnostics.elaptime'], search=[ f'cadence={cadence:d}', f'diagnostics.elaptime <= {maxtime:f}' ]) if not star_vals: continue et = np.array([star['elaptime'] for star in star_vals], dtype='float64') resize = np.array([star['stamp_resizes'] for star in star_vals], dtype='int32') maxresize = int(np.max(resize)) fig, ax = plt.subplots(figsize=plt.figaspect(0.5)) norm = Normalize(vmin=-0.5, vmax=maxresize + 0.5) scalarMap = ScalarMappable(norm=norm, cmap=plt.get_cmap('tab10')) # Calculate KDE of full dataset: kde1 = KDE(et) kde1.fit(kernel='gau', gridsize=1024) # Calculate KDEs for different number of stamp resizes: for jj in range(maxresize + 1): kde_data = et[resize == jj] if len(kde_data): kde2 = KDE(kde_data) kde2.fit(kernel='gau', gridsize=1024) rgba_color = scalarMap.to_rgba(jj) ax.fill_between(kde2.support, 0, kde2.density, color=rgba_color, alpha=0.5, label=f'{jj:d} resizes') ax.plot(kde1.support, kde1.density, color='k', lw=2, label='All') ax.set_xlim([0, maxtime]) ax.set_ylim(bottom=0) ax.xaxis.set_major_locator(MultipleLocator(5)) ax.xaxis.set_minor_locator(MultipleLocator(1)) ax.set_xlabel('Calculation time (sec)') ax.legend(loc='upper right') fig.savefig(os.path.join(dval.outfolder, f'calctime_c{cadence:04d}')) if not dval.show: plt.close(fig)
def setupClass(cls): res1 = KDE(Xi) weights = np.linspace(1,100,200) res1.fit(kernel="gau", gridsize=50, weights=weights, fft=False, bw="silverman") cls.res1 = res1 rfname = os.path.join(curdir,'results','results_kde_weights.csv') cls.res_density = np.genfromtxt(open(rfname, 'rb'), skip_header=1)
def setupClass(cls): res1 = KDE(Xi) weights = np.linspace(1, 100, 200) res1.fit(kernel="gau", gridsize=50, weights=weights, fft=False, bw="silverman") cls.res1 = res1 rfname = os.path.join(curdir, 'results', 'results_kde_weights.csv') cls.res_density = np.genfromtxt(open(rfname, 'rb'), skip_header=1)
class TestKdeRefit(): np.random.seed(12345) data1 = np.random.randn(100) * 100 pdf = KDE(data1) pdf.fit() data2 = np.random.randn(100) * 100 pdf2 = KDE(data2) pdf2.fit() for attr in ['icdf', 'cdf', 'sf']: npt.assert_( not np.allclose(getattr(pdf, attr)[:10], getattr(pdf2, attr)[:10]))
def MagnitudeDistribution(catalog, mag=18., bins=16, timeStamp=False): """ A simple plot to compare input and extracted magnitudes for a fixed magnitude stars. """ txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now()) kde1 = KDE(catalog.mag_aper - mag) kde1.fit() kde2 = KDE(catalog.mag_auto - mag) kde2.fit() fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(catalog.mag_aper - mag, bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b') ax1.axvline(x=np.mean(catalog.mag_aper - mag), c='b', ls='--', label='Mean') ax1.hist(catalog.mag_auto - mag, bins=bins, label='Auto', alpha=0.3, normed=True, color='r') ax1.axvline(x=np.mean(catalog.mag_auto - mag), c='r', ls='--', label='Mean') ax1.plot(kde1.support, kde1.density, 'b-', label='Gaussian KDE') ax1.plot(kde2.support, kde2.density, 'r-', label='Gaussian KDE') #print np.std(catalog.mag_aper), np.std(catalog.mag_auto) ax1.set_xlabel('SExtractor Magnitude - Input Catalog') ax1.set_ylabel('PDF') if timeStamp: ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0) plt.savefig('MagDistributionSExtractor.pdf') plt.close()
def compute_entropy(U): HGauss0 = 0.5 + 0.5 * np.log(2 * np.pi) nSingVals = U.shape[1] H = np.empty(nSingVals, dtype='float64') for iBasisVector in range(nSingVals): kde = KDE(np.abs(U[:, iBasisVector])) kde.fit(gridsize=1000) pdf = kde.density x = kde.support dx = x[1] - x[0] # Calculate the Gaussian entropy pdfMean = nansum(x * pdf) * dx with np.errstate(invalid='ignore'): sigma = np.sqrt(nansum(((x - pdfMean)**2) * pdf) * dx) HGauss = HGauss0 + np.log(sigma) # Calculate vMatrix entropy pdf_pos = (pdf > 0) HVMatrix = -np.sum(xlogy(pdf[pdf_pos], pdf[pdf_pos])) * dx # Returned entropy is difference between V-Matrix entropy and Gaussian entropy of similar width (sigma) H[iBasisVector] = HVMatrix - HGauss return H
def reduce_mode(x): kde = KDE(x) kde.fit(gridsize=2000) pdf = kde.density x = kde.support return x[np.argmax(pdf)]
def _kde_plot( values: ndarray, grid: ndarray, axes: Axes, bw: Union[float, str] = "scott" ) -> None: """Calculate KDE for observed spacings. Parameters ---------- values: ndarray the values used to compute (fit) the kernel density estimate grid: ndarray the grid of values over which to evaluate the computed KDE curve axes: pyplot.Axes the current axes object to be modified bw: bandwidh The `bw` argument for statsmodels KDEUnivariate .fit Notes ----- we are doing this manually because we want to ensure consistency of the KDE calculation and remove Seaborn control over the process, while also avoiding inconsistent behaviours like https://github.com/mwaskom/seaborn/issues/938 and https://github.com/mwaskom/seaborn/issues/796 """ values = values[values > 0] # prevent floating-point bad behaviour kde = KDE(values) # kde.fit(kernel="gau", bw="scott", cut=0) kde.fit(kernel="gau", bw=bw, cut=0) evaluated = np.empty_like(grid) for i, _ in enumerate(evaluated): evaluated[i] = kde.evaluate(grid[i]) kde_curve = axes.plot(grid, evaluated, label="Kernel Density Estimate") plt.setp(kde_curve, color="black")
def _mode(data): modes = np.zeros([data.shape[0]]) for i in range(data.shape[0]): kde = KDE(data[i, :]) kde.fit(gridsize=2000) modes[i] = kde.support[np.argmax(kde.density)] return modes
def _reduce_mode(x): if len(x) == 0: return np.NaN x = np.asarray(x, dtype=np.float64) kde = KDE(x) kde.fit(gridsize=2000) return kde.support[np.argmax(kde.density)]
def setup_class(cls): cls.x = x = KDEWResults['x'] weights = KDEWResults['weights'] res1 = KDE(x) # default kernel was scott when reference values computed res1.fit(kernel=cls.kernel_name, weights=weights, fft=False, bw="scott") cls.res1 = res1 cls.res_density = KDEWResults[cls.res_kernel_name]
def SExtractorSNR(catalog, bins=16, timeStamp=False): """ A simple plot showing the SNR SExtractor finds. """ txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now()) snr = 1. / catalog.magerr_aper kde1 = KDE(snr) kde1.fit() fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(snr, bins=bins, label='r=0.65 Aperture', normed=True, color='r', alpha=0.5) ax1.axvline(x=np.mean(snr), c='g', ls='--', label='Mean', lw=1.6) ax1.plot(kde1.support, kde1.density, 'b-', label='Gaussian KDE', lw=1.6) print 'Sextractor:', np.mean(1. / catalog.magerr_aper), np.mean( 1. / catalog.magerr_auto) ax1.set_xlabel('SExtractor Signal-to-Noise Ratio') ax1.set_ylabel('PDF') if timeStamp: ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0) plt.savefig('SExtractorSNR.pdf') plt.close()
def test_kde_bw_positive(): # GH 6679 x = np.array([ 4.59511985, 4.59511985, 4.59511985, 4.59511985, 4.59511985, 4.59511985, 4.59511985, 4.59511985, 4.59511985, 4.59511985, 5.67332327, 6.19847872, 7.43189192 ]) kde = KDE(x) kde.fit() assert kde.bw > 0
def mag_dist(dval): """ Function to plot magnitude distribution for targets .. codeauthor:: Mikkel N. Lund <*****@*****.**> .. codeauthor:: Rasmus Handberg <*****@*****.**> """ logger = logging.getLogger('dataval') logger.info('Plotting Magnitude distribution...') fig, ax = plt.subplots(figsize=plt.figaspect(0.5)) fig.subplots_adjust(left=0.14, wspace=0.3, top=0.94, bottom=0.155, right=0.96) colors = ['r', 'b', 'g'] # TODO: What if there are more than three? for k, cadence in enumerate(dval.cadences): star_vals = dval.search_database(select='todolist.tmag', search=f'cadence={cadence:d}') if star_vals: tmags = np.array([star['tmag'] for star in star_vals]) kde = KDE(tmags) kde.fit(gridsize=1000) ax.fill_between(kde.support, 0, kde.density / np.max(kde.density), color=colors[k], alpha=0.3, label=f'{cadence:d}s cadence') # kde_all = KDE(tmags) # kde_all.fit(gridsize=1000) # ax.plot(kde_all.support, kde_all.density/np.max(kde_all.density), 'k-', lw=1.5, label='All') ax.set_ylim(bottom=0) ax.set_xlabel('TESS magnitude') ax.set_ylabel('Normalised Density') ax.xaxis.set_major_locator(MultipleLocator(2)) ax.xaxis.set_minor_locator(MultipleLocator(1)) ax.legend(frameon=False, loc='upper left', borderaxespad=0, handlelength=2.5, handletextpad=0.4) fig.savefig(os.path.join(dval.outfolder, 'mag_dist')) if not dval.show: plt.close(fig)
def MagnitudeDistribution(catalog, mag=18., bins=16, timeStamp=False): """ A simple plot to compare input and extracted magnitudes for a fixed magnitude stars. """ txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now()) kde1 = KDE(catalog.mag_aper-mag) kde1.fit() kde2 = KDE(catalog.mag_auto-mag) kde2.fit() fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(catalog.mag_aper-mag, bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b') ax1.axvline(x=np.mean(catalog.mag_aper-mag), c='b' ,ls='--', label='Mean') ax1.hist(catalog.mag_auto-mag, bins=bins, label='Auto', alpha=0.3, normed=True, color='r') ax1.axvline(x=np.mean(catalog.mag_auto-mag), c='r', ls='--', label='Mean') ax1.plot(kde1.support, kde1.density, 'b-', label='Gaussian KDE') ax1.plot(kde2.support, kde2.density, 'r-', label='Gaussian KDE') #print np.std(catalog.mag_aper), np.std(catalog.mag_auto) ax1.set_xlabel('SExtractor Magnitude - Input Catalog') ax1.set_ylabel('PDF') if timeStamp: ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0) plt.savefig('MagDistributionSExtractor.pdf') plt.close()
def __get_kde_values( self, spacings_range: Tuple[float, float], nnnsd: bool = False, kde_gridsize: int = 1000, ) -> np.array: """Fit / derive the KDE using the entire set of unfolded values, but evaluating only over the given `spacings_range`. """ spacings = np.sort(self.vals[2:] - self.vals[:-2]) if nnnsd else self.spacings kde = KDE(spacings) kde.fit(kernel="gau", bw="scott", cut=0, fft=False, gridsize=10000) s = np.linspace(spacings_range[0], spacings_range[1], kde_gridsize) # evaluated = np.empty_like(s) # for i, _ in enumerate(evaluated): # evaluated[i] = kde.evaluate(s[i]) evaluated = kde.evaluate(s) return evaluated
def calctime_corrections(dval, maxtime=50.0): logger = logging.getLogger('dataval') if not dval.corrections_done: logger.debug("Skipping since corrections not done") return logger.info('Plotting calculation times for corrections...') for cadence in dval.cadences: star_vals = dval.search_database( select='diagnostics_corr.elaptime', search=[ f'cadence={cadence:d}', f'diagnostics_corr.elaptime <= {maxtime:f}' ]) if not star_vals: continue et = np.array([star['elaptime'] for star in star_vals], dtype='float64') kde = KDE(et) kde.fit(gridsize=1024) fig, ax = plt.subplots(figsize=plt.figaspect(0.5)) ax.plot(kde.support, kde.density, color='k', lw=2) ax.set_xlim([0, maxtime]) ax.set_ylim(bottom=0) ax.xaxis.set_major_locator(MultipleLocator(5)) ax.xaxis.set_minor_locator(MultipleLocator(1)) ax.set_xlabel('Calculation time (sec)') fig.savefig( os.path.join(dval.outfolder, f'calctime_corr_c{cadence:04d}')) if not dval.show: plt.close(fig)
def test_fit_self(reset_randomstate): x = np.random.standard_normal(100) kde = KDE(x) assert isinstance(kde, KDE) assert isinstance(kde.fit(), KDE)
def setupClass(cls): res1 = KDE(Xi) res1.fit(kernel="biw", fft=False, bw="silverman") cls.res1 = res1 cls.res_density = KDEResults["biw_d"]
def setup_class(cls): res1 = KDE(Xi) res1.fit(kernel="gau", fft=False, bw="silverman") cls.res1 = res1 cls.res_density = KDEResults["gau_d"]
def setup_class(cls): cls.kde = KDE(Xi) cls.weights_200 = np.linspace(1, 100, 200) cls.weights_100 = np.linspace(1, 100, 100)
def plotSourceFinderResults(file='objects.phot', mag=18., bins=14, apcorr=0.923, timeStamp=False): """ """ data = sextutils.sextractor(file) offs = data.magnitude - mag xpos = np.mean(offs) kde = KDE(offs) kde.fit() txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now()) fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(offs, bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b') ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE') ax1.axvline(x=xpos, c='b', ls='-', label='Mean') ax1.set_xlabel('Aperture Corrected Magnitude - Input Catalogue') ax1.set_ylabel('PDF') ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0, loc='upper left') plt.savefig('MagDistributionSourceFinder.pdf') plt.close() if np.abs(mag - 18.) < 0.1: counts = data.counts / apcorr - 608137.825681 #for 18mag else: counts = data.counts / apcorr - 1359.57331621 #for 24.5mag xpos = np.mean(counts) std = np.std(data.counts / apcorr) snr = np.mean(data.counts / apcorr) / std print 'SourceFinder:', snr, np.mean( data.counts) / std, 1359.57331621 / std, np.mean(data.counts / apcorr), std kde = KDE(counts) kde.fit() txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now()) fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(counts, bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b') ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE', lw=2) ax1.axvline(x=xpos, c='g', ls='-', label='Mean', lw=2) ax1.set_xlabel('Aperture Corrected Counts - Input Catalogue') ax1.set_ylabel('PDF') ax1.text(ax1.get_xlim()[0] * 0.95, ax1.get_ylim()[1] * 0.75, r'$SNR = \frac{\left < counts \right >}{\sigma} \sim %.2f$' % snr) if timeStamp: ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0, loc='upper left') plt.savefig('CountDistributionSourceFinder.pdf') plt.close() kde = KDE(data.snr) kde.fit() fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(data.snr, bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b') ax1.axvline(x=np.mean(data.snr), c='g', ls='-', label='Mean', lw=2) ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE', lw=2) ax1.set_xlabel('Derived Signal-to-Noise Ratio') ax1.set_ylabel('PDF') #ax1.text(ax1.get_xlim()[0]*1.02, ax1.get_ylim()[1]*0.9, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr)) ax1.text(4.5, 0.33, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr)) if timeStamp: ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0, loc='upper left') plt.savefig('SNRsSourceFinder.pdf') plt.close() #pick the ones with well recovered flux msk = data.counts > 1223.6 #90% of 1369.57 kde = KDE(data.snr[msk]) kde.fit() fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(data.snr[msk], bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b') ax1.axvline(x=np.mean(data.snr[msk]), c='g', ls='-', label='Mean', lw=2) ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE', lw=2) ax1.set_xlabel('Derived Signal-to-Noise Ratio') ax1.set_ylabel('PDF') #ax1.text(ax1.get_xlim()[0]*1.02, ax1.get_ylim()[1]*0.9, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr)) ax1.text(11., 0.5, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr[msk])) if timeStamp: ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0) plt.savefig('SNRsSourceFinder2.pdf') plt.close() avg = np.mean(data.ellipticity) std = np.std(data.ellipticity) fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(data.ellipticity, bins=bins, alpha=0.2, normed=True, color='b') ax1.axvline(x=avg, c='b', ls='-') ax1.text(ax1.get_xlim()[0] * 1.02, ax1.get_ylim()[1] * 0.95, r'$\bar{e} = %f$' % avg) ax1.text(ax1.get_xlim()[0] * 1.02, ax1.get_ylim()[1] * 0.9, r'$\sigma = %f$' % std) ax1.set_xlabel('Derived Ellipticity') ax1.set_ylabel('PDF') ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) plt.savefig('EllipticityDistributionSourceFinder.pdf') plt.close()
def plotSourceFinderResults(file='objects.phot', mag=18., bins=14, apcorr=0.923, timeStamp=False): """ """ data = sextutils.sextractor(file) offs = data.magnitude-mag xpos = np.mean(offs) kde = KDE(offs) kde.fit() txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now()) fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(offs, bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b') ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE') ax1.axvline(x=xpos, c='b' ,ls='-', label='Mean') ax1.set_xlabel('Aperture Corrected Magnitude - Input Catalogue') ax1.set_ylabel('PDF') ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0, loc='upper left') plt.savefig('MagDistributionSourceFinder.pdf') plt.close() if np.abs(mag - 18.) < 0.1: counts = data.counts / apcorr - 608137.825681 #for 18mag else: counts = data.counts / apcorr - 1359.57331621 #for 24.5mag xpos = np.mean(counts) std = np.std(data.counts/apcorr) snr = np.mean(data.counts/apcorr)/std print 'SourceFinder:', snr, np.mean(data.counts)/std, 1359.57331621/std, np.mean(data.counts/apcorr), std kde = KDE(counts) kde.fit() txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now()) fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(counts, bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b') ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE', lw=2) ax1.axvline(x=xpos, c='g' ,ls='-', label='Mean', lw=2) ax1.set_xlabel('Aperture Corrected Counts - Input Catalogue') ax1.set_ylabel('PDF') ax1.text(ax1.get_xlim()[0]*0.95, ax1.get_ylim()[1]*0.75, r'$SNR = \frac{\left < counts \right >}{\sigma} \sim %.2f$' % snr) if timeStamp: ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0, loc='upper left') plt.savefig('CountDistributionSourceFinder.pdf') plt.close() kde = KDE(data.snr) kde.fit() fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(data.snr, bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b') ax1.axvline(x=np.mean(data.snr), c='g' ,ls='-', label='Mean', lw=2) ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE', lw=2) ax1.set_xlabel('Derived Signal-to-Noise Ratio') ax1.set_ylabel('PDF') #ax1.text(ax1.get_xlim()[0]*1.02, ax1.get_ylim()[1]*0.9, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr)) ax1.text(4.5, 0.33, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr)) if timeStamp: ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0, loc='upper left') plt.savefig('SNRsSourceFinder.pdf') plt.close() #pick the ones with well recovered flux msk = data.counts > 1223.6 #90% of 1369.57 kde = KDE(data.snr[msk]) kde.fit() fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(data.snr[msk], bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b') ax1.axvline(x=np.mean(data.snr[msk]), c='g' ,ls='-', label='Mean', lw=2) ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE', lw=2) ax1.set_xlabel('Derived Signal-to-Noise Ratio') ax1.set_ylabel('PDF') #ax1.text(ax1.get_xlim()[0]*1.02, ax1.get_ylim()[1]*0.9, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr)) ax1.text(11., 0.5, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr[msk])) if timeStamp: ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0) plt.savefig('SNRsSourceFinder2.pdf') plt.close() avg = np.mean(data.ellipticity) std = np.std(data.ellipticity) fig = plt.figure() ax1 = fig.add_subplot(111) ax1.hist(data.ellipticity, bins=bins, alpha=0.2, normed=True, color='b') ax1.axvline(x=avg, c='b' ,ls='-') ax1.text(ax1.get_xlim()[0]*1.02, ax1.get_ylim()[1]*0.95, r'$\bar{e} = %f$' % avg) ax1.text(ax1.get_xlim()[0]*1.02, ax1.get_ylim()[1]*0.9, r'$\sigma = %f$' % std) ax1.set_xlabel('Derived Ellipticity') ax1.set_ylabel('PDF') ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2) plt.savefig('EllipticityDistributionSourceFinder.pdf') plt.close()
def cotrend_ini(self, cbv_area, do_ini_plots=False): """ Function for running the initial co-trending to obtain CBV coefficients for the construction of priors. The steps taken in the function are: 1: for each cbv-area load calculated CBVs 2: co-trend all light curves in area using fit of all CBVs using linear least squares 3: save CBV coefficients Parameters: *self*: all parameters defined in class init Returns: Saves CBV coefficients per cbv-area in ".npz" files adds loaded CBVs to *self* .. codeauthor:: Mikkel N. Lund <*****@*****.**> """ logger = logging.getLogger(__name__) #------------------------------------------------------------------ # CORRECTING STARS #------------------------------------------------------------------ logger.info( "--------------------------------------------------------------") if os.path.exists( os.path.join( self.data_folder, 'mat-%s-%d_free_weights.npz' % (self.datasource, cbv_area))): logger.info( "Initial co-trending for light curves in %s CBV area%d already done" % (self.datasource, cbv_area)) return else: logger.info( "Initial co-trending for light curves in %s CBV area%d" % (self.datasource, cbv_area)) # Load stars from data base stars = self.search_database(search=[ 'datasource="%s"' % self.datasource, 'cbv_area=%i' % cbv_area ]) # Load the cbv from file: cbv = CBV(self.data_folder, cbv_area, self.datasource, self.threshold_snrtest) # # Signal-to-Noise test (without actually removing any CBVs): # indx_lowsnr = cbv_snr_test(cbv.cbv, self.threshold_snrtest) # cbv.remove_cols(indx_lowsnr) # Update maximum number of components n_components0 = cbv.cbv.shape[1] logger.info('New max number of components: %i' % int(n_components0)) if self.Numcbvs == 'all': n_components = n_components0 else: n_components = np.min([self.Numcbvs, n_components0]) logger.info('Fitting using number of components: %i' % int(n_components)) # initialize results array, including TIC, CBV components, and an residual offset Nres = int(2 * n_components + 2) results = np.zeros([len(stars), Nres]) # Loop through stars for kk, star in tqdm(enumerate(stars), total=len(stars), disable=not logger.isEnabledFor(logging.INFO)): lc = self.load_lightcurve(star) logger.debug("Correcting star %d", lc.targetid) flux_filter, res = cbv.cotrend_single(lc, n_components, self.data_folder, ini=True) lc_corr = (lc.flux / flux_filter - 1) * 1e6 # TODO: compute diagnostics requiring the light curve # # SAVE TO DIAGNOSTICS FILE:: # wn_ratio = GOC_wn(flux, flux-flux_filter) res = np.array([ res, ]).flatten() results[kk, 0] = lc.targetid results[kk, 1:len(res) + 1] = res if do_ini_plots: fig = plt.figure() ax1 = fig.add_subplot(211) ax1.plot(lc.time, lc.flux) ax1.plot(lc.time, flux_filter) ax1.set_xlabel('Time (BJD)') ax1.set_ylabel('Flux (counts)') ax1.set_xticks([]) ax2 = fig.add_subplot(212) ax2.plot(lc.time, lc_corr) ax2.set_xlabel('Time (BJD)') ax2.set_ylabel('Relative flux (ppm)') filename = 'lc_corr_ini_TIC%d.png' % lc.targetid if not os.path.exists(os.path.join(self.plot_folder(lc))): os.makedirs(os.path.join(self.plot_folder(lc))) fig.savefig(os.path.join(self.plot_folder(lc), filename)) plt.close(fig) # Save weights for priors if it is an initial run np.savez(os.path.join( self.data_folder, 'mat-%s-%d_free_weights.npz' % (self.datasource, cbv_area)), res=results) # Plot CBV weights fig = plt.figure(figsize=(15, 15)) ax = fig.add_subplot(221) ax2 = fig.add_subplot(222) ax3 = fig.add_subplot(223) ax4 = fig.add_subplot(224) for kk in range(1, int(2 * n_components + 1)): if kk > n_components: LS = '--' else: LS = '-' idx = np.nonzero(results[:, kk]) r = results[idx, kk] idx2 = (r > np.percentile(r, 10)) & (r < np.percentile(r, 90)) kde = KDE(r[idx2]) kde.fit(gridsize=5000) err = nanmedian(np.abs(r[idx2] - nanmedian(r[idx2]))) * 1e5 if kk > n_components: ax3.plot(kde.support * 1e5, kde.density / np.max(kde.density), label='CBV ' + str(kk), ls=LS) ax4.errorbar(kk, kde.support[np.argmax(kde.density)] * 1e5, yerr=err, marker='o', color='k') else: ax.plot(kde.support * 1e5, kde.density / np.max(kde.density), label='CBV ' + str(kk), ls=LS) ax2.errorbar(kk, kde.support[np.argmax(kde.density)] * 1e5, yerr=err, marker='o', color='k') ax.set_xlabel('CBV weight') ax2.set_ylabel('CBV weight') ax2.set_xlabel('CBV') ax.legend() fig.savefig( os.path.join( self.data_folder, 'weights-sector-%s-%d.png' % (self.datasource, cbv_area))) plt.close(fig)
def plotSNR(deg=60, kdes=True, log=False): CCDs = 1000 fudge = 47.0 #cumulative distribution of stars for different galactic latitudes if deg == 30: tmp = 1 sfudge = 0.79 elif deg == 60: tmp = 2 sfudge = 0.79 else: tmp = 3 sfudge = 0.78 #stars d = np.loadtxt('data/stars.dat', usecols=(0, tmp)) stmags = d[:, 0] stcounts = d[:, 1] #fit a function and generate finer sample z = np.polyfit(stmags, np.log10(stcounts), 4) p = np.poly1d(z) starmags = np.arange(1, 30.2, 0.2) starcounts = 10**p(starmags) cpdf = (starcounts - np.min(starcounts)) / (np.max(starcounts) - np.min(starcounts)) starcounts /= 3600. #convert to square arcseconds nstars = int(np.max(starcounts) * fudge * sfudge) * CCDs magStars = cr.drawFromCumulativeDistributionFunction( cpdf, starmags, nstars) SNRsStars = ETC.SNR(ETC.VISinformation(), magnitude=magStars, exposures=1, galaxy=False) print 'Assuming Galactic Lattitude = %i deg' % deg print 'Number of stars within a pointing (36CCDs) with 70 < SNR < 700 (single 565s exposure):', \ int((SNRsStars[(SNRsStars > 70) & (SNRsStars < 700)]).size * 36. / CCDs) print 'Number of stars within a pointing (36CCDs) with 60 < SNR < 80 (single 565s exposure):', \ int((SNRsStars[(SNRsStars > 60) & (SNRsStars < 80)]).size * 36. / CCDs) print 'Number of stars within a pointing (36CCDs) with 690 < SNR < 710 (single 565s exposure):', \ int((SNRsStars[(SNRsStars > 690) & (SNRsStars < 710)]).size * 36. / CCDs) print 'Number of stars within a pointing (36CCDs) with 18 < mag < 22 (single 565s exposure):', \ int((SNRsStars[(magStars > 18) & (magStars < 22)]).size * 36. / CCDs) print 'Number of stars within a pointing (36CCDs) with 18 < mag < 23 (single 565s exposure):', \ int((SNRsStars[(magStars > 18) & (magStars < 23)]).size * 36. / CCDs) print 'Number of stars within a pointing (36CCDs) with 17.9 < mag < 18.1 (single 565s exposure):', \ int((SNRsStars[(magStars > 17.9) & (magStars < 18.1)]).size * 36. / CCDs) print 'Number of stars within a pointing (36CCDs) with 21 < mag < 23 (single 565s exposure):', \ int((SNRsStars[(magStars > 21) & (magStars < 23)]).size * 36. / CCDs) #calculate Gaussian KDE with statsmodels package (for speed) if kdes: kn = SNRsStars[SNRsStars < 1000] kdeStars = KDE(kn) kdeStars.fit(adjust=2) nst = kn.size / 10. / 1.38 #galaxies #cumulative distribution of galaxies d = np.loadtxt('data/cdf_galaxies.dat', usecols=(0, 1)) gmags = d[:, 0] gcounts = d[:, 1] nums = int(np.max(gcounts) / 3600. * fudge * CCDs) z = np.polyfit(gmags, np.log10(gcounts), 4) p = np.poly1d(z) galaxymags = np.arange(10.0, 30.2, 0.2) galaxycounts = 10**p(galaxymags) cumulative = (galaxycounts - np.min(galaxycounts)) / ( np.max(galaxycounts) - np.min(galaxycounts)) magGalaxies = cr.drawFromCumulativeDistributionFunction( cumulative, galaxymags, nums) SNRsGalaxies = ETC.SNR(VISinformation(), magnitude=magGalaxies, exposures=1) #calculate Gaussian KDE, this time with scipy to save memory, and evaluate it if kdes: kn = SNRsGalaxies[SNRsGalaxies < 1000] #pos = np.linspace(1, 810, num=70) #kdegal = gaussian_kde(kn) #gals = kdegal(pos) #ngl = kn.size #/ df kdeGalaxy = KDE(kn) kdeGalaxy.fit(adjust=10) ngl = kn.size / 10. / 1.38 #histogram binning and weighting bins = np.linspace(0., 1000., 101) df = bins[1] - bins[0] weight = 1. / (2048 * 2 * 2066 * 2 * 0.1 * 0.1 * 7.71604938e-8 * CCDs) / df weightsS = np.ones(magStars.size) * weight weightsG = np.ones(magGalaxies.size) * weight #simple magnitude distribution plot for stars stars = np.loadtxt('data/stars.dat') fig = plt.figure() ax = fig.add_subplot(111) ax.hist(magStars, bins=30, cumulative=True, log=True, alpha=0.3, weights=weightsS * df, label='Random Draws') ax.semilogy(stars[:, 0], stars[:, 1], label='Stars (30deg)') ax.semilogy(stars[:, 0], stars[:, 2], label='Stars (60deg)') ax.semilogy(stars[:, 0], stars[:, 3], label='Stars (90deg)') ax.set_xlabel(r'$M_{AB}$') ax.set_ylabel(r'Cumulative Number of Objects [deg$^{-2}$]') plt.legend(shadow=True, fancybox=True, loc='upper left') plt.savefig('stars%ideg.pdf' % deg) plt.close() #make a plot txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now()) ax = host_subplot(111, axes_class=AA.Axes) hist1 = ax.hist(SNRsStars, bins=bins, alpha=0.2, log=True, weights=weightsS, label='Stars [%i deg]' % deg, color='r') hist2 = ax.hist(SNRsGalaxies, bins=bins, alpha=0.2, log=True, weights=weightsG, label='Galaxies', color='blue') if kdes: ax.plot(kdeStars.support, kdeStars.density * nst, 'r-', label='Gaussian KDE (stars)') #ax.plot(pos, gals*ngl, 'b-', label='Gaussian KDE (galaxies)') ax.plot(kdeGalaxy.support, kdeGalaxy.density * ngl, 'b-', label='Gaussian KDE (galaxies)') #calculate magnitude scale, top-axis if log: mags = np.asarray([17, 18, 19, 20, 21, 22, 23, 24]) SNRs = ETC.SNR(VISinformation(), magnitude=mags, exposures=1, galaxy=False) else: mags = np.asarray([17, 17.5, 18, 18.5, 19, 20, 21, 22.5]) SNRs = ETC.SNR(VISinformation(), magnitude=mags, exposures=1, galaxy=False) ax2 = ax.twin() # ax2 is responsible for "top" axis and "right" axis ax2.set_xticks(SNRs) ax2.set_xticklabels([str(tmp) for tmp in mags]) ax2.set_xlabel('$M(R+I)_{AB}$ [mag]') ax2.axis['right'].major_ticklabels.set_visible(False) ax.set_ylim(1e-1, 1e5) ax.set_ylabel('Number of Objects [deg$^{-2}$ dex$^{-1}$]') ax.set_xlabel('Signal-to-Noise Ratio [assuming a single 565s exposure]') plt.text(0.8, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax.transAxes, alpha=0.2) plt.legend(shadow=True, fancybox=True) if log: ax.set_xscale('log') plt.savefig('SNRtheoretical%ideglog.pdf' % deg) else: ax.set_xlim(1, 1e3) plt.savefig('SNRtheoretical%ideglin.pdf' % deg) plt.close() #write output if not log: mid = df / 2. #output to file fh = open('SNRsSTARS%ideg.txt' % deg, 'w') fh.write('#These values are for stars at %ideg (%s)\n' % (deg, txt)) fh.write('#SNR number_of_stars N\n') fh.write('#bin_centre per_square_degree per_pointing\n') for a, b in zip(hist1[0], hist1[1]): fh.write('%i %f %f\n' % (b + mid, a * df, a * df * 0.496)) fh.close() fh = open('SNRsGALAXIES.txt', 'w') fh.write('#These values are for galaxies (%s)\n' % txt) fh.write('#SNR number_of_galaxies N\n') fh.write('#bin_centre per_square_degree per_pointing\n') for a, b in zip(hist2[0], hist2[1]): fh.write('%i %f %f\n' % (b + mid, a * df, a * df * 0.496)) fh.close()
heights.hist(bins = bins1, fc = 'steelblue') plt.savefig('height_hist_bins1.png') # 5-inch bins bins5 = np.arange(heights.min(), heights.max(), 5.) heights.hist(bins = bins5, fc = 'steelblue') plt.savefig('height_hist_bins5.png') # 0.001-inch bins bins001 = np.arange(heights.min(), heights.max(), .001) heights.hist(bins = bins001, fc = 'steelblue') plt.savefig('height_hist_bins001.png') # Kernel density estimators, from scipy.stats. # Create a KDE ojbect heights_kde = KDE(heights) # Use fit() to estimate the densities. Default is gaussian kernel # using fft. This will provide a "density" attribute. heights_kde.fit() # Plot the density of the heights # Sort inside the plotting so the lines connect nicely. fig = plt.figure() plt.plot(heights_kde.support, heights_kde.density) plt.savefig('heights_density.png') # Pull out male and female heights as arrays over which to compute densities heights_m = heights[heights_weights['Gender'] == 'Male'].values heights_f = heights[heights_weights['Gender'] == 'Female'].values heights_m_kde = KDE(heights_m) heights_f_kde = KDE(heights_f)
heights.hist(bins=bins1, fc='steelblue') plt.savefig('height_hist_bins1.png') # 5-inch bins bins5 = np.arange(heights.min(), heights.max(), 5.) heights.hist(bins=bins5, fc='steelblue') plt.savefig('height_hist_bins5.png') # 0.001-inch bins bins001 = np.arange(heights.min(), heights.max(), .001) heights.hist(bins=bins001, fc='steelblue') plt.savefig('height_hist_bins001.png') # Kernel density estimators, from scipy.stats. # Create a KDE ojbect heights_kde = KDE(heights) # Use fit() to estimate the densities. Default is gaussian kernel # using fft. This will provide a "density" attribute. heights_kde.fit() # Plot the density of the heights # Sort inside the plotting so the lines connect nicely. fig = plt.figure() plt.plot(heights_kde.support, heights_kde.density) plt.savefig('heights_density.png') # Pull out male and female heights as arrays over which to compute densities heights_m = heights[heights_weights['Gender'] == 'Male'].values heights_f = heights[heights_weights['Gender'] == 'Female'].values heights_m_kde = KDE(heights_m) heights_f_kde = KDE(heights_f)
def plot_mag_dist(data_path, sector, version=1, savetex=False): # Add data values files = np.array([]) for root, dirs, fil in os.walk(data_path): for file in fil: file_path = root + os.sep + file if ('corr' in file_path) and ('.fits' in file_path): print(file_path) files = np.append(files, file_path) tmag_vals_sc = np.array([]) tmag_vals_lc = np.array([]) for f in files: with fits.open(f) as hdu: tmag = hdu[0].header['TESSMAG'] dt = hdu[1].header['TIMEDEL'] * 86400 print(tmag, dt) if dt < 1000: tmag_vals_sc = np.append(tmag_vals_sc, tmag) else: tmag_vals_lc = np.append(tmag_vals_lc, tmag) fig = plt.figure() ax = fig.add_subplot(111) if len(tmag_vals_lc) > 0: kde_lc = KDE(tmag_vals_lc) kde_lc.fit(gridsize=1000) ax.fill_between(kde_lc.support, 0, kde_lc.density * len(tmag_vals_lc), color='b', alpha=0.3, label='1800s') ax.scatter(tmag_vals_lc, np.zeros_like(tmag_vals_lc), lw=1, marker='|', c='k', s=80) if len(tmag_vals_sc) > 0: kde_sc = KDE(tmag_vals_sc) kde_sc.fit(gridsize=1000) ax.fill_between(kde_sc.support, 0, kde_sc.density * len(tmag_vals_sc), color='r', alpha=0.3, label='120s') ax.scatter(tmag_vals_sc, np.zeros_like(tmag_vals_sc), lw=1, marker='|', c='k', s=80) tmag_all = np.append(tmag_vals_lc, tmag_vals_sc) kde_all = KDE(tmag_all) kde_all.fit(gridsize=1000) ax.plot(kde_all.support, kde_all.density * len(tmag_all), 'k-', lw=1.5, label='All') # try: # kde_sc = KDE(tmag_vals_sc) # kde_sc.fit(gridsize=1000) # ax.plot(kde_sc.support, kde_sc.density*len(tmag_vals_sc), label='SC') # ax.scatter(tmag_vals_sc, np.zeros_like(tmag_vals_sc), lw=1, marker='+', s=80) # except: # pass # ax.set_xlim([3.5, 16.5]) ax.set_ylim(ymin=0) ax.set_xlabel('TESS magnitude', fontsize=16, labelpad=10) ax.set_ylabel('Number of stars', fontsize=16, labelpad=10) ax.xaxis.set_major_locator(MultipleLocator(2)) ax.xaxis.set_minor_locator(MultipleLocator(1)) ax.tick_params(direction='out', which='both', pad=5, length=3) ax.tick_params(which='major', pad=6, length=5, labelsize='15') ax.yaxis.set_ticks_position('both') plt.tight_layout() ax.legend(frameon=False, prop={'size': 12}, loc='upper right', borderaxespad=0, handlelength=2.5, handletextpad=0.4) ax.yaxis.set_ticks_position('both') if version != 1: save_path = 'plots/sector%02d/v%1d/' % (sector, version) else: save_path = 'plots/sector%02d/' % sector if not os.path.exists(save_path): os.makedirs(save_path) fig.savefig(os.path.join(save_path, 'magnitudes.pdf'), bb_inches='tight') fig.savefig(os.path.join(save_path, 'magnitudes.png'), bb_inches='tight') if savetex: save_path2 = '../releasenote_tex/Release_note%1d/' % sector fig.savefig(os.path.join(save_path2, 'magnitudes.pdf'), bb_inches='tight') plt.show()
def k2p2FixFromSum(SumImage, thresh=1, output_folder=None, plot_folder=None, show_plot=True, min_no_pixels_in_mask=8, min_for_cluster=4, cluster_radius=np.sqrt(2), segmentation=True, ws_alg='flux', ws_blur=0.5, ws_thres=0.05, ws_footprint=3, extend_overflow=True, catalog=None): """ Create pixel masks from Sum-image. Parameters: SumImage (ndarray): Sum-image. thres (float, optional): Threshold for significant flux. The threshold is calculated as MODE+thres*MAD. Default=1. output_folder (string, optional): Path to directory where output should be saved. Default=None. plot_folder (string, optional): Path to directory where plots should be saved. Default=None. show_plot (boolean, optional): Should plots be shown to the user? Default=True. min_no_pixels_in_mask (integer, optional): Minimim number of pixels to constitute a mask. min_for_cluster (integer, optional): Minimum number of pixels to be considered a cluster in DBSCAN clustering. cluster_radius (float, optional): Radius around points to consider cluster in DBSCAN clustering. segmentation (boolean, optional): Perform segmentation of clusters using Watershed segmentation. ws_alg (string, optional): Watershed method to use. Default='flux'. ws_thres (float, optional): Threshold for watershed segmentation. ws_footprint (integer, optional): Footprint to use in watershed segmentation. extend_overflow (boolean, optional): Enable extension of overflow columns for bright stars. catalog (ndarray, optional): Catalog of stars as an array with three columns (column, row and magnitude). If this is provided the results will only allow masks to be returned for stars in the catalog and the information is also used in the extension of overflow columns. Returns: tuple: Tuple with two elements: A 3D boolean ndarray of masks and a float indicating the bandwidth used for the estimation background-levels. .. codeauthor:: Rasmus Handberg <*****@*****.**> .. codeauthor:: Mikkel Lund <*****@*****.**> """ # Get logger for printing messages: logger = logging.getLogger(__name__) logger.info("Creating masks from sum-image...") NY, NX = np.shape(SumImage) ori_mask = ~np.isnan(SumImage) X, Y = np.meshgrid(np.arange(NX), np.arange(NY)) # Cut out pixels from sum image which were collected and contains flux # and flatten the 2D image to 1D array: Flux = SumImage[ori_mask].flatten() Flux = Flux[Flux > 0] # Check if there was actually any flux measured: if len(Flux) == 0: raise K2P2NoFlux("No measured flux in sum-image") # Cut away the top 15% of the fluxes: flux_cut = stats.trim1(np.sort(Flux), 0.15) # Also do a cut on the absolute values of pixel - This helps in cases where # the image is dominated by saturated pixels. The exact value is of course # in principle dependent on the CCD, but we have found this value to be # reasonable in TESS simulated data: flux_cut = flux_cut[flux_cut < 70000] # Estimate the bandwidth we are going to use for the background: background_bandwidth = select_bandwidth(flux_cut, bw='scott', kernel='gau') logger.debug(" Sum-image KDE bandwidth: %f", background_bandwidth) # Make the Kernel Density Estimation of the fluxes: kernel = KDE(flux_cut) kernel.fit(kernel='gau', bw=background_bandwidth, fft=True, gridsize=100) # MODE def kernel_opt(x): return -1*kernel.evaluate(x) max_guess = kernel.support[np.argmax(kernel.density)] MODE = minimize(kernel_opt, max_guess, method='Powell').x # MAD (around mode) MAD1 = mad_to_sigma * nanmedian( np.abs( Flux[(Flux < MODE)] - MODE ) ) # Define the cutoff above which pixels are regarded significant: CUT = MODE + thresh * MAD1 logger.debug(" Threshold used: %f", thresh) logger.debug(" Flux cut is: %f", CUT) if logger.isEnabledFor(logging.DEBUG) and plot_folder is not None: fig = plt.figure() ax = fig.add_subplot(111) ax.fill_between(kernel.support, kernel.density, alpha=0.3) ax.axvline(MODE, color='k') ax.axvline(CUT, color='r') ax.set_xlabel('Flux') ax.set_ylabel('Distribution') save_figure(os.path.join(plot_folder, 'flux_distribution')) plt.close(fig) #========================================================================== # Find and seperate clusters of pixels #========================================================================== # Cut out pixels of sum image with flux above the cut-off: idx = (SumImage > CUT) X2 = X[idx] Y2 = Y[idx] if np.all(~idx): raise K2P2NoStars("No flux above threshold") logger.debug(" Min for cluster is: %f", min_for_cluster) logger.debug(" Cluster radius is: %f", cluster_radius) # Run clustering algorithm XX, labels_ini, core_samples_mask = run_DBSCAN(X2, Y2, cluster_radius, min_for_cluster) # Run watershed segmentation algorithm: # Demand that there was any non-noise clusters found. if segmentation and any(labels_ini != -1): # Create a set of dummy-masks that are made up of the clusters # that were found by DBSCAN, meaning that there could be masks # with several stars in them: DUMMY_MASKS = np.zeros((0, NY, NX), dtype='bool') DUMMY_MASKS_LABELS = [] m = np.zeros_like(SumImage, dtype='bool') for lab in set(labels_ini): if lab == -1: continue # Create "image" of this mask: m[:,:] = False for x,y in XX[labels_ini == lab]: m[y, x] = True # Append them to lists: DUMMY_MASKS = np.append(DUMMY_MASKS, [m], axis=0) DUMMY_MASKS_LABELS.append(lab) # Run the dummy masks through the detection of saturated columns: logger.debug("Detecting saturated columns in non-segmentated masks...") smask, _ = k2p2_saturated(SumImage, DUMMY_MASKS, idx) # Create dictionary that will map a label to the mask of saturated pixels: if np.any(smask): saturated_masks = {} for u,sm in enumerate(smask): saturated_masks[DUMMY_MASKS_LABELS[u]] = sm else: saturated_masks = None # Run the mask segmentaion algorithm on the found clusters: labels, unique_labels, NoCluster = k2p2WS(X, Y, X2, Y2, SumImage, XX, labels_ini, core_samples_mask, saturated_masks=saturated_masks, ws_thres=ws_thres, ws_footprint=ws_footprint, ws_blur=ws_blur, ws_alg=ws_alg, output_folder=plot_folder, catalog=catalog) else: labels = labels_ini unique_labels = set(labels) #NoCluster = len(unique_labels) - (1 if -1 in labels else 0) # Make sure it is a tuple and not a set - much easier to work with: unique_labels = tuple(unique_labels) # Create list of clusters and their number of pixels: No_pix_sort = np.zeros([len(unique_labels), 2]) for u,lab in enumerate(unique_labels): No_pix_sort[u, 0] = np.sum(labels == lab) No_pix_sort[u, 1] = lab # Only select the clusters that have enough pixels and are not noise: cluster_select = (No_pix_sort[:, 0] >= min_no_pixels_in_mask) & (No_pix_sort[:, 1] != -1) no_masks = sum(cluster_select) No_pix_sort = No_pix_sort[cluster_select, :] # No masks were found, so return None: if no_masks == 0: MASKS = None else: # Sort the clusters by the number of pixels: cluster_sort = np.argsort(No_pix_sort[:, 0]) No_pix_sort = No_pix_sort[cluster_sort[::-1], :] # Create 3D array that will hold masks for each target: MASKS = np.zeros((no_masks, NY, NX)) for u in range(no_masks): lab = No_pix_sort[u, 1] class_member_mask = (labels == lab) xy = XX[class_member_mask ,:] MASKS[u, xy[:,1], xy[:,0]] = 1 #========================================================================== # Fill holes in masks #========================================================================== pattern = np.array([[[0, 0.25, 0],[0.25, 0, 0.25],[0, 0.25, 0]]]) # 3D array - shape=(1, 3, 3) mask_holes_indx = ndimage.convolve(MASKS, pattern, mode='constant', cval=0.0) mask_holes_indx = (mask_holes_indx > 0.95) & (MASKS == 0) # Should be exactly 1.0, but let's assume some round-off errors if np.any(mask_holes_indx): logger.info("Filling %d holes in the masks", np.sum(mask_holes_indx)) MASKS[mask_holes_indx] = 1 if not plot_folder is None: # Create image showing all masks at different levels: img = np.zeros((NY,NX)) for r in np.transpose(np.where(MASKS > 0)): img[r[1], r[2]] = r[0]+1 # Plot everything together: fig = plt.figure() ax = fig.add_subplot(111) plot_image(img, ax=ax, scale='linear', percentile=100, cmap='nipy_spectral', title='Holes in mask filled') # Create outline of filled holes: for hole in np.transpose(np.where(mask_holes_indx)): cen = (hole[2]-0.5, hole[1]-0.5) ax.add_patch(mpl.patches.Rectangle(cen, 1, 1, color='k', lw=2, fill=False, hatch='//')) #fig.savefig(os.path.join(plot_folder, 'mask_filled_holes.png'), format='png', bbox_inches='tight') save_figure(os.path.join(plot_folder, 'mask_filled_holes')) plt.close(fig) #========================================================================== # Entend overflow lanes #========================================================================== if extend_overflow: logger.debug("Detecting saturated columns in masks...") # Find pixels that are saturated in each mask and find out if they should # be added to the mask: saturated_mask, pixels_added = k2p2_saturated(SumImage, MASKS, idx) logger.info("Overflow will add %d pixels in total to the masks.", pixels_added) # If we have a catalog of stars, we will only allow stars above the saturation # limit to get their masks extended: if catalog is not None: # Filter that catalog, only keeping stars actully inside current image: c = np.asarray(np.round(catalog[:, 0]), dtype='int32') r = np.asarray(np.round(catalog[:, 1]), dtype='int32') tmag = catalog[:, 2] indx = (c >= 0) & (c < SumImage.shape[1]) & (r >= 0) & (r < SumImage.shape[0]) c = c[indx] r = r[indx] tmag = tmag[indx] # Loop through the masks: for u in range(no_masks): if np.any(saturated_mask[u, :, :]): # Find out which stars fall inside this mask: which_stars = np.asarray(MASKS[u, :, :][r, c], dtype='bool') if np.any(which_stars): # Only allow extension of columns if the combined light of # the targts in the mask exceeds the saturation limit: mags_in_mask = tmag[which_stars] mags_total = -2.5*np.log10(np.nansum(10**(-0.4*mags_in_mask))) if mags_total > saturation_limit: # The combined magnitude of the targets is now # above saturation saturated_mask[u, :, :] = False else: # Do not add saturation columns if no stars were found: saturated_mask[u, :, :] = False # If we are going to plot later on, make a note # of how the outline of the masks looked before # changing anything: if plot_folder is not None and logger.isEnabledFor(logging.DEBUG): outline_before = [] for u in range(no_masks): outline_before.append( k2p2maks(MASKS[u,:,:], 1, 0.5) ) # Add the saturated pixels to the masks: MASKS[saturated_mask] = 1 # If we are running as DEBUG, output some plots as well: if plot_folder is not None and logger.isEnabledFor(logging.DEBUG): logger.debug("Plotting overflow figures...") Ypixel = np.arange(NY) for u in range(no_masks): mask = np.asarray(MASKS[u, :, :], dtype='bool') mask_rows, mask_columns = np.where(mask) mask_max = np.nanmax(SumImage[mask]) # The outline of the mask after saturated columns have been # corrected for: outline = k2p2maks(mask, 1, 0.5) with PdfPages(os.path.join(plot_folder, 'overflow_mask' + str(u) + '.pdf')) as pdf: for c in sorted(set(mask_columns)): column_rows = mask_rows[mask_columns == c] title = "Mask %d - Column %d" % (u, c) if np.any(saturated_mask[u,:,c]): title += " - Saturated" fig = plt.figure(figsize=(14,6)) ax1 = fig.add_subplot(121) ax1.axvspan(np.min(column_rows)-0.5, np.max(column_rows)+0.5, color='0.7') ax1.plot(Ypixel, SumImage[:, c], 'ro-', drawstyle='steps-mid') ax1.set_title(title) ax1.set_xlabel('Y pixels') ax1.set_ylabel('Sum-image counts') ax1.set_ylim(0, mask_max) ax1.set_xlim(-0.5, NY-0.5) ax2 = fig.add_subplot(122) plot_image(SumImage, ax=ax2, scale='log') ax2.plot(outline_before[u][:,0], outline_before[u][:,1], 'r:') ax2.plot(outline[:,0], outline[:,1], 'r-') ax2.axvline(c, color='r', ls='--') pdf.savefig(fig) plt.close(fig) #============================================================================== # Create plots #============================================================================== if plot_folder is not None: # Colors to use for each cluster label: colors = plt.cm.gist_rainbow(np.linspace(0, 1, len(unique_labels))) # Colormap to use for clusters: # https://stackoverflow.com/questions/9707676/defining-a-discrete-colormap-for-imshow-in-matplotlib/9708079#9708079 #cmap = mpl.colors.ListedColormap(np.append([[1, 1, 1, 1]], colors, axis=0)) #cmap_norm = mpl.colors.BoundaryNorm(np.arange(-1, len(unique_labels)-1)+0.5, cmap.N) # Set up figure to hold subplots: if NY/NX > 5: aspect = 0.5 else: aspect = 0.2 fig0 = plt.figure(figsize=(2*plt.figaspect(aspect))) fig0.subplots_adjust(wspace=0.12) # --------------- # PLOT 1 ax0 = fig0.add_subplot(151) plot_image(SumImage, ax=ax0, scale='log', title='Sum-image', xlabel=None, ylabel=None) # --------------- # PLOT 2 Flux_mat2 = np.zeros_like(SumImage) Flux_mat2[SumImage < CUT] = 1 Flux_mat2[SumImage > CUT] = 2 Flux_mat2[ori_mask == 0] = 0 ax2 = fig0.add_subplot(152) plot_image(Flux_mat2, ax=ax2, scale='linear', percentile=100, cmap='nipy_spectral', title='Significant flux', xlabel=None, ylabel=None) # --------------- # PLOT 3 ax2 = fig0.add_subplot(153) Flux_mat4 = np.zeros_like(SumImage) for u,lab in enumerate(unique_labels): class_member_mask = (labels == lab) xy = XX[class_member_mask,:] if lab == -1: # Black used for noise. ax2.plot(xy[:, 0], xy[:, 1], '+', markerfacecolor='k', markeredgecolor='k', markersize=5) else: Flux_mat4[xy[:,1], xy[:,0]] = u+1 ax2.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(colors[u]), markeredgecolor='k', markersize=5) ax2.set_title("Clustering + Watershed") ax2.set_xlim([-0.5, SumImage.shape[1]-0.5]) ax2.set_ylim([-0.5, SumImage.shape[0]-0.5]) ax2.set_aspect('equal') # --------------- # PLOT 4 ax4 = fig0.add_subplot(154) plot_image(Flux_mat4, ax=ax4, scale='linear', percentile=100, cmap='nipy_spectral', title='Extracted clusters', xlabel=None, ylabel=None) # --------------- # PLOT 5 ax5 = fig0.add_subplot(155) plot_image(SumImage, ax=ax5, scale='log', title='Final masks', xlabel=None, ylabel=None) # Plot outlines of selected masks: for u in range(no_masks): # Get the color associated with this label: col = colors[ int(np.where(unique_labels == No_pix_sort[u, 1])[0]) ] # Make mask outline: outline = k2p2maks(MASKS[u, :, :], 1, threshold=0.5) # Plot outlines: ax5.plot(outline[:, 0], outline[:, 1], color=col, zorder=10, lw=2.5) ax4.plot(outline[:, 0], outline[:, 1], color='k', zorder=10, lw=1.5) # Save the figure and close it: save_figure(os.path.join(plot_folder, 'masks_'+ws_alg)) if show_plot: plt.show() else: plt.close('all') return MASKS, background_bandwidth
def remove_stars(tpf): sumimage = np.nansum(tpf, axis=0, dtype='float64') ny, nx = np.shape(sumimage) ori_mask = ~np.isnan(sumimage) X, Y = np.meshgrid(np.arange(nx), np.arange(ny)) Flux = sumimage[ori_mask].flatten() Flux = Flux[Flux > 0] flux_cut = stats.trim1(np.sort(Flux), 0.15) background_bandwidth = select_bandwidth(flux_cut, bw='scott', kernel='gau') kernel = KDE(flux_cut) kernel.fit(kernel='gau', bw=background_bandwidth, fft=True, gridsize=100) def kernel_opt(x): return -1 * kernel.evaluate(x) max_guess = kernel.support[np.argmax(kernel.density)] MODE = optimize.fmin_powell(kernel_opt, max_guess, disp=0) mad_to_sigma = 1.482602218505602 MAD1 = mad_to_sigma * nanmedian(np.abs(Flux[(Flux < MODE)] - MODE)) thresh = 2. CUT = MODE + thresh * MAD1 idx = (sumimage > CUT) X2 = X[idx] Y2 = Y[idx] cluster_radius = np.sqrt(2) min_for_cluster = 4 XX, labels_ini, core_samples_mask = run_DBSCAN(X2, Y2, cluster_radius, min_for_cluster) DUMMY_MASKS = np.zeros((0, ny, nx), dtype='bool') DUMMY_MASKS_LABELS = [] m = np.zeros_like(sumimage, dtype='bool') for lab in set(labels_ini): if lab == -1: continue # Create "image" of this mask: m[:, :] = False for x, y in XX[labels_ini == lab]: m[y, x] = True # Append them to lists: DUMMY_MASKS = np.append(DUMMY_MASKS, [m], axis=0) DUMMY_MASKS_LABELS.append(lab) smask, _ = k2p2_saturated(sumimage, DUMMY_MASKS, idx) if np.any(smask): saturated_masks = {} for u, sm in enumerate(smask): saturated_masks[DUMMY_MASKS_LABELS[u]] = sm else: saturated_masks = None ws_thres = 0.02 ws_footprint = 3 ws_blur = 0.2 ws_alg = 'flux' plot_folder = None catalog = None labels, unique_labels, NoCluster = k2p2WS( X, Y, X2, Y2, sumimage, XX, labels_ini, core_samples_mask, saturated_masks=saturated_masks, ws_thres=ws_thres, ws_footprint=ws_footprint, ws_blur=ws_blur, ws_alg=ws_alg, output_folder=plot_folder, catalog=catalog) # Make sure it is a tuple and not a set - much easier to work with: unique_labels = tuple(unique_labels) # Create list of clusters and their number of pixels: No_pix_sort = np.zeros([len(unique_labels), 2]) for u, lab in enumerate(unique_labels): No_pix_sort[u, 0] = np.sum(labels == lab) No_pix_sort[u, 1] = lab # Only select the clusters that are not the largest or noise: cluster_select = (No_pix_sort[:, 0] < np.max( No_pix_sort.T[0])) & (No_pix_sort[:, 1] != -1) # cluster_select = (No_pix_sort[:, 0] < np.max(No_pix_sort.T[0])) no_masks = sum(cluster_select) No_pix_sort = No_pix_sort[cluster_select, :] MASKS = np.zeros((no_masks, ny, nx)) for u in range(no_masks): lab = No_pix_sort[u, 1] class_member_mask = (labels == lab) xy = XX[class_member_mask, :] MASKS[u, xy[:, 1], xy[:, 0]] = 1 maskimg = np.sum(MASKS, axis=0) invmaskimg = np.abs(maskimg - 1) return invmaskimg * tpf