Python KDEの例、statsmodels.nonparametric.kde.KDE Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_kde.py プロジェクト: CRP/statsmodels

 def setupClass(cls):
     cls.decimal_density = 2 # low accuracy because binning is different
     res1 = KDE(Xi)
     res1.fit(kernel="gau", fft=True, bw="silverman")
     cls.res1 = res1
     rfname2 = os.path.join(curdir,'results','results_kde_fft.csv')
     cls.res_density = np.genfromtxt(open(rfname2, 'rb'))

コード例 #2

0

ファイルを表示

ファイル: plotSNRsForStars.py プロジェクト: eddienko/EuclidVisibleInstrument

def SExtractorSNR(catalog, bins=16, timeStamp=False):
    """
    A simple plot showing the SNR SExtractor finds.
    """
    txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now())

    snr = 1./catalog.magerr_aper
    kde1 = KDE(snr)
    kde1.fit()

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(snr, bins=bins, label='r=0.65 Aperture', normed=True, color='r', alpha=0.5)
    ax1.axvline(x=np.mean(snr), c='g' ,ls='--', label='Mean', lw=1.6)
    ax1.plot(kde1.support, kde1.density, 'b-', label='Gaussian KDE', lw=1.6)

    print 'Sextractor:', np.mean(1./catalog.magerr_aper), np.mean(1./catalog.magerr_auto)

    ax1.set_xlabel('SExtractor Signal-to-Noise Ratio')
    ax1.set_ylabel('PDF')

    if timeStamp:
        ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2)

    ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0)
    plt.savefig('SExtractorSNR.pdf')
    plt.close()

コード例 #3

0

ファイルを表示

ファイル: test_kde.py プロジェクト: jayhetee/statsmodels

 def setupClass(cls):
     cls.decimal_density = 2  # low accuracy because binning is different
     res1 = KDE(Xi)
     res1.fit(kernel="gau", fft=True, bw="silverman")
     cls.res1 = res1
     rfname2 = os.path.join(curdir, 'results', 'results_kde_fft.csv')
     cls.res_density = np.genfromtxt(open(rfname2, 'rb'))

コード例 #4

0

ファイルを表示

ファイル: calctime.py プロジェクト: tasoc/dataval

def calctime(dval, maxtime=50.0):

    logger = logging.getLogger('dataval')
    logger.info('Plotting calculation times for photometry...')

    for cadence in dval.cadences:

        star_vals = dval.search_database(
            select=['diagnostics.stamp_resizes', 'diagnostics.elaptime'],
            search=[
                f'cadence={cadence:d}', f'diagnostics.elaptime <= {maxtime:f}'
            ])

        if not star_vals:
            continue

        et = np.array([star['elaptime'] for star in star_vals],
                      dtype='float64')
        resize = np.array([star['stamp_resizes'] for star in star_vals],
                          dtype='int32')

        maxresize = int(np.max(resize))

        fig, ax = plt.subplots(figsize=plt.figaspect(0.5))
        norm = Normalize(vmin=-0.5, vmax=maxresize + 0.5)
        scalarMap = ScalarMappable(norm=norm, cmap=plt.get_cmap('tab10'))

        # Calculate KDE of full dataset:
        kde1 = KDE(et)
        kde1.fit(kernel='gau', gridsize=1024)

        # Calculate KDEs for different number of stamp resizes:
        for jj in range(maxresize + 1):
            kde_data = et[resize == jj]
            if len(kde_data):
                kde2 = KDE(kde_data)
                kde2.fit(kernel='gau', gridsize=1024)

                rgba_color = scalarMap.to_rgba(jj)

                ax.fill_between(kde2.support,
                                0,
                                kde2.density,
                                color=rgba_color,
                                alpha=0.5,
                                label=f'{jj:d} resizes')

        ax.plot(kde1.support, kde1.density, color='k', lw=2, label='All')
        ax.set_xlim([0, maxtime])
        ax.set_ylim(bottom=0)

        ax.xaxis.set_major_locator(MultipleLocator(5))
        ax.xaxis.set_minor_locator(MultipleLocator(1))
        ax.set_xlabel('Calculation time (sec)')
        ax.legend(loc='upper right')

        fig.savefig(os.path.join(dval.outfolder, f'calctime_c{cadence:04d}'))

        if not dval.show:
            plt.close(fig)

コード例 #5

0

ファイルを表示

ファイル: test_kde.py プロジェクト: CRP/statsmodels

 def setupClass(cls):
     res1 = KDE(Xi)
     weights = np.linspace(1,100,200)
     res1.fit(kernel="gau", gridsize=50, weights=weights, fft=False,
                 bw="silverman")
     cls.res1 = res1
     rfname = os.path.join(curdir,'results','results_kde_weights.csv')
     cls.res_density = np.genfromtxt(open(rfname, 'rb'), skip_header=1)

コード例 #6

0

ファイルを表示

ファイル: test_kde.py プロジェクト: jayhetee/statsmodels

 def setupClass(cls):
     res1 = KDE(Xi)
     weights = np.linspace(1, 100, 200)
     res1.fit(kernel="gau",
              gridsize=50,
              weights=weights,
              fft=False,
              bw="silverman")
     cls.res1 = res1
     rfname = os.path.join(curdir, 'results', 'results_kde_weights.csv')
     cls.res_density = np.genfromtxt(open(rfname, 'rb'), skip_header=1)

コード例 #7

0

ファイルを表示

ファイル: test_kde.py プロジェクト: arnab0000/Internships

class TestKdeRefit():
    np.random.seed(12345)
    data1 = np.random.randn(100) * 100
    pdf = KDE(data1)
    pdf.fit()

    data2 = np.random.randn(100) * 100
    pdf2 = KDE(data2)
    pdf2.fit()

    for attr in ['icdf', 'cdf', 'sf']:
        npt.assert_(
            not np.allclose(getattr(pdf, attr)[:10],
                            getattr(pdf2, attr)[:10]))

コード例 #8

0

ファイルを表示

def MagnitudeDistribution(catalog, mag=18., bins=16, timeStamp=False):
    """
    A simple plot to compare input and extracted magnitudes for a fixed magnitude stars.
    """
    txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now())

    kde1 = KDE(catalog.mag_aper - mag)
    kde1.fit()
    kde2 = KDE(catalog.mag_auto - mag)
    kde2.fit()

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(catalog.mag_aper - mag,
             bins=bins,
             label='r=0.65 Aperture',
             alpha=0.2,
             normed=True,
             color='b')
    ax1.axvline(x=np.mean(catalog.mag_aper - mag),
                c='b',
                ls='--',
                label='Mean')
    ax1.hist(catalog.mag_auto - mag,
             bins=bins,
             label='Auto',
             alpha=0.3,
             normed=True,
             color='r')
    ax1.axvline(x=np.mean(catalog.mag_auto - mag),
                c='r',
                ls='--',
                label='Mean')

    ax1.plot(kde1.support, kde1.density, 'b-', label='Gaussian KDE')
    ax1.plot(kde2.support, kde2.density, 'r-', label='Gaussian KDE')

    #print np.std(catalog.mag_aper), np.std(catalog.mag_auto)

    ax1.set_xlabel('SExtractor Magnitude - Input Catalog')
    ax1.set_ylabel('PDF')

    if timeStamp:
        ax1.text(0.83,
                 1.12,
                 txt,
                 ha='left',
                 va='top',
                 fontsize=9,
                 transform=ax1.transAxes,
                 alpha=0.2)

    ax1.legend(shadow=True,
               fancybox=True,
               numpoints=1,
               scatterpoints=1,
               markerscale=1.0)
    plt.savefig('MagDistributionSExtractor.pdf')
    plt.close()

コード例 #9

0

ファイルを表示

ファイル: cbv_utilities.py プロジェクト: tasoc/corrections

def compute_entropy(U):

    HGauss0 = 0.5 + 0.5 * np.log(2 * np.pi)

    nSingVals = U.shape[1]
    H = np.empty(nSingVals, dtype='float64')

    for iBasisVector in range(nSingVals):

        kde = KDE(np.abs(U[:, iBasisVector]))
        kde.fit(gridsize=1000)

        pdf = kde.density
        x = kde.support

        dx = x[1] - x[0]

        # Calculate the Gaussian entropy
        pdfMean = nansum(x * pdf) * dx
        with np.errstate(invalid='ignore'):
            sigma = np.sqrt(nansum(((x - pdfMean)**2) * pdf) * dx)
        HGauss = HGauss0 + np.log(sigma)

        # Calculate vMatrix entropy
        pdf_pos = (pdf > 0)
        HVMatrix = -np.sum(xlogy(pdf[pdf_pos], pdf[pdf_pos])) * dx

        # Returned entropy is difference between V-Matrix entropy and Gaussian entropy of similar width (sigma)
        H[iBasisVector] = HVMatrix - HGauss

    return H

コード例 #10

0

ファイルを表示

ファイル: cbv_utilities.py プロジェクト: tasoc/corrections

def reduce_mode(x):
    kde = KDE(x)
    kde.fit(gridsize=2000)

    pdf = kde.density
    x = kde.support
    return x[np.argmax(pdf)]

コード例 #11

0

ファイルを表示

def _kde_plot(
    values: ndarray, grid: ndarray, axes: Axes, bw: Union[float, str] = "scott"
) -> None:
    """Calculate KDE for observed spacings.

    Parameters
    ----------
    values: ndarray
        the values used to compute (fit) the kernel density estimate

    grid: ndarray
        the grid of values over which to evaluate the computed KDE curve

    axes: pyplot.Axes
        the current axes object to be modified

    bw: bandwidh
        The `bw` argument for statsmodels KDEUnivariate .fit

    Notes
    -----
    we are doing this manually because we want to ensure consistency of the KDE
    calculation and remove Seaborn control over the process, while also avoiding
    inconsistent behaviours like https://github.com/mwaskom/seaborn/issues/938
    and https://github.com/mwaskom/seaborn/issues/796
    """
    values = values[values > 0]  # prevent floating-point bad behaviour
    kde = KDE(values)
    # kde.fit(kernel="gau", bw="scott", cut=0)
    kde.fit(kernel="gau", bw=bw, cut=0)
    evaluated = np.empty_like(grid)
    for i, _ in enumerate(evaluated):
        evaluated[i] = kde.evaluate(grid[i])
    kde_curve = axes.plot(grid, evaluated, label="Kernel Density Estimate")
    plt.setp(kde_curve, color="black")

コード例 #12

0

ファイルを表示

ファイル: backgrounds.py プロジェクト: danhey/photometry

def _mode(data):
    modes = np.zeros([data.shape[0]])
    for i in range(data.shape[0]):
        kde = KDE(data[i, :])
        kde.fit(gridsize=2000)
        modes[i] = kde.support[np.argmax(kde.density)]
    return modes

コード例 #13

0

ファイルを表示

ファイル: backgrounds.py プロジェクト: danhey/photometry

def _reduce_mode(x):
    if len(x) == 0:
        return np.NaN
    x = np.asarray(x, dtype=np.float64)
    kde = KDE(x)
    kde.fit(gridsize=2000)
    return kde.support[np.argmax(kde.density)]

コード例 #14

0

ファイルを表示

ファイル: test_kde.py プロジェクト: zhuangqingbin/AutoOM

 def setup_class(cls):
     cls.x = x = KDEWResults['x']
     weights = KDEWResults['weights']
     res1 = KDE(x)
     # default kernel was scott when reference values computed
     res1.fit(kernel=cls.kernel_name, weights=weights, fft=False, bw="scott")
     cls.res1 = res1
     cls.res_density = KDEWResults[cls.res_kernel_name]

コード例 #15

0

ファイルを表示

def SExtractorSNR(catalog, bins=16, timeStamp=False):
    """
    A simple plot showing the SNR SExtractor finds.
    """
    txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now())

    snr = 1. / catalog.magerr_aper
    kde1 = KDE(snr)
    kde1.fit()

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(snr,
             bins=bins,
             label='r=0.65 Aperture',
             normed=True,
             color='r',
             alpha=0.5)
    ax1.axvline(x=np.mean(snr), c='g', ls='--', label='Mean', lw=1.6)
    ax1.plot(kde1.support, kde1.density, 'b-', label='Gaussian KDE', lw=1.6)

    print 'Sextractor:', np.mean(1. / catalog.magerr_aper), np.mean(
        1. / catalog.magerr_auto)

    ax1.set_xlabel('SExtractor Signal-to-Noise Ratio')
    ax1.set_ylabel('PDF')

    if timeStamp:
        ax1.text(0.83,
                 1.12,
                 txt,
                 ha='left',
                 va='top',
                 fontsize=9,
                 transform=ax1.transAxes,
                 alpha=0.2)

    ax1.legend(shadow=True,
               fancybox=True,
               numpoints=1,
               scatterpoints=1,
               markerscale=1.0)
    plt.savefig('SExtractorSNR.pdf')
    plt.close()

コード例 #16

0

ファイルを表示

ファイル: test_kde.py プロジェクト: cisco00/Sentimental-Analysis-on-threat

def test_kde_bw_positive():
    # GH 6679
    x = np.array([
        4.59511985, 4.59511985, 4.59511985, 4.59511985, 4.59511985, 4.59511985,
        4.59511985, 4.59511985, 4.59511985, 4.59511985, 5.67332327, 6.19847872,
        7.43189192
    ])
    kde = KDE(x)
    kde.fit()
    assert kde.bw > 0

コード例 #17

0

ファイルを表示

ファイル: mag_dist.py プロジェクト: tasoc/dataval

def mag_dist(dval):
    """
	Function to plot magnitude distribution for targets

	.. codeauthor:: Mikkel N. Lund <*****@*****.**>
	.. codeauthor:: Rasmus Handberg <*****@*****.**>
	"""

    logger = logging.getLogger('dataval')
    logger.info('Plotting Magnitude distribution...')

    fig, ax = plt.subplots(figsize=plt.figaspect(0.5))
    fig.subplots_adjust(left=0.14,
                        wspace=0.3,
                        top=0.94,
                        bottom=0.155,
                        right=0.96)

    colors = ['r', 'b', 'g']  # TODO: What if there are more than three?
    for k, cadence in enumerate(dval.cadences):

        star_vals = dval.search_database(select='todolist.tmag',
                                         search=f'cadence={cadence:d}')

        if star_vals:
            tmags = np.array([star['tmag'] for star in star_vals])

            kde = KDE(tmags)
            kde.fit(gridsize=1000)

            ax.fill_between(kde.support,
                            0,
                            kde.density / np.max(kde.density),
                            color=colors[k],
                            alpha=0.3,
                            label=f'{cadence:d}s cadence')

#		kde_all = KDE(tmags)
#		kde_all.fit(gridsize=1000)
#		ax.plot(kde_all.support, kde_all.density/np.max(kde_all.density), 'k-', lw=1.5, label='All')

    ax.set_ylim(bottom=0)
    ax.set_xlabel('TESS magnitude')
    ax.set_ylabel('Normalised Density')
    ax.xaxis.set_major_locator(MultipleLocator(2))
    ax.xaxis.set_minor_locator(MultipleLocator(1))
    ax.legend(frameon=False,
              loc='upper left',
              borderaxespad=0,
              handlelength=2.5,
              handletextpad=0.4)

    fig.savefig(os.path.join(dval.outfolder, 'mag_dist'))
    if not dval.show:
        plt.close(fig)

コード例 #18

0

ファイルを表示

ファイル: plotSNRsForStars.py プロジェクト: eddienko/EuclidVisibleInstrument

def MagnitudeDistribution(catalog, mag=18., bins=16, timeStamp=False):
    """
    A simple plot to compare input and extracted magnitudes for a fixed magnitude stars.
    """
    txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now())

    kde1 = KDE(catalog.mag_aper-mag)
    kde1.fit()
    kde2 = KDE(catalog.mag_auto-mag)
    kde2.fit()

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(catalog.mag_aper-mag, bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b')
    ax1.axvline(x=np.mean(catalog.mag_aper-mag), c='b' ,ls='--', label='Mean')
    ax1.hist(catalog.mag_auto-mag, bins=bins, label='Auto', alpha=0.3, normed=True, color='r')
    ax1.axvline(x=np.mean(catalog.mag_auto-mag), c='r', ls='--', label='Mean')

    ax1.plot(kde1.support, kde1.density, 'b-', label='Gaussian KDE')
    ax1.plot(kde2.support, kde2.density, 'r-', label='Gaussian KDE')

    #print np.std(catalog.mag_aper), np.std(catalog.mag_auto)

    ax1.set_xlabel('SExtractor Magnitude - Input Catalog')
    ax1.set_ylabel('PDF')

    if timeStamp:
        ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2)

    ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0)
    plt.savefig('MagDistributionSExtractor.pdf')
    plt.close()

コード例 #19

0

ファイルを表示

 def __get_kde_values(
     self,
     spacings_range: Tuple[float, float],
     nnnsd: bool = False,
     kde_gridsize: int = 1000,
 ) -> np.array:
     """Fit / derive the KDE using the entire set of unfolded values, but
     evaluating only over the given `spacings_range`. """
     spacings = np.sort(self.vals[2:] - self.vals[:-2]) if nnnsd else self.spacings
     kde = KDE(spacings)
     kde.fit(kernel="gau", bw="scott", cut=0, fft=False, gridsize=10000)
     s = np.linspace(spacings_range[0], spacings_range[1], kde_gridsize)
     # evaluated = np.empty_like(s)
     # for i, _ in enumerate(evaluated):
     #     evaluated[i] = kde.evaluate(s[i])
     evaluated = kde.evaluate(s)
     return evaluated

コード例 #20

0

ファイルを表示

ファイル: calctime.py プロジェクト: tasoc/dataval

def calctime_corrections(dval, maxtime=50.0):

    logger = logging.getLogger('dataval')
    if not dval.corrections_done:
        logger.debug("Skipping since corrections not done")
        return
    logger.info('Plotting calculation times for corrections...')

    for cadence in dval.cadences:

        star_vals = dval.search_database(
            select='diagnostics_corr.elaptime',
            search=[
                f'cadence={cadence:d}',
                f'diagnostics_corr.elaptime <= {maxtime:f}'
            ])

        if not star_vals:
            continue

        et = np.array([star['elaptime'] for star in star_vals],
                      dtype='float64')

        kde = KDE(et)
        kde.fit(gridsize=1024)

        fig, ax = plt.subplots(figsize=plt.figaspect(0.5))
        ax.plot(kde.support, kde.density, color='k', lw=2)
        ax.set_xlim([0, maxtime])
        ax.set_ylim(bottom=0)
        ax.xaxis.set_major_locator(MultipleLocator(5))
        ax.xaxis.set_minor_locator(MultipleLocator(1))
        ax.set_xlabel('Calculation time (sec)')

        fig.savefig(
            os.path.join(dval.outfolder, f'calctime_corr_c{cadence:04d}'))

        if not dval.show:
            plt.close(fig)

コード例 #21

0

ファイルを表示

ファイル: test_kde.py プロジェクト: cisco00/Sentimental-Analysis-on-threat

def test_fit_self(reset_randomstate):
    x = np.random.standard_normal(100)
    kde = KDE(x)
    assert isinstance(kde, KDE)
    assert isinstance(kde.fit(), KDE)

コード例 #22

0

ファイルを表示

ファイル: test_kde.py プロジェクト: CRP/statsmodels

 def setupClass(cls):
     res1 = KDE(Xi)
     res1.fit(kernel="biw", fft=False, bw="silverman")
     cls.res1 = res1
     cls.res_density = KDEResults["biw_d"]

コード例 #23

0

ファイルを表示

ファイル: test_kde.py プロジェクト: arnab0000/Internships

 def setup_class(cls):
     res1 = KDE(Xi)
     res1.fit(kernel="gau", fft=False, bw="silverman")
     cls.res1 = res1
     cls.res_density = KDEResults["gau_d"]

コード例 #24

0

ファイルを表示

ファイル: test_kde.py プロジェクト: arnab0000/Internships

 def setup_class(cls):
     cls.kde = KDE(Xi)
     cls.weights_200 = np.linspace(1, 100, 200)
     cls.weights_100 = np.linspace(1, 100, 100)

コード例 #25

0

ファイルを表示

def plotSourceFinderResults(file='objects.phot',
                            mag=18.,
                            bins=14,
                            apcorr=0.923,
                            timeStamp=False):
    """
    """
    data = sextutils.sextractor(file)

    offs = data.magnitude - mag
    xpos = np.mean(offs)
    kde = KDE(offs)
    kde.fit()

    txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now())

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(offs,
             bins=bins,
             label='r=0.65 Aperture',
             alpha=0.2,
             normed=True,
             color='b')
    ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE')
    ax1.axvline(x=xpos, c='b', ls='-', label='Mean')

    ax1.set_xlabel('Aperture Corrected Magnitude - Input Catalogue')
    ax1.set_ylabel('PDF')

    ax1.text(0.83,
             1.12,
             txt,
             ha='left',
             va='top',
             fontsize=9,
             transform=ax1.transAxes,
             alpha=0.2)
    ax1.legend(shadow=True,
               fancybox=True,
               numpoints=1,
               scatterpoints=1,
               markerscale=1.0,
               loc='upper left')
    plt.savefig('MagDistributionSourceFinder.pdf')
    plt.close()

    if np.abs(mag - 18.) < 0.1:
        counts = data.counts / apcorr - 608137.825681  #for 18mag
    else:
        counts = data.counts / apcorr - 1359.57331621  #for 24.5mag
    xpos = np.mean(counts)
    std = np.std(data.counts / apcorr)
    snr = np.mean(data.counts / apcorr) / std
    print 'SourceFinder:', snr, np.mean(
        data.counts) / std, 1359.57331621 / std, np.mean(data.counts /
                                                         apcorr), std
    kde = KDE(counts)
    kde.fit()

    txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now())

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(counts,
             bins=bins,
             label='r=0.65 Aperture',
             alpha=0.2,
             normed=True,
             color='b')
    ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE', lw=2)
    ax1.axvline(x=xpos, c='g', ls='-', label='Mean', lw=2)

    ax1.set_xlabel('Aperture Corrected Counts - Input Catalogue')
    ax1.set_ylabel('PDF')

    ax1.text(ax1.get_xlim()[0] * 0.95,
             ax1.get_ylim()[1] * 0.75,
             r'$SNR = \frac{\left < counts \right >}{\sigma} \sim %.2f$' % snr)

    if timeStamp:
        ax1.text(0.83,
                 1.12,
                 txt,
                 ha='left',
                 va='top',
                 fontsize=9,
                 transform=ax1.transAxes,
                 alpha=0.2)

    ax1.legend(shadow=True,
               fancybox=True,
               numpoints=1,
               scatterpoints=1,
               markerscale=1.0,
               loc='upper left')
    plt.savefig('CountDistributionSourceFinder.pdf')
    plt.close()

    kde = KDE(data.snr)
    kde.fit()

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(data.snr,
             bins=bins,
             label='r=0.65 Aperture',
             alpha=0.2,
             normed=True,
             color='b')
    ax1.axvline(x=np.mean(data.snr), c='g', ls='-', label='Mean', lw=2)
    ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE', lw=2)

    ax1.set_xlabel('Derived Signal-to-Noise Ratio')
    ax1.set_ylabel('PDF')

    #ax1.text(ax1.get_xlim()[0]*1.02, ax1.get_ylim()[1]*0.9, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr))
    ax1.text(4.5, 0.33, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr))

    if timeStamp:
        ax1.text(0.83,
                 1.12,
                 txt,
                 ha='left',
                 va='top',
                 fontsize=9,
                 transform=ax1.transAxes,
                 alpha=0.2)

    ax1.legend(shadow=True,
               fancybox=True,
               numpoints=1,
               scatterpoints=1,
               markerscale=1.0,
               loc='upper left')
    plt.savefig('SNRsSourceFinder.pdf')
    plt.close()

    #pick the ones with well recovered flux
    msk = data.counts > 1223.6  #90% of 1369.57

    kde = KDE(data.snr[msk])
    kde.fit()

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(data.snr[msk],
             bins=bins,
             label='r=0.65 Aperture',
             alpha=0.2,
             normed=True,
             color='b')
    ax1.axvline(x=np.mean(data.snr[msk]), c='g', ls='-', label='Mean', lw=2)
    ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE', lw=2)

    ax1.set_xlabel('Derived Signal-to-Noise Ratio')
    ax1.set_ylabel('PDF')

    #ax1.text(ax1.get_xlim()[0]*1.02, ax1.get_ylim()[1]*0.9, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr))
    ax1.text(11., 0.5,
             r'$\left < SNR \right > = %.2f$' % np.mean(data.snr[msk]))

    if timeStamp:
        ax1.text(0.83,
                 1.12,
                 txt,
                 ha='left',
                 va='top',
                 fontsize=9,
                 transform=ax1.transAxes,
                 alpha=0.2)

    ax1.legend(shadow=True,
               fancybox=True,
               numpoints=1,
               scatterpoints=1,
               markerscale=1.0)
    plt.savefig('SNRsSourceFinder2.pdf')
    plt.close()

    avg = np.mean(data.ellipticity)
    std = np.std(data.ellipticity)

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(data.ellipticity, bins=bins, alpha=0.2, normed=True, color='b')
    ax1.axvline(x=avg, c='b', ls='-')

    ax1.text(ax1.get_xlim()[0] * 1.02,
             ax1.get_ylim()[1] * 0.95, r'$\bar{e} = %f$' % avg)
    ax1.text(ax1.get_xlim()[0] * 1.02,
             ax1.get_ylim()[1] * 0.9, r'$\sigma = %f$' % std)

    ax1.set_xlabel('Derived Ellipticity')
    ax1.set_ylabel('PDF')

    ax1.text(0.83,
             1.12,
             txt,
             ha='left',
             va='top',
             fontsize=9,
             transform=ax1.transAxes,
             alpha=0.2)
    plt.savefig('EllipticityDistributionSourceFinder.pdf')
    plt.close()

コード例 #26

0

ファイルを表示

ファイル: plotSNRsForStars.py プロジェクト: eddienko/EuclidVisibleInstrument

def plotSourceFinderResults(file='objects.phot', mag=18., bins=14, apcorr=0.923, timeStamp=False):
    """
    """
    data = sextutils.sextractor(file)

    offs = data.magnitude-mag
    xpos = np.mean(offs)
    kde = KDE(offs)
    kde.fit()

    txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now())

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(offs, bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b')
    ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE')
    ax1.axvline(x=xpos, c='b' ,ls='-', label='Mean')

    ax1.set_xlabel('Aperture Corrected Magnitude - Input Catalogue')
    ax1.set_ylabel('PDF')

    ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2)
    ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0, loc='upper left')
    plt.savefig('MagDistributionSourceFinder.pdf')
    plt.close()

    if np.abs(mag - 18.) < 0.1:
        counts = data.counts / apcorr - 608137.825681  #for 18mag
    else:
        counts = data.counts / apcorr - 1359.57331621 #for 24.5mag
    xpos = np.mean(counts)
    std = np.std(data.counts/apcorr)
    snr = np.mean(data.counts/apcorr)/std
    print 'SourceFinder:', snr, np.mean(data.counts)/std, 1359.57331621/std, np.mean(data.counts/apcorr), std
    kde = KDE(counts)
    kde.fit()

    txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now())

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(counts, bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b')
    ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE', lw=2)
    ax1.axvline(x=xpos, c='g' ,ls='-', label='Mean', lw=2)

    ax1.set_xlabel('Aperture Corrected Counts - Input Catalogue')
    ax1.set_ylabel('PDF')

    ax1.text(ax1.get_xlim()[0]*0.95, ax1.get_ylim()[1]*0.75, r'$SNR = \frac{\left < counts \right >}{\sigma} \sim %.2f$' % snr)

    if timeStamp:
        ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2)

    ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0, loc='upper left')
    plt.savefig('CountDistributionSourceFinder.pdf')
    plt.close()

    kde = KDE(data.snr)
    kde.fit()

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(data.snr, bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b')
    ax1.axvline(x=np.mean(data.snr), c='g' ,ls='-', label='Mean', lw=2)
    ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE', lw=2)

    ax1.set_xlabel('Derived Signal-to-Noise Ratio')
    ax1.set_ylabel('PDF')

    #ax1.text(ax1.get_xlim()[0]*1.02, ax1.get_ylim()[1]*0.9, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr))
    ax1.text(4.5, 0.33, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr))

    if timeStamp:
        ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2)

    ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0, loc='upper left')
    plt.savefig('SNRsSourceFinder.pdf')
    plt.close()

    #pick the ones with well recovered flux
    msk = data.counts > 1223.6 #90% of 1369.57

    kde = KDE(data.snr[msk])
    kde.fit()

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(data.snr[msk], bins=bins, label='r=0.65 Aperture', alpha=0.2, normed=True, color='b')
    ax1.axvline(x=np.mean(data.snr[msk]), c='g' ,ls='-', label='Mean', lw=2)
    ax1.plot(kde.support, kde.density, 'r-', label='Gaussian KDE', lw=2)

    ax1.set_xlabel('Derived Signal-to-Noise Ratio')
    ax1.set_ylabel('PDF')

    #ax1.text(ax1.get_xlim()[0]*1.02, ax1.get_ylim()[1]*0.9, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr))
    ax1.text(11., 0.5, r'$\left < SNR \right > = %.2f$' % np.mean(data.snr[msk]))

    if timeStamp:
        ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2)

    ax1.legend(shadow=True, fancybox=True, numpoints=1, scatterpoints=1, markerscale=1.0)
    plt.savefig('SNRsSourceFinder2.pdf')
    plt.close()

    avg = np.mean(data.ellipticity)
    std = np.std(data.ellipticity)

    fig = plt.figure()
    ax1 = fig.add_subplot(111)

    ax1.hist(data.ellipticity, bins=bins, alpha=0.2, normed=True, color='b')
    ax1.axvline(x=avg, c='b' ,ls='-')

    ax1.text(ax1.get_xlim()[0]*1.02, ax1.get_ylim()[1]*0.95, r'$\bar{e} = %f$' % avg)
    ax1.text(ax1.get_xlim()[0]*1.02, ax1.get_ylim()[1]*0.9, r'$\sigma = %f$' % std)

    ax1.set_xlabel('Derived Ellipticity')
    ax1.set_ylabel('PDF')

    ax1.text(0.83, 1.12, txt, ha='left', va='top', fontsize=9, transform=ax1.transAxes, alpha=0.2)
    plt.savefig('EllipticityDistributionSourceFinder.pdf')
    plt.close()

コード例 #27

0

ファイルを表示

    def cotrend_ini(self, cbv_area, do_ini_plots=False):
        """
		Function for running the initial co-trending to obtain CBV coefficients for the construction of priors.

		The steps taken in the function are:
			1: for each cbv-area load calculated CBVs
			2: co-trend all light curves in area using fit of all CBVs using linear least squares
			3: save CBV coefficients

		Parameters:
			*self*: all parameters defined in class init

		Returns:
			Saves CBV coefficients per cbv-area in ".npz" files
			adds loaded CBVs to *self*

		.. codeauthor:: Mikkel N. Lund <*****@*****.**>
		"""

        logger = logging.getLogger(__name__)

        #------------------------------------------------------------------
        # CORRECTING STARS
        #------------------------------------------------------------------

        logger.info(
            "--------------------------------------------------------------")
        if os.path.exists(
                os.path.join(
                    self.data_folder, 'mat-%s-%d_free_weights.npz' %
                    (self.datasource, cbv_area))):
            logger.info(
                "Initial co-trending for light curves in %s CBV area%d already done"
                % (self.datasource, cbv_area))
            return
        else:
            logger.info(
                "Initial co-trending for light curves in %s CBV area%d" %
                (self.datasource, cbv_area))

        # Load stars from data base
        stars = self.search_database(search=[
            'datasource="%s"' % self.datasource,
            'cbv_area=%i' % cbv_area
        ])

        # Load the cbv from file:
        cbv = CBV(self.data_folder, cbv_area, self.datasource,
                  self.threshold_snrtest)

        #		# Signal-to-Noise test (without actually removing any CBVs):
        #		indx_lowsnr = cbv_snr_test(cbv.cbv, self.threshold_snrtest)
        #		cbv.remove_cols(indx_lowsnr)

        # Update maximum number of components
        n_components0 = cbv.cbv.shape[1]
        logger.info('New max number of components: %i' % int(n_components0))

        if self.Numcbvs == 'all':
            n_components = n_components0
        else:
            n_components = np.min([self.Numcbvs, n_components0])

        logger.info('Fitting using number of components: %i' %
                    int(n_components))
        # initialize results array, including TIC, CBV components, and an residual offset
        Nres = int(2 * n_components + 2)
        results = np.zeros([len(stars), Nres])

        # Loop through stars
        for kk, star in tqdm(enumerate(stars),
                             total=len(stars),
                             disable=not logger.isEnabledFor(logging.INFO)):

            lc = self.load_lightcurve(star)

            logger.debug("Correcting star %d", lc.targetid)

            flux_filter, res = cbv.cotrend_single(lc,
                                                  n_components,
                                                  self.data_folder,
                                                  ini=True)
            lc_corr = (lc.flux / flux_filter - 1) * 1e6

            # TODO: compute diagnostics requiring the light curve
            #			# SAVE TO DIAGNOSTICS FILE::
            #			wn_ratio = GOC_wn(flux, flux-flux_filter)

            res = np.array([
                res,
            ]).flatten()
            results[kk, 0] = lc.targetid
            results[kk, 1:len(res) + 1] = res

            if do_ini_plots:
                fig = plt.figure()
                ax1 = fig.add_subplot(211)
                ax1.plot(lc.time, lc.flux)
                ax1.plot(lc.time, flux_filter)
                ax1.set_xlabel('Time (BJD)')
                ax1.set_ylabel('Flux (counts)')
                ax1.set_xticks([])
                ax2 = fig.add_subplot(212)
                ax2.plot(lc.time, lc_corr)
                ax2.set_xlabel('Time (BJD)')
                ax2.set_ylabel('Relative flux (ppm)')
                filename = 'lc_corr_ini_TIC%d.png' % lc.targetid

                if not os.path.exists(os.path.join(self.plot_folder(lc))):
                    os.makedirs(os.path.join(self.plot_folder(lc)))
                fig.savefig(os.path.join(self.plot_folder(lc), filename))
                plt.close(fig)

        # Save weights for priors if it is an initial run
        np.savez(os.path.join(
            self.data_folder,
            'mat-%s-%d_free_weights.npz' % (self.datasource, cbv_area)),
                 res=results)

        # Plot CBV weights
        fig = plt.figure(figsize=(15, 15))
        ax = fig.add_subplot(221)
        ax2 = fig.add_subplot(222)
        ax3 = fig.add_subplot(223)
        ax4 = fig.add_subplot(224)
        for kk in range(1, int(2 * n_components + 1)):

            if kk > n_components:
                LS = '--'
            else:
                LS = '-'
            idx = np.nonzero(results[:, kk])
            r = results[idx, kk]
            idx2 = (r > np.percentile(r, 10)) & (r < np.percentile(r, 90))
            kde = KDE(r[idx2])
            kde.fit(gridsize=5000)
            err = nanmedian(np.abs(r[idx2] - nanmedian(r[idx2]))) * 1e5

            if kk > n_components:
                ax3.plot(kde.support * 1e5,
                         kde.density / np.max(kde.density),
                         label='CBV ' + str(kk),
                         ls=LS)
                ax4.errorbar(kk,
                             kde.support[np.argmax(kde.density)] * 1e5,
                             yerr=err,
                             marker='o',
                             color='k')
            else:
                ax.plot(kde.support * 1e5,
                        kde.density / np.max(kde.density),
                        label='CBV ' + str(kk),
                        ls=LS)
                ax2.errorbar(kk,
                             kde.support[np.argmax(kde.density)] * 1e5,
                             yerr=err,
                             marker='o',
                             color='k')

        ax.set_xlabel('CBV weight')
        ax2.set_ylabel('CBV weight')
        ax2.set_xlabel('CBV')
        ax.legend()
        fig.savefig(
            os.path.join(
                self.data_folder,
                'weights-sector-%s-%d.png' % (self.datasource, cbv_area)))
        plt.close(fig)

コード例 #28

0

ファイルを表示

def plotSNR(deg=60, kdes=True, log=False):
    CCDs = 1000
    fudge = 47.0

    #cumulative distribution of stars for different galactic latitudes
    if deg == 30:
        tmp = 1
        sfudge = 0.79
    elif deg == 60:
        tmp = 2
        sfudge = 0.79
    else:
        tmp = 3
        sfudge = 0.78

    #stars
    d = np.loadtxt('data/stars.dat', usecols=(0, tmp))
    stmags = d[:, 0]
    stcounts = d[:, 1]

    #fit a function and generate finer sample
    z = np.polyfit(stmags, np.log10(stcounts), 4)
    p = np.poly1d(z)
    starmags = np.arange(1, 30.2, 0.2)
    starcounts = 10**p(starmags)

    cpdf = (starcounts - np.min(starcounts)) / (np.max(starcounts) -
                                                np.min(starcounts))
    starcounts /= 3600.  #convert to square arcseconds
    nstars = int(np.max(starcounts) * fudge * sfudge) * CCDs
    magStars = cr.drawFromCumulativeDistributionFunction(
        cpdf, starmags, nstars)
    SNRsStars = ETC.SNR(ETC.VISinformation(),
                        magnitude=magStars,
                        exposures=1,
                        galaxy=False)

    print 'Assuming Galactic Lattitude = %i deg' % deg
    print 'Number of stars within a pointing (36CCDs) with 70 < SNR < 700 (single 565s exposure):', \
            int((SNRsStars[(SNRsStars > 70) & (SNRsStars < 700)]).size * 36. / CCDs)
    print 'Number of stars within a pointing (36CCDs) with 60 < SNR < 80 (single 565s exposure):', \
            int((SNRsStars[(SNRsStars > 60) & (SNRsStars < 80)]).size * 36. / CCDs)
    print 'Number of stars within a pointing (36CCDs) with 690 < SNR < 710 (single 565s exposure):', \
            int((SNRsStars[(SNRsStars > 690) & (SNRsStars < 710)]).size * 36. / CCDs)
    print 'Number of stars within a pointing (36CCDs) with 18 < mag < 22 (single 565s exposure):', \
            int((SNRsStars[(magStars > 18) & (magStars < 22)]).size * 36. / CCDs)
    print 'Number of stars within a pointing (36CCDs) with 18 < mag < 23 (single 565s exposure):', \
            int((SNRsStars[(magStars > 18) & (magStars < 23)]).size * 36. / CCDs)
    print 'Number of stars within a pointing (36CCDs) with 17.9 < mag < 18.1 (single 565s exposure):', \
            int((SNRsStars[(magStars > 17.9) & (magStars < 18.1)]).size * 36. / CCDs)
    print 'Number of stars within a pointing (36CCDs) with 21 < mag < 23 (single 565s exposure):', \
            int((SNRsStars[(magStars > 21) & (magStars < 23)]).size * 36. / CCDs)

    #calculate Gaussian KDE with statsmodels package (for speed)
    if kdes:
        kn = SNRsStars[SNRsStars < 1000]
        kdeStars = KDE(kn)
        kdeStars.fit(adjust=2)
        nst = kn.size / 10. / 1.38

    #galaxies
    #cumulative distribution of galaxies
    d = np.loadtxt('data/cdf_galaxies.dat', usecols=(0, 1))
    gmags = d[:, 0]
    gcounts = d[:, 1]
    nums = int(np.max(gcounts) / 3600. * fudge * CCDs)
    z = np.polyfit(gmags, np.log10(gcounts), 4)
    p = np.poly1d(z)
    galaxymags = np.arange(10.0, 30.2, 0.2)
    galaxycounts = 10**p(galaxymags)
    cumulative = (galaxycounts - np.min(galaxycounts)) / (
        np.max(galaxycounts) - np.min(galaxycounts))
    magGalaxies = cr.drawFromCumulativeDistributionFunction(
        cumulative, galaxymags, nums)
    SNRsGalaxies = ETC.SNR(VISinformation(),
                           magnitude=magGalaxies,
                           exposures=1)

    #calculate Gaussian KDE, this time with scipy to save memory, and evaluate it
    if kdes:
        kn = SNRsGalaxies[SNRsGalaxies < 1000]
        #pos = np.linspace(1, 810, num=70)
        #kdegal = gaussian_kde(kn)
        #gals = kdegal(pos)
        #ngl = kn.size #/ df
        kdeGalaxy = KDE(kn)
        kdeGalaxy.fit(adjust=10)
        ngl = kn.size / 10. / 1.38

    #histogram binning and weighting
    bins = np.linspace(0., 1000., 101)
    df = bins[1] - bins[0]
    weight = 1. / (2048 * 2 * 2066 * 2 * 0.1 * 0.1 * 7.71604938e-8 * CCDs) / df
    weightsS = np.ones(magStars.size) * weight
    weightsG = np.ones(magGalaxies.size) * weight

    #simple magnitude distribution plot for stars
    stars = np.loadtxt('data/stars.dat')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.hist(magStars,
            bins=30,
            cumulative=True,
            log=True,
            alpha=0.3,
            weights=weightsS * df,
            label='Random Draws')
    ax.semilogy(stars[:, 0], stars[:, 1], label='Stars (30deg)')
    ax.semilogy(stars[:, 0], stars[:, 2], label='Stars (60deg)')
    ax.semilogy(stars[:, 0], stars[:, 3], label='Stars (90deg)')
    ax.set_xlabel(r'$M_{AB}$')
    ax.set_ylabel(r'Cumulative Number of Objects [deg$^{-2}$]')
    plt.legend(shadow=True, fancybox=True, loc='upper left')
    plt.savefig('stars%ideg.pdf' % deg)
    plt.close()

    #make a plot
    txt = '%s' % datetime.datetime.isoformat(datetime.datetime.now())
    ax = host_subplot(111, axes_class=AA.Axes)

    hist1 = ax.hist(SNRsStars,
                    bins=bins,
                    alpha=0.2,
                    log=True,
                    weights=weightsS,
                    label='Stars [%i deg]' % deg,
                    color='r')
    hist2 = ax.hist(SNRsGalaxies,
                    bins=bins,
                    alpha=0.2,
                    log=True,
                    weights=weightsG,
                    label='Galaxies',
                    color='blue')

    if kdes:
        ax.plot(kdeStars.support,
                kdeStars.density * nst,
                'r-',
                label='Gaussian KDE (stars)')
        #ax.plot(pos, gals*ngl, 'b-', label='Gaussian KDE (galaxies)')
        ax.plot(kdeGalaxy.support,
                kdeGalaxy.density * ngl,
                'b-',
                label='Gaussian KDE (galaxies)')

    #calculate magnitude scale, top-axis
    if log:
        mags = np.asarray([17, 18, 19, 20, 21, 22, 23, 24])
        SNRs = ETC.SNR(VISinformation(),
                       magnitude=mags,
                       exposures=1,
                       galaxy=False)
    else:
        mags = np.asarray([17, 17.5, 18, 18.5, 19, 20, 21, 22.5])
        SNRs = ETC.SNR(VISinformation(),
                       magnitude=mags,
                       exposures=1,
                       galaxy=False)

    ax2 = ax.twin()  # ax2 is responsible for "top" axis and "right" axis
    ax2.set_xticks(SNRs)
    ax2.set_xticklabels([str(tmp) for tmp in mags])
    ax2.set_xlabel('$M(R+I)_{AB}$ [mag]')
    ax2.axis['right'].major_ticklabels.set_visible(False)

    ax.set_ylim(1e-1, 1e5)

    ax.set_ylabel('Number of Objects [deg$^{-2}$ dex$^{-1}$]')
    ax.set_xlabel('Signal-to-Noise Ratio [assuming a single 565s exposure]')

    plt.text(0.8,
             1.12,
             txt,
             ha='left',
             va='top',
             fontsize=9,
             transform=ax.transAxes,
             alpha=0.2)
    plt.legend(shadow=True, fancybox=True)

    if log:
        ax.set_xscale('log')
        plt.savefig('SNRtheoretical%ideglog.pdf' % deg)
    else:
        ax.set_xlim(1, 1e3)
        plt.savefig('SNRtheoretical%ideglin.pdf' % deg)

    plt.close()

    #write output
    if not log:
        mid = df / 2.
        #output to file
        fh = open('SNRsSTARS%ideg.txt' % deg, 'w')
        fh.write('#These values are for stars at %ideg (%s)\n' % (deg, txt))
        fh.write('#SNR number_of_stars  N\n')
        fh.write('#bin_centre per_square_degree per_pointing\n')
        for a, b in zip(hist1[0], hist1[1]):
            fh.write('%i %f %f\n' % (b + mid, a * df, a * df * 0.496))
        fh.close()
        fh = open('SNRsGALAXIES.txt', 'w')
        fh.write('#These values are for galaxies (%s)\n' % txt)
        fh.write('#SNR number_of_galaxies   N\n')
        fh.write('#bin_centre per_square_degree per_pointing\n')
        for a, b in zip(hist2[0], hist2[1]):
            fh.write('%i %f %f\n' % (b + mid, a * df, a * df * 0.496))
        fh.close()

コード例 #29

0

ファイルを表示

ファイル: ch2.py プロジェクト: ANB2/Will_it_Python

heights.hist(bins = bins1, fc = 'steelblue')
plt.savefig('height_hist_bins1.png')

# 5-inch bins
bins5 = np.arange(heights.min(), heights.max(), 5.)
heights.hist(bins = bins5, fc = 'steelblue')
plt.savefig('height_hist_bins5.png')

# 0.001-inch bins
bins001 = np.arange(heights.min(), heights.max(), .001)
heights.hist(bins = bins001, fc = 'steelblue')
plt.savefig('height_hist_bins001.png')

# Kernel density estimators, from scipy.stats.
# Create a KDE ojbect
heights_kde = KDE(heights)
# Use fit() to estimate the densities. Default is gaussian kernel 
# using fft. This will provide a "density" attribute.
heights_kde.fit()

# Plot the density of the heights
# Sort inside the plotting so the lines connect nicely.
fig = plt.figure()
plt.plot(heights_kde.support, heights_kde.density)
plt.savefig('heights_density.png')

# Pull out male and female heights as arrays over which to compute densities
heights_m = heights[heights_weights['Gender'] == 'Male'].values
heights_f = heights[heights_weights['Gender'] == 'Female'].values
heights_m_kde = KDE(heights_m)
heights_f_kde = KDE(heights_f)

コード例 #30

0

ファイルを表示

heights.hist(bins=bins1, fc='steelblue')
plt.savefig('height_hist_bins1.png')

# 5-inch bins
bins5 = np.arange(heights.min(), heights.max(), 5.)
heights.hist(bins=bins5, fc='steelblue')
plt.savefig('height_hist_bins5.png')

# 0.001-inch bins
bins001 = np.arange(heights.min(), heights.max(), .001)
heights.hist(bins=bins001, fc='steelblue')
plt.savefig('height_hist_bins001.png')

# Kernel density estimators, from scipy.stats.
# Create a KDE ojbect
heights_kde = KDE(heights)
# Use fit() to estimate the densities. Default is gaussian kernel
# using fft. This will provide a "density" attribute.
heights_kde.fit()

# Plot the density of the heights
# Sort inside the plotting so the lines connect nicely.
fig = plt.figure()
plt.plot(heights_kde.support, heights_kde.density)
plt.savefig('heights_density.png')

# Pull out male and female heights as arrays over which to compute densities
heights_m = heights[heights_weights['Gender'] == 'Male'].values
heights_f = heights[heights_weights['Gender'] == 'Female'].values
heights_m_kde = KDE(heights_m)
heights_f_kde = KDE(heights_f)

コード例 #31

0

ファイルを表示

def plot_mag_dist(data_path, sector, version=1, savetex=False):

    # Add data values
    files = np.array([])
    for root, dirs, fil in os.walk(data_path):
        for file in fil:
            file_path = root + os.sep + file
            if ('corr' in file_path) and ('.fits' in file_path):
                print(file_path)
                files = np.append(files, file_path)

    tmag_vals_sc = np.array([])
    tmag_vals_lc = np.array([])
    for f in files:
        with fits.open(f) as hdu:
            tmag = hdu[0].header['TESSMAG']
            dt = hdu[1].header['TIMEDEL'] * 86400

            print(tmag, dt)

            if dt < 1000:
                tmag_vals_sc = np.append(tmag_vals_sc, tmag)
            else:
                tmag_vals_lc = np.append(tmag_vals_lc, tmag)

    fig = plt.figure()
    ax = fig.add_subplot(111)

    if len(tmag_vals_lc) > 0:
        kde_lc = KDE(tmag_vals_lc)
        kde_lc.fit(gridsize=1000)
        ax.fill_between(kde_lc.support,
                        0,
                        kde_lc.density * len(tmag_vals_lc),
                        color='b',
                        alpha=0.3,
                        label='1800s')
        ax.scatter(tmag_vals_lc,
                   np.zeros_like(tmag_vals_lc),
                   lw=1,
                   marker='|',
                   c='k',
                   s=80)

    if len(tmag_vals_sc) > 0:
        kde_sc = KDE(tmag_vals_sc)
        kde_sc.fit(gridsize=1000)
        ax.fill_between(kde_sc.support,
                        0,
                        kde_sc.density * len(tmag_vals_sc),
                        color='r',
                        alpha=0.3,
                        label='120s')
        ax.scatter(tmag_vals_sc,
                   np.zeros_like(tmag_vals_sc),
                   lw=1,
                   marker='|',
                   c='k',
                   s=80)

    tmag_all = np.append(tmag_vals_lc, tmag_vals_sc)
    kde_all = KDE(tmag_all)
    kde_all.fit(gridsize=1000)
    ax.plot(kde_all.support,
            kde_all.density * len(tmag_all),
            'k-',
            lw=1.5,
            label='All')

    #	try:
    #		kde_sc = KDE(tmag_vals_sc)
    #		kde_sc.fit(gridsize=1000)
    #		ax.plot(kde_sc.support, kde_sc.density*len(tmag_vals_sc), label='SC')
    #		ax.scatter(tmag_vals_sc, np.zeros_like(tmag_vals_sc), lw=1, marker='+', s=80)
    #	except:
    #		pass

    #	ax.set_xlim([3.5, 16.5])
    ax.set_ylim(ymin=0)
    ax.set_xlabel('TESS magnitude', fontsize=16, labelpad=10)
    ax.set_ylabel('Number of stars', fontsize=16, labelpad=10)
    ax.xaxis.set_major_locator(MultipleLocator(2))
    ax.xaxis.set_minor_locator(MultipleLocator(1))
    ax.tick_params(direction='out', which='both', pad=5, length=3)
    ax.tick_params(which='major', pad=6, length=5, labelsize='15')
    ax.yaxis.set_ticks_position('both')
    plt.tight_layout()
    ax.legend(frameon=False,
              prop={'size': 12},
              loc='upper right',
              borderaxespad=0,
              handlelength=2.5,
              handletextpad=0.4)

    ax.yaxis.set_ticks_position('both')

    if version != 1:
        save_path = 'plots/sector%02d/v%1d/' % (sector, version)
    else:
        save_path = 'plots/sector%02d/' % sector

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    fig.savefig(os.path.join(save_path, 'magnitudes.pdf'), bb_inches='tight')
    fig.savefig(os.path.join(save_path, 'magnitudes.png'), bb_inches='tight')

    if savetex:
        save_path2 = '../releasenote_tex/Release_note%1d/' % sector

        fig.savefig(os.path.join(save_path2, 'magnitudes.pdf'),
                    bb_inches='tight')

    plt.show()

コード例 #32

0

ファイルを表示

def k2p2FixFromSum(SumImage, thresh=1, output_folder=None, plot_folder=None, show_plot=True,
				   min_no_pixels_in_mask=8, min_for_cluster=4, cluster_radius=np.sqrt(2),
				   segmentation=True, ws_alg='flux', ws_blur=0.5, ws_thres=0.05, ws_footprint=3,
				   extend_overflow=True, catalog=None):
	"""
	Create pixel masks from Sum-image.

	Parameters:
		SumImage (ndarray): Sum-image.
		thres (float, optional): Threshold for significant flux. The threshold is calculated as MODE+thres*MAD. Default=1.
		output_folder (string, optional): Path to directory where output should be saved. Default=None.
		plot_folder (string, optional): Path to directory where plots should be saved. Default=None.
		show_plot (boolean, optional): Should plots be shown to the user? Default=True.
		min_no_pixels_in_mask (integer, optional): Minimim number of pixels to constitute a mask.
		min_for_cluster (integer, optional): Minimum number of pixels to be considered a cluster in DBSCAN clustering.
		cluster_radius (float, optional): Radius around points to consider cluster in DBSCAN clustering.
		segmentation (boolean, optional): Perform segmentation of clusters using Watershed segmentation.
		ws_alg (string, optional): Watershed method to use. Default='flux'.
		ws_thres (float, optional): Threshold for watershed segmentation.
		ws_footprint (integer, optional): Footprint to use in watershed segmentation.
		extend_overflow (boolean, optional): Enable extension of overflow columns for bright stars.
		catalog (ndarray, optional): Catalog of stars as an array with three columns (column, row and magnitude). If this is provided
			the results will only allow masks to be returned for stars in the catalog and the information is
			also used in the extension of overflow columns.

	Returns:
		tuple: Tuple with two elements: A 3D boolean ndarray of masks and a float indicating the bandwidth used for the estimation background-levels.

	.. codeauthor:: Rasmus Handberg <*****@*****.**>
	.. codeauthor:: Mikkel Lund <*****@*****.**>
	"""

	# Get logger for printing messages:
	logger = logging.getLogger(__name__)
	logger.info("Creating masks from sum-image...")

	NY, NX = np.shape(SumImage)
	ori_mask = ~np.isnan(SumImage)
	X, Y = np.meshgrid(np.arange(NX), np.arange(NY))

	# Cut out pixels from sum image which were collected and contains flux
	# and flatten the 2D image to 1D array:
	Flux = SumImage[ori_mask].flatten()
	Flux = Flux[Flux > 0]

	# Check if there was actually any flux measured:
	if len(Flux) == 0:
		raise K2P2NoFlux("No measured flux in sum-image")

	# Cut away the top 15% of the fluxes:
	flux_cut = stats.trim1(np.sort(Flux), 0.15)
	# Also do a cut on the absolute values of pixel - This helps in cases where
	# the image is dominated by saturated pixels. The exact value is of course
	# in principle dependent on the CCD, but we have found this value to be
	# reasonable in TESS simulated data:
	flux_cut = flux_cut[flux_cut < 70000]

	# Estimate the bandwidth we are going to use for the background:
	background_bandwidth = select_bandwidth(flux_cut, bw='scott', kernel='gau')
	logger.debug("  Sum-image KDE bandwidth: %f", background_bandwidth)

	# Make the Kernel Density Estimation of the fluxes:
	kernel = KDE(flux_cut)
	kernel.fit(kernel='gau', bw=background_bandwidth, fft=True, gridsize=100)

	# MODE
	def kernel_opt(x): return -1*kernel.evaluate(x)
	max_guess = kernel.support[np.argmax(kernel.density)]
	MODE = minimize(kernel_opt, max_guess, method='Powell').x

	# MAD (around mode)
	MAD1 = mad_to_sigma * nanmedian( np.abs( Flux[(Flux < MODE)] - MODE ) )

	# Define the cutoff above which pixels are regarded significant:
	CUT = MODE + thresh * MAD1

	logger.debug("  Threshold used: %f", thresh)
	logger.debug("  Flux cut is: %f", CUT)
	if logger.isEnabledFor(logging.DEBUG) and plot_folder is not None:
		fig = plt.figure()
		ax = fig.add_subplot(111)
		ax.fill_between(kernel.support, kernel.density, alpha=0.3)
		ax.axvline(MODE, color='k')
		ax.axvline(CUT, color='r')
		ax.set_xlabel('Flux')
		ax.set_ylabel('Distribution')
		save_figure(os.path.join(plot_folder, 'flux_distribution'))
		plt.close(fig)

	#==========================================================================
	# Find and seperate clusters of pixels
	#==========================================================================

	# Cut out pixels of sum image with flux above the cut-off:
	idx = (SumImage > CUT)
	X2 = X[idx]
	Y2 = Y[idx]

	if np.all(~idx):
		raise K2P2NoStars("No flux above threshold")

	logger.debug("  Min for cluster is: %f", min_for_cluster)
	logger.debug("  Cluster radius is: %f", cluster_radius)

	# Run clustering algorithm
	XX, labels_ini, core_samples_mask = run_DBSCAN(X2, Y2, cluster_radius, min_for_cluster)

	# Run watershed segmentation algorithm:
	# Demand that there was any non-noise clusters found.
	if segmentation and any(labels_ini != -1):
		# Create a set of dummy-masks that are made up of the clusters
		# that were found by DBSCAN, meaning that there could be masks
		# with several stars in them:
		DUMMY_MASKS = np.zeros((0, NY, NX), dtype='bool')
		DUMMY_MASKS_LABELS = []
		m = np.zeros_like(SumImage, dtype='bool')
		for lab in set(labels_ini):
			if lab == -1: continue
			# Create "image" of this mask:
			m[:,:] = False
			for x,y in XX[labels_ini == lab]:
				m[y, x] = True
			# Append them to lists:
			DUMMY_MASKS = np.append(DUMMY_MASKS, [m], axis=0)
			DUMMY_MASKS_LABELS.append(lab)

		# Run the dummy masks through the detection of saturated columns:
		logger.debug("Detecting saturated columns in non-segmentated masks...")
		smask, _ = k2p2_saturated(SumImage, DUMMY_MASKS, idx)

		# Create dictionary that will map a label to the mask of saturated pixels:
		if np.any(smask):
			saturated_masks = {}
			for u,sm in enumerate(smask):
				saturated_masks[DUMMY_MASKS_LABELS[u]] = sm
		else:
			saturated_masks = None

		# Run the mask segmentaion algorithm on the found clusters:
		labels, unique_labels, NoCluster = k2p2WS(X, Y, X2, Y2, SumImage, XX, labels_ini, core_samples_mask, saturated_masks=saturated_masks, ws_thres=ws_thres,
												  ws_footprint=ws_footprint, ws_blur=ws_blur, ws_alg=ws_alg, output_folder=plot_folder, catalog=catalog)
	else:
		labels = labels_ini
		unique_labels = set(labels)
		#NoCluster = len(unique_labels) - (1 if -1 in labels else 0)

	# Make sure it is a tuple and not a set - much easier to work with:
	unique_labels = tuple(unique_labels)

	# Create list of clusters and their number of pixels:
	No_pix_sort = np.zeros([len(unique_labels), 2])
	for u,lab in enumerate(unique_labels):
		No_pix_sort[u, 0] = np.sum(labels == lab)
		No_pix_sort[u, 1] = lab

	# Only select the clusters that have enough pixels and are not noise:
	cluster_select = (No_pix_sort[:, 0] >= min_no_pixels_in_mask) & (No_pix_sort[:, 1] != -1)
	no_masks = sum(cluster_select)
	No_pix_sort = No_pix_sort[cluster_select, :]

	# No masks were found, so return None:
	if no_masks == 0:
		MASKS = None

	else:
		# Sort the clusters by the number of pixels:
		cluster_sort = np.argsort(No_pix_sort[:, 0])
		No_pix_sort = No_pix_sort[cluster_sort[::-1], :]

		# Create 3D array that will hold masks for each target:
		MASKS = np.zeros((no_masks, NY, NX))
		for u in range(no_masks):
			lab = No_pix_sort[u, 1]
			class_member_mask = (labels == lab)
			xy = XX[class_member_mask ,:]
			MASKS[u, xy[:,1], xy[:,0]] = 1

		#==========================================================================
		# Fill holes in masks
		#==========================================================================
		pattern = np.array([[[0, 0.25, 0],[0.25, 0, 0.25],[0, 0.25, 0]]]) # 3D array - shape=(1, 3, 3)
		mask_holes_indx = ndimage.convolve(MASKS, pattern, mode='constant', cval=0.0)
		mask_holes_indx = (mask_holes_indx > 0.95) & (MASKS == 0) # Should be exactly 1.0, but let's assume some round-off errors
		if np.any(mask_holes_indx):
			logger.info("Filling %d holes in the masks", np.sum(mask_holes_indx))
			MASKS[mask_holes_indx] = 1

			if not plot_folder is None:
				# Create image showing all masks at different levels:
				img = np.zeros((NY,NX))
				for r in np.transpose(np.where(MASKS > 0)):
					img[r[1], r[2]] = r[0]+1

				# Plot everything together:
				fig = plt.figure()
				ax = fig.add_subplot(111)
				plot_image(img, ax=ax, scale='linear', percentile=100, cmap='nipy_spectral', title='Holes in mask filled')

				# Create outline of filled holes:
				for hole in np.transpose(np.where(mask_holes_indx)):
					cen = (hole[2]-0.5, hole[1]-0.5)
					ax.add_patch(mpl.patches.Rectangle(cen, 1, 1, color='k', lw=2, fill=False, hatch='//'))

				#fig.savefig(os.path.join(plot_folder, 'mask_filled_holes.png'), format='png', bbox_inches='tight')
				save_figure(os.path.join(plot_folder, 'mask_filled_holes'))
				plt.close(fig)

		#==========================================================================
		# Entend overflow lanes
		#==========================================================================
		if extend_overflow:
			logger.debug("Detecting saturated columns in masks...")

			# Find pixels that are saturated in each mask and find out if they should
			# be added to the mask:
			saturated_mask, pixels_added = k2p2_saturated(SumImage, MASKS, idx)
			logger.info("Overflow will add %d pixels in total to the masks.", pixels_added)

			# If we have a catalog of stars, we will only allow stars above the saturation
			# limit to get their masks extended:
			if catalog is not None:
				# Filter that catalog, only keeping stars actully inside current image:
				c = np.asarray(np.round(catalog[:, 0]), dtype='int32')
				r = np.asarray(np.round(catalog[:, 1]), dtype='int32')
				tmag = catalog[:, 2]
				indx = (c >= 0) & (c < SumImage.shape[1]) & (r >= 0) & (r < SumImage.shape[0])
				c = c[indx]
				r = r[indx]
				tmag = tmag[indx]
				# Loop through the masks:
				for u in range(no_masks):
					if np.any(saturated_mask[u, :, :]):
						# Find out which stars fall inside this mask:
						which_stars = np.asarray(MASKS[u, :, :][r, c], dtype='bool')
						if np.any(which_stars):
							# Only allow extension of columns if the combined light of
							# the targts in the mask exceeds the saturation limit:
							mags_in_mask = tmag[which_stars]
							mags_total = -2.5*np.log10(np.nansum(10**(-0.4*mags_in_mask)))
							if mags_total > saturation_limit:
								# The combined magnitude of the targets is now
								# above saturation
								saturated_mask[u, :, :] = False
						else:
							# Do not add saturation columns if no stars were found:
							saturated_mask[u, :, :] = False

			# If we are going to plot later on, make a note
			# of how the outline of the masks looked before
			# changing anything:
			if plot_folder is not None and logger.isEnabledFor(logging.DEBUG):
				outline_before = []
				for u in range(no_masks):
					outline_before.append( k2p2maks(MASKS[u,:,:], 1, 0.5) )

			# Add the saturated pixels to the masks:
			MASKS[saturated_mask] = 1

			# If we are running as DEBUG, output some plots as well:
			if plot_folder is not None and logger.isEnabledFor(logging.DEBUG):
				logger.debug("Plotting overflow figures...")
				Ypixel = np.arange(NY)
				for u in range(no_masks):
					mask = np.asarray(MASKS[u, :, :], dtype='bool')
					mask_rows, mask_columns = np.where(mask)
					mask_max = np.nanmax(SumImage[mask])

					# The outline of the mask after saturated columns have been
					# corrected for:
					outline = k2p2maks(mask, 1, 0.5)

					with PdfPages(os.path.join(plot_folder, 'overflow_mask' + str(u) + '.pdf')) as pdf:
						for c in sorted(set(mask_columns)):

							column_rows = mask_rows[mask_columns == c]

							title = "Mask %d - Column %d" % (u, c)
							if np.any(saturated_mask[u,:,c]):
								title += " - Saturated"

							fig = plt.figure(figsize=(14,6))
							ax1 = fig.add_subplot(121)
							ax1.axvspan(np.min(column_rows)-0.5, np.max(column_rows)+0.5, color='0.7')
							ax1.plot(Ypixel, SumImage[:, c], 'ro-', drawstyle='steps-mid')
							ax1.set_title(title)
							ax1.set_xlabel('Y pixels')
							ax1.set_ylabel('Sum-image counts')
							ax1.set_ylim(0, mask_max)
							ax1.set_xlim(-0.5, NY-0.5)

							ax2 = fig.add_subplot(122)
							plot_image(SumImage, ax=ax2, scale='log')
							ax2.plot(outline_before[u][:,0], outline_before[u][:,1], 'r:')
							ax2.plot(outline[:,0], outline[:,1], 'r-')
							ax2.axvline(c, color='r', ls='--')

							pdf.savefig(fig)
							plt.close(fig)

	#==============================================================================
	# Create plots
	#==============================================================================
	if plot_folder is not None:
		# Colors to use for each cluster label:
		colors = plt.cm.gist_rainbow(np.linspace(0, 1, len(unique_labels)))

		# Colormap to use for clusters:
		# https://stackoverflow.com/questions/9707676/defining-a-discrete-colormap-for-imshow-in-matplotlib/9708079#9708079
		#cmap = mpl.colors.ListedColormap(np.append([[1, 1, 1, 1]], colors, axis=0))
		#cmap_norm = mpl.colors.BoundaryNorm(np.arange(-1, len(unique_labels)-1)+0.5, cmap.N)

		# Set up figure to hold subplots:
		if NY/NX > 5:
			aspect = 0.5
		else:
			aspect = 0.2

		fig0 = plt.figure(figsize=(2*plt.figaspect(aspect)))
		fig0.subplots_adjust(wspace=0.12)

		# ---------------
		# PLOT 1
		ax0 = fig0.add_subplot(151)
		plot_image(SumImage, ax=ax0, scale='log', title='Sum-image', xlabel=None, ylabel=None)

		# ---------------
		# PLOT 2
		Flux_mat2 = np.zeros_like(SumImage)
		Flux_mat2[SumImage < CUT] = 1
		Flux_mat2[SumImage > CUT] = 2
		Flux_mat2[ori_mask == 0] = 0

		ax2 = fig0.add_subplot(152)
		plot_image(Flux_mat2, ax=ax2, scale='linear', percentile=100, cmap='nipy_spectral', title='Significant flux', xlabel=None, ylabel=None)

		# ---------------
		# PLOT 3
		ax2 = fig0.add_subplot(153)

		Flux_mat4 = np.zeros_like(SumImage)
		for u,lab in enumerate(unique_labels):
			class_member_mask = (labels == lab)
			xy = XX[class_member_mask,:]
			if lab == -1:
				# Black used for noise.
				ax2.plot(xy[:, 0], xy[:, 1], '+', markerfacecolor='k',
					 markeredgecolor='k', markersize=5)

			else:
				Flux_mat4[xy[:,1], xy[:,0]] = u+1
				ax2.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(colors[u]),
						 markeredgecolor='k', markersize=5)

		ax2.set_title("Clustering + Watershed")
		ax2.set_xlim([-0.5, SumImage.shape[1]-0.5])
		ax2.set_ylim([-0.5, SumImage.shape[0]-0.5])
		ax2.set_aspect('equal')

		# ---------------
		# PLOT 4
		ax4 = fig0.add_subplot(154)
		plot_image(Flux_mat4, ax=ax4, scale='linear', percentile=100, cmap='nipy_spectral', title='Extracted clusters', xlabel=None, ylabel=None)

		# ---------------
		# PLOT 5
		ax5 = fig0.add_subplot(155)
		plot_image(SumImage, ax=ax5, scale='log', title='Final masks', xlabel=None, ylabel=None)

		# Plot outlines of selected masks:
		for u in range(no_masks):
			# Get the color associated with this label:
			col = colors[ int(np.where(unique_labels == No_pix_sort[u, 1])[0]) ]
			# Make mask outline:
			outline = k2p2maks(MASKS[u, :, :], 1, threshold=0.5)
			# Plot outlines:
			ax5.plot(outline[:, 0], outline[:, 1], color=col, zorder=10, lw=2.5)
			ax4.plot(outline[:, 0], outline[:, 1], color='k', zorder=10, lw=1.5)

		# Save the figure and close it:
		save_figure(os.path.join(plot_folder, 'masks_'+ws_alg))
		if show_plot:
			plt.show()
		else:
			plt.close('all')

	return MASKS, background_bandwidth

コード例 #33

0

ファイルを表示

def remove_stars(tpf):

    sumimage = np.nansum(tpf, axis=0, dtype='float64')

    ny, nx = np.shape(sumimage)
    ori_mask = ~np.isnan(sumimage)

    X, Y = np.meshgrid(np.arange(nx), np.arange(ny))

    Flux = sumimage[ori_mask].flatten()
    Flux = Flux[Flux > 0]

    flux_cut = stats.trim1(np.sort(Flux), 0.15)

    background_bandwidth = select_bandwidth(flux_cut, bw='scott', kernel='gau')
    kernel = KDE(flux_cut)

    kernel.fit(kernel='gau', bw=background_bandwidth, fft=True, gridsize=100)

    def kernel_opt(x):
        return -1 * kernel.evaluate(x)

    max_guess = kernel.support[np.argmax(kernel.density)]
    MODE = optimize.fmin_powell(kernel_opt, max_guess, disp=0)

    mad_to_sigma = 1.482602218505602
    MAD1 = mad_to_sigma * nanmedian(np.abs(Flux[(Flux < MODE)] - MODE))

    thresh = 2.
    CUT = MODE + thresh * MAD1

    idx = (sumimage > CUT)
    X2 = X[idx]
    Y2 = Y[idx]

    cluster_radius = np.sqrt(2)
    min_for_cluster = 4

    XX, labels_ini, core_samples_mask = run_DBSCAN(X2, Y2, cluster_radius,
                                                   min_for_cluster)

    DUMMY_MASKS = np.zeros((0, ny, nx), dtype='bool')
    DUMMY_MASKS_LABELS = []
    m = np.zeros_like(sumimage, dtype='bool')
    for lab in set(labels_ini):
        if lab == -1: continue
        # Create "image" of this mask:
        m[:, :] = False
        for x, y in XX[labels_ini == lab]:
            m[y, x] = True
        # Append them to lists:
        DUMMY_MASKS = np.append(DUMMY_MASKS, [m], axis=0)
        DUMMY_MASKS_LABELS.append(lab)

        smask, _ = k2p2_saturated(sumimage, DUMMY_MASKS, idx)

        if np.any(smask):
            saturated_masks = {}
            for u, sm in enumerate(smask):
                saturated_masks[DUMMY_MASKS_LABELS[u]] = sm
        else:
            saturated_masks = None

        ws_thres = 0.02
        ws_footprint = 3
        ws_blur = 0.2
        ws_alg = 'flux'
        plot_folder = None
        catalog = None

        labels, unique_labels, NoCluster = k2p2WS(
            X,
            Y,
            X2,
            Y2,
            sumimage,
            XX,
            labels_ini,
            core_samples_mask,
            saturated_masks=saturated_masks,
            ws_thres=ws_thres,
            ws_footprint=ws_footprint,
            ws_blur=ws_blur,
            ws_alg=ws_alg,
            output_folder=plot_folder,
            catalog=catalog)

    # Make sure it is a tuple and not a set - much easier to work with:
    unique_labels = tuple(unique_labels)

    # Create list of clusters and their number of pixels:
    No_pix_sort = np.zeros([len(unique_labels), 2])
    for u, lab in enumerate(unique_labels):
        No_pix_sort[u, 0] = np.sum(labels == lab)
        No_pix_sort[u, 1] = lab

    # Only select the clusters that are not the largest or noise:

    cluster_select = (No_pix_sort[:, 0] < np.max(
        No_pix_sort.T[0])) & (No_pix_sort[:, 1] != -1)
    # cluster_select = (No_pix_sort[:, 0] < np.max(No_pix_sort.T[0]))
    no_masks = sum(cluster_select)
    No_pix_sort = No_pix_sort[cluster_select, :]

    MASKS = np.zeros((no_masks, ny, nx))
    for u in range(no_masks):
        lab = No_pix_sort[u, 1]
        class_member_mask = (labels == lab)
        xy = XX[class_member_mask, :]
        MASKS[u, xy[:, 1], xy[:, 0]] = 1

    maskimg = np.sum(MASKS, axis=0)
    invmaskimg = np.abs(maskimg - 1)

    return invmaskimg * tpf