def stratified_bayesian_blocks(x, p0=0.01, min_bin_width=0.01):
    """
    Creates smart histogram bins for mixed continuous/categorical data
    """

    # Create strata
    r, s = np.unique(x, return_counts=True)
    strata_edges = bayesian_blocks(s, p0=0.01)
    strata_bins = zip(strata_edges[:-1], strata_edges[1:])

    # Iterate over each strata
    data_bins = []
    for strata_bin in strata_bins:
        # Select the data pertaining only to a given strata
        sel = (strata_bin[0] <= s) & (s < strata_bin[1])
        strata_data = np.repeat(r[sel], s[sel])

        # Normalize and regularize the data within the strata
        strata_data = _regularize(_normalize(strata_data))

        # Perform Bayesian Blocks and append the bins
        edges = bayesian_blocks(strata_data, p0=0.01)
        data_bins.append(edges)

    # Collect the bins from all strata together
    data_bins = np.sort(np.concatenate(data_bins))

    # Clean up with the min_bin_width heuristic
    sel = (data_bins[1:] - min_bin_width) < data_bins[:-1]
    sel = np.r_[False, sel]
    data_bins = data_bins[~sel]

    return data_bins
def stratified_bayesian_blocks(x, p0=0.01, min_bin_width=0.01):
    """
    Creates smart histogram bins for mixed continuous/categorical data
    """

    # Create strata
    r, s = np.unique(x, return_counts=True)
    strata_edges = bayesian_blocks(s, p0=0.01)
    strata_bins = zip(strata_edges[:-1], strata_edges[1:])

    # Iterate over each strata
    data_bins = []
    for strata_bin in strata_bins:
        # Select the data pertaining only to a given strata
        sel = (strata_bin[0] <= s) & (s < strata_bin[1])
        strata_data = np.repeat(r[sel], s[sel])

        # Normalize and regularize the data within the strata
        strata_data = _regularize(_normalize(strata_data))

        # Perform Bayesian Blocks and append the bins
        edges = bayesian_blocks(strata_data, p0=0.01)
        data_bins.append(edges)

    # Collect the bins from all strata together
    data_bins = np.sort(np.concatenate(data_bins))

    # Clean up with the min_bin_width heuristic
    sel = (data_bins[1:]-min_bin_width) < data_bins[:-1]
    sel = np.r_[False, sel]
    data_bins = data_bins[~sel]

    return data_bins
def test_duplicate_events():
    t = np.random.random(100)
    t[80:] = t[:20]

    x = np.ones_like(t)
    x[:20] += 1

    bins1 = bayesian_blocks(t)
    bins2 = bayesian_blocks(t[:80], x[:80])

    assert_allclose(bins1, bins2)
Beispiel #4
0
def test_duplicate_events():
    t = np.random.random(100)
    t[80:] = t[:20]

    x = np.ones_like(t)
    x[:20] += 1

    with catch_warnings(AstroMLDeprecationWarning):
        bins1 = bayesian_blocks(t)
        bins2 = bayesian_blocks(t[:80], x[:80])

    assert_allclose(bins1, bins2)
def test_duplicate_events():
    t = np.random.random(100)
    t[80:] = t[:20]

    x = np.ones_like(t)
    x[:20] += 1

    with catch_warnings(AstroMLDeprecationWarning):
        bins1 = bayesian_blocks(t)
        bins2 = bayesian_blocks(t[:80], x[:80])

    assert_allclose(bins1, bins2)
Beispiel #6
0
def ProbRemoval(data, h, rMags):
    # data = output from DCDT
    # h = output from makePrior
    # rMags = rMags from catalog, same length as data
    z = []
    bBlocks = []
    for i in range(len(data)):
        rN = np.searchsorted(
            rMids, rMags[i])  # index of nearest rMag with defined zDist
        if rN > 239:
            print(rMags[i])
            rN = 239
        ws = np.interp(data[i][:, 1], zMids, h[rN])  # values of zDist at zFits
        ws = ws / np.max(
            ws)  # Normalize to make probabalistic removal possible
        ran = np.random.rand(len(ws))
        msk = ran > ws  # True where ran exceeds prior prob
        zs = np.ma.masked_array(data[i][:, 1], mask=msk).compressed()
        bins = bayesian_blocks(zs, fitness='events', p0=0.25)
        histo = np.histogram(zs, bins)
        bBlocks.append([bins, histo])
        try:
            nMax = np.argmax(histo[0])
            loc = (histo[1][nMax] + histo[1][nMax + 1]) / 2.
            z.append(loc)
        except:
            loc = np.percentile(zs, 50)
            z.append(loc)
    return (z)
Beispiel #7
0
def gaussfit_sky(values, p_thresh=0.65, plot=False, **extras):
    """Fit a gaussian to the lower part of a histogram of the sky values.
    The histogram bins are estimated using Bayesian blocks.  p_thresh gives
    the percentile below which the gaussian is fitted to the data. Return
    central value and estimate of standard deviation per pixel """

    bins = bayesian_blocks(values)
    print(len(bins), bins)
    #dbin = bins[1:]-bins[:-1]
    cbin = (bins[1:] + bins[:-1]) / 2
    hist = np.histogram(values,
                        bins=bins,
                        range=(bins.min(), bins.max()),
                        density=True)

    #pdf = hist/dbin
    val_thresh = np.percentile(values, p_thresh)
    lower = cbin < p_thresh

    def gauss(x, *p):
        A, mu, sigma = p
        return A * np.exp(-(x - mu)**2 / (2. * sigma**2))

    # p0 is the initial guess for the fitting coefficients (A, mu and sigma above)
    p0 = [np.max(hist[0]), values.mean(), values.std()]
    coeff, var_matrix = curve_fit(gauss, cbin[lower], hist[0][lower], p0=p0)
    if plot:
        print(len(hist[1]), len(hist[0]), type(coeff))
        pl.figure()
        pl.plot(cbin, hist[0], color='b')
        pl.plot(cbin, gauss(cbin, [coeff[0], coeff[1], coeff[2]]), color='r')
        pl.axvline(val_thresh)
    return coeff[1], coeff[2]
Beispiel #8
0
def ProbRemoval(data,h,rMags):
    # data = output from DCDT
    # h = output from makePrior
    # rMags = rMags from catalog, same length as data
    z = []
    bBlocks = []
    for i in range(len(data)):
        rN = np.searchsorted(rMids,rMags[i]) # index of nearest rMag with defined zDist
        if rN > 239:
            print(rMags[i])
            rN = 239
        ws = np.interp(data[i][:,1],zMids,h[rN]) # values of zDist at zFits
        ws = ws/np.max(ws) # Normalize to make probabalistic removal possible
        ran = np.random.rand(len(ws))
        msk = ran > ws # True where ran exceeds prior prob
        zs = np.ma.masked_array(data[i][:,1],mask=msk).compressed()
        bins = bayesian_blocks(zs,fitness='events',p0=0.25)
        histo = np.histogram(zs,bins)
        bBlocks.append([bins,histo])
        try:
            nMax = np.argmax(histo[0])
            loc = (histo[1][nMax]+histo[1][nMax+1])/2.
            z.append(loc)
        except:
            loc = np.percentile(zs,50)
            z.append(loc)
    return(z)
Beispiel #9
0
def gaussfit_sky(values, p_thresh = 0.65, plot = False, **extras):
    """Fit a gaussian to the lower part of a histogram of the sky values.
    The histogram bins are estimated using Bayesian blocks.  p_thresh gives
    the percentile below which the gaussian is fitted to the data. Return
    central value and estimate of standard deviation per pixel """
    
    bins = bayesian_blocks(values)
    print(len(bins),bins)
    #dbin = bins[1:]-bins[:-1]
    cbin = (bins[1:]+bins[:-1])/2
    hist = np.histogram(values, bins = bins, range = (bins.min(), bins.max()), density = True)
    
    #pdf = hist/dbin
    val_thresh = np.percentile(values, p_thresh)
    lower = cbin < p_thresh

    def gauss(x, *p):
        A, mu, sigma = p
        return A*np.exp(-(x-mu)**2/(2.*sigma**2))

    # p0 is the initial guess for the fitting coefficients (A, mu and sigma above)
    p0 = [np.max(hist[0]), values.mean(), values.std()]
    coeff, var_matrix = curve_fit(gauss, cbin[lower], hist[0][lower], p0=p0)
    if plot:
        print(len(hist[1]), len(hist[0]),type(coeff))
        pl.figure()
        pl.plot(cbin,hist[0], color = 'b')
        pl.plot(cbin, gauss(cbin, [coeff[0], coeff[1], coeff[2]]), color = 'r')
        pl.axvline(val_thresh)
    return coeff[1], coeff[2]
Beispiel #10
0
def set_plx_kde(t, bandwidth=0.3, method='sklearn_kde'):
    """ Set the plx_kde

    Parameters
    ----------
    t : ndarray float
        Catalog of parallax measures (units: mas)
    bandwidth : float
        Bandwidth for gaussian_kde (optional, 0.01 recommended)
    method : string
        Method for density determination (options: scipy_kde, sklearn_kde, blocks)
    """

    global plx_kde

    if method is 'scipy_kde':

        if plx_kde is None:
            # We are only going to allow parallaxes above some minimum value
            if bandwidth is None:
                plx_kde = gaussian_kde(t['plx'][t['plx']>0.0])
            else:
                plx_kde = gaussian_kde(t['plx'][t['plx']>0.0], bw_method=bandwidth)

    elif method is 'sklearn_kde':
        if plx_kde is None:
            kwargs = {'kernel':'tophat'}
            if bandwidth is None:
                plx_kde = KernelDensity(**kwargs)
            else:
                plx_kde = KernelDensity(bandwidth=bandwidth, **kwargs)

            if c.kde_subset:
                plx_ran = np.copy(t['plx'][t['plx']>0.0])
                np.random.shuffle(plx_ran)
                plx_kde.fit( plx_ran[0:5000, np.newaxis] )
            else:
                plx_kde.fit( t['plx'][t['plx']>0.0][:, np.newaxis] )

    elif method is 'blocks':
        global plx_bins_blocks
        global plx_hist_blocks

        # Set up Bayesian Blocks
        print("Calculating Bayesian Blocks...")
        nbins = np.min([len(t), 40000])
        bins = bayesian_blocks(t['plx'][t['plx']>0.0][0:nbins])
        hist, bins = np.histogram(t['plx'][t['plx']>0.0][0:nbins], bins=bins, normed=True)

        # Pad with zeros
        plx_bins_blocks = np.append(-1.0e100, bins)
        hist_pad = np.append(0.0, hist)
        plx_hist_blocks = np.append(hist_pad, 0.0)
        print("Bayesian Blocks set.")

    else:
        print("You must include a valid method")
        print("Options: kde or blocks")
        return
Beispiel #11
0
def histogram(a, bins=10, range=None, **kwargs):
    """Enhanced histogram

    This is a histogram function that enables the use of more sophisticated
    algorithms for determining bins.  Aside from the `bins` argument allowing
    a string specified how bins are computed, the parameters are the same
    as numpy.histogram().

    Parameters
    ----------
    a : array_like
        array of data to be histogrammed

    bins : int or list or str (optional)
        If bins is a string, then it must be one of:
        'blocks' : use bayesian blocks for dynamic bin widths
        'knuth' : use Knuth's rule to determine bins
        'scotts' : use Scott's rule to determine bins
        'freedman' : use the Freedman-diaconis rule to determine bins

    range : tuple or None (optional)
        the minimum and maximum range for the histogram.  If not specified,
        it will be (x.min(), x.max())

    other keyword arguments are described in numpy.hist().

    Returns
    -------
    hist : array
        The values of the histogram. See `normed` and `weights` for a
        description of the possible semantics.
    bin_edges : array of dtype float
        Return the bin edges ``(length(hist)+1)``.

    See Also
    --------
    numpy.histogram
    astroML.plotting.hist
    """
    a = np.asarray(a)

    # if range is specified, we need to truncate the data for
    # the bin-finding routines
    if (range is not None
            and (bins in ['blocks', 'knuth', 'scotts', 'freedman'])):
        a = a[(a >= range[0]) & (a <= range[1])]

    if bins == 'blocks':
        bins = bayesian_blocks(a)
    elif bins == 'knuth':
        da, bins = knuth_bin_width(a, True)
    elif bins == 'scotts':
        da, bins = scotts_bin_width(a, True)
    elif bins == 'freedman':
        da, bins = freedman_bin_width(a, True)
    elif isinstance(bins, str):
        raise ValueError("unrecognized bin code: '%s'" % bins)

    return np.histogram(a, bins, range, **kwargs)
Beispiel #12
0
def hist(x, bins=10, range=None, *args, **kwargs):
    """Enhanced histogram

    This is a histogram function that enables the use of more sophisticated
    algorithms for determining bins.  Aside from the `bins` argument allowing
    a string specified how bins are computed, the parameters are the same
    as pylab.hist().

    Parameters
    ----------
    x : array_like
        array of data to be histogrammed

    bins : int or list or str (optional)
        If bins is a string, then it must be one of:
        'blocks' : use bayesian blocks for dynamic bin widths
        'knuth' : use Knuth's rule to determine bins
        'scott' : use Scott's rule to determine bins
        'freedman' : use the Freedman-diaconis rule to determine bins

    range : tuple or None (optional)
        the minimum and maximum range for the histogram.  If not specified,
        it will be (x.min(), x.max())

    ax : Axes instance (optional)
        specify the Axes on which to draw the histogram.  If not specified,
        then the current active axes will be used.

    **kwargs :
        other keyword arguments are described in pylab.hist().
    """
    x = np.asarray(x)

    if 'ax' in kwargs:
        ax = kwargs['ax']
        del kwargs['ax']
    else:
        ax = plt.gca()

    # if range is specified, we need to truncate the data for
    # the bin-finding routines
    if (range is not None and (bins in ['blocks',
                                        'knuth', 'knuths',
                                        'scott', 'scotts',
                                        'freedman', 'freedmans'])):
        x = x[(x >= range[0]) & (x <= range[1])]

    if bins in ['blocks']:
        bins = bayesian_blocks(x)
    elif bins in ['knuth', 'knuths']:
        dx, bins = knuth_bin_width(x, True)
    elif bins in ['scott', 'scotts']:
        dx, bins = scotts_bin_width(x, True)
    elif bins in ['freedman', 'freedmans']:
        dx, bins = freedman_bin_width(x, True)
    elif isinstance(bins, str):
        raise ValueError("unrecognized bin code: '%s'" % bins)

    return ax.hist(x, bins, range, **kwargs)
Beispiel #13
0
def hist(x, bins=10, range=None, *args, **kwargs):
    """Enhanced histogram

    This is a histogram function that enables the use of more sophisticated
    algorithms for determining bins.  Aside from the `bins` argument allowing
    a string specified how bins are computed, the parameters are the same
    as pylab.hist().

    Parameters
    ----------
    x : array_like
        array of data to be histogrammed

    bins : int or list or str (optional)
        If bins is a string, then it must be one of:
        'blocks' : use bayesian blocks for dynamic bin widths
        'knuth' : use Knuth's rule to determine bins
        'scott' : use Scott's rule to determine bins
        'freedman' : use the Freedman-diaconis rule to determine bins

    range : tuple or None (optional)
        the minimum and maximum range for the histogram.  If not specified,
        it will be (x.min(), x.max())

    ax : Axes instance (optional)
        specify the Axes on which to draw the histogram.  If not specified,
        then the current active axes will be used.

    **kwargs :
        other keyword arguments are described in pylab.hist().
    """
    x = np.asarray(x)

    if 'ax' in kwargs:
        ax = kwargs['ax']
        del kwargs['ax']
    else:
        ax = plt.gca()

    # if range is specified, we need to truncate the data for
    # the bin-finding routines
    if (range is not None and (bins in [
            'blocks', 'knuth', 'knuths', 'scott', 'scotts', 'freedman',
            'freedmans'
    ])):
        x = x[(x >= range[0]) & (x <= range[1])]

    if bins in ['blocks']:
        bins = bayesian_blocks(x)
    elif bins in ['knuth', 'knuths']:
        dx, bins = knuth_bin_width(x, True)
    elif bins in ['scott', 'scotts']:
        dx, bins = scotts_bin_width(x, True)
    elif bins in ['freedman', 'freedmans']:
        dx, bins = freedman_bin_width(x, True)
    elif isinstance(bins, str):
        raise ValueError("unrecognized bin code: '%s'" % bins)

    return ax.hist(x, bins, range, **kwargs)
Beispiel #14
0
def histogram(a, bins=10, range=None, **kwargs):
    """Enhanced histogram

    This is a histogram function that enables the use of more sophisticated
    algorithms for determining bins.  Aside from the `bins` argument allowing
    a string specified how bins are computed, the parameters are the same
    as numpy.histogram().

    Parameters
    ----------
    a : array_like
        array of data to be histogrammed

    bins : int or list or str (optional)
        If bins is a string, then it must be one of:
        'blocks' : use bayesian blocks for dynamic bin widths
        'knuth' : use Knuth's rule to determine bins
        'scotts' : use Scott's rule to determine bins
        'freedman' : use the Freedman-diaconis rule to determine bins

    range : tuple or None (optional)
        the minimum and maximum range for the histogram.  If not specified,
        it will be (x.min(), x.max())

    other keyword arguments are described in numpy.hist().

    Returns
    -------
    hist : array
        The values of the histogram. See `normed` and `weights` for a
        description of the possible semantics.
    bin_edges : array of dtype float
        Return the bin edges ``(length(hist)+1)``.

    See Also
    --------
    numpy.histogram
    astroML.plotting.hist
    """
    a = np.asarray(a)

    # if range is specified, we need to truncate the data for
    # the bin-finding routines
    if (range is not None and (bins in ['blocks', 'knuth',
                                        'scotts', 'freedman'])):
        a = a[(a >= range[0]) & (a <= range[1])]

    if bins == 'blocks':
        bins = bayesian_blocks(a)
    elif bins == 'knuth':
        da, bins = knuth_bin_width(a, True)
    elif bins == 'scotts':
        da, bins = scotts_bin_width(a, True)
    elif bins == 'freedman':
        da, bins = freedman_bin_width(a, True)
    elif isinstance(bins, str):
        raise ValueError("unrecognized bin code: '%s'" % bins)

    return np.histogram(a, bins, range, **kwargs)
Beispiel #15
0
def test_single_change_point():
    np.random.seed(0)
    x = np.concatenate([np.random.random(100), 1 + np.random.random(200)])

    with catch_warnings(AstroMLDeprecationWarning):
        bins = bayesian_blocks(x)

    assert_(len(bins) == 3)
    assert_allclose(bins[1], 1, rtol=0.02)
Beispiel #16
0
def test_single_change_point():
    np.random.seed(0)
    x = np.concatenate([np.random.random(100),
                        1 + np.random.random(200)])

    bins = bayesian_blocks(x)

    assert_(len(bins) == 3)
    assert_allclose(bins[1], 1, rtol=0.02)
Beispiel #17
0
def test_measures_fitness_heteroscedastic():
    np.random.seed(1)
    t = np.linspace(0, 1, 11)
    x = np.exp(-0.5 * (t - 0.5) ** 2 / 0.01 ** 2)
    sigma = 0.02 + 0.02 * np.random.random(len(x))
    x = np.random.normal(x, sigma)

    bins = bayesian_blocks(t, x, sigma, fitness='measures')

    assert_allclose(bins, [0, 0.45, 0.55, 1])
Beispiel #18
0
def test_single_change_point():
    np.random.seed(0)
    x = np.concatenate([np.random.random(100),
                        1 + np.random.random(200)])

    with catch_warnings(AstroMLDeprecationWarning):
        bins = bayesian_blocks(x)

    assert_(len(bins) == 3)
    assert_allclose(bins[1], 1, rtol=0.02)
Beispiel #19
0
def test_regular_events():
    np.random.seed(0)
    dt = 0.01
    steps = np.concatenate([np.unique(np.random.randint(0, 500, 100)),
                            np.unique(np.random.randint(500, 1000, 200))])
    t = dt * steps

    bins = bayesian_blocks(t, fitness='regular_events', dt=dt)

    assert_(len(bins) == 3)
    assert_allclose(bins[1], 5, rtol=0.05)
Beispiel #20
0
def test_measures_fitness_heteroscedastic():
    np.random.seed(1)
    t = np.linspace(0, 1, 11)
    x = np.exp(-0.5 * (t - 0.5)**2 / 0.01**2)
    sigma = 0.02 + 0.02 * np.random.random(len(x))
    x = np.random.normal(x, sigma)

    with catch_warnings(AstroMLDeprecationWarning):
        bins = bayesian_blocks(t, x, sigma, fitness='measures')

    assert_allclose(bins, [0, 0.45, 0.55, 1])
Beispiel #21
0
def test_measures_fitness_homoscedastic():
    np.random.seed(0)
    t = np.linspace(0, 1, 11)
    x = np.exp(-0.5 * (t - 0.5) ** 2 / 0.01 ** 2)
    sigma = 0.05
    x = np.random.normal(x, sigma)

    with catch_warnings(AstroMLDeprecationWarning):
        bins = bayesian_blocks(t, x, sigma, fitness='measures')

    assert_allclose(bins, [0, 0.45, 0.55, 1])
Beispiel #22
0
def DualTreePeakProbs(data,flagMultimodal=False,saveBBlocks=False): 
    '''
    Creates a Bayesian blocks histogram of the set of values found for each parameter
    for each object. The peak probability value is taken to be the centre of the block
    with highest value.
    Inputs:
        DualTree Output array of size (#objects,mcIts,#params)
    Output:
        Peak probability parameter values for each object, a NumPy array of size(#objects,#parameters)
    '''
    allPeakLocs = [] #for all objects
    multimo = []
    bBlocks = []
    for i in range(len(data)):
        peakLocs = [] #for this object, each parameter
        myMultimo = []
        myBBlocks = []
        for j in range(len(data[0][0])):
            
            bins = bayesian_blocks(data[i][:,j],fitness='events',p0=0.05)
            histo = np.histogram(data[i][:,j],bins)
            # Optional Bayesian Block Histogram storage
            if saveBBlocks:
                myBBlocks.append([bins,histo])
            try:
                nMax = np.argmax(histo[0])
            except:
                print(i,j)
                return(histo)
            loc = (histo[1][nMax]+histo[1][nMax+1])/2.
            peakLocs.append(loc)
            # Optional check for possible multimodality. Not remotely rigorous, but I haven't seen it fail yet
            if flagMultimodal:
                left = histo[0][1:-1] > histo[0][:-2]
                right = histo[0][1:-1] > histo[0][2:]
                nPeaks = np.sum(left*right)
                if nPeaks>1:
                    myMultimo.append(True)
                else:
                    myMultimo.append(False)
        if flagMultimodal:
            multimo.append(myMultimo)
        if saveBBlocks:
            bBlocks.append(myBBlocks)
        allPeakLocs.append(peakLocs)
    if flagMultimodal and not saveBBlocks:
        return([allPeakLocs,multimo])
    if not flagMultimodal and saveBBlocks:
        return([allPeakLocs,bBlocks])
    if flagMultimodal and saveBBlocks:
        return([allPeakLocs,multimo,bBlocks])
    else:
        return(np.array(allPeakLocs))
Beispiel #23
0
def run_bayesianblocks(times, counts, p0):
    edges = bayesian_blocks(times, counts, fitness="events", p0=p0)
    #print("edges: " +str(edges))
    binned_counts = []
    for i in range(len(edges[:-1])):
        #print("first edge: " + str(edges[i]))
        #print("second edge: " +str(edges[i+1]))
        #print("times: " + str(times[:10]))
        e_inds = np.where((edges[i] <= times) & (times <= edges[i+1]))
        #print(e_inds)
        binned_counts.append(np.sum(counts[e_inds]))
    return edges, binned_counts
Beispiel #24
0
def DualTreePeakProbs(data, flagMultimodal=False, saveBBlocks=False):
    '''
    Creates a Bayesian blocks histogram of the set of values found for each parameter
    for each object. The peak probability value is taken to be the centre of the block
    with highest value.
    Inputs:
        DualTree Output array of size (#objects,mcIts,#params)
    Output:
        Peak probability parameter values for each object, a NumPy array of size(#objects,#parameters)
    '''
    allPeakLocs = []  #for all objects
    multimo = []
    bBlocks = []
    for i in range(len(data)):
        peakLocs = []  #for this object, each parameter
        myMultimo = []
        myBBlocks = []
        for j in range(len(data[0][0])):

            bins = bayesian_blocks(data[i][:, j], fitness='events', p0=0.05)
            histo = np.histogram(data[i][:, j], bins)
            # Optional Bayesian Block Histogram storage
            if saveBBlocks:
                myBBlocks.append([bins, histo])
            try:
                nMax = np.argmax(histo[0])
            except:
                print(i, j)
                return (histo)
            loc = (histo[1][nMax] + histo[1][nMax + 1]) / 2.
            peakLocs.append(loc)
            # Optional check for possible multimodality. Not remotely rigorous, but I haven't seen it fail yet
            if flagMultimodal:
                left = histo[0][1:-1] > histo[0][:-2]
                right = histo[0][1:-1] > histo[0][2:]
                nPeaks = np.sum(left * right)
                if nPeaks > 1:
                    myMultimo.append(True)
                else:
                    myMultimo.append(False)
        if flagMultimodal:
            multimo.append(myMultimo)
        if saveBBlocks:
            bBlocks.append(myBBlocks)
        allPeakLocs.append(peakLocs)
    if flagMultimodal and not saveBBlocks:
        return ([allPeakLocs, multimo])
    if not flagMultimodal and saveBBlocks:
        return ([allPeakLocs, bBlocks])
    if flagMultimodal and saveBBlocks:
        return ([allPeakLocs, multimo, bBlocks])
    else:
        return (np.array(allPeakLocs))
Beispiel #25
0
def test_regular_events():
    np.random.seed(0)
    dt = 0.01
    steps = np.concatenate([
        np.unique(np.random.randint(0, 500, 100)),
        np.unique(np.random.randint(500, 1000, 200))
    ])
    t = dt * steps

    with catch_warnings(AstroMLDeprecationWarning):
        bins = bayesian_blocks(t, fitness='regular_events', dt=dt)

    assert_(len(bins) == 3)
    assert_allclose(bins[1], 5, rtol=0.05)
Beispiel #26
0
	def chckfile( fle, func ):
		#pickle
		if os.path.isfile( fle ) :
				bayesfile=open(fle , 'rb')
				bypkl = pickle.load(bayesfile)
				bynp = np.asarray( bypkl, dtype=np.float64 )
				ab.bins = bynp
				bayesfile.close()
		#function
		else :
			ww = ab.wts.flat
			intervals =  bayesian_blocks(ww) #array of optimal bin_edges
			ab.bins = intervals
			ab.pckle(ab.bins, fle ) 
			print 'bins**', ab.bins
    def _size_bins(self,hist,bin_tool,**kwargs):
        """Wrapper for astroML routines to choose optimal bin widths."""

        if bin_tool == 'freedman':
            _,bins = density_estimation.freedman_bin_width(hist,return_bins=True)
        elif bin_tool == 'scotts':
            _,bins = density_estimation.scotts_bin_width(hist,return_bins=True)
        elif bin_tool == 'knuth':
            _,bins = density_estimation.knuth_bin_width(hist,return_bins=True, disp=False)
        elif bin_tool == 'blocks':
            bins = density_estimation.bayesian_blocks(hist,**kwargs)
        elif type(bin_tool) == type(int()) or type(bin_tool) == type(np.int64()) or type(bin_tool) == type(np.int32()):
            bins=bin_tool
        else:
            self.logger.warning("Unrecognized bin_tool option. Using Freedman-Diaconis rule.")
            _,bins = density_estimation.freedman_bin_width(hist,return_bins=True)

        return bins
Beispiel #28
0
    def histogram(self, data, bin_width='knuth', weights=None, density=None,
                  norm=None, ax=None, **kwargs):
        """
        Plots a histogram.

        Parameters
        ----------
        data : list or array
            Data to plot.

        bin_width : {'knuth', 'bayesian'} or float, optional
            Automatically determine the bin width using Knuth's rule
            (2006physics...5197K), with Bayesian blocks (2013ApJ...764..167S),
            or manually, choosing a floating point value.

        weights : array, optional
            An array of weights, of the same shape as `a`.  Each value in `a`
            only contributes its associated weight towards the bin count
            (instead of 1).  If `density` is True, the weights are normalized,
            so that the integral of the density over the range remains 1.

        density : bool, optional
            If False, the result will contain the number of samples
            in each bin.  If True, the result is the value of the
            probability *density* function at the bin, normalised such that
            the *integral* over the range is 1. Note that the sum of the
            histogram values will not be equal to 1 unless bins of unity
            width are chosen; it is not a probability *mass* function.

        norm : int or float
            Custom normalisation.

        ax : `matplotlib.axes.Axes`, optional
            Axes instance.

        """

        # Axes instance:
        if ax is None:
            ax = self.axes[0]

        elif not isinstance(ax, Axes):
            raise TypeError('ax must be of type `matplotlib.axes.Axes`')

        # Convert list to array:
        if isinstance(data, list):
            data = np.array(data)

        if bin_width == 'knuth':
            _, bins = knuth_bin_width(data, return_bins=True)

        elif bin_width == 'bayesian':
            bins = bayesian_blocks(data)

        elif isinstance(bin_width, (int, float)):
            bins = np.arange(data.min(), data.max(), bin_width)

        else:
            raise ValueError('bin_width must be a number, or one of'
                             '(`knuth`, `bayesian`)')

        # Ensure padding with empty bins:
        dx = np.diff(bins).min()
        bins = np.pad(bins, (1, 2), mode='linear_ramp',
                      end_values=(bins[0] - dx, bins[-1] + 2 * dx))

        # Calculate histogram:
        histogram, bins = np.histogram(
            data, bins, weights=weights, density=density)
        if norm:
            histogram /= norm

        # Plot data:
        ax.plot(bins[:-1] + np.diff(bins) / 2, histogram,
                drawstyle='steps-mid', **kwargs)
Beispiel #29
0
def set_plx_kde(t, bandwidth=0.3, method='sklearn_kde'):
    """ Set the plx_kde

    Parameters
    ----------
    t : ndarray float
        Catalog of parallax measures (units: mas)
    bandwidth : float
        Bandwidth for gaussian_kde (optional, 0.01 recommended)
    method : string
        Method for density determination (options: scipy_kde, sklearn_kde, blocks)
    """

    global plx_kde

    if method is 'scipy_kde':

        if plx_kde is None:
            # We are only going to allow parallaxes above some minimum value
            if bandwidth is None:
                plx_kde = gaussian_kde(t['plx'][t['plx'] > 0.0])
            else:
                plx_kde = gaussian_kde(t['plx'][t['plx'] > 0.0],
                                       bw_method=bandwidth)

    elif method is 'sklearn_kde':
        if plx_kde is None:
            kwargs = {'kernel': 'tophat'}
            if bandwidth is None:
                plx_kde = KernelDensity(**kwargs)
            else:
                plx_kde = KernelDensity(bandwidth=bandwidth, **kwargs)

            if c.kde_subset:
                plx_ran = np.copy(t['plx'][t['plx'] > 0.0])
                np.random.shuffle(plx_ran)
                plx_kde.fit(plx_ran[0:5000, np.newaxis])
            else:
                plx_kde.fit(t['plx'][t['plx'] > 0.0][:, np.newaxis])

    elif method is 'blocks':
        global plx_bins_blocks
        global plx_hist_blocks

        # Set up Bayesian Blocks
        print("Calculating Bayesian Blocks...")
        nbins = np.min([len(t), 40000])
        bins = bayesian_blocks(t['plx'][t['plx'] > 0.0][0:nbins])
        hist, bins = np.histogram(t['plx'][t['plx'] > 0.0][0:nbins],
                                  bins=bins,
                                  normed=True)

        # Pad with zeros
        plx_bins_blocks = np.append(-1.0e100, bins)
        hist_pad = np.append(0.0, hist)
        plx_hist_blocks = np.append(hist_pad, 0.0)
        print("Bayesian Blocks set.")

    else:
        print("You must include a valid method")
        print("Options: kde or blocks")
        return
Beispiel #30
0
def plot_stacked_timeseries_histogram(total_counts,
                                      itemized_counts=None,
                                      ax=None):
    r"""Create a time series histogram with stacked counts labeled by category.
    Convenience function for methods from
    `astroML.density_estimation.bayesian_blocks`.
    
    Args:
        total_counts (collections.Counter): Total counts by time.
            Example: total_counts.items() = [(1, 5), (2, 4), ...]
                where day 1 had 5 total counts, day 2 had 4 total counts...
        itemized_counts (optional, dict): `dict` of `collections.Counter`.
            If `None` (default), histogram is not stacked.
            Keys: `hashable` label for each type of event. To preserve
                key order, use `collections.OrderedDict`.
            Values: `collections.Counter` counts by time.
            Example: itemized_counts = dict(a=counter_a, b=counter_b)
                where counter_a.items() = [(1, 1), (2, 1), ...]
                and   counter_b.items() = [(1, 4), (2, 3), ...]
            Required: The `total_counts` must equal the sum of all
                `itemized_counts`
        ax (optional, matplotlib.Axes): Axes instance on which to add the plot.
            If `None` (default), an axes instance is created.
    
    Returns:
        ax (matplotlib.axes): Axes instance for the plot.

    Raises:
        AssertionError:
            If `total_counts` does not equal the sum of all `itemized_counts`.

    See Also:
        astroML.density_estimation.bayesian_blocks

    Notes:
        * This simple implementation assumes that the times are not regularly
            spaced and that the data are counts of events.
        * Example call with ax=`None`:
            ax = plot_stacked_timeseries_histogram(
                total_counts=total_counts,
                itemized_counts=itemized_counts,
                ax=None)
            ax.legend(loc='upper left')
            plt.show(ax)
        * Example call with ax defined:
            fig = plt.figure()
            ax = fig.add_subplot(111)
            ax = plot_stacked_timeseries_histogram(
                total_counts=total_counts,
                itemized_counts=itemized_counts,
                ax=ax)
            ax.legend(loc='upper left')
            plt.show(ax)

    """
    # Check input.
    if itemized_counts is not None:
        summed_itemized_counts = collections.Counter()
        for key in itemized_counts.keys():
            summed_itemized_counts.update(itemized_counts[key])
        if not total_counts == summed_itemized_counts:
            raise AssertionError(
                "`total_counts` must equal the sum of all `itemized_counts`.")
    # Calculate histogram bins.
    (times, counts) = zip(*total_counts.items())
    bin_edges = astroML_dens.bayesian_blocks(t=times,
                                             x=counts,
                                             fitness='events')
    # Create plot.
    if ax is None:
        fig = plt.figure()
        ax = fig.add_subplot(111)
    if itemized_counts is None:
        ax.hist(list(total_counts.elements()),
                bins=bin_edges,
                stacked=False,
                rwidth=1.0,
                label=None,
                color=sns.color_palette()[0])
    else:
        keys = itemized_counts.keys()
        ax.hist([list(itemized_counts[key].elements()) for key in keys],
                bins=bin_edges,
                stacked=True,
                rwidth=1.0,
                label=keys,
                color=sns.husl_palette(n_colors=len(keys)))
    return ax
Beispiel #31
0
    def histogram(self,
                  data,
                  bin_width='knuth',
                  weights=None,
                  density=None,
                  norm=None,
                  ax=None,
                  **kwargs):
        """
        Plots a histogram.

        Parameters
        ----------
        data : list or array
            Data to plot.

        bin_width : {'knuth', 'bayesian'} or float, optional
            Automatically determine the bin width using Knuth's rule
            (2006physics...5197K), with Bayesian blocks (2013ApJ...764..167S),
            or manually, choosing a floating point value.

        weights : array, optional
            An array of weights, of the same shape as `a`.  Each value in `a`
            only contributes its associated weight towards the bin count
            (instead of 1).  If `density` is True, the weights are normalized,
            so that the integral of the density over the range remains 1.

        density : bool, optional
            If False, the result will contain the number of samples
            in each bin.  If True, the result is the value of the
            probability *density* function at the bin, normalised such that
            the *integral* over the range is 1. Note that the sum of the
            histogram values will not be equal to 1 unless bins of unity
            width are chosen; it is not a probability *mass* function.

        norm : int or float
            Custom normalisation.

        ax : `matplotlib.axes.Axes`, optional
            Axes instance.

        """

        # Axes instance:
        if ax is None:
            ax = self.axes[0]

        elif not isinstance(ax, Axes):
            raise TypeError('ax must be of type `matplotlib.axes.Axes`')

        # Convert list to array:
        if isinstance(data, list):
            data = np.array(data)

        if bin_width == 'knuth':
            _, bins = knuth_bin_width(data, return_bins=True)

        elif bin_width == 'bayesian':
            bins = bayesian_blocks(data)

        elif isinstance(bin_width, (int, float)):
            bins = np.arange(data.min(), data.max(), bin_width)

        else:
            raise ValueError('bin_width must be a number, or one of'
                             '(`knuth`, `bayesian`)')

        # Ensure padding with empty bins:
        dx = np.diff(bins).min()
        bins = np.pad(bins, (1, 2),
                      mode='linear_ramp',
                      end_values=(bins[0] - dx, bins[-1] + 2 * dx))

        # Calculate histogram:
        histogram, bins = np.histogram(data,
                                       bins,
                                       weights=weights,
                                       density=density)
        if norm:
            histogram /= norm

        # Plot data:
        ax.plot(bins[:-1] + np.diff(bins) / 2,
                histogram,
                drawstyle='steps-mid',
                **kwargs)
Beispiel #32
0
def create_scatter_hist(data,sigcutx,sigcuty,paramx,paramy,range_x,range_y,dataset_id,frequencies):
    # create the figure with eta and V histograms and scatter plot
    
    print('plotting figure: scatter histogram plot')

    frequencies.sort()
    if "TP" in frequencies:
        # if the data is classified, we ensure that the "frequencies" are correct
        frequencies = ["TN","TP","FN","FP"]
    if "stable" in frequencies:
        freq_labels= [name.replace("_", " ") for name in frequencies]
    else:
        freq_labels=frequencies

    # Setting up the plot
    nullfmt   = NullFormatter()         # no labels
    fontP = FontProperties()
    fontP.set_size('large')
    col = make_colours(frequencies)
    left, width = 0.1, 0.65
    bottom, height = 0.1, 0.65
    bottom_h = left_h = left+width+0.02
    rect_scatter = [left, bottom, width, height]
    rect_histx = [left, bottom_h, width, 0.2]
    rect_histy = [left_h, bottom, 0.2, height]
    fig = plt.figure(1,figsize=(12,12))
    axScatter = fig.add_subplot(223, position=rect_scatter)
    plt.xlabel(r'$\eta_{\nu}$', fontsize=28)
    plt.ylabel(r'$V_{\nu}$', fontsize=28)
    axHistx=fig.add_subplot(221, position=rect_histx)
    axHisty=fig.add_subplot(224, position=rect_histy)

    # Plotting data - scatter plot
    for i in range(len(frequencies)):
        xdata_var=[data[n][1] for n in range(len(data)) if data[n][3]==frequencies[i]]
        ydata_var=[data[n][2] for n in range(len(data)) if data[n][3]==frequencies[i]]
        if frequencies[i]=='stable':
            axScatter.scatter(xdata_var, ydata_var,color='0.75', s=10., zorder=1)
        else:
            axScatter.scatter(xdata_var, ydata_var,color=col[i], s=10., zorder=5)
    if 'stable' in frequencies or 'TN' in frequencies:
        x=[data[n][1] for n in range(len(data)) if (data[n][3]=='stable' or data[n][3]=='FP' or data[n][3]=='TN')]
        y=[data[n][2] for n in range(len(data)) if (data[n][3]=='stable' or data[n][3]=='FP' or data[n][3]=='TN')]
    else:
        x=[data[n][1] for n in range(len(data))]
        y=[data[n][2] for n in range(len(data))]

    # Plotting histograms with bayesian blocks binning
    new_bins = density_estimation.bayesian_blocks(x)
    binsx = [new_bins[a] for a in range(len(new_bins)-1) if abs((new_bins[a+1]-new_bins[a])/new_bins[a])>0.05]
    binsx = binsx + [new_bins[-1]]
    new_bins = density_estimation.bayesian_blocks(y)
    binsy = [new_bins[a] for a in range(len(new_bins)-1) if abs((new_bins[a+1]-new_bins[a])/new_bins[a])>0.05]
    binsy = binsy + [new_bins[-1]]
    axHistx.hist(x, bins=binsx, normed=1, histtype='stepfilled', color='b')
    axHisty.hist(y, bins=binsy, normed=1, histtype='stepfilled', orientation='horizontal', color='b')
    axScatter.legend(freq_labels,loc=4, prop=fontP)

    # Plotting lines representing thresholds (unless no thresholds)
    if sigcutx != 0 or sigcuty != 0:
        axHistx.axvline(x=sigcutx, linewidth=2, color='k', linestyle='--')
        axHisty.axhline(y=sigcuty, linewidth=2, color='k', linestyle='--')
        axScatter.axhline(y=sigcuty, linewidth=2, color='k', linestyle='--')
        axScatter.axvline(x=sigcutx, linewidth=2, color='k', linestyle='--')

    # Plotting the Gaussian fits
    fit=norm.pdf(range_x,loc=paramx[0],scale=paramx[1])
    axHistx.plot(range_x,fit, 'k:', linewidth=2)
    fit2=norm.pdf(range_y,loc=paramy[0],scale=paramy[1])
    axHisty.plot(fit2, range_y, 'k:', linewidth=2)

    # Final plot settings
    axHistx.xaxis.set_major_formatter(nullfmt)
    axHisty.yaxis.set_major_formatter(nullfmt)
    axHistx.axes.yaxis.set_ticklabels([])
    axHisty.axes.xaxis.set_ticklabels([])
    axHistx.set_xlim( axScatter.get_xlim() )
    axHisty.set_ylim( axScatter.get_ylim() )
    xmin=int(min([data[n][1] for n in range(len(data))])-1)
    xmax=int(max([data[n][1] for n in range(len(data))]))+1
    ymin=int(min([data[n][2] for n in range(len(data))])-1)
    ymax=int(max([data[n][2] for n in range(len(data))]))+1
    xvals=range(xmin,xmax)
    xtxts=[r'$10^{'+str(a)+'}$' for a in xvals]
    yvals=range(ymin,ymax)
    ytxts=[r'$10^{'+str(a)+'}$' for a in yvals]
    axScatter.set_xlim([xmin,xmax])
    axScatter.set_ylim([ymin,ymax])
    axScatter.set_xticks(xvals)
    axScatter.set_xticklabels(xtxts, fontsize=20)
    axScatter.set_yticks(yvals)
    axScatter.set_yticklabels(ytxts, fontsize=20)
    axHistx.set_xlim( axScatter.get_xlim() )
    axHisty.set_ylim( axScatter.get_ylim() )
    plt.savefig('ds'+str(dataset_id)+'_scatter_hist.png')

    # find all the variable candidates
    tmp=[x for x in data if x[1]>sigcutx if x[2]>sigcuty]
    tmp2=[]
    for line in tmp:
        if line[0] not in tmp2:
            tmp2.append(line[0])
    IdTrans=np.sort(tmp2, axis=0)

    plt.close()

    return IdTrans
Beispiel #33
0
    def MakeBlocks(self, p0):

        self.bins = bayesian_blocks(self.evts, p0=p0)
        self.bType = "bb"
        self.binWidth = diff(self.bins)
Beispiel #34
0
    def MakeBlocks(self, p0):


        self.bins = bayesian_blocks(self.evts, p0 = p0)
        self.bType = "bb"
        self.binWidth = diff(self.bins)
Beispiel #35
0
def create_scatter_hist(data, sigcutx, sigcuty, paramx, paramy, range_x,
                        range_y, dataset_id, frequencies):
    # create the figure with eta and V histograms and scatter plot

    print('plotting figure: scatter histogram plot')

    frequencies.sort()
    if "TP" in frequencies:
        # if the data is classified, we ensure that the "frequencies" are correct
        frequencies = ["TN", "TP", "FN", "FP"]
    if "stable" in frequencies:
        freq_labels = [name.replace("_", " ") for name in frequencies]


#    elif "~" in frequencies[0]:
#        freq_labels= [name.replace("~", ",") for name in frequencies]
    else:
        freq_labels = frequencies

    # Setting up the plot
    nullfmt = NullFormatter()  # no labels
    fontP = FontProperties()
    #    fontP.set_size('large')
    col = make_colours(frequencies)
    left, width = 0.1, 0.65
    bottom, height = 0.1, 0.65
    bottom_h = left_h = left + width + 0.02
    rect_scatter = [left, bottom, width, height]
    rect_histx = [left, bottom_h, width, 0.2]
    rect_histy = [left_h, bottom, 0.2, height]
    fig = plt.figure(1, figsize=(12, 12))
    axScatter = fig.add_subplot(223, position=rect_scatter)
    plt.xlabel(r'$\eta_{\nu}$', fontsize=28)
    plt.ylabel(r'$V_{\nu}$', fontsize=28)
    axHistx = fig.add_subplot(221, position=rect_histx)
    axHisty = fig.add_subplot(224, position=rect_histy)

    # Plotting data - scatter plot
    for i in range(len(frequencies)):
        xdata_var = [
            data[n][1] for n in range(len(data))
            if data[n][3] == frequencies[i]
        ]
        ydata_var = [
            data[n][2] for n in range(len(data))
            if data[n][3] == frequencies[i]
        ]
        if frequencies[i] == 'stable':
            axScatter.scatter(xdata_var,
                              ydata_var,
                              color='0.75',
                              s=10.,
                              zorder=1)
        else:
            axScatter.scatter(xdata_var,
                              ydata_var,
                              color=col[i],
                              s=10.,
                              zorder=5)
    if 'stable' in frequencies or 'TN' in frequencies:
        x = [
            data[n][1] for n in range(len(data))
            if (data[n][3] == 'stable' or data[n][3] == 'FP'
                or data[n][3] == 'TN')
        ]
        y = [
            data[n][2] for n in range(len(data))
            if (data[n][3] == 'stable' or data[n][3] == 'FP'
                or data[n][3] == 'TN')
        ]
    else:
        x = [data[n][1] for n in range(len(data))]
        y = [data[n][2] for n in range(len(data))]

    # Plotting histograms with bayesian blocks binning
    new_bins = density_estimation.bayesian_blocks(x)
    binsx = [
        new_bins[a] for a in range(len(new_bins) - 1)
        if abs((new_bins[a + 1] - new_bins[a]) / new_bins[a]) > 0.05
    ]
    binsx = binsx + [new_bins[-1]]
    new_bins = density_estimation.bayesian_blocks(y)
    binsy = [
        new_bins[a] for a in range(len(new_bins) - 1)
        if abs((new_bins[a + 1] - new_bins[a]) / new_bins[a]) > 0.05
    ]
    binsy = binsy + [new_bins[-1]]
    axHistx.hist(x, bins=binsx, normed=1, histtype='stepfilled', color='b')
    axHisty.hist(y,
                 bins=binsy,
                 normed=1,
                 histtype='stepfilled',
                 orientation='horizontal',
                 color='b')
    axScatter.legend(freq_labels, loc=4, prop=fontP)

    # Plotting lines representing thresholds (unless no thresholds)
    if sigcutx != 0 or sigcuty != 0:
        axHistx.axvline(x=sigcutx, linewidth=2, color='k', linestyle='--')
        axHisty.axhline(y=sigcuty, linewidth=2, color='k', linestyle='--')
        axScatter.axhline(y=sigcuty, linewidth=2, color='k', linestyle='--')
        axScatter.axvline(x=sigcutx, linewidth=2, color='k', linestyle='--')

    # Plotting the Gaussian fits
    fit = norm.pdf(range_x, loc=paramx[0], scale=paramx[1])
    axHistx.plot(range_x, fit, 'k:', linewidth=2)
    fit2 = norm.pdf(range_y, loc=paramy[0], scale=paramy[1])
    axHisty.plot(fit2, range_y, 'k:', linewidth=2)

    # Final plot settings
    axHistx.xaxis.set_major_formatter(nullfmt)
    axHisty.yaxis.set_major_formatter(nullfmt)
    axHistx.axes.yaxis.set_ticklabels([])
    axHisty.axes.xaxis.set_ticklabels([])
    axHistx.set_xlim(axScatter.get_xlim())
    axHisty.set_ylim(axScatter.get_ylim())
    xmin = int(min([data[n][1] for n in range(len(data))]) - 1)
    xmax = int(max([data[n][1] for n in range(len(data))]) + 1)
    ymin = int(min([data[n][2] for n in range(len(data))]) - 1)
    ymax = int(max([data[n][2] for n in range(len(data))]) + 1)
    xvals = range(xmin, xmax)
    xtxts = [r'$10^{' + str(a) + '}$' for a in xvals]
    yvals = range(ymin, ymax)
    ytxts = [r'$10^{' + str(a) + '}$' for a in yvals]
    axScatter.set_xlim([xmin, xmax])
    axScatter.set_ylim([ymin, ymax])
    axScatter.set_xticks(xvals)
    axScatter.set_xticklabels(xtxts, fontsize=20)
    axScatter.set_yticks(yvals)
    axScatter.set_yticklabels(ytxts, fontsize=20)
    axHistx.set_xlim(axScatter.get_xlim())
    axHisty.set_ylim(axScatter.get_ylim())
    plt.savefig('ds' + str(dataset_id) + '_scatter_hist.png')

    # find all the variable candidates
    tmp = [x for x in data if x[1] > sigcutx if x[2] > sigcuty]
    tmp2 = []
    for line in tmp:
        if line[0] not in tmp2:
            tmp2.append(line[0])
    IdTrans = np.sort(tmp2, axis=0)

    plt.close()

    return IdTrans