def integrate_2dfield(egrid, f, mask):
    '''
    Integrates a 2d field over a certain region given a mask and grid.
    
    
    Parameters
    ----------
    egrid : tuble of list of numpy arrays
        edge-based grid such that:  xeg, yeg = egrid

    f : numpy array, 2dim
        input field that is integrated

    mask : numpy array, 2dim, bool
        mask of a certain region that is used in integration (same shape as f)
    

    Returns
    -------
    fint : float value
        integral of f for the region where mask is True
    '''

    xeg, yeg = egrid

    dx = gi.lmean(np.diff(xeg, axis=0), axis=1)
    dy = gi.lmean(np.diff(yeg, axis=1), axis=0)

    fint = (f[mask] * dx[mask] * dy[mask]).sum()

    return fint
def radial_integration_2d_field(egrid, f, rbins, p0):
    '''
    Integrates a 2d field over a certain region given a base point 
    and the ring edges.
    
    
    Parameters
    ----------
    egrid :  tuple or list of two numpy arrays
        edge-based grid such that:  xeg, yeg = egrid

    f : numpy array, 2dim
        2d field

    rbins : numpy array
       array of range bins

    p0 : tuple of 2 float values
        base point, i.e. x0, y0 = p0

    
    Returns
    --------
    fint : float
        radial integral of f
    '''

    # get the equi-distant grids
    xeg, yeg = egrid

    xe = xeg.mean(axis=1)
    ye = yeg.mean(axis=0)

    xm = gi.lmean(xe)
    ym = gi.lmean(ye)

    # transformed function: now in cylinder coordinates
    ftrans = transform2cylinder_coords(xm, ym, f, p0=p0)
    kernel = lambda r, phi: r * ftrans(r, phi)

    nbins = rbins.shape[0]
    I = np.zeros(nbins - 1)
    for i in range(nbins - 1):
        r1 = rbins[i]
        r2 = rbins[i + 1]
        I[i] = scipy.integrate.nquad(kernel, [[r1, r2], [0, 2 * np.pi]])[0]
    return I
def radial_ring_integration_2d_field(egrid, f, r1, r2, p0):
    '''
    Integrates a 2d field over a certain region given a base point 
    and the ring edges.
    
    
    Parameters
    ----------
    egrid : tuple or list of two numpy arrays
        edge-based grid such that:  xeg, yeg = egrid

    f : numpy array, 2dim
        2d field

    r1 : float value
        inner ring radius

    r2 : float value
        outer ring radius

    p0 : tuple of 2 float values
        base point, i.e. x0, y0 = p0


    Returns
    --------
    fint : float value
        radial integral of f
    '''

    # get the equi-distant grids
    xeg, yeg = egrid

    xe = xeg.mean(axis=1)
    ye = yeg.mean(axis=0)

    xm = gi.lmean(xe)
    ym = gi.lmean(ye)

    # transformed function: now in cylinder coordinates
    ftrans = transform2cylinder_coords(xm, ym, f, p0=p0)
    kernel = lambda r, phi: r * ftrans(r, phi)

    I = scipy.integrate.nquad(kernel, [[r1, r2], [0, 2 * np.pi]])
    return I[0]
Exemple #4
0
def hist3d(v1, v2, v3, bins):
    '''
    A wrapper for 3d histogram binning. 

    It additionally generates the average bin values with the same 3d shape 
    as histogram itself.

    
    Parameters
    ----------
    v1 : np.array
        1st data vector

    v2 : np.array
        2nd data vector

    v3 : np.array
        3rd data vector
    
    bins : list of 3 np.arrays
        bins argument passed to the np.histogramdd function


    Returns
    -------
    bins3d : list of 3dim np.array
        mesh of bin edges
    
    h : np.array
        absolute histogram counts

    '''

    h, bs = np.histogramdd([v1, v2, v3], bins)

    # generate average bin value
    bave = []
    for b in bs:
        bave.append(gi.lmean(b))

    bins3d = np.meshgrid(*bave, indexing='ij')

    return bins3d, h
Exemple #5
0
def calculate_pcf4clust( c, egrid, nd_ref, 
                         weight = 1., 
                         rMax = 500., 
                         pcf_sensitivity = True,
                         dr = 20.,
                         verbose = False):


    '''
    Calculates Pair-correlation function for a cluster set.


    Parameters
    -----------
    c : dict
        set o cluster properties

    egrid : tuple or list of two numpy arrays
        edge-based grid of reference number density

    nd_ref : numpy array, 2dim
        reference number density

    weight : float, optional, default = 1.
        weight between equi-distant and equal-area range binning
        0 = equal-area
        1 = equi-distant

    rMax : float, optional, default = 500.
        largest range bin

    dr : float, optional, default = 20.
        range ring interval
    
    verbose : bool, optional, default = False
        switch if more standard output is provided


    Returns
    --------
    c : dict
        set of cluster props with pcf field updated
    '''


    # read input data ------------------------------------------------
    ncells = len( c['abs_time'] )
    # ================================================================

    
    # get norm of cell density function ------------------------------
    dx = gi.lmean(np.diff(egrid[0], axis = 0), axis = 1)
    dy = gi.lmean(np.diff(egrid[1], axis = 1), axis = 0)

    ncells_per_slot = (dx * dy * nd_ref).sum()
    nd_normed = nd_ref / ncells_per_slot
    # ================================================================

    
    # and also define homogeneous cell density function --------------
    nd_const = 1. / (dx * dy * np.size(nd_ref))
    # ================================================================
    
    
    # get cell coordinates and cutout region -------------------------
    xt, yt = c['x_mean'], c['y_mean']
    # ================================================================


    # loop over all time ids -----------------------------------------
    if 'time_id' in c.keys():
        tname = 'time_id'
    else:
        tname = 'rel_time'

    time_ids = sorted( set(c[tname]) )
    
    rbins = radius_ring_edges(rMax, dr, weight = weight)
    nbins = len(rbins)

   
    c['pcf'] = np.nan * np.ma.ones( (ncells, nbins - 1) )
    
    if pcf_sensitivity:
        for pcf_name in ['pcf_hom', 'pcf_hom_tconst', 'pcf_tconst']:
            c[pcf_name] = np.nan * np.ma.ones( (ncells, nbins - 1) )
            

    for tid in time_ids:

        # masking 
        m = (c[tname] == tid) 
        ccount = m.sum()
        

        pos = np.array([xt[m], yt[m]]).T

        if verbose:
            print tid, m.sum()

        # correct for temporal variation of cell number
        nd_corr = ccount * nd_normed
        
        # pair-correlation analysis with variable BG density
        gfull, g, rbins, indices = pairCorrelationFunction_2D(pos, egrid, nd_corr, rMax, dr, weight = weight)

        gfull_with_edge = np.nan *  np.ma.ones( (ccount, nbins - 1) )
        gfull_with_edge[indices] = gfull
        c['pcf'][m] = gfull_with_edge
        c['pcf'] = np.ma.masked_invalid( c['pcf'] )
        
        if pcf_sensitivity:
            
            nd = {}
            nd['pcf_hom'] = ccount * nd_const
            nd['pcf_hom_tconst'] = ncells_per_slot * nd_const
            nd['pcf_tconst'] = ncells_per_slot * nd_normed
            
            for pcf_name in nd.keys():
                
                # pair-correlation analysis with variable BG density
                gfull, g, rbins, indices = pairCorrelationFunction_2D(pos, 
                                                                      egrid, 
                                                                      nd[pcf_name],
                                                                      rMax, dr, weight = weight)

                gfull_with_edge = np.nan *  np.ma.ones( (ccount, nbins - 1) )
                gfull_with_edge[indices] = gfull
                
                c[pcf_name][m] = gfull_with_edge
                c[pcf_name] = np.ma.masked_invalid( c[pcf_name] )


    c['rbins_pcf'] = rbins
    # ================================================================



    return c
Exemple #6
0
def calculate_average_numberdensity(d,
                                    smooth_sig = 2.,
                                    filter_in_logspace = False,
                                    var_names = None,
                                    bins = None,
                                    itime_range = None,
                                    dx = 20. ,
                                    dy = 20.,):

    
    '''
    Calculates average number density.
    

    Parameters
    ----------
    d : dict of arrays
       set of cell properties
    
    smooth_sig : float, optional, default = 2.
       sigma of Gaussian filter for smoothing the results

    filter_in_logspace : bool, optional, default = True,
       switch if filter is applied in log-space

    var_names : list of two strings, optional, default = None
       list of the two variables used for binning
       if None: 'x_mean' and 'y_mean' are used
       
    bins : list or tuple of two numpy arrays, optional, default = None
       sets the bins of histogram analysis
       if None: min and max is estimated from data and (dx, dy) is used
       
    itime_range : list or tuple of two int, optional, default = None
       selects a range of relative time for the analysis
       if None: all times are used
    
    dx : float, optional, default = 20. 
       interval of x-binning if bins == None

    dy : float, optional, default = 20.
       interval of y-binning if bins == None


    Returns
    --------
    egrid : tuple of two numpy arrays
       edge-based output grid

    nd : numpy array, 2dim
       number density field.


    Notes
    -----
    There might be some problems with the (x,y) versus the transformed (x,y)
    coordinates.

    ToDo:
    * implemnet super sampling
    
    '''
    
    # get the right variable names -----------------------------------
    if var_names is None:
        var_name1 = 'x_mean'
        var_name2 = 'y_mean'
    else: 
        var_name1, var_name2 = var_names

    x, y, tid, trel = d[var_name1], d[var_name2], d['time_id'], d['rel_time']
    # ================================================================

    
    # prepare time masking -------------------------------------------
    if itime_range is None:
        mask = np.ones_like( trel ).astype( np.bool )
    else:
        itime_min, itime_max = itime_range
        mask = (trel >= itime_min) & (trel <= itime_max)
        
    # ================================================================

                

    # transformed coordinate -> equator at zero
    # clon, clat = gi.xy2ll(1.*x, 1.*y)
    # xt, yt  =  gi.ll2xy(clon, clat, lon0 = 0, lat0 = 0)
    
    # mask out the spinup time
    # mask = trel > -12

    
    # get binning ----------------------------------------------------
    if bins is None:
        # take the values from the cell props
        xbins = np.arange(x.min(), x.max(), dx)
        ybins = np.arange(y.min(), y.max(), dy)

        bins = (xbins, ybins)
    else:
        xbins, ybins = bins
    # ================================================================


    # check for region -----------------------------------------------
    reg_xmask = ( x > xbins.min() ) & (x < xbins.max() )
    reg_ymask = ( y > ybins.min() ) & (y < ybins.max() )
    reg_mask = reg_xmask & reg_ymask
    mask = mask & reg_mask     
    # ================================================================


    # histogram analysis ---------------------------------------------
    h, xe, ye = np.histogram2d(x[mask], y[mask], bins)

    if filter_in_logspace:
        # smoothing ???
        logh = np.ma.log(h)
        logh.data[ logh.mask ] = -5.

        loghs = scipy.ndimage.gaussian_filter(logh, smooth_sig)
        h = np.exp(loghs)
    else:
        h = scipy.ndimage.gaussian_filter(h, smooth_sig)
    # ================================================================
    


    # grid center points ---------------------------------------------
    xm = gi.lmean(xe)
    ym = gi.lmean(ye)
    
    egrid = np.meshgrid(xe, ye, indexing = 'ij')
    # ================================================================


    
    # get the number of cells per time slot --------------------------
    target_times = sorted( set(tid[mask]) )
    ntimes = 1.* len(target_times)

    ncells =  1. * len (tid[mask] )
    ncells_per_slot = ncells /  ntimes
    # ================================================================


    # normalization --------------------------------------------------
    N0 = (h * dx * dy).sum()
    nd0 = ncells_per_slot * h / N0
    # ================================================================
    
    return egrid, nd0
def init_reference_numbers(egrid,
                           numberDensity,
                           rbins,
                           nsub=4,
                           radial_integration=True):
    '''
    Calculates the number of expected cells / particle as function of distance.
    

    Parameters
    -----------
    egrid : tuble of list of two 2dim numpy arrays
        edge-based grid at which number density field is given
        
    numberDensity : numpy array
        number density field, as number of particles per area
        
    rbins : numpy array
        ring edge array

    nsub : int values, optional, default = 4
        values that determines subsampling of the analyzed fields

    radial_integration : bool, optional, default = True
        switch if radial integration of an interpolated representation of the 
        number field is used.

        
    Returns
    --------
    xout : numpy array, 2dim with shape = (nrows, ncols)
       output x-grid
 
    yout : numpy array, 2dim with shape = (nrows, ncols)
       output y-grid
    
    n0 : numpy array, 3dim with shape = (nrows, ncols, nrbins)
       expected number of cells on grid for each ring interval
    '''

    # get grids
    # ==========
    xg, yg = egrid  # edge based grid
    xc = gi.lmean(gi.lmean(xg, axis=0), axis=1)  # center point based grid
    yc = gi.lmean(gi.lmean(yg, axis=0), axis=1)  # center point based grid

    dx = gi.lmean(np.diff(xg, axis=0), axis=1)
    dy = gi.lmean(np.diff(yg, axis=1), axis=0)

    n_dx_dy = numberDensity * dx * dy

    # set output grid
    # =================
    xout = xc[::nsub, ::nsub]
    yout = yc[::nsub, ::nsub]

    pc = np.array([xc.flatten(), yc.flatten()]).T
    grid_tree = scipy.spatial.KDTree(pc)

    nrow_out, ncol_out = xout.shape

    # count particles per distance ring
    # ==================================
    edges = rbins
    num_increments = len(edges) - 1

    n0 = np.zeros((nrow_out, ncol_out, num_increments))
    ncum = np.zeros((nrow_out, ncol_out, num_increments))

    # calculated expected number of reference cells
    # ==============================================
    for irow in range(nrow_out):
        for icol in range(ncol_out):

            p0 = (xout[irow, icol], yout[irow, icol])

            if radial_integration:
                n0[irow, icol, :] = radial_integration_2d_field(
                    egrid, numberDensity, rbins, p0)
            else:

                for ibin, r in enumerate(edges[1:]):
                    m = grid_tree.query_ball_point(p0, r)
                    ncum[irow, icol, ibin] = n_dx_dy.flatten()[m].sum()

                n0[irow, icol, 0] = ncum[irow, icol, 0]
                n0[irow, icol, 1:] = np.diff(ncum[irow, icol, ::-1])
    return xout, yout, n0
def pcf(pos, rbins, domain, refgrid, ref_number, tree_as_grid_input=False):
    """
    Compute the two-dimensional pair correlation function, also known
    as the radial distribution function, for a set of circular particles
    contained in a square region of a plane.  

    Parameters
    ----------
   
    pos : numpy array
        position vector such that x, y = pos.T and pos.shape = (Nparticles, 2)
        
    rbins : numpy array
        ring edge vector
    
    domain : tuple or list in the form ((xmin, xmax), (ymin, ymax))
        containing min & max coordiantes for masking the edge
        
    refgrid :  tuble of two 2dim numpy arrays OR scipy tree class instance
        grid on which reference number of cells is given
        
    ref_number : numpy array
        reference number of cells per ring

    tree_as_grid_input : bool, optional, default = False
        if not reference grid, but scipy tree class instance (for nearest neighbor int) is supplied


    Returns 
    -------
    
    g(r) : numpy array 
        correlation function g(r) for each particle
        
    g_ave(r) : numpy array 
        average correlation function
        
    radii : numpy array 
        radii of the   annuli used to compute g(r)

     interior_indices : numpy array
        indices of reference particles
    """

    # Number of particles in ring/area of ring/number of reference particles/number density
    # area of ring = pi*(r_outer**2 - r_inner**2)

    # Find particles which are close enough to the box center that a circle of radius
    # rMax will not cross any edge of the box

    # get particle coordiantes
    # =========================
    x, y = pos.T

    # check boundaries
    # ================
    xmin, xmax = domain[0]
    ymin, ymax = domain[1]

    rMax = rbins.max()

    xmask = (x > rMax + xmin) & (x < xmax - rMax)
    ymask = (y > rMax + ymin) & (y < ymax - rMax)

    interior_indices, = np.where(xmask & ymask)
    num_interior_particles = len(interior_indices)

    pos_inner = pos[interior_indices]

    if num_interior_particles < 1:
        raise RuntimeError(
            "No particles found for which a circle of radius rMax\
                will lie entirely within a square of side length S.  Decrease rMax\
                or increase the size of the square.")

    # set distance rings
    # ==================
    edges = rbins
    num_increments = len(edges) - 1

    g = np.zeros([num_interior_particles, num_increments])
    npart = np.zeros([num_interior_particles, num_increments])

    # Compute number of observed particles
    # ====================================
    for p in range(num_interior_particles):
        index = interior_indices[p]
        d = np.sqrt((x[index] - x)**2 + (y[index] - y)**2)
        d[index] = 2 * rMax

        (npart[p, :], bins) = np.histogram(d, bins=edges, normed=False)

    # Get number of reference particles
    # =================================
    if tree_as_grid_input:
        grid_tree = refgrid
    else:
        xout, yout = refgrid
        pout = np.array([xout.flatten(), yout.flatten()]).T
        grid_tree = scipy.spatial.KDTree(pout)

    dist, index = grid_tree.query(pos_inner)

    nref = ref_number.reshape(-1, num_increments)[index]

    g = np.ma.divide(npart, nref)
    g = np.ma.masked_invalid(g)

    g_average = g.mean(axis=0)
    radii = gi.lmean(edges)

    return (g, g_average, radii, interior_indices)
def pairCorrelationFunction_2D(pos,
                               egrid,
                               numberDensity,
                               rMax,
                               dr,
                               rbins=None,
                               equal_area=True,
                               weight=None,
                               constant_analytic=False,
                               mask_edge_particle=True,
                               use_radial_interpolation=False,
                               finite_size_correction=False):
    """Compute the two-dimensional pair correlation function, also known
    as the radial distribution function, for a set of circular particles
    contained in a square region of a plane.  

    This simple function finds reference particles such that a circle of radius rMax drawn around the
    particle will fit entirely within the square, eliminating the need to
    compensate for edge effects.  If no such particles exist, an error is
    returned. Try a smaller rMax...or write some code to handle edge effects! ;)


    Parameters
    ----------
        
    pos : numpy array
        position vector such that x, y = pos.T and pos.shape = (Nparticles, 2)
        
    egrid : numpy array
        edge-based grid at which number density field is given
    
    numberDensity : numpy array
        number density field, as number of particles per area
        
    rMax : float value
        maximum radius
 
    dr : float value
        increment for increasing radius of annulus
        
    rbins : numpy array, optional, default = None
        array which contains range bin values

    equal_area : bool, optional, default = True
        switch that determines if equi-distance radius rings or rings of equal area
        are chosen, optional
        
    weight : float, optional, default = None
        if weight (number between 0 and 1) is set, than radius rings are calculated 
        from a weighted version between equi-distant (weight = 0) and equal area (weight = 1)


    constant_analytic : bool, optional default = False
        use analytic form of expected reference number assuming a constant number density
 
    mask_edge_particles : bool, optional default = True
        If particle close to edge are ignored
 
    use_radial_interpolation : bool, optional,  default = False
        if number field is represented by an interpolating function and intergation is performed
        on that field

    finite_size_correction : bool, optional, default = False
        if a finite size correction N / (N-1) is applied


    Returns 
    -------
    
    g(r) : numpy array 
        correlation function g(r) for each particle
        
    g_ave(r) : numpy array 
        average correlation function
        
    radii : numpy array 
        radii of the   annuli used to compute g(r)

     interior_indices : numpy array
        indices of reference particles
        
    Note
    ------
    Possibly outdated! Try the faster variant pcf that uses precalculated reference number fields!
    
    """
    # Number of particles in ring/area of ring/number of reference particles/number density
    # area of ring = pi*(r_outer**2 - r_inner**2)

    # Find particles which are close enough to the box center that a circle of radius
    # rMax will not cross any edge of the box

    # get particle coordiantes
    # =========================
    x, y = pos.T

    # get grids
    # ==========
    xg, yg = egrid  # edge based grid
    xc = gi.lmean(gi.lmean(xg, axis=0), axis=1)  # center point based grid
    yc = gi.lmean(gi.lmean(yg, axis=0), axis=1)  # center point based grid

    # check boundaries
    # ================

    # overwrite rMax if rbins is given!!!
    if rbins != None:
        rMax = rbins.max()

    xmin, xmax = xg.min(), xg.max()
    ymin, ymax = yg.min(), yg.max()

    if mask_edge_particle:
        xmask = (x > rMax + xmin) & (x < xmax - rMax)
        ymask = (y > rMax + ymin) & (y < ymax - rMax)
    else:
        xmask = (x > xmin) & (x < xmax)
        ymask = (y > ymin) & (y < ymax)

    interior_indices, = np.where(xmask & ymask)
    num_interior_particles = len(interior_indices)

    if num_interior_particles < 1:
        raise RuntimeError(
            "No particles found for which a circle of radius rMax\
                will lie entirely within a square of side length S.  Decrease rMax\
                or increase the size of the square.")

    # count particles per distance ring
    # ==================================
    if rbins == None:
        edges = radius_ring_edges(rMax,
                                  dr,
                                  equal_area=equal_area,
                                  weight=weight)
    else:
        edges = rbins

    num_increments = len(edges) - 1
    g = np.zeros([num_interior_particles, num_increments])

    # finite size correction (N - 1) / N
    # =================================
    ncells = 1. * pos.shape[0]
    if finite_size_correction:
        corr_factor = (ncells - 1.) / ncells
    else:
        corr_factor = 1.

    # Compute pairwise correlation for each interior particle
    for p in range(num_interior_particles):
        index = interior_indices[p]
        d = np.sqrt((x[index] - x)**2 + (y[index] - y)**2)
        d[index] = 2 * rMax

        (npart, bins) = np.histogram(d, bins=edges, normed=False)

        dgrid = np.sqrt((x[index] - xc)**2 + (y[index] - yc)**2)

        # here, we calculate reference number of particles
        for i in range(num_increments):
            r1 = edges[i]
            r2 = edges[i + 1]

            if constant_analytic:
                n0 = np.pi * numberDensity.mean() * (r2**2 - r1**2)
            else:
                if use_radial_interpolation:
                    p0 = (x[index], y[index])
                    n0 = radial_ring_integration_2d_field(egrid,
                                                          numberDensity,
                                                          r1,
                                                          r2,
                                                          p0=p0)
                else:
                    dmask = (dgrid >= edges[i]) & (dgrid <= edges[i + 1])
                    n0 = integrate_2dfield(egrid, numberDensity, dmask)

            g[p, i] = np.ma.divide(npart[i], n0)

    # Average g(r) for all interior particles and compute radii
    #g_average = zeros(num_increments)
    #for i in range(num_increments):
    #    radii[i] = (edges[i] + edges[i+1]) / 2.
    #    rOuter = edges[i + 1]
    #    rInner = edges[i]
    #    g_average[i] = mean(g[:, i]) / (pi * (rOuter**2 - rInner**2))

    g = corr_factor * np.ma.masked_invalid(g)

    g_average = g.mean(axis=0)
    radii = gi.lmean(edges)

    return (g, g_average, radii, interior_indices)
Exemple #10
0
def percentiles_from_hist(xe, ye, h, p=[25, 50, 75], axis=0, sig=0):
    '''
    Use output from the numpy 2d histogram routine 
    to calculate percentiles of the y variable
    based on relative occurence rates.


    Parameters
    ----------
    xe : np.array
        x-part of histogram grid (edge values)

    ye : np.array
        y-part of histogram grid (edge values)

    h : np.array
        frequency of occurence

    p : list, optional, default = [25, 50, 75]
        list of percentile values to be calculated (in 100th)

    axis : {0, 1}, optional
        axis along which the calculations are peformed

    sig : float, optional, default = 0
        signa of a Gaussian filter apllied to the histogram in advance
        
        Should be non-negative.


    Returns
    -------
    yperc : np.array
        list of arrays containing the percentiles
    '''

    # normalization
    hn = conditioned_hist(1. * h, axis=axis)

    # get the right coordinate
    if axis == 0:
        z = xe[1:]
        hn = hn.transpose()

    elif axis == 1:
        z = gi.lmean(ye)
        z = ye[1:]

    # what follows assumes axis = 1 LLLLLLLLLLLLLLLLLLLLLLL

    # add edge
    z0 = z[0] - (z[1] - z[0]) / 2.
    zN = z[-1] + (z[-1] - z[-2]) / 2.

    z = np.hstack([z0, z, zN])

    h0 = np.zeros_like(hn[:, 0])
    hn = np.column_stack([h0, hn, h0])

    sigvec = (0, sig)
    hn = scipy.ndimage.gaussian_filter(hn, sigvec)

    hcum = hn.cumsum(axis=1) * 100.

    zperc = []

    for pval in p:

        # percentile crossing
        hr = hcum - pval

        # this is the point closest to crossing
        imax = np.abs(hr).argmin(axis=1)

        # neighboring indices
        iminus = np.where(imax - 1 < 0, 0, imax - 1)
        iplus = np.where(imax + 1 < hn.shape[1], imax + 1, imax)

        # indexing stuff!!!!

        # for axis = 1
        i0 = list(range(imax.shape[0]))

        hmask = hr[i0, imax] < 0

        # lower bound
        dh0 = np.where(hmask, hr[i0, imax], hr[i0, iminus])
        z0 = np.where(hmask, z[imax], z[iminus])

        # upper bound
        dh1 = np.where(hmask, hr[i0, iplus], hr[i0, imax])
        z1 = np.where(hmask, z[iplus], z[imax])

        zp = (dh1 * z0 - dh0 * z1) / (dh1 - dh0)

        zperc.append(zp)

    return zperc
Exemple #11
0
    x = 2 * np.random.randn(1000)
    y = x**2 + np.random.randn(1000)
    #    y = x + np.random.randn(1000)

    pl.figure()
    pl.plot(y, x, 'o')

    h, xe, ye = np.histogram2d(y, x, (18, 20), normed=True)

    hn = conditioned_hist(h, axis=0)

    pl.pcolormesh(xe, ye, hn.transpose(), alpha=0.5)

    x25, x50, x75 = percentiles_from_hist(xe, ye, h)
    pl.plot(x50, gi.lmean(ye), 'r-', lw=3)
    pl.plot(x25, gi.lmean(ye), 'g-', lw=3)
    pl.plot(x75, gi.lmean(ye), 'g-', lw=3)

    pl.figure()
    pl.plot(gi.lmean(ye), x50)
    pl.plot(x, x**2, 'o')

#
#    pl.figure()
#    pl.plot(x,y, 'o')
#
#    h, xe, ye = np.histogram2d(x, y, (20,8), normed = True)
#
#    hn = conditioned_hist(h, axis = 1)
#
Exemple #12
0
def main(collection_file,
         do_output=True,
         output_file=None,
         smooth_sig=2.,
         filter_in_logspace=True,
         var_names=None,
         bins=None,
         itime_range=None,
         dx=20.,
         dy=20.,
         do_interpolation=False,
         variable_filename=None,
         variable_name=None,
         input_options={}):
    '''
    Calculates average number density.
    

    Parameters
    ----------
    collection_file : str
        filename of cluster collection

    do_output : bool, optional, default = True
        switch if output is written

    output_file : str, optional, default = None
        name of output file
        if None: name is generated based on collection filename

    smooth_sig : float, optional, default = 2.
       sigma of Gaussian filter for smoothing the results

    filter_in_logspace : bool, optional, default = True,
       switch if filter is applied in log-space

    var_names : list of two strings, optional, default = None
       list of the two variables used for binning
       if None: 'x_mean' and 'y_mean' are used
       
    bins : list or tuple of two numpy arrays, optional, default = None
       sets the bins of histogram analysis
       if None: min and max is estimated from data and (dx, dy) is used
       
    itime_range : list or tuple of two int, optional, default = None
       selects a range of relative time for the analysis
       if None: all times are used
    
    dx : float, optional, default = 20. 
       interval of x-binning if bins == None

    dy : float, optional, default = 20.
       interval of y-binning if bins == None

    do_interpolation : bool, optional, default = True
        if number density field is interpolated onto cluster grid

    variable_filename : str, optional, default = None
        one of the variable files to input cluster grid georef
                
    variable_name : str, optional, default = None
        variable name in variable file

    input_options : dict, optional, default = {}
        all possible input options used in the data_reader


    Returns
    --------
    egrid : tuple of two numpy arrays
       edge-based output grid

    nd : numpy array, 2dim
       number density field.

   
    '''
    # ================================================================

    # input collection data ------------------------------------------
    cset = hio.read_dict_from_hdf(collection_file)
    # ================================================================

    # do number denisty calculation ----------------------------------
    egrid, nd_ref = calculate_average_numberdensity(
        cset,
        smooth_sig=smooth_sig,
        filter_in_logspace=filter_in_logspace,
        var_names=var_names,
        bins=bins,
        itime_range=itime_range,
        dx=dx,
        dy=dy)

    xgrid = egrid[0]
    ygrid = egrid[1]
    # ================================================================

    # do interpolation of reference number density on cluster grid ---
    if do_interpolation:
        if variable_filename is not None and variable_name is not None:
            varset = data_reader.input(variable_filename, variable_name,
                                       **input_options)

            # centered base grid
            xgridc = gi.lmean(gi.lmean(xgrid, axis=0), axis=1)
            ygridc = gi.lmean(gi.lmean(ygrid, axis=0), axis=1)

            # get target grid
            cx = varset['x']
            cy = varset['y']

            # interpolation index
            ind = gi.create_interpolation_index(xgridc,
                                                ygridc,
                                                cx,
                                                cy,
                                                xy=True)

            # get total number of cells
            ncells = (nd_ref * dx * dy).sum()

            # normalization of new nd field
            da = gi.simple_pixel_area(cx, cy, xy=True)
            norm = (nd_ref[ind] * da).sum()
            nd_int = ncells * nd_ref[ind] / norm
    # ================================================================

    # save stuff into hdf --------------------------------------------
    if do_output:
        out = {}
        out['xgrid'] = xgrid
        out['ygrid'] = ygrid
        out['number_density'] = nd_ref

        if do_interpolation:
            out['x_int'] = cx
            out['y_int'] = cy
            out['nd_int'] = nd_int

        if output_file is None:
            oname = collection_file.replace('collected_cluster_props',
                                            'average_number_density')
        else:
            oname = output_file

        print '... save data to %s' % oname
        hio.save_dict2hdf(oname, out)
    # ================================================================

    return egrid, nd_ref
Exemple #13
0
def histxr(dset, varname, bins, loop_dim='time', do_log=False):
    '''
    Calculates histogram using xarray data structure. Allows to loop one dimension (e.g. time).
    
    
    Parameters
    ----------
    dset : xarray Dataset
        input data container
        
    varname : str
        variable name for which binning is done
        
    bins : numpy array or list
        set of bin edges for histogram computations
        
    loop_dim : str, optional
        dimension name for which looping is done
        default = 'time'
        
    do_log : bool, optional
        switch for bin center calculations
        if True: bins are assume to be equi-distant in log-space
        
        
    Returns
    --------
    hset : xarray Dataset
        histogram data, incl. pdf, cdf and cumulative moments
    '''

    # ----------------------------
    # (1) Preparation Part
    # ----------------------------

    # nbin center points
    if do_log:
        logbins = np.log(bins)
        clogbins = gi.lmean(logbins)
        cbins = np.exp(clogbins)
    else:
        cbins = gi.lmean(bins)

    # ----------------------------
    # (2) Loop Part
    # ----------------------------
    nloop = len(dset.coords[loop_dim])
    hist_list = []

    for i in range(nloop):
        d = dset.isel({loop_dim: i})[varname].data

        h, xe = np.histogram(d.flatten(), bins)

        hist_list += [
            h.copy(),
        ]

    harray = np.row_stack(hist_list)

    # ----------------------------
    # (3) Xarray Part
    # ----------------------------
    # get variable attributes
    attrs = dict(dset[varname].attrs)
    attrs['varname'] = varname
    attrs['var_longname'] = attrs.pop('longname', 'None')

    # store absolute counts
    hset = xr.Dataset(
        {
            'histogram': ([loop_dim, 'cbin'], harray, {
                'longname': 'absolute histogram counts'
            })
        },
        coords={
            loop_dim: (loop_dim, dset[loop_dim]),
            'ebin': ('ebin', bins, dict(longname='bin egdes', **attrs)),
            'cbin': ('cbin', cbins, dict(longname='bin mid-points', **attrs)),
        })

    hset['delta_bin'] = xr.DataArray(hset.ebin.diff('ebin').data,
                                     dims='cbin',
                                     attrs=dict(longname='bin widths',
                                                **attrs))

    # pdf
    h_relative = hset.histogram / (1. * hset.histogram.sum(dim='cbin'))
    pdf = h_relative / hset.delta_bin
    hset['pdf'] = xr.DataArray(
        pdf, attrs={'longname': 'probability density function'})

    # cdf
    cdf = (pdf * hset.delta_bin).cumsum(dim='cbin')
    hset['cdf'] = xr.DataArray(
        cdf, attrs={'longname': 'cumulative distribution function'})

    # cumulative moments
    for i in range(1, 3):
        mom = hset.cbin**i
        cum_mom = (mom * pdf * hset.delta_bin).cumsum(dim='cbin')
        cum_mom_name = 'cum_mom%d' % i
        hset[cum_mom_name] = xr.DataArray(
            cum_mom, attrs={'longname': 'cumulative %d. moment' % i})

    return hset
Exemple #14
0
def timevar_hist(d,
                 vname,
                 x=None,
                 atot=1.,
                 full_time_info=False,
                 nbins=20,
                 logbinning=False,
                 edge_values=False,
                 density=True,
                 do_cnsd=False,
                 do_ccsd=False):
    '''
    Makes time-average 1d histograms for a certain variable.

    
    Parameters
    ----------
    d : dict
        set of cell properties

    vname : str
        variable name used for analysis

    x : numpy array, optional, default = None
        bins for histogram
        if None: either predefined bins or bins from percentiles are used
    
    atot : float, topional, default = 1.
        total area of analysis domain (for cnsd)

    full_time_info : bool, optional, default = False
        switch if full time information is returned (rather than ave and standard error)

    nbins : int, optional, default = 20
        number of bins

    logbinning : bool, optional, default = False
         switch if autogenerated bins use logarithmic bin distances

    edge_values : bool, optional, default = False
        if edge rather than mid-point values are returned
    
    density : bool, optional, default = True
        switch if PDF rather than absolute counts are returned

    do_cnsd : bool, optional, default = False
        if cloud number size distribution is calculated

    do_ccsd : bool, optional, default = False
        if cloud cover size distribution is calculated


    Returns
    --------
    xm : numpy array, 1d
        mid-points of bins
    
    pdf : numpy array, 2d 
        time-array of histograms per time slots

    hm : numpy array, 1d
        average histogram


    dhm : numpy array, 1d
        standard error of average histogram
    
    '''

    # set bins -------------------------------------------------------
    if np.any(x) == None:
        if vname in ['diameter', 'nN_diameter']:
            lx = np.linspace(np.log10(20), np.log10(1000), nbins)
            lx = np.linspace(np.log10(10), np.log10(1000), nbins)
            x = 10**lx

        elif vname == 'nN_distance':
            lx = np.linspace(np.log10(20), np.log10(1000), nbins)
            x = 10**lx

        elif vname == 'ml':
            lx = np.linspace(np.log10(1), np.log10(10000), nbins)
            x = 10**lx

        elif vname in ['shape_bt108', 'aspect']:
            x = np.linspace(0, 1, nbins)

        elif vname in ['bt108_min']:
            x = np.linspace(180, 230, nbins)
        else:
            # use percentile bins
            v1, v99 = np.percentile(d[vname], [1, 99])

            if logbinning:
                lx = np.linspace(np.log10(v1), np.log10(v99), nbins)
                x = 10**lx
            else:
                x = np.linspace(v1, v99, nbins)

    if vname in ['diameter', 'nN_diameter', 'nN_distance'
                 ] and np.any(x) == None:
        lx = np.log10(x)
        lxm = gi.lmean(lx)
        xm = 10**lxm
    else:
        xm = gi.lmean(x)
    # ================================================================

    # collect histograms ---------------------------------------------
    tlist = sorted(set(d['time_id']))

    ntime = len(tlist)
    nbins = len(xm)

    pdf = np.zeros((ntime, nbins))
    cnsd = np.zeros((ntime, nbins))
    ccsd = np.zeros((ntime, nbins))

    range_mask = (d[vname] >= x.min()) & (d[vname] <= x.max())

    for itime, t in enumerate(tlist):

        m = (d['time_id'] == t) & range_mask

        v = d[vname][m]

        Ntot = len(v)

        pdf[itime], x = np.histogram(v, x, density=density)
        # pdf[itime] = scipy.ndimage.gaussian_filter(pdf[itime], 0) #.5)

        cnsd[itime] = Ntot * pdf[itime] / atot
        ccsd[itime] = 100. * np.pi / (4.) * (cnsd[itime] * xm**3)
    # ================================================================

    pdf = np.ma.masked_invalid(pdf)
    cnsd = np.ma.masked_invalid(cnsd)
    ccsd = np.ma.masked_invalid(ccsd)

    if do_cnsd:
        pdf = cnsd
    elif do_ccsd:
        pdf = ccsd

    hm = pdf.mean(axis=0)
    hsig = pdf.std(axis=0)

    # try an standard error approach
    ntimes, nbins = pdf.shape
    dhm = 2 * hsig / np.sqrt(ntimes)

    if edge_values:
        xout = x
    else:
        xout = xm

    if full_time_info:
        return xout, pdf
    else:
        return xout, hm, dhm