Ejemplo n.º 1
0
def read_hdcp2_data(fname):
    '''
    Reads BT10.8 data from files generated by the HDCP2 O module.


    Parameters
    ----------
    fname : str
       file name


    Returns
    --------
    dset : dict
        dictionary of datasets
    '''

    # data fields ----------------------------------------------------
    vlist = ['tb108', 'lon', 'lat', 'time']
    dset = ncio.read_icon_4d_data(fname, vlist, itime=None)

    b3d = dset.pop('tb108')
    b3d = np.ma.masked_less(b3d, 100.)
    # ================================================================

    # geo ref --------------------------------------------------------
    lon, lat = dset['lon'], dset['lat']

    x, y = gi.ll2xyc(lon, lat, lon0=10, lat0=50)
    area = np.abs(gi.simple_pixel_area(lon, lat))
    # ================================================================

    # time conversions -----------------------------------------------
    abs_time = dset['time'] / (3600. * 24)
    rel_time = np.mod(abs_time, 1) * 24.

    ntime = len(rel_time)
    index_time = np.arange(ntime)
    # ================================================================

    # prepare output .................................................
    vnames = ['x', 'y', 'area', 'rel_time', 'abs_time', 'index_time']
    vvec = [x, y, area, rel_time, abs_time, index_time]
    for i, vname in enumerate(vnames):
        dset[vname] = vvec[i]

    dset['bt108'] = b3d
    dset['lsm'] = np.ones_like(x)
    dset['input_dir'] = os.path.dirname(fname)
    # ================================================================

    return dset
Ejemplo n.º 2
0
def get_area_rate(
        lon,
        lat,
        f1,
        f2,
        thresh,
        cluster_method='connect',  # segmentation method 
        nsub=4,  # subsampling factor
        vmin=None,
        vmax=None,
        nedge=20,
        output_percentile_change=True,
        percentiles=[50, 75, 90, 95],
        return_vector=False,
        dt=3600.,
        **kwargs):
    '''
    Calculates area rate between two sets of time-connected objects. 

    Method:
  
    (i) The two fields f1 and f2 are segmented using threshold thresh and condition
    that f1, f2 > thresh (foreground). 

    (ii) Optimal flow transformation is calculated between the field and an average 
    shift is applied to field f1. 
    
    (iii) The shifted f1 and f2 are stack and segmented again to get time connection.


    
    Parameters
    ----------
    lon : numpy array, 2dim
       longitude field

    lat : numpy array, 2dim
       latitude field

    f1 : numpy array, 2dim
       field at present time (to be shifted)

    f2 : numpy array, 2dim
       field at future time (taken to calculated opt. flow)

    thresh : float
       selected threshold for clustering

    cluster_method : str, optional, default = 'connect'
       which method used for clustering 

    nsub : int, optional, default = 4
       subsampling factor

    vmin : float, optional, default = None
       lower minimum value for field clipping 
       if None, minimum from input fields is taken

    vmax : float, optional, default = None
       upper maximum value for field clipping 
       if None, maximum from input fields is taken

    output_percentile_change :  bool, optional, default = True
       switch if change in percentile values is also returned

    percentiles :  list,  optional, default = [50, 75, 90, 95]
       percentiles for which change is monitored

    output_vector : bool, optional, default = False
       switch if change is returned as vector (and not 2d field)

    dt : float, optional, default = 3600.
       time interval

    **kwargs: dict
       keyword argument used in clustering routine


    Returns
    -------
    da : numpy array, 2dim, shape subsampled with nsub
       area rate field ( units km * m/s )

    davec : numpy array 1dim, optional if output_vector = True
       area rate vector, sorted per cell

    dp : numpy array, 2dim, optional if output_percentile_change = True
       percentile change (not divided by dt)

    dpvec : numpy array 1dim, optional if output_vector = True AND output_percentile_change = True
       percentile change vector, sorted per cell
    '''

    # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
    # Section 1: prepare input fields
    # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT

    # subsampling ----------------------------------------------------
    f1c = f1[::nsub, ::nsub]
    f2c = f2[::nsub, ::nsub]

    clon = lon[::nsub, ::nsub]
    clat = lat[::nsub, ::nsub]
    # ================================================================

    # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
    # Section 2: segmenation of individual fields
    # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT

    # perform single field segmentation ------------------------------
    s1 = seg.clustering(f1c, thresh, cluster_method=cluster_method, **kwargs)
    s2 = seg.clustering(f2c, thresh, cluster_method=cluster_method, **kwargs)

    # s1 = mahotas.labeled.remove_bordering(s1, (nedge / nsub, nedge / nsub))
    # s2 = mahotas.labeled.remove_bordering(s2, (nedge / nsub, nedge / nsub))
    # ================================================================

    # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
    # Section 3: optical flow
    # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
    t1, flow = object_nowcast(f1c, f2c, s1=s1, output_symmetric_flow=True)

    # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
    # Section 4: 3d segmentation (time-connected clusters)
    # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT

    # stack shifted, previous and actual field
    s3d = np.array([t1, s2])

    s = seg.clustering(s3d, 0, cluster_method=cluster_method, **kwargs)
    s = mahotas.labeled.remove_bordering(s, (0, nedge / nsub, nedge / nsub))

    # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
    # Section 5: calculate output fields
    # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT

    # area rate
    a = gi.simple_pixel_area(clon, clat)

    a1 = scipy.ndimage.measurements.sum(a,
                                        labels=s[0],
                                        index=range(0,
                                                    s.max() + 1))

    a2 = scipy.ndimage.measurements.sum(a,
                                        labels=s[1],
                                        index=range(0,
                                                    s.max() + 1))

    davec = a2 - a1
    davec[0] = 0.

    da = davec[s[1]] * 1000. / dt  # units convection km / h to m / s

    # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
    # Section 6: optionally calculate percentiles of field
    # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT

    if output_percentile_change:

        # PROBLEM: f2c is not transform -> inconsistent with s[0] !!!! SOLVED !!!!!

        ft1 = oflow.morph_trans_opt_flow(f1c, flow)
        p1 = percentiles_from_cluster(ft1,
                                      s[0],
                                      p=percentiles,
                                      index=range(0, s[1].max() + 1))

        p2 = percentiles_from_cluster(f2c,
                                      s[1],
                                      p=percentiles,
                                      index=range(0, s[1].max() + 1))

        dpvec = np.row_stack(p2) - np.row_stack(p1)

        dp = dpvec[s[1]].transpose(2, 0, 1)

        if return_vector:
            return davec, dpvec
        else:
            return da, dp
    else:
        if return_vector:
            return davec
        else:
            return da
Ejemplo n.º 3
0
def cluster_analysis(lon, lat, c, dmin=20., do_fast=False, xya=None):
    '''
    Simple version of cluster analysis used for aggregation metrics.
    
    Parameters
    -----------
    lon : numpy array, 2dim with shape = (nrows, ncols) 
        longitude

    lat :  numpy array, 2dim with shape = (nrows, ncols) 
        latitude

    c : numpy array, 2dim with shape = (nrows, ncols), type = int
        categorical cluster field
    
    dmin : int or float, optional, default = 20
        minimum diameter kept in the cell set data

    do_fast : bool, optional, default = False
        switch to use faster calculation of cell properties
    
    xya : tuble of 3 numpy arrays, optional, default = None
        recalculated fields of x- and y-coordinate and gridbox area
        (x, y, a) = xya


    Returns
    --------
    ca : dict
        cluster analysis dict
    '''

    # get geo and area fields ----------------------------------------
    if xya == None:
        x, y = gi.ll2xy(lon, lat)
        a = np.abs(gi.simple_pixel_area(lon, lat))
    else:
        x, y, a = xya
    # ================================================================

    # init output dict -----------------------------------------------
    ca = {}

    if do_fast:
        index = range(1, c.max() + 1)
        ca['xc'] = scipy.ndimage.measurements.mean(x, c, index=index)
        ca['yc'] = scipy.ndimage.measurements.mean(y, c, index=index)
        ca['area'] = scipy.ndimage.measurements.sum(a, c, index=index)
    else:
        vnames = ['area', 'xc', 'yc']
        for vname in vnames:
            ca[vname] = []

        # loop over clusters---------------------------------------------

        for n in range(1, c.max() + 1):

            # make cluster mask
            m = (c == n)

            ca['area'].append(a[m].sum())
            ca['xc'].append(x[m].mean())
            ca['yc'].append(y[m].mean())

        for vname in ca.keys():
            ca[vname] = np.array(ca[vname])
        # ================================================================

    # calculate diameter ---------------------------------------------
    ca['dia'] = 2 * np.sqrt(ca['area'] / np.pi)
    # ================================================================

    # do masking -----------------------------------------------------
    m = ca['dia'] > dmin
    for vname in ca.keys():
        ca[vname] = ca[vname][m]
    # ================================================================

    # get number of clusters -----------------------------------------
    ca['number'] = len(ca['dia'])
    # ================================================================

    # calculate distances field --------------------------------------

    # make a mesh for fast matrix-based distance calculation
    xx, yy = np.meshgrid(ca['xc'], ca['yc'])

    # get squared direction deviations
    dxq = (xx - xx.transpose())**2
    dyq = (yy - yy.transpose())**2

    dist_matrix = np.sqrt(dxq + dyq)

    # take upper triangular matrix
    dist_matrix = np.triu(dist_matrix)
    mask_matrix = (dist_matrix != 0)

    ca['dist'] = dist_matrix[mask_matrix]
    # ================================================================

    # calculate diameters --------------------------------------------
    ca['D0'] = scipy.stats.gmean(ca['dist'])
    ca['D1'] = np.mean(ca['dist'])
    # ================================================================

    return ca
Ejemplo n.º 4
0
def read_icon_lem_data(fname):
    '''
    Reads BT10.8 data from files generated for ICON-LEM runs.


    Parameters
    ----------
    fname : str
       file name


    Returns
    --------
    dset : dict
        dictionary of datasets
    '''

    # data fields ----------------------------------------------------
    vlist = ['bt108', 'lon', 'lat', 'time']
    dset = ncio.read_icon_4d_data(fname, vlist, itime=None)

    b3d = dset.pop('bt108')
    b3d = np.ma.masked_less(b3d, 100.)
    # ================================================================

    # geo ref --------------------------------------------------------
    lon, lat = dset['lon'], dset['lat']

    x, y = gi.ll2xyc(lon, lat, lon0=10, lat0=50)
    area = np.abs(gi.simple_pixel_area(lon, lat))
    # ================================================================

    # time conversions -----------------------------------------------
    rel_time = 24 * (dset['time'] - dset['time'][0])

    ntime = len(rel_time)
    index_time = np.arange(ntime)

    t0 = datetime.datetime(1970, 1, 1)
    abs_time = []
    for t in dset['time']:
        day = str(int(t))
        subday = np.mod(t, 1)

        tobj = datetime.datetime.strptime(day, '%Y%m%d')
        tobj += datetime.timedelta(days=subday)

        dt = (tobj - t0).total_seconds()
        abs_time.append(dt / (24. * 3600.))

    abs_time = np.array(abs_time)

    # ================================================================

    # prepare output .................................................
    vnames = ['x', 'y', 'area', 'rel_time', 'abs_time', 'index_time']
    vvec = [x, y, area, rel_time, abs_time, index_time]
    for i, vname in enumerate(vnames):
        dset[vname] = vvec[i]

    dset['bt108'] = b3d
    dset['lsm'] = np.ones_like(x)
    dset['input_dir'] = os.path.dirname(fname)
    # ================================================================

    return dset
Ejemplo n.º 5
0
def read_narval_addvars(fname, vname, domain_center=None, region_slice=None):
    '''
    Reads the time stack of Narval data, either meteoat or synsat.


    Parameters
    ----------
    fname : str 
        filename of data file
    
    vname : str
        variable name 
        (variable should be contained in file)

    domain_center : tuple of floats, optional, default = None
        setting the projection center to (clon, clat)
        if None: not used

    region_slice : tuple of floats, optional, default = None
        cutout of fields for form  ((irow1, irow2), (icol1, icol2))
        if None: not used


    Returns
    --------
    dset : dict
        dataset dictionary containing georef and bt108 data.
    '''

    # read land sea data ---------------------------------------------
    narval_dir = '%s/icon/narval' % local_data_path
    lsm_name = '%s/aux/narval_landsea_coast_mask.h5' % narval_dir

    print '... read land-sea-mask from %s' % lsm_name
    dset = hio.read_dict_from_hdf(lsm_name)

    lsm = dset['mask50']
    # ================================================================

    # read bt108 -----------------------------------------------------
    print '... read %s from %s' % (vname, fname)
    basename, file_ext = os.path.splitext(os.path.basename(fname))

    date = basename.split('_')[-1]
    t0 = datetime.datetime.strptime(date, '%Y%m%d')

    b3d = ncio.read_icon_4d_data(fname, [
        vname,
    ], itime=None)[vname]
    b3d = np.ma.masked_invalid(b3d)

    ntime, nrow, ncol = b3d.shape
    # ================================================================

    # prepare time vector --------------------------------------------
    rel_time = np.arange(1, ntime + 1)
    index_time = np.arange(ntime)

    day_shift = t0 - datetime.datetime(1970, 1, 1)
    day_shift = day_shift.total_seconds() / (24. * 3600)

    abs_time = day_shift + rel_time / 24.
    # ================================================================

    # get georef .....................................................
    gfile = '%s/aux/target_grid_geo_reference_narval.h5' % narval_dir
    geo = hio.read_dict_from_hdf(gfile)
    lon, lat = geo['lon'], geo['lat']

    if domain_center is not None:
        mlon, mlat = domain_center
    else:
        mlon, mlat = None, None

    x, y = gi.ll2xyc(lon, lat, mlon=mlon, mlat=mlat)

    area = np.abs(gi.simple_pixel_area(x, y, xy=True))
    # ================================================================

    # prepare output .................................................
    dset = {}
    dset[vname] = b3d

    addnames = [
        'x', 'y', 'lon', 'lat', 'lsm', 'area', 'rel_time', 'abs_time',
        'index_time'
    ]
    vvec = [x, y, lon, lat, lsm, area, rel_time, abs_time, index_time]
    for i, aname in enumerate(addnames):
        dset[aname] = vvec[i]

    dset['input_dir'] = os.path.dirname(fname)
    # ================================================================

    # do cutout if wanted --------------------------------------------
    field_names = ['x', 'y', 'lon', 'lat', 'lsm', 'area', vname]

    if region_slice is not None:
        for name in field_names:
            dset[name] = gi.cutout_fields(dset[name], region_slice, vaxis=0)
    # ================================================================

    return dset
Ejemplo n.º 6
0
def read_narval_data(fname):
    '''
    Reads the time stack of Narval data, either meteoat or synsat.

    
    Parameters
    ----------
    fname : str
       filename of data file
    
    
    Returns
    --------
    dset : dict
       dataset dictionary containing georef and bt108 data.
    '''

    # read land sea data ---------------------------------------------
    narval_dir = '%s/icon/narval' % local_data_path
    lsm_name = '%s/aux/narval_landsea_coast_mask.h5' % narval_dir

    print '... read land-sea-mask from %s' % lsm_name
    dset = hio.read_dict_from_hdf(lsm_name)

    lsm = dset['mask50']
    # ================================================================

    # read bt108 -----------------------------------------------------
    print '... read BT10.8 from %s' % fname
    basename, file_ext = os.path.splitext(os.path.basename(fname))

    date = basename.split('_')[-1]
    t0 = datetime.datetime.strptime(date, '%Y%m%d')

    # check if its is obs or sim?
    ftype = basename.split('_')[0]
    if ftype in ['msevi', 'trans']:
        subpath = None
    elif ftype == 'synsat':
        subpath = 'synsat_oper'

    # read bt108 from hdf
    if file_ext == '.h5':
        b3d = hio.read_var_from_hdf(fname, 'IR_108', subpath=subpath) / 100.
    elif file_ext == '.nc':
        vname = 'bt108'
        b3d = ncio.read_icon_4d_data(fname, vname, itime=None)[vname]

    b3d = np.ma.masked_invalid(b3d)
    b3d = np.ma.masked_less(b3d, 100.)

    ntime, nrow, ncol = b3d.shape
    # ================================================================

    # prepare time vector --------------------------------------------
    rel_time = np.arange(1, ntime + 1)
    index_time = np.arange(ntime)

    day_shift = t0 - datetime.datetime(1970, 1, 1)
    day_shift = day_shift.total_seconds() / (24. * 3600)

    abs_time = day_shift + rel_time / 24.
    # ================================================================

    # get georef .....................................................
    gfile = '%s/aux/target_grid_geo_reference_narval.h5' % narval_dir
    geo = hio.read_dict_from_hdf(gfile)
    lon, lat = geo['lon'], geo['lat']

    # centered sinusoidal
    x, y = gi.ll2xyc(lon, lat)
    area = np.abs(gi.simple_pixel_area(lon, lat))
    # ================================================================

    # prepare output .................................................
    dset = {}
    vnames = [
        'x', 'y', 'lon', 'lat', 'lsm', 'area', 'rel_time', 'abs_time',
        'index_time'
    ]
    vvec = [x, y, lon, lat, lsm, area, rel_time, abs_time, index_time]
    for i, vname in enumerate(vnames):
        dset[vname] = vvec[i]

    dset['bt108'] = b3d
    dset['input_dir'] = narval_dir
    # ================================================================

    return dset
Ejemplo n.º 7
0
def collect_data4cre_sim(radname, itime):
    '''
    Collects a set of simulated data fields for cloud-radiative effect analysis.  

    
    Parameters
    ----------
    radname : str
       name of toa allsky radiation file

    itime : int
       time index of data fields ('swf_net' and 'lwf') in radname


    Returns
    --------
    dset : dict
       dataset dict containing swf, lwf and ct fields
    '''

    # set filenames
    clearname = radname.replace('toa_', 'toa_clear_')
    ctname = radname2ctname(radname, datatype='sim')

    # read radiation data for allsky
    dset = {}
    for vname in ['lwf', 'swf_net', 'swf_up']:
        radset = read_data_field(radname, itime, vname, region='atlantic')
        dset[vname] = radset[vname]

    dset['swf_down'] = dset['swf_net'] - dset['swf_up']

    # read radiation data for clearsky
    for vname in ['lwf', 'swf_net']:
        clearset = read_data_field(clearname, itime, vname, region='atlantic')
        dset['%s_clear' % vname] = clearset[vname]

    # calculate SWF up (clearsky) from allsky downwelling (downwelling is the same...)
    dset['swf_up_clear'] = dset['swf_net_clear'] - dset['swf_down']

    ctset = read_data_field(ctname,
                            radset['time_obj'],
                            'CT',
                            region='atlantic')
    dset.update(ctset)
    # select region mask
    region_mask = dset['mask']

    # possible extension (get away from coast)
    nedge = 11
    region_mask = scipy.ndimage.minimum_filter(region_mask, nedge)

    mlon = dset['lon'][region_mask].mean()
    mlat = dset['lat'][region_mask].mean()

    x, y = gi.ll2xyc(dset['lon'], dset['lat'], mlon=mlon, mlat=mlat)
    a = gi.simple_pixel_area(x, y, xy=True)

    # update mask and area
    dset['mask'] = region_mask
    dset['area'] = a

    return dset
Ejemplo n.º 8
0
def collect_data4cre_obs(radname,
                         itime,
                         filepart='-scaled',
                         lwf_clear_offset=-2.):
    '''
    Collects a set of observed data fields for cloud-radiative effect analysis.  
    

    Parameters
    ----------
    radname : str
       name of toa allsky radiation file

    itime : int
       time index of data fields ('swf_net' and 'lwf') in radname
    
    filepart : str, optional, default = '-scaled'
       part in the file that gives information about scaling of clear-sky fields
       either '-scaled' or '-not_scaled'

    lwf_clear_offset : float, optional, default = 2.
       due to the bias in the simulated LWF, we might use an predefined offset
       to correct this issue
       i.e. LWF_clear_simulated += lwf_clear_offset


    Returns
    --------
    dset : dict
       dataset dict containing swf, lwf and ct fields
    '''

    # set filenames
    # ==============
    clearname = radname.replace('toa_', 'toa_clear_')
    ctname = radname2ctname(radname, datatype='obs')

    # read allsky data
    # =================
    dset = {}
    for vname in ['lwf', 'swf_net', 'swf_up']:
        radset = read_data_field(radname, itime, vname, region='atlantic')
        dset[vname] = radset[vname]

    dset['swf_down'] = dset['swf_net'] - dset['swf_up']

    # find the right short-wave clear file
    # ===================================
    tobj = radset['time_obj']
    filemap = selector.make_filetime_index(
        'swf_net',
        tobj,
        filepart=filepart,
        subdirs=['retrieved_clearsky_netswf'])

    # print filemap

    # input swf clear
    # ===============
    clearname = filemap[tobj][0]
    clearset = read_data_field(clearname, tobj, 'swf_net', region='atlantic')
    dset['swf_net_clear'] = clearset['swf_net']
    dset['swf_up_clear'] = dset['swf_net_clear'] - dset['swf_down']

    # long-wave filename
    # ====================
    lwfclearname = clearname.replace(
        'retrieved_clearsky_netswf/clearsky_netswf-',
        'sim-toarad/toa_clear_radflux-')
    lwfclearname = lwfclearname.replace(filepart, '')

    print((radname, clearname, lwfclearname))

    # input lwf clear data
    # ====================
    if filepart == '-not_scaled':
        lwf_clear_offset = 0

    lwfclearset = read_data_field(lwfclearname, tobj, 'lwf', region='atlantic')
    dset['lwf_clear'] = lwfclearset['lwf'] + lwf_clear_offset

    # input cloud type
    # ====================
    ctset = read_data_field(ctname, tobj, 'CT', region='atlantic')
    dset.update(ctset)

    # select and modify region mask
    # ==============================
    region_mask = dset['mask']

    # possible extension (get away from coast)
    nedge = 11
    region_mask = scipy.ndimage.minimum_filter(region_mask, nedge)

    # finally prepare georef
    # =======================
    mlon = dset['lon'][region_mask].mean()
    mlat = dset['lat'][region_mask].mean()

    x, y = gi.ll2xyc(dset['lon'], dset['lat'], mlon=mlon, mlat=mlat)
    a = gi.simple_pixel_area(x, y, xy=True)

    # update mask and area
    dset['mask'] = region_mask
    dset['area'] = a

    return dset
Ejemplo n.º 9
0
def main(collection_file,
         do_output=True,
         output_file=None,
         smooth_sig=2.,
         filter_in_logspace=True,
         var_names=None,
         bins=None,
         itime_range=None,
         dx=20.,
         dy=20.,
         do_interpolation=False,
         variable_filename=None,
         variable_name=None,
         input_options={}):
    '''
    Calculates average number density.
    

    Parameters
    ----------
    collection_file : str
        filename of cluster collection

    do_output : bool, optional, default = True
        switch if output is written

    output_file : str, optional, default = None
        name of output file
        if None: name is generated based on collection filename

    smooth_sig : float, optional, default = 2.
       sigma of Gaussian filter for smoothing the results

    filter_in_logspace : bool, optional, default = True,
       switch if filter is applied in log-space

    var_names : list of two strings, optional, default = None
       list of the two variables used for binning
       if None: 'x_mean' and 'y_mean' are used
       
    bins : list or tuple of two numpy arrays, optional, default = None
       sets the bins of histogram analysis
       if None: min and max is estimated from data and (dx, dy) is used
       
    itime_range : list or tuple of two int, optional, default = None
       selects a range of relative time for the analysis
       if None: all times are used
    
    dx : float, optional, default = 20. 
       interval of x-binning if bins == None

    dy : float, optional, default = 20.
       interval of y-binning if bins == None

    do_interpolation : bool, optional, default = True
        if number density field is interpolated onto cluster grid

    variable_filename : str, optional, default = None
        one of the variable files to input cluster grid georef
                
    variable_name : str, optional, default = None
        variable name in variable file

    input_options : dict, optional, default = {}
        all possible input options used in the data_reader


    Returns
    --------
    egrid : tuple of two numpy arrays
       edge-based output grid

    nd : numpy array, 2dim
       number density field.

   
    '''
    # ================================================================

    # input collection data ------------------------------------------
    cset = hio.read_dict_from_hdf(collection_file)
    # ================================================================

    # do number denisty calculation ----------------------------------
    egrid, nd_ref = calculate_average_numberdensity(
        cset,
        smooth_sig=smooth_sig,
        filter_in_logspace=filter_in_logspace,
        var_names=var_names,
        bins=bins,
        itime_range=itime_range,
        dx=dx,
        dy=dy)

    xgrid = egrid[0]
    ygrid = egrid[1]
    # ================================================================

    # do interpolation of reference number density on cluster grid ---
    if do_interpolation:
        if variable_filename is not None and variable_name is not None:
            varset = data_reader.input(variable_filename, variable_name,
                                       **input_options)

            # centered base grid
            xgridc = gi.lmean(gi.lmean(xgrid, axis=0), axis=1)
            ygridc = gi.lmean(gi.lmean(ygrid, axis=0), axis=1)

            # get target grid
            cx = varset['x']
            cy = varset['y']

            # interpolation index
            ind = gi.create_interpolation_index(xgridc,
                                                ygridc,
                                                cx,
                                                cy,
                                                xy=True)

            # get total number of cells
            ncells = (nd_ref * dx * dy).sum()

            # normalization of new nd field
            da = gi.simple_pixel_area(cx, cy, xy=True)
            norm = (nd_ref[ind] * da).sum()
            nd_int = ncells * nd_ref[ind] / norm
    # ================================================================

    # save stuff into hdf --------------------------------------------
    if do_output:
        out = {}
        out['xgrid'] = xgrid
        out['ygrid'] = ygrid
        out['number_density'] = nd_ref

        if do_interpolation:
            out['x_int'] = cx
            out['y_int'] = cy
            out['nd_int'] = nd_int

        if output_file is None:
            oname = collection_file.replace('collected_cluster_props',
                                            'average_number_density')
        else:
            oname = output_file

        print '... save data to %s' % oname
        hio.save_dict2hdf(oname, out)
    # ================================================================

    return egrid, nd_ref
Ejemplo n.º 10
0
def calculate_bootstrap4clust(cx, cy, segmented, cset, nd_ref,
                                               nfixed = None,
                                               use_poisson = False,
                                               dmin = 1.5,
                                               addvarnames = []):

    '''
    Calculates a randomization of cluster field c based on a background
    number density field.


    Parameters
    ----------
    cx : numpy array, 2dim
        x-coordinate of cluster field c

    cy : numpy array, 2dim
        y-coordinate of cluster field c

    segmented : numpy array, 3dim, (ntimes, nrows, ncols)
        cluster field

    cset : dict
        set of cell properties

    nd_ref : numpy array, 2dim, shape = (nrows, ncols)
        reference number density same grid as cluster field

    nfixed : int, optional, default = None
        if set, nfixed specifies a constant number of cells used in bootstrapping

    use_poisson : bool, optional, default = False
       if True, the number of cells is drawn from Poisson distribution, 
       with repeated use of same cells possible

    dmin : float, optional, default = 1.5
       minimum distance between two cells randomly place in the domain

    addvarnames : list, optional, default = ['imf_mean']
        list of additional variables add to the bootstrap set


    Returns:
    --------
    cset_ran : dict
        cell data for randomized field

    segmented_ran : numpy, 3dim, (ntimes, nrows, ncols)
        randomized cluster field

    '''
    
    # prepare aux fields
    carea = gi.simple_pixel_area( cx, cy, xy = True )

    
    # initialize random field
    ntimes, nrows, ncols = segmented.shape
    segmented_ran = np.zeros_like( segmented )
    
    cset_ran = {}

    for addvar in addvarnames:
        cset_ran[addvar] = []
    noffset = 0

    # random shifting within time loop
    for n in range( ntimes ):
    
        c = segmented[n]
 
        # random bootstrap 
        try:
           cell_mapping, cran = random_field_generator_nonuniform_dist(c, 
                                        nd_ref,
                                        use_poisson = use_poisson,
                                        nfixed = nfixed, 
                                        dmin = dmin,
                                        output_cell_mapping = True)

        except:
            cran = np.zeros_like( c )

        segmented_ran[n] = cran[:, :]

        # cluster analysis for bootstrap set ---------------------
        dset = {'clust': cran, 'x': cx, 'y': cy, 'area':carea}

        dset['rel_time'] = get_variable4cellids(cset, 'rel_time', n, [1])[0]
        dset['abs_time'] = get_variable4cellids(cset, 'abs_time', n, [1])[0]
        dset['index_time'] = n

        # get cluster properties
        cluster_analysis.cellset_analysis(dset, cset_ran, 
                                          noffset = noffset, 
                                          var_names = [],
                                          weight_names = [],
                                          do_landsea_fraction = False)

        for addvar in addvarnames:
            cset_ran[addvar] += [ get_variable4cellids(cset, 
                                                addvar, 
                                                n, 
                                                cell_mapping) ]

        noffset += cran.max()
    
    for addvar in addvarnames:
        cset_ran[addvar] = np.hstack( cset_ran[addvar] )

    return cset_ran, segmented_ran