def read_hdcp2_data(fname): ''' Reads BT10.8 data from files generated by the HDCP2 O module. Parameters ---------- fname : str file name Returns -------- dset : dict dictionary of datasets ''' # data fields ---------------------------------------------------- vlist = ['tb108', 'lon', 'lat', 'time'] dset = ncio.read_icon_4d_data(fname, vlist, itime=None) b3d = dset.pop('tb108') b3d = np.ma.masked_less(b3d, 100.) # ================================================================ # geo ref -------------------------------------------------------- lon, lat = dset['lon'], dset['lat'] x, y = gi.ll2xyc(lon, lat, lon0=10, lat0=50) area = np.abs(gi.simple_pixel_area(lon, lat)) # ================================================================ # time conversions ----------------------------------------------- abs_time = dset['time'] / (3600. * 24) rel_time = np.mod(abs_time, 1) * 24. ntime = len(rel_time) index_time = np.arange(ntime) # ================================================================ # prepare output ................................................. vnames = ['x', 'y', 'area', 'rel_time', 'abs_time', 'index_time'] vvec = [x, y, area, rel_time, abs_time, index_time] for i, vname in enumerate(vnames): dset[vname] = vvec[i] dset['bt108'] = b3d dset['lsm'] = np.ones_like(x) dset['input_dir'] = os.path.dirname(fname) # ================================================================ return dset
def get_area_rate( lon, lat, f1, f2, thresh, cluster_method='connect', # segmentation method nsub=4, # subsampling factor vmin=None, vmax=None, nedge=20, output_percentile_change=True, percentiles=[50, 75, 90, 95], return_vector=False, dt=3600., **kwargs): ''' Calculates area rate between two sets of time-connected objects. Method: (i) The two fields f1 and f2 are segmented using threshold thresh and condition that f1, f2 > thresh (foreground). (ii) Optimal flow transformation is calculated between the field and an average shift is applied to field f1. (iii) The shifted f1 and f2 are stack and segmented again to get time connection. Parameters ---------- lon : numpy array, 2dim longitude field lat : numpy array, 2dim latitude field f1 : numpy array, 2dim field at present time (to be shifted) f2 : numpy array, 2dim field at future time (taken to calculated opt. flow) thresh : float selected threshold for clustering cluster_method : str, optional, default = 'connect' which method used for clustering nsub : int, optional, default = 4 subsampling factor vmin : float, optional, default = None lower minimum value for field clipping if None, minimum from input fields is taken vmax : float, optional, default = None upper maximum value for field clipping if None, maximum from input fields is taken output_percentile_change : bool, optional, default = True switch if change in percentile values is also returned percentiles : list, optional, default = [50, 75, 90, 95] percentiles for which change is monitored output_vector : bool, optional, default = False switch if change is returned as vector (and not 2d field) dt : float, optional, default = 3600. time interval **kwargs: dict keyword argument used in clustering routine Returns ------- da : numpy array, 2dim, shape subsampled with nsub area rate field ( units km * m/s ) davec : numpy array 1dim, optional if output_vector = True area rate vector, sorted per cell dp : numpy array, 2dim, optional if output_percentile_change = True percentile change (not divided by dt) dpvec : numpy array 1dim, optional if output_vector = True AND output_percentile_change = True percentile change vector, sorted per cell ''' # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL # Section 1: prepare input fields # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT # subsampling ---------------------------------------------------- f1c = f1[::nsub, ::nsub] f2c = f2[::nsub, ::nsub] clon = lon[::nsub, ::nsub] clat = lat[::nsub, ::nsub] # ================================================================ # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL # Section 2: segmenation of individual fields # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT # perform single field segmentation ------------------------------ s1 = seg.clustering(f1c, thresh, cluster_method=cluster_method, **kwargs) s2 = seg.clustering(f2c, thresh, cluster_method=cluster_method, **kwargs) # s1 = mahotas.labeled.remove_bordering(s1, (nedge / nsub, nedge / nsub)) # s2 = mahotas.labeled.remove_bordering(s2, (nedge / nsub, nedge / nsub)) # ================================================================ # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL # Section 3: optical flow # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT t1, flow = object_nowcast(f1c, f2c, s1=s1, output_symmetric_flow=True) # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL # Section 4: 3d segmentation (time-connected clusters) # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT # stack shifted, previous and actual field s3d = np.array([t1, s2]) s = seg.clustering(s3d, 0, cluster_method=cluster_method, **kwargs) s = mahotas.labeled.remove_bordering(s, (0, nedge / nsub, nedge / nsub)) # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL # Section 5: calculate output fields # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT # area rate a = gi.simple_pixel_area(clon, clat) a1 = scipy.ndimage.measurements.sum(a, labels=s[0], index=range(0, s.max() + 1)) a2 = scipy.ndimage.measurements.sum(a, labels=s[1], index=range(0, s.max() + 1)) davec = a2 - a1 davec[0] = 0. da = davec[s[1]] * 1000. / dt # units convection km / h to m / s # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL # Section 6: optionally calculate percentiles of field # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT if output_percentile_change: # PROBLEM: f2c is not transform -> inconsistent with s[0] !!!! SOLVED !!!!! ft1 = oflow.morph_trans_opt_flow(f1c, flow) p1 = percentiles_from_cluster(ft1, s[0], p=percentiles, index=range(0, s[1].max() + 1)) p2 = percentiles_from_cluster(f2c, s[1], p=percentiles, index=range(0, s[1].max() + 1)) dpvec = np.row_stack(p2) - np.row_stack(p1) dp = dpvec[s[1]].transpose(2, 0, 1) if return_vector: return davec, dpvec else: return da, dp else: if return_vector: return davec else: return da
def cluster_analysis(lon, lat, c, dmin=20., do_fast=False, xya=None): ''' Simple version of cluster analysis used for aggregation metrics. Parameters ----------- lon : numpy array, 2dim with shape = (nrows, ncols) longitude lat : numpy array, 2dim with shape = (nrows, ncols) latitude c : numpy array, 2dim with shape = (nrows, ncols), type = int categorical cluster field dmin : int or float, optional, default = 20 minimum diameter kept in the cell set data do_fast : bool, optional, default = False switch to use faster calculation of cell properties xya : tuble of 3 numpy arrays, optional, default = None recalculated fields of x- and y-coordinate and gridbox area (x, y, a) = xya Returns -------- ca : dict cluster analysis dict ''' # get geo and area fields ---------------------------------------- if xya == None: x, y = gi.ll2xy(lon, lat) a = np.abs(gi.simple_pixel_area(lon, lat)) else: x, y, a = xya # ================================================================ # init output dict ----------------------------------------------- ca = {} if do_fast: index = range(1, c.max() + 1) ca['xc'] = scipy.ndimage.measurements.mean(x, c, index=index) ca['yc'] = scipy.ndimage.measurements.mean(y, c, index=index) ca['area'] = scipy.ndimage.measurements.sum(a, c, index=index) else: vnames = ['area', 'xc', 'yc'] for vname in vnames: ca[vname] = [] # loop over clusters--------------------------------------------- for n in range(1, c.max() + 1): # make cluster mask m = (c == n) ca['area'].append(a[m].sum()) ca['xc'].append(x[m].mean()) ca['yc'].append(y[m].mean()) for vname in ca.keys(): ca[vname] = np.array(ca[vname]) # ================================================================ # calculate diameter --------------------------------------------- ca['dia'] = 2 * np.sqrt(ca['area'] / np.pi) # ================================================================ # do masking ----------------------------------------------------- m = ca['dia'] > dmin for vname in ca.keys(): ca[vname] = ca[vname][m] # ================================================================ # get number of clusters ----------------------------------------- ca['number'] = len(ca['dia']) # ================================================================ # calculate distances field -------------------------------------- # make a mesh for fast matrix-based distance calculation xx, yy = np.meshgrid(ca['xc'], ca['yc']) # get squared direction deviations dxq = (xx - xx.transpose())**2 dyq = (yy - yy.transpose())**2 dist_matrix = np.sqrt(dxq + dyq) # take upper triangular matrix dist_matrix = np.triu(dist_matrix) mask_matrix = (dist_matrix != 0) ca['dist'] = dist_matrix[mask_matrix] # ================================================================ # calculate diameters -------------------------------------------- ca['D0'] = scipy.stats.gmean(ca['dist']) ca['D1'] = np.mean(ca['dist']) # ================================================================ return ca
def read_icon_lem_data(fname): ''' Reads BT10.8 data from files generated for ICON-LEM runs. Parameters ---------- fname : str file name Returns -------- dset : dict dictionary of datasets ''' # data fields ---------------------------------------------------- vlist = ['bt108', 'lon', 'lat', 'time'] dset = ncio.read_icon_4d_data(fname, vlist, itime=None) b3d = dset.pop('bt108') b3d = np.ma.masked_less(b3d, 100.) # ================================================================ # geo ref -------------------------------------------------------- lon, lat = dset['lon'], dset['lat'] x, y = gi.ll2xyc(lon, lat, lon0=10, lat0=50) area = np.abs(gi.simple_pixel_area(lon, lat)) # ================================================================ # time conversions ----------------------------------------------- rel_time = 24 * (dset['time'] - dset['time'][0]) ntime = len(rel_time) index_time = np.arange(ntime) t0 = datetime.datetime(1970, 1, 1) abs_time = [] for t in dset['time']: day = str(int(t)) subday = np.mod(t, 1) tobj = datetime.datetime.strptime(day, '%Y%m%d') tobj += datetime.timedelta(days=subday) dt = (tobj - t0).total_seconds() abs_time.append(dt / (24. * 3600.)) abs_time = np.array(abs_time) # ================================================================ # prepare output ................................................. vnames = ['x', 'y', 'area', 'rel_time', 'abs_time', 'index_time'] vvec = [x, y, area, rel_time, abs_time, index_time] for i, vname in enumerate(vnames): dset[vname] = vvec[i] dset['bt108'] = b3d dset['lsm'] = np.ones_like(x) dset['input_dir'] = os.path.dirname(fname) # ================================================================ return dset
def read_narval_addvars(fname, vname, domain_center=None, region_slice=None): ''' Reads the time stack of Narval data, either meteoat or synsat. Parameters ---------- fname : str filename of data file vname : str variable name (variable should be contained in file) domain_center : tuple of floats, optional, default = None setting the projection center to (clon, clat) if None: not used region_slice : tuple of floats, optional, default = None cutout of fields for form ((irow1, irow2), (icol1, icol2)) if None: not used Returns -------- dset : dict dataset dictionary containing georef and bt108 data. ''' # read land sea data --------------------------------------------- narval_dir = '%s/icon/narval' % local_data_path lsm_name = '%s/aux/narval_landsea_coast_mask.h5' % narval_dir print '... read land-sea-mask from %s' % lsm_name dset = hio.read_dict_from_hdf(lsm_name) lsm = dset['mask50'] # ================================================================ # read bt108 ----------------------------------------------------- print '... read %s from %s' % (vname, fname) basename, file_ext = os.path.splitext(os.path.basename(fname)) date = basename.split('_')[-1] t0 = datetime.datetime.strptime(date, '%Y%m%d') b3d = ncio.read_icon_4d_data(fname, [ vname, ], itime=None)[vname] b3d = np.ma.masked_invalid(b3d) ntime, nrow, ncol = b3d.shape # ================================================================ # prepare time vector -------------------------------------------- rel_time = np.arange(1, ntime + 1) index_time = np.arange(ntime) day_shift = t0 - datetime.datetime(1970, 1, 1) day_shift = day_shift.total_seconds() / (24. * 3600) abs_time = day_shift + rel_time / 24. # ================================================================ # get georef ..................................................... gfile = '%s/aux/target_grid_geo_reference_narval.h5' % narval_dir geo = hio.read_dict_from_hdf(gfile) lon, lat = geo['lon'], geo['lat'] if domain_center is not None: mlon, mlat = domain_center else: mlon, mlat = None, None x, y = gi.ll2xyc(lon, lat, mlon=mlon, mlat=mlat) area = np.abs(gi.simple_pixel_area(x, y, xy=True)) # ================================================================ # prepare output ................................................. dset = {} dset[vname] = b3d addnames = [ 'x', 'y', 'lon', 'lat', 'lsm', 'area', 'rel_time', 'abs_time', 'index_time' ] vvec = [x, y, lon, lat, lsm, area, rel_time, abs_time, index_time] for i, aname in enumerate(addnames): dset[aname] = vvec[i] dset['input_dir'] = os.path.dirname(fname) # ================================================================ # do cutout if wanted -------------------------------------------- field_names = ['x', 'y', 'lon', 'lat', 'lsm', 'area', vname] if region_slice is not None: for name in field_names: dset[name] = gi.cutout_fields(dset[name], region_slice, vaxis=0) # ================================================================ return dset
def read_narval_data(fname): ''' Reads the time stack of Narval data, either meteoat or synsat. Parameters ---------- fname : str filename of data file Returns -------- dset : dict dataset dictionary containing georef and bt108 data. ''' # read land sea data --------------------------------------------- narval_dir = '%s/icon/narval' % local_data_path lsm_name = '%s/aux/narval_landsea_coast_mask.h5' % narval_dir print '... read land-sea-mask from %s' % lsm_name dset = hio.read_dict_from_hdf(lsm_name) lsm = dset['mask50'] # ================================================================ # read bt108 ----------------------------------------------------- print '... read BT10.8 from %s' % fname basename, file_ext = os.path.splitext(os.path.basename(fname)) date = basename.split('_')[-1] t0 = datetime.datetime.strptime(date, '%Y%m%d') # check if its is obs or sim? ftype = basename.split('_')[0] if ftype in ['msevi', 'trans']: subpath = None elif ftype == 'synsat': subpath = 'synsat_oper' # read bt108 from hdf if file_ext == '.h5': b3d = hio.read_var_from_hdf(fname, 'IR_108', subpath=subpath) / 100. elif file_ext == '.nc': vname = 'bt108' b3d = ncio.read_icon_4d_data(fname, vname, itime=None)[vname] b3d = np.ma.masked_invalid(b3d) b3d = np.ma.masked_less(b3d, 100.) ntime, nrow, ncol = b3d.shape # ================================================================ # prepare time vector -------------------------------------------- rel_time = np.arange(1, ntime + 1) index_time = np.arange(ntime) day_shift = t0 - datetime.datetime(1970, 1, 1) day_shift = day_shift.total_seconds() / (24. * 3600) abs_time = day_shift + rel_time / 24. # ================================================================ # get georef ..................................................... gfile = '%s/aux/target_grid_geo_reference_narval.h5' % narval_dir geo = hio.read_dict_from_hdf(gfile) lon, lat = geo['lon'], geo['lat'] # centered sinusoidal x, y = gi.ll2xyc(lon, lat) area = np.abs(gi.simple_pixel_area(lon, lat)) # ================================================================ # prepare output ................................................. dset = {} vnames = [ 'x', 'y', 'lon', 'lat', 'lsm', 'area', 'rel_time', 'abs_time', 'index_time' ] vvec = [x, y, lon, lat, lsm, area, rel_time, abs_time, index_time] for i, vname in enumerate(vnames): dset[vname] = vvec[i] dset['bt108'] = b3d dset['input_dir'] = narval_dir # ================================================================ return dset
def collect_data4cre_sim(radname, itime): ''' Collects a set of simulated data fields for cloud-radiative effect analysis. Parameters ---------- radname : str name of toa allsky radiation file itime : int time index of data fields ('swf_net' and 'lwf') in radname Returns -------- dset : dict dataset dict containing swf, lwf and ct fields ''' # set filenames clearname = radname.replace('toa_', 'toa_clear_') ctname = radname2ctname(radname, datatype='sim') # read radiation data for allsky dset = {} for vname in ['lwf', 'swf_net', 'swf_up']: radset = read_data_field(radname, itime, vname, region='atlantic') dset[vname] = radset[vname] dset['swf_down'] = dset['swf_net'] - dset['swf_up'] # read radiation data for clearsky for vname in ['lwf', 'swf_net']: clearset = read_data_field(clearname, itime, vname, region='atlantic') dset['%s_clear' % vname] = clearset[vname] # calculate SWF up (clearsky) from allsky downwelling (downwelling is the same...) dset['swf_up_clear'] = dset['swf_net_clear'] - dset['swf_down'] ctset = read_data_field(ctname, radset['time_obj'], 'CT', region='atlantic') dset.update(ctset) # select region mask region_mask = dset['mask'] # possible extension (get away from coast) nedge = 11 region_mask = scipy.ndimage.minimum_filter(region_mask, nedge) mlon = dset['lon'][region_mask].mean() mlat = dset['lat'][region_mask].mean() x, y = gi.ll2xyc(dset['lon'], dset['lat'], mlon=mlon, mlat=mlat) a = gi.simple_pixel_area(x, y, xy=True) # update mask and area dset['mask'] = region_mask dset['area'] = a return dset
def collect_data4cre_obs(radname, itime, filepart='-scaled', lwf_clear_offset=-2.): ''' Collects a set of observed data fields for cloud-radiative effect analysis. Parameters ---------- radname : str name of toa allsky radiation file itime : int time index of data fields ('swf_net' and 'lwf') in radname filepart : str, optional, default = '-scaled' part in the file that gives information about scaling of clear-sky fields either '-scaled' or '-not_scaled' lwf_clear_offset : float, optional, default = 2. due to the bias in the simulated LWF, we might use an predefined offset to correct this issue i.e. LWF_clear_simulated += lwf_clear_offset Returns -------- dset : dict dataset dict containing swf, lwf and ct fields ''' # set filenames # ============== clearname = radname.replace('toa_', 'toa_clear_') ctname = radname2ctname(radname, datatype='obs') # read allsky data # ================= dset = {} for vname in ['lwf', 'swf_net', 'swf_up']: radset = read_data_field(radname, itime, vname, region='atlantic') dset[vname] = radset[vname] dset['swf_down'] = dset['swf_net'] - dset['swf_up'] # find the right short-wave clear file # =================================== tobj = radset['time_obj'] filemap = selector.make_filetime_index( 'swf_net', tobj, filepart=filepart, subdirs=['retrieved_clearsky_netswf']) # print filemap # input swf clear # =============== clearname = filemap[tobj][0] clearset = read_data_field(clearname, tobj, 'swf_net', region='atlantic') dset['swf_net_clear'] = clearset['swf_net'] dset['swf_up_clear'] = dset['swf_net_clear'] - dset['swf_down'] # long-wave filename # ==================== lwfclearname = clearname.replace( 'retrieved_clearsky_netswf/clearsky_netswf-', 'sim-toarad/toa_clear_radflux-') lwfclearname = lwfclearname.replace(filepart, '') print((radname, clearname, lwfclearname)) # input lwf clear data # ==================== if filepart == '-not_scaled': lwf_clear_offset = 0 lwfclearset = read_data_field(lwfclearname, tobj, 'lwf', region='atlantic') dset['lwf_clear'] = lwfclearset['lwf'] + lwf_clear_offset # input cloud type # ==================== ctset = read_data_field(ctname, tobj, 'CT', region='atlantic') dset.update(ctset) # select and modify region mask # ============================== region_mask = dset['mask'] # possible extension (get away from coast) nedge = 11 region_mask = scipy.ndimage.minimum_filter(region_mask, nedge) # finally prepare georef # ======================= mlon = dset['lon'][region_mask].mean() mlat = dset['lat'][region_mask].mean() x, y = gi.ll2xyc(dset['lon'], dset['lat'], mlon=mlon, mlat=mlat) a = gi.simple_pixel_area(x, y, xy=True) # update mask and area dset['mask'] = region_mask dset['area'] = a return dset
def main(collection_file, do_output=True, output_file=None, smooth_sig=2., filter_in_logspace=True, var_names=None, bins=None, itime_range=None, dx=20., dy=20., do_interpolation=False, variable_filename=None, variable_name=None, input_options={}): ''' Calculates average number density. Parameters ---------- collection_file : str filename of cluster collection do_output : bool, optional, default = True switch if output is written output_file : str, optional, default = None name of output file if None: name is generated based on collection filename smooth_sig : float, optional, default = 2. sigma of Gaussian filter for smoothing the results filter_in_logspace : bool, optional, default = True, switch if filter is applied in log-space var_names : list of two strings, optional, default = None list of the two variables used for binning if None: 'x_mean' and 'y_mean' are used bins : list or tuple of two numpy arrays, optional, default = None sets the bins of histogram analysis if None: min and max is estimated from data and (dx, dy) is used itime_range : list or tuple of two int, optional, default = None selects a range of relative time for the analysis if None: all times are used dx : float, optional, default = 20. interval of x-binning if bins == None dy : float, optional, default = 20. interval of y-binning if bins == None do_interpolation : bool, optional, default = True if number density field is interpolated onto cluster grid variable_filename : str, optional, default = None one of the variable files to input cluster grid georef variable_name : str, optional, default = None variable name in variable file input_options : dict, optional, default = {} all possible input options used in the data_reader Returns -------- egrid : tuple of two numpy arrays edge-based output grid nd : numpy array, 2dim number density field. ''' # ================================================================ # input collection data ------------------------------------------ cset = hio.read_dict_from_hdf(collection_file) # ================================================================ # do number denisty calculation ---------------------------------- egrid, nd_ref = calculate_average_numberdensity( cset, smooth_sig=smooth_sig, filter_in_logspace=filter_in_logspace, var_names=var_names, bins=bins, itime_range=itime_range, dx=dx, dy=dy) xgrid = egrid[0] ygrid = egrid[1] # ================================================================ # do interpolation of reference number density on cluster grid --- if do_interpolation: if variable_filename is not None and variable_name is not None: varset = data_reader.input(variable_filename, variable_name, **input_options) # centered base grid xgridc = gi.lmean(gi.lmean(xgrid, axis=0), axis=1) ygridc = gi.lmean(gi.lmean(ygrid, axis=0), axis=1) # get target grid cx = varset['x'] cy = varset['y'] # interpolation index ind = gi.create_interpolation_index(xgridc, ygridc, cx, cy, xy=True) # get total number of cells ncells = (nd_ref * dx * dy).sum() # normalization of new nd field da = gi.simple_pixel_area(cx, cy, xy=True) norm = (nd_ref[ind] * da).sum() nd_int = ncells * nd_ref[ind] / norm # ================================================================ # save stuff into hdf -------------------------------------------- if do_output: out = {} out['xgrid'] = xgrid out['ygrid'] = ygrid out['number_density'] = nd_ref if do_interpolation: out['x_int'] = cx out['y_int'] = cy out['nd_int'] = nd_int if output_file is None: oname = collection_file.replace('collected_cluster_props', 'average_number_density') else: oname = output_file print '... save data to %s' % oname hio.save_dict2hdf(oname, out) # ================================================================ return egrid, nd_ref
def calculate_bootstrap4clust(cx, cy, segmented, cset, nd_ref, nfixed = None, use_poisson = False, dmin = 1.5, addvarnames = []): ''' Calculates a randomization of cluster field c based on a background number density field. Parameters ---------- cx : numpy array, 2dim x-coordinate of cluster field c cy : numpy array, 2dim y-coordinate of cluster field c segmented : numpy array, 3dim, (ntimes, nrows, ncols) cluster field cset : dict set of cell properties nd_ref : numpy array, 2dim, shape = (nrows, ncols) reference number density same grid as cluster field nfixed : int, optional, default = None if set, nfixed specifies a constant number of cells used in bootstrapping use_poisson : bool, optional, default = False if True, the number of cells is drawn from Poisson distribution, with repeated use of same cells possible dmin : float, optional, default = 1.5 minimum distance between two cells randomly place in the domain addvarnames : list, optional, default = ['imf_mean'] list of additional variables add to the bootstrap set Returns: -------- cset_ran : dict cell data for randomized field segmented_ran : numpy, 3dim, (ntimes, nrows, ncols) randomized cluster field ''' # prepare aux fields carea = gi.simple_pixel_area( cx, cy, xy = True ) # initialize random field ntimes, nrows, ncols = segmented.shape segmented_ran = np.zeros_like( segmented ) cset_ran = {} for addvar in addvarnames: cset_ran[addvar] = [] noffset = 0 # random shifting within time loop for n in range( ntimes ): c = segmented[n] # random bootstrap try: cell_mapping, cran = random_field_generator_nonuniform_dist(c, nd_ref, use_poisson = use_poisson, nfixed = nfixed, dmin = dmin, output_cell_mapping = True) except: cran = np.zeros_like( c ) segmented_ran[n] = cran[:, :] # cluster analysis for bootstrap set --------------------- dset = {'clust': cran, 'x': cx, 'y': cy, 'area':carea} dset['rel_time'] = get_variable4cellids(cset, 'rel_time', n, [1])[0] dset['abs_time'] = get_variable4cellids(cset, 'abs_time', n, [1])[0] dset['index_time'] = n # get cluster properties cluster_analysis.cellset_analysis(dset, cset_ran, noffset = noffset, var_names = [], weight_names = [], do_landsea_fraction = False) for addvar in addvarnames: cset_ran[addvar] += [ get_variable4cellids(cset, addvar, n, cell_mapping) ] noffset += cran.max() for addvar in addvarnames: cset_ran[addvar] = np.hstack( cset_ran[addvar] ) return cset_ran, segmented_ran