def aggregate(infile, outfile, reduction, variables=None, agg_methods=rv.ReduceVar.REDUCE_MEAN, agg_dim='days') : """copy named variables and aggregate in the specified manner Copy infile to outfile, aggregating the named variables by the specified "reduction factor" using agg_methods to produce the representative value in "outfile".""" in_ds = nc.Dataset(infile) # if the user did not specify which variables to reduce, # guess that they want everything except coordinate variables. if variables is None: variables = list(in_ds.variables.keys()) for d in in_ds.dimensions.keys() : variables.remove(d) if 'nav_lat' in variables : variables.remove('nav_lat') if 'nav_lon' in variables : variables.remove('nav_lon') # set up the "ReduceVar" aggregator # assume that all variables have same dimensions. v = in_ds.variables[variables[0]] variable_shape = v.shape variable_dims = v.dimensions i_agg = variable_dims.index(agg_dim) if reduction == REDUCE_MONTHLY : aggregator = rv.monthly_aggregator(variable_shape, i_agg) else : aggregator = rv.ReduceVar(variable_shape, i_agg, reduction) # figure out the shape of the output array output_shape = list(variable_shape) output_shape[i_agg] = aggregator.reduced # create the output file out_agg = agg.NetCDFTemplate(infile, outfile) # don't let the template copy the "aggregate" dimension to the new file! out_agg.createDimension(agg_dim, aggregator.reduced) # copy the "navigation" variables out_agg.copyVariable('nav_lat') out_agg.copyVariable('nav_lon') # expand agg_methods if necessary if not isinstance(agg_methods, collections.Sequence) : agg_methods = [agg_methods] * len(variables) # prepare an index to write the output out_slice = [ slice(None,None,None) ] * len(variable_shape) # loop over the variables for varname, agg_method in zip(variables, agg_methods) : v = in_ds.variables[varname] fill_value = getattr(v, '_FillValue', None) out_v = out_agg.create_variable(varname, v.dimensions, v.dtype, fill=fill_value) # loop over each reduced index for reduced_i in range(aggregator.reduced) : out_slice[i_agg] = reduced_i out_v[out_slice] = aggregator.reduce(agg_method, reduced_i, v) out_agg.close() in_ds.close()
def aggregate(infile, outfile, reduction, variables=None, agg_methods=rv.ReduceVar.REDUCE_MEAN, agg_dim='days'): """copy named variables and aggregate in the specified manner Copy infile to outfile, aggregating the named variables by the specified "reduction factor" using agg_methods to produce the representative value in "outfile".""" in_ds = nc.Dataset(infile) # if the user did not specify which variables to reduce, # guess that they want everything except coordinate variables. if variables is None: variables = list(in_ds.variables.keys()) for d in in_ds.dimensions.keys(): variables.remove(d) if 'nav_lat' in variables: variables.remove('nav_lat') if 'nav_lon' in variables: variables.remove('nav_lon') # set up the "ReduceVar" aggregator # assume that all variables have same dimensions. v = in_ds.variables[variables[0]] variable_shape = v.shape variable_dims = v.dimensions i_agg = variable_dims.index(agg_dim) if reduction == REDUCE_MONTHLY: aggregator = rv.monthly_aggregator(variable_shape, i_agg) else: aggregator = rv.ReduceVar(variable_shape, i_agg, reduction) # figure out the shape of the output array output_shape = list(variable_shape) output_shape[i_agg] = aggregator.reduced # create the output file out_agg = agg.NetCDFTemplate(infile, outfile) # don't let the template copy the "aggregate" dimension to the new file! out_agg.createDimension(agg_dim, aggregator.reduced) # copy the "navigation" variables out_agg.copyVariable('nav_lat') out_agg.copyVariable('nav_lon') # expand agg_methods if necessary if not isinstance(agg_methods, collections.Sequence): agg_methods = [agg_methods] * len(variables) # prepare an index to write the output out_slice = [slice(None, None, None)] * len(variable_shape) # loop over the variables for varname, agg_method in zip(variables, agg_methods): v = in_ds.variables[varname] fill_value = getattr(v, '_FillValue', None) out_v = out_agg.create_variable(varname, v.dimensions, v.dtype, fill=fill_value) # loop over each reduced index for reduced_i in range(aggregator.reduced): out_slice[i_agg] = reduced_i out_v[out_slice] = aggregator.reduce(agg_method, reduced_i, v) out_agg.close() in_ds.close()
def ba_compare_year(indicesfile, bafile, outfile=None, indicesvarnames= None, support=None, reduction=None) : """collates the various indices with burned area counts""" # interesting variables from the indicesfile last_val_reduce = [ ] if indicesvarnames is None : indicesvarnames = ['gsi_avg','fm1000','fm100','fm10','fm1','dd','t_max'] if 'dd' in indicesvarnames : last_val_reduce = ['dd'] indices = nc.Dataset(indicesfile) indicesvars = [indices.variables[v] for v in indicesvarnames] ba = nc.Dataset(bafile) count = ba.variables['count'] if support is not None : s = nc.Dataset(support) supportvars = list(s.variables.keys()) supportvars.remove('land') indicesvarnames.extend(supportvars) indicesvars.extend([s.variables[v] for v in supportvars]) last_val_reduce.extend(supportvars) # workaround: bug in index calculator does not calculate last day. time_samples = range(1,len(ba.dimensions['days']) - 1) if reduction is not None : if reduction == REDUCE_MONTHLY : grid_reducer = rv.monthly_aggregator(count.shape, 3) cmp_reducer = rv.monthly_aggregator(indicesvars[0].shape,0) grid_reducer.cutpoints[0]=1 cmp_reducer.cutpoints[1]=1 else : grid_reducer = rv.ReduceVar(count.shape, 3, reduction) cmp_reducer = rv.ReduceVar(indicesvars[0].shape, 0, reduction) time_samples = range(grid_reducer.reduced) ca = trend.CompressedAxes(indices, 'land') alldata = [] days = [] for i_time in time_samples : day_data = [] active_lc = [] if reduction is None : count_slice = count[...,i_time] else : count_slice = np.array(grid_reducer.sum(i_time, count)) for lc in range(len(ba.dimensions['landcover'])) : # compress the count lc_count = ca.compress(count_slice[:,:,lc]) # find nonzero counts i_nonzero = ma.nonzero(lc_count) if len(i_nonzero[0]) > 0 : # construct dataframe for this landcover code lc_data = {"BA Count" : lc_count[i_nonzero]} for n,v in zip(indicesvarnames,indicesvars) : # reduce variable if necessary if reduction is None: day_v = v[i_time,:] else : # the last value of the dry day sequence is # representative of the reduced time period if n in last_val_reduce : day_v = cmp_reducer.last_val(i_time, v) else : day_v = cmp_reducer.mean(i_time, v) # add a column for the current index lc_data[n] = day_v[i_nonzero] day_data.append( pd.DataFrame( lc_data ) ) active_lc.append(ba.variables['landcover'][lc]) if len(day_data) > 0 : alldata.append(pd.concat(day_data, keys=active_lc)) days.append(i_time) all_data_frame = pd.concat(alldata, keys=days) if outfile is not None : all_data_frame.to_csv(outfile) return all_data_frame
def ba_compare_year(indicesfile, bafile, outfile=None, indicesvarnames=None, support=None, reduction=None): """collates the various indices with burned area counts""" # interesting variables from the indicesfile last_val_reduce = [] if indicesvarnames is None: indicesvarnames = [ 'gsi_avg', 'fm1000', 'fm100', 'fm10', 'fm1', 'dd', 't_max' ] if 'dd' in indicesvarnames: last_val_reduce = ['dd'] indices = nc.Dataset(indicesfile) indicesvars = [indices.variables[v] for v in indicesvarnames] ba = nc.Dataset(bafile) count = ba.variables['count'] if support is not None: s = nc.Dataset(support) supportvars = list(s.variables.keys()) supportvars.remove('land') indicesvarnames.extend(supportvars) indicesvars.extend([s.variables[v] for v in supportvars]) last_val_reduce.extend(supportvars) # workaround: bug in index calculator does not calculate last day. time_samples = range(1, len(ba.dimensions['days']) - 1) if reduction is not None: if reduction == REDUCE_MONTHLY: grid_reducer = rv.monthly_aggregator(count.shape, 3) cmp_reducer = rv.monthly_aggregator(indicesvars[0].shape, 0) grid_reducer.cutpoints[0] = 1 cmp_reducer.cutpoints[1] = 1 else: grid_reducer = rv.ReduceVar(count.shape, 3, reduction) cmp_reducer = rv.ReduceVar(indicesvars[0].shape, 0, reduction) time_samples = range(grid_reducer.reduced) ca = trend.CompressedAxes(indices, 'land') alldata = [] days = [] for i_time in time_samples: day_data = [] active_lc = [] if reduction is None: count_slice = count[..., i_time] else: count_slice = np.array(grid_reducer.sum(i_time, count)) for lc in range(len(ba.dimensions['landcover'])): # compress the count lc_count = ca.compress(count_slice[:, :, lc]) # find nonzero counts i_nonzero = ma.nonzero(lc_count) if len(i_nonzero[0]) > 0: # construct dataframe for this landcover code lc_data = {"BA Count": lc_count[i_nonzero]} for n, v in zip(indicesvarnames, indicesvars): # reduce variable if necessary if reduction is None: day_v = v[i_time, :] else: # the last value of the dry day sequence is # representative of the reduced time period if n in last_val_reduce: day_v = cmp_reducer.last_val(i_time, v) else: day_v = cmp_reducer.mean(i_time, v) # add a column for the current index lc_data[n] = day_v[i_nonzero] day_data.append(pd.DataFrame(lc_data)) active_lc.append(ba.variables['landcover'][lc]) if len(day_data) > 0: alldata.append(pd.concat(day_data, keys=active_lc)) days.append(i_time) all_data_frame = pd.concat(alldata, keys=days) if outfile is not None: all_data_frame.to_csv(outfile) return all_data_frame