def get_values_arr_and_dt_arr(ncVar_temporal, ncVar_values, fill_val=None, time_range=None, N_lev=None, ignore_Feb29th=False, i1_row_current_tile=None, i2_row_current_tile=None, i1_col_current_tile=None, i2_col_current_tile=None, add_offset=0.0, scale_factor=1.0): try: calend = ncVar_temporal.calendar except: calend = 'gregorian' units=ncVar_temporal.units time_arr = ncVar_temporal[:] dt_arr = numpy.array([util_dt.num2date(dt, calend=calend, units=units) for dt in time_arr]) deltat = (dt_arr[1]-dt_arr[0]).total_seconds() if deltat != 86400.0: print "WARNING: Time interval of the input file is not daily!! Delta time is: "+str(deltat) if N_lev == None: assert(ncVar_values.ndim == 3) if time_range == None: values_arr = (ncVar_values[:,i1_row_current_tile:i2_row_current_tile, i1_col_current_tile:i2_col_current_tile] * scale_factor) + add_offset else: # we adjust datetime.datetime objects from time_range dt = util_dt.num2date(ncVar_temporal[:][0], calend, units) time_range = util_dt.adjust_time_range(time_range, dt) indices_subset = util_dt.get_indices_subset(dt_arr, time_range) dt_arr = dt_arr[indices_subset] values_arr = (ncVar_values[indices_subset,i1_row_current_tile:i2_row_current_tile, i1_col_current_tile:i2_col_current_tile] * scale_factor) + add_offset else: assert(ncVar_values.ndim == 4) if time_range == None: values_arr = (ncVar_values[:,N_lev,i1_row_current_tile:i2_row_current_tile, i1_col_current_tile:i2_col_current_tile] * scale_factor) + add_offset else: # we adjust datetime.datetime objects from time_range dt = util_dt.num2date(ncVar_temporal[:][0], calend, units) time_range = util_dt.adjust_time_range(time_range, dt) indices_subset = util_dt.get_indices_subset(dt_arr, time_range) dt_arr = dt_arr[indices_subset] values_arr = (ncVar_values[indices_subset,N_lev,i1_row_current_tile:i2_row_current_tile, i1_col_current_tile:i2_col_current_tile] * scale_factor) + add_offset if fill_val != None: numpy.ma.set_fill_value(values_arr, fill_val) assert(dt_arr.ndim == 1) assert(values_arr.ndim == 3) if ignore_Feb29th == True: mask_Feb29th = numpy.array([ (dt.month==2 and dt.day==29) for dt in dt_arr]) indices_masked_Feb29th = numpy.where(mask_Feb29th==False)[0] # ...[0]: tuple to numpy.ndarray (http://stackoverflow.com/questions/16127444/why-is-my-array-length-1-when-building-it-with-numpy-where) dt_arr = dt_arr[indices_masked_Feb29th] values_arr = values_arr[indices_masked_Feb29th,:,:] return (dt_arr, values_arr) else: return (dt_arr, values_arr)
def get_values_arr_and_dt_arr(ncVar_temporal, ncVar_values, fill_val=None, time_range=None, N_lev=None, lev_dim_pos=1, ignore_Feb29th=False, i1_row_current_tile=None, i2_row_current_tile=None, i1_col_current_tile=None, i2_col_current_tile=None, add_offset=0.0, scale_factor=1.0): try: calend = ncVar_temporal.calendar except: calend = 'gregorian' units=ncVar_temporal.units time_arr = ncVar_temporal[:] dt_arr = numpy.array([util_dt.num2date(dt, calend=calend, units=units) for dt in time_arr]) # REMOVED, because netcdftime.datetime objects have no method total_seconds() # deltat = (dt_arr[1]-dt_arr[0]).total_seconds() # if deltat != 86400.0: # print "WARNING: Time interval of the input file is not daily!! Delta time is: "+str(deltat) # print "+++", time_range if N_lev == None: assert(ncVar_values.ndim == 3) indices_subset = util_dt.get_indices_subset(dt_arr, time_range) dt_arr = dt_arr[indices_subset] values_arr = (ncVar_values[indices_subset,i1_row_current_tile:i2_row_current_tile, i1_col_current_tile:i2_col_current_tile] * scale_factor) + add_offset else: assert(ncVar_values.ndim == 4) indices_subset = util_dt.get_indices_subset(dt_arr, time_range) dt_arr = dt_arr[indices_subset] if lev_dim_pos == 0: values_arr = (ncVar_values[N_lev,indices_subset,i1_row_current_tile:i2_row_current_tile, i1_col_current_tile:i2_col_current_tile] * scale_factor) + add_offset else: values_arr = (ncVar_values[indices_subset,N_lev,i1_row_current_tile:i2_row_current_tile, i1_col_current_tile:i2_col_current_tile] * scale_factor) + add_offset if fill_val != None: numpy.ma.set_fill_value(values_arr, fill_val) assert(dt_arr.ndim == 1) assert(values_arr.ndim == 3) if ignore_Feb29th == True and not calend == '360_day': mask_Feb29th = numpy.array([ (dt.month==2 and dt.day==29) for dt in dt_arr]) indices_masked_Feb29th = numpy.where(mask_Feb29th==False)[0] # ...[0]: tuple to numpy.ndarray (http://stackoverflow.com/questions/16127444/why-is-my-array-length-1-when-building-it-with-numpy-where) dt_arr = dt_arr[indices_masked_Feb29th] values_arr = values_arr[indices_masked_Feb29th,:,:] #values_arr = values_arr.astype(numpy.float64) return (dt_arr, values_arr)
def get_values_arr_and_dt_arr(ncVar_temporal, ncVar_values, fill_val=None, time_range=None, N_lev=None, lev_dim_pos=1, ignore_Feb29th=False, i1_row_current_tile=None, i2_row_current_tile=None, i1_col_current_tile=None, i2_col_current_tile=None, add_offset=0.0, scale_factor=1.0): try: calend = ncVar_temporal.calendar except: calend = 'gregorian' units=ncVar_temporal.units time_arr = ncVar_temporal[:] dt_arr = numpy.array([util_dt.num2date(dt, calend=calend, units=units) for dt in time_arr]) # REMOVED, because netcdftime.datetime objects have no method total_seconds() # deltat = (dt_arr[1]-dt_arr[0]).total_seconds() # if deltat != 86400.0: # print "WARNING: Time interval of the input file is not daily!! Delta time is: "+str(deltat) # print "+++", time_range if N_lev == None: assert(ncVar_values.ndim == 3) indices_subset = util_dt.get_indices_subset(dt_arr, time_range) dt_arr = dt_arr[indices_subset] values_arr = (ncVar_values[indices_subset,i1_row_current_tile:i2_row_current_tile, i1_col_current_tile:i2_col_current_tile] * scale_factor) + add_offset else: assert(ncVar_values.ndim == 4) indices_subset = util_dt.get_indices_subset(dt_arr, time_range) dt_arr = dt_arr[indices_subset] if lev_dim_pos == 0: values_arr = (ncVar_values[N_lev,indices_subset,i1_row_current_tile:i2_row_current_tile, i1_col_current_tile:i2_col_current_tile] * scale_factor) + add_offset else: values_arr = (ncVar_values[indices_subset,N_lev,i1_row_current_tile:i2_row_current_tile, i1_col_current_tile:i2_col_current_tile] * scale_factor) + add_offset if fill_val != None: numpy.ma.set_fill_value(values_arr, fill_val) assert(dt_arr.ndim == 1) assert(values_arr.ndim == 3) if ignore_Feb29th == True and not calend == '360_day': mask_Feb29th = numpy.array([ (dt.month==2 and dt.day==29) for dt in dt_arr]) indices_masked_Feb29th = numpy.where(mask_Feb29th==False)[0] # ...[0]: tuple to numpy.ndarray (http://stackoverflow.com/questions/16127444/why-is-my-array-length-1-when-building-it-with-numpy-where) dt_arr = dt_arr[indices_masked_Feb29th] values_arr = values_arr[indices_masked_Feb29th,:,:] return (dt_arr, values_arr)
def get_tile_dimension(in_files, var_name, transfer_limit_Mbytes=None, time_range=None): ''' Computes the total size of 3D variable array and returns the optimal tile dimension for spatial chunking. :param in_files: absolute path(s) to NetCDF dataset(s) (including OPeNDAP URLs) :type in_files: list :param var_name: variable name to process :type var_name: str :param transfer_limit_Mbytes: maximum OPeNDAP/THREDDS transfer limit in Mbytes (default: None) :type transfer_limit_Mbytes: float :param time_range: time range :type time_range: list of 2 datetime objects: [dt1, dt2] rtype: int .. warning:: only for 3D variables ''' if transfer_limit_Mbytes==None: return 0 else: transfer_limit_bytes = transfer_limit_Mbytes * 1024 * 1024 # Mbytes --> bytes in_files.sort() mfnc = MFDataset(in_files, 'r', aggdim='time') ndim = mfnc.variables[var_name].ndim if ndim != 3: print("ERROR: The variable to process must be 3D") v = mfnc.variables[var_name] v_shape = v.shape v_dtype = v.dtype v_nb_bytes = v_dtype.itemsize if time_range == None: total_array_size_bytes = v_shape[0] * v_shape[1] * v_shape[2] * v_nb_bytes optimal_tile_dimension = int( numpy.sqrt( transfer_limit_bytes / (v.shape[0] * v_nb_bytes) ) ) else: var_time = mfnc.variables['time'] try: time_calend = var_time.calendar except: time_calend = 'gregorian' time_units = var_time.units time_arr = var_time[:] dt_arr = numpy.array([util_dt.num2date(dt, calend=time_calend, units=time_units) for dt in time_arr]) indices_subset = util_dt.get_indices_subset(dt_arr, time_range) nb_time_steps_after_subset = len(indices_subset) total_array_size_bytes = nb_time_steps_after_subset * v_shape[1] * v_shape[2] * v_nb_bytes optimal_tile_dimension = int( numpy.sqrt( transfer_limit_bytes / (nb_time_steps_after_subset * v_nb_bytes) ) ) mfnc.close() return optimal_tile_dimension