def apply_clustering(data, timesteps, clustering_func, how, normalize=True, **kwargs): """ Apply the given clustering function to the given data. Parameters ---------- data : xarray.Dataset timesteps : pandas.DatetimeIndex or list of timesteps or None clustering_func : str Name of clustering function. how : str How to map clusters to data. 'mean' or 'closest'. normalize : bool, optional If True (default), data is normalized before clustering is applied, using :func:`~calliope.core.time.funcs.normalized_copy`. **kwargs : optional Arguments passed to clustering_func. Returns ------- data_new_scaled : xarray.Dataset """ # Save all coordinates, to ensure they can be added back in after clustering data_coords = data.copy().coords del data_coords['timesteps'] # Only apply clustering function on subset of masked timesteps if timesteps is None: data_to_cluster = data else: data_to_cluster = data.loc[{'timesteps': timesteps}] # remove all variables that are not indexed over time data_to_cluster = data_to_cluster.drop([ i for i in data.variables if 'timesteps' not in data[i].dims or 'timestep_' in i ]) for dim in data_to_cluster.dims: data_to_cluster[dim] = data[dim] if normalize: data_normalized = normalized_copy(data_to_cluster) else: data_normalized = data_to_cluster # Get function from `clustering_func` string func = plugin_load(clustering_func, builtin_module='calliope.core.time.clustering') result = func(data_normalized, **kwargs) clusters = result[0] # Ignore other stuff returned data_new = clustering.map_clusters_to_data(data_to_cluster, clusters, how=how) if timesteps is None: data_new = _copy_non_t_vars(data, data_new) else: # Drop timesteps from old data data_new = _copy_non_t_vars(data, data_new) data_new = _combine_datasets(data.drop(timesteps, dim='timesteps'), data_new) data_new = _copy_non_t_vars(data, data_new) # It's now safe to add the original coordiantes back in (preserving all the # loc_tech sets that aren't used to index a variable in the DataArray) data_new.update(data_coords) # Scale the new/combined data so that the mean for each (x, y, variable) # combination matches that from the original data data_new_scaled = data_new.copy(deep=True) data_vars_in_t = [ v for v in data_new.data_vars if 'timesteps' in data_new[v].dims and 'timestep_' not in v ] for var in data_vars_in_t: scale_to_match_mean = (data[var].mean(dim='timesteps') / data_new[var].mean(dim='timesteps')).fillna(0) data_new_scaled[var] = data_new[var] * scale_to_match_mean return data_new_scaled
def apply_time_clustering(model_data, model_run): """ Take a Calliope model_data post time dimension addition, prior to any time clustering, and apply relevant time clustering/masking techniques. See doi: 10.1016/j.apenergy.2017.03.051 for applications. Techniques include: - Clustering timeseries into a selected number of 'representative' days. Days with similar profiles and daily magnitude are grouped together and represented by one 'representative' day with a greater weight per time step. - Masking timeseries, leading to variable timestep length Only certain parts of the input are shown at full resolution, with other periods being clustered together into a single timestep. E.g. Keep high resolution in the week with greatest wind power variability, smooth all other timesteps to 12H - Timestep resampling Used to reduce problem size by reducing resolution of all timeseries data. E.g. resample from 1H to 6H timesteps Parameters ---------- model_data : xarray Dataset Preprocessed Calliope model_data, as produced using `calliope.preprocess.build_model_data` and found in model._model_data_original model_run : bool preprocessed model_run dictionary, as produced by Calliope.preprocess_model Returns ------- data : xarray Dataset Dataset with optimisation parameters as variables, optimisation sets as coordinates, and other information in attributes. Time dimension has been updated as per user-defined clustering techniques (from model_run) """ time_config = model_run.model["time"] data = model_data.copy(deep=True) ## # Process masking and get list of timesteps to keep at high res ## if "masks" in time_config: masks = {} # time.masks is a list of {'function': .., 'options': ..} dicts for entry in time_config.masks: entry = AttrDict(entry) mask_func = plugin_load(entry.function, builtin_module="calliope.time.masks") mask_kwargs = entry.get_key("options", default=AttrDict()).as_dict() masks[entry.to_yaml()] = mask_func(data, **mask_kwargs) data.attrs["masks"] = masks # Concatenate the DatetimeIndexes by using dummy Series chosen_timesteps = pd.concat( [pd.Series(0, index=m) for m in masks.values()]).index # timesteps: a list of timesteps NOT picked by masks timesteps = pd.Index( data.timesteps.values).difference(chosen_timesteps) else: timesteps = None ## # Process function, apply resolution adjustments ## if "function" in time_config: func = plugin_load(time_config.function, builtin_module="calliope.time.funcs") func_kwargs = time_config.get("function_options", AttrDict()).as_dict() if "file=" in func_kwargs.get("clustering_func", ""): func_kwargs.update({"model_run": model_run}) data = func(data=data, timesteps=timesteps, **func_kwargs) return data
def apply_clustering(data, timesteps, clustering_func, how, normalize=True, scale_clusters='mean', **kwargs): """ Apply the given clustering function to the given data. Parameters ---------- data : xarray.Dataset timesteps : pandas.DatetimeIndex or list of timesteps or None clustering_func : str Name of clustering function. how : str How to map clusters to data. 'mean' or 'closest'. normalize : bool, optional If True (default), data is normalized before clustering is applied, using :func:`~calliope.core.time.funcs.normalized_copy`. scale_clusters : str or None, default = 'mean' Scale the results of clustering such that the clusters match the metric given by scale_clusters. For example, 'mean' scales along each loc_tech and variable to match inputs and outputs. Other options for matching include 'sum', 'max', and 'min'. If None, no scaling occurs. **kwargs : optional Arguments passed to clustering_func. Returns ------- data_new_scaled : xarray.Dataset """ assert how in ['mean', 'closest'] daily_timesteps = get_daily_timesteps(data, check_uniformity=True) timesteps_per_day = len(daily_timesteps) # Save all coordinates, to ensure they can be added back in after clustering data_coords = data.copy().coords del data_coords['timesteps'] # Only apply clustering function on subset of masked timesteps if timesteps is None: data_to_cluster = data else: data_to_cluster = data.loc[{'timesteps': timesteps}] # remove all variables that are not indexed over time data_to_cluster = data_to_cluster.drop([ i for i in data.variables if 'timesteps' not in data[i].dims or 'timestep_' in i ]) for dim in data_to_cluster.dims: data_to_cluster[dim] = data[dim] if normalize: data_normalized = normalized_copy(data_to_cluster) else: data_normalized = data_to_cluster # Get function from `clustering_func` string func = plugin_load(clustering_func, builtin_module='calliope.core.time.clustering') result = func(data_normalized, timesteps_per_day=timesteps_per_day, **kwargs) clusters = result[0] # Ignore other stuff returned data_new = clustering.map_clusters_to_data(data_to_cluster, clusters, how=how, daily_timesteps=daily_timesteps) if timesteps is None: data_new = _copy_non_t_vars(data, data_new) else: # Drop timesteps from old data data_new = _copy_non_t_vars(data, data_new) data_new = _combine_datasets(data.drop(timesteps, dim='timesteps'), data_new) data_new = _copy_non_t_vars(data, data_new) # It's now safe to add the original coordiantes back in (preserving all the # loc_tech sets that aren't used to index a variable in the DataArray) data_new.update(data_coords) # Scale the new/combined data so that the mean for each (loc_tech, variable) # combination matches that from the original data data_new_scaled = data_new.copy(deep=True) if scale_clusters: data_vars_in_t = [ v for v in data_new.data_vars if 'timesteps' in data_new[v].dims and 'timestep_' not in v and v != 'clusters' ] for var in data_vars_in_t: scale = (getattr(data[var], scale_clusters)(dim='timesteps') / getattr(data_new[var], scale_clusters)(dim='timesteps')) data_new_scaled[var] = data_new[var] * scale.fillna(0) return data_new_scaled
def apply_time_clustering(model_data, model_run): """ Take a Calliope model_data post time dimension addition, prior to any time clustering, and apply relevant time clustering/masking techniques. See doi: 10.1016/j.apenergy.2017.03.051 for applications. Techniques include: - Clustering timeseries into a selected number of 'representative' days. Days with similar profiles and daily magnitude are grouped together and represented by one 'representative' day with a greater weight per time step. - Masking timeseries, leading to variable timestep length Only certain parts of the input are shown at full resolution, with other periods being clustered together into a single timestep. E.g. Keep high resolution in the week with greatest wind power variability, smooth all other timesteps to 12H - Timestep resampling Used to reduce problem size by reducing resolution of all timeseries data. E.g. resample from 1H to 6H timesteps Parameters ---------- model_data : xarray Dataset Preprocessed Calliope model_data, as produced using `calliope.core.preprocess_data.build_model_data` and found in model._model_data_original model_run : bool preprocessed model_run dictionary, as produced by Calliope.core.preprocess_model Returns ------- data : xarray Dataset Dataset with optimisation parameters as variables, optimisation sets as coordinates, and other information in attributes. Time dimension has been updated as per user-defined clustering techniques (from model_run) """ time_config = model_run.model['time'] data = model_data.copy(deep=True) # Add temporary 'timesteps per day' attribute daily_timesteps = [ data.timestep_resolution.loc[i].values for i in np.unique(data.timesteps.to_index().strftime('%Y-%m-%d')) ] if not np.all(daily_timesteps == daily_timesteps[0]): raise exceptions.ModelError( 'For clustering, timestep resolution must be uniform.') data.attrs['_daily_timesteps'] = daily_timesteps[0] ## # Process masking and get list of timesteps to keep at high res ## if 'masks' in time_config: masks = {} # time.masks is a list of {'function': .., 'options': ..} dicts for entry in time_config.masks: entry = AttrDict(entry) mask_func = plugin_load(entry.function, builtin_module='calliope.core.time.masks') mask_kwargs = entry.get_key('options', default={}) masks[entry.to_yaml()] = mask_func(data, **mask_kwargs) data.attrs['masks'] = masks # Concatenate the DatetimeIndexes by using dummy Series chosen_timesteps = pd.concat( [pd.Series(0, index=m) for m in masks.values()]).index # timesteps: a list of timesteps NOT picked by masks timesteps = pd.Index( data.timesteps.values).difference(chosen_timesteps) else: timesteps = None ## # Process function, apply resolution adjustments ## if 'function' in time_config: func = plugin_load(time_config.function, builtin_module='calliope.core.time.funcs') func_kwargs = time_config.get('function_options', {}) data = func(data=data, timesteps=timesteps, **func_kwargs) # Temporary timesteps per day attribute is no longer needed try: del data.attrs['_daily_timesteps'] except KeyError: pass return data