def solve_model(backend_model, solver, solver_io=None, solver_options=None, save_logs=False, **solve_kwargs): opt = SolverFactory(solver, solver_io=solver_io) if solver_options: for k, v in solver_options.items(): opt.options[k] = v if save_logs: solve_kwargs.update({ 'symbolic_solver_labels': True, 'keepfiles': True }) os.makedirs(save_logs, exist_ok=True) TempfileManager.tempdir = save_logs # Sets log output dir if 'warmstart' in solve_kwargs.keys() and solver == 'glpk': exceptions.ModelWarning( 'The chosen solver, GLPK, does not suport warmstart, which may ' 'impact performance.' ) del solve_kwargs['warmstart'] with redirect_stdout(LogWriter('info', strip=True)): with redirect_stderr(LogWriter('error', strip=True)): results = opt.solve(backend_model, tee=True, **solve_kwargs) return results
def solve_model(backend_model, solver, solver_io=None, solver_options=None, save_logs=False, **solve_kwargs): """ Solve a Pyomo model using the chosen solver and all necessary solver options Returns a Pyomo results object """ opt = SolverFactory(solver, solver_io=solver_io) if solver_options: for k, v in solver_options.items(): opt.options[k] = v if save_logs: solve_kwargs.update({ 'symbolic_solver_labels': True, 'keepfiles': True }) os.makedirs(save_logs, exist_ok=True) TempfileManager.tempdir = save_logs # Sets log output dir if 'warmstart' in solve_kwargs.keys() and solver in ['glpk', 'cbc']: exceptions.ModelWarning( 'The chosen solver, {}, does not suport warmstart, which may ' 'impact performance.'.format(solver)) del solve_kwargs['warmstart'] with redirect_stdout(LogWriter(logger, 'debug', strip=True)): with redirect_stderr(LogWriter(logger, 'error', strip=True)): results = opt.solve(backend_model, tee=True, **solve_kwargs) return results
def rerun_pyomo_model(model_data, backend_model): """ Rerun the Pyomo backend, perhaps after updating a parameter value, (de)activating a constraint/objective or updating run options in the model model_data object (e.g. `run.solver`). Returns ------- run_data : xarray.Dataset Raw data from this rerun, including both inputs and results. to filter inputs/results, use `run_data.filter_by_attrs(is_result=...)` with 0 for inputs and 1 for results. """ backend_model.__calliope_run_config = AttrDict.from_yaml_string(model_data.attrs['run_config']) if backend_model.__calliope_run_config['mode'] != 'plan': raise exceptions.ModelError( 'Cannot rerun the backend in {} run mode. Only `plan` mode is ' 'possible.'.format(backend_model.__calliope_run_config['mode']) ) timings = {} log_time(logger, timings, 'model_creation') results, backend_model = backend_run.run_plan( model_data, timings, run_pyomo, build_only=False, backend_rerun=backend_model ) for k, v in timings.items(): results.attrs['timings.' + k] = v exceptions.ModelWarning( 'model.results will only be updated on running the model from ' '`model.run()`. We provide results of this rerun as a standalone xarray ' 'Dataset' ) results.attrs.update(model_data.attrs) for key, var in results.data_vars.items(): var.attrs['is_result'] = 1 inputs = access_pyomo_model_inputs(backend_model) for key, var in inputs.data_vars.items(): var.attrs['is_result'] = 0 results.update(inputs) run_data = results return run_data
def run_operate(model_data, timings, backend, build_only): """ For use when mode is 'operate', to allow the model to be built, edited, and iteratively run within Pyomo. """ log_time(timings, 'run_start', comment='Backend: starting model run in operational mode') defaults = ruamel.yaml.load(model_data.attrs['defaults'], Loader=ruamel.yaml.Loader) operate_params = ['purchased'] + [ i.replace('_max', '') for i in defaults if i[-4:] == '_max' ] # Capacity results (from plan mode) can be used as the input to operate mode if (any(model_data.filter_by_attrs(is_result=1).data_vars) and model_data.attrs.get('run.operation.use_cap_results', False)): # Anything with is_result = 1 will be ignored in the Pyomo model for varname, varvals in model_data.data_vars.items(): if varname in operate_params: varvals.attrs['is_result'] = 1 varvals.attrs['operate_param'] = 1 else: cap_max = xr.merge([ v.rename(k.replace('_max', '')) for k, v in model_data.data_vars.items() if '_max' in k ]) cap_equals = xr.merge([ v.rename(k.replace('_equals', '')) for k, v in model_data.data_vars.items() if '_equals' in k ]) caps = cap_max.update(cap_equals) for cap in caps.data_vars.values(): cap.attrs['is_result'] = 1 cap.attrs['operate_param'] = 1 model_data.update(caps) # Storage initial is carried over between iterations, so must be defined along with storage if ('loc_techs_store' in model_data.dims.keys() and 'storage_initial' not in model_data.data_vars.keys()): model_data['storage_initial'] = (xr.DataArray( [0 for loc_tech in model_data.loc_techs_store.values], dims='loc_techs_store')) model_data['storage_initial'].attrs['is_result'] = 0 exceptions.ModelWarning( 'Initial stored energy not defined, set to zero for all ' 'loc::techs in loc_techs_store, for use in iterative optimisation') # Operated units is carried over between iterations, so must be defined in a milp model if ('loc_techs_milp' in model_data.dims.keys() and 'operated_units' not in model_data.data_vars.keys()): model_data['operated_units'] = (xr.DataArray( [0 for loc_tech in model_data.loc_techs_milp.values], dims='loc_techs_milp')) model_data['operated_units'].attrs['is_result'] = 1 model_data['operated_units'].attrs['operate_param'] = 1 exceptions.ModelWarning( 'daily operated units not defined, set to zero for all ' 'loc::techs in loc_techs_milp, for use in iterative optimisation') comments, warnings, errors = checks.check_operate_params(model_data) exceptions.print_warnings_and_raise_errors(warnings=warnings, errors=errors) # Initialize our variables solver = model_data.attrs['run.solver'] solver_io = model_data.attrs.get('run.solver_io', None) solver_options = model_data.attrs.get('run.solver_options', None) save_logs = model_data.attrs.get('run.save_logs', None) window = model_data.attrs['run.operation.window'] horizon = model_data.attrs['run.operation.horizon'] window_to_horizon = horizon - window # get the cumulative sum of timestep resolution, to find where we hit our window and horizon timestep_cumsum = model_data.timestep_resolution.cumsum( 'timesteps').to_pandas() # get the timesteps at which we start and end our windows window_ends = timestep_cumsum.where((timestep_cumsum % window == 0) | ( timestep_cumsum == timestep_cumsum[-1])) window_starts = timestep_cumsum.where((~np.isnan(window_ends.shift(1))) | ( timestep_cumsum == timestep_cumsum[0])).dropna() window_ends = window_ends.dropna() horizon_ends = timestep_cumsum[timestep_cumsum.isin(window_ends.values + window_to_horizon)] if not any(window_starts): raise exceptions.ModelError( 'Not enough timesteps or incorrect timestep resolution to run in ' 'operational mode with an optimisation window of {}'.format( window)) # We will only update timseries parameters timeseries_data_vars = [ k for k, v in model_data.data_vars.items() if 'timesteps' in v.dims and v.attrs['is_result'] == 0 ] # Loop through each window, solve over the horizon length, and add result to # result_array we only go as far as the end of the last horizon, which may # clip the last bit of data result_array = [] # track whether each iteration finds an optimal solution or not terminations = [] if build_only: iterations = [0] else: iterations = range(len(window_starts)) for i in iterations: start_timestep = window_starts.index[i] # Build full model in first instance if i == 0: warmstart = False end_timestep = horizon_ends.index[i] timesteps = slice(start_timestep, end_timestep) window_model_data = model_data.loc[dict(timesteps=timesteps)] log_time(timings, 'model_gen_1', comment='Backend: generating initial model') backend_model = backend.generate_model(window_model_data) # Build the full model in the last instance(s), # where number of timesteps is less than the horizon length elif i > len(horizon_ends) - 1: warmstart = False end_timestep = window_ends.index[i] timesteps = slice(start_timestep, end_timestep) window_model_data = model_data.loc[dict(timesteps=timesteps)] log_time( timings, 'model_gen_{}'.format(i + 1), comment=( 'Backend: ite()ration {}: generating new model for ' 'end of timeseries, with horizon = {} timesteps'.format( i + 1, window_ends[i] - window_starts[i]))) backend_model = backend.generate_model(window_model_data) # Update relevent Pyomo Params in intermediate instances else: warmstart = True end_timestep = horizon_ends.index[i] timesteps = slice(start_timestep, end_timestep) window_model_data = model_data.loc[dict(timesteps=timesteps)] log_time( timings, 'model_gen_{}'.format(i + 1), comment='Backend: iteration {}: updating model parameters'. format(i + 1)) # Pyomo model sees the same timestamps each time, we just change the # values associated with those timestamps for var in timeseries_data_vars: # New values var_series = window_model_data[var].to_series().dropna( ).replace('inf', np.inf) # Same timestamps var_series.index = backend_model.__calliope_model_data__[ 'data'][var].keys() var_dict = var_series.to_dict() # Update pyomo Param with new dictionary for k, v in getattr(backend_model, var).items(): if k in var_dict: v.set_value(var_dict[k]) if not build_only: log_time(timings, 'model_run_{}'.format(i + 1), time_since_start=True, comment='Backend: iteration {}: sending model to solver'. format(i + 1)) # After iteration 1, warmstart = True, which should speed up the process # Note: Warmstart isn't possible with GLPK (dealt with later on) _results = backend.solve_model(backend_model, solver=solver, solver_io=solver_io, solver_options=solver_options, save_logs=save_logs, warmstart=warmstart) log_time(timings, 'run_solver_exit_{}'.format(i + 1), time_since_start=True, comment='Backend: iteration {}: solver finished running'. format(i + 1)) # xarray dataset is built for each iteration _termination = backend.load_results(backend_model, _results) terminations.append(_termination) _results = backend.get_result_array(backend_model, model_data) # We give back the actual timesteps for this iteration and take a slice # equal to the window length _results['timesteps'] = window_model_data.timesteps.copy() # We always save the window data. Until the last window(s) this will crop # the window_to_horizon timesteps. In the last window(s), optimistion will # only be occurring over a window length anyway _results = _results.loc[dict( timesteps=slice(None, window_ends.index[i]))] result_array.append(_results) # Set up initial storage for the next iteration if 'loc_techs_store' in model_data.dims.keys(): storage_initial = _results.storage.loc[dict( timesteps=window_ends.index[i])] model_data['storage_initial'].loc[{}] = storage_initial.values for k, v in backend_model.storage_initial.items(): v.set_value( storage_initial.to_series().dropna().to_dict()[k]) # Set up total operated units for the next iteration if 'loc_techs_milp' in model_data.dims.keys(): operated_units = _results.operating_units.sum( 'timesteps').astype(np.int) model_data['operated_units'].loc[{}] += operated_units.values for k, v in backend_model.operated_units.items(): v.set_value( operated_units.to_series().dropna().to_dict()[k]) log_time(timings, 'run_solver_exit_{}'.format(i + 1), time_since_start=True, comment='Backend: iteration {}: generated solution array'. format(i + 1)) if build_only: results = xr.Dataset() else: # Concatenate results over the timestep dimension to get a single # xarray Dataset of interest results = xr.concat(result_array, dim='timesteps') if all(i == 'optimal' for i in terminations): results.attrs['termination_condition'] = 'optimal' else: results.attrs['termination_condition'] = ','.join(terminations) log_time(timings, 'run_solution_returned', time_since_start=True, comment='Backend: generated full solution array') return results, backend_model
def map_clusters_to_data(data, clusters, how, daily_timesteps, storage_inter_cluster=True): """ Returns a copy of data that has been clustered. Parameters ---------- how : str How to select data from clusters. Can be mean (centroid) or closest real day to the mean (by root mean square error). storage_inter_cluster : bool, default=True If True, add `datesteps` to model_data, for use in the backend to build inter_cluster storage decision variables and constraints """ # FIXME hardcoded time intervals ('1H', '1D') # Get all timesteps, not just the first per day timesteps_per_day = len(daily_timesteps) idx = clusters.index new_idx = _timesteps_from_daily_index(idx, daily_timesteps) clusters_timeseries = (clusters.reindex(new_idx).fillna( method='ffill').astype(int)) new_data = get_mean_from_clusters(data, clusters_timeseries, timesteps_per_day) new_data.attrs = data.attrs if how == 'mean': # Add timestep names by taking the median timestamp from daily clusters... # (a random way of doing it, but we want some label to apply) timestamps = clusters.groupby(clusters).apply( lambda x: x.index[int(len(x.index) / 2)]) new_data.coords['timesteps'] = _timesteps_from_daily_index( pd.Index(timestamps.values), daily_timesteps) # Generate weights # weight of each timestep = number of timesteps in this timestep's cluster # divided by timesteps per day (since we're grouping days together and # a cluster consisting of 1 day = 24 hours should have weight of 1) value_counts = clusters_timeseries.value_counts() / timesteps_per_day # And turn the index into dates (days) value_counts = pd.DataFrame({ 'dates': timestamps, 'counts': value_counts }).set_index('dates')['counts'] elif how == 'closest': new_data, chosen_ts = get_closest_days_from_clusters( data, new_data, clusters, daily_timesteps) # Deal with the case where more than one cluster has the same closest day # An easy way is to rename the original clusters with the chosen days # So at this point, clusterdays_timeseries maps all timesteps to the day # of year of the cluster the timestep belongs to clusterdays_timeseries = clusters_timeseries.map( lambda x: chosen_ts[x]) value_counts = clusterdays_timeseries.value_counts( ) / timesteps_per_day timestamps = pd.DataFrame.from_dict(chosen_ts, orient='index')[0] cluster_diff = len(clusters.unique()) - len(timestamps.unique()) if cluster_diff > 0: exceptions.ModelWarning( 'Creating {} fewer clusters as some clusters share the same ' 'closest day'.format(cluster_diff)) timestamps = timestamps.drop_duplicates() for cluster, date in timestamps.items(): clusterdays_timeseries.loc[clusterdays_timeseries == date] = cluster clusters = clusterdays_timeseries.resample('1D').mean() _clusters = xr.DataArray(data=np.full(len(new_data.timesteps.values), np.nan), dims='timesteps', coords={'timesteps': new_data.timesteps.values}) for cluster, date in timestamps.items(): _clusters.loc[date.strftime('%Y-%m-%d')] = cluster new_data['timestep_cluster'] = _clusters.astype(int) weights = (value_counts.reindex( _timesteps_from_daily_index(value_counts.index, daily_timesteps)).fillna(method='ffill')) new_data['timestep_weights'] = xr.DataArray(weights, dims=['timesteps']) days = np.unique(new_data.timesteps.to_index().date) new_data['timestep_resolution'] = (xr.DataArray( np.tile(daily_timesteps, len(days)), dims=['timesteps'], coords={'timesteps': new_data['timesteps']})) if storage_inter_cluster: clusters.index.name = 'datesteps' new_data['lookup_datestep_cluster'] = xr.DataArray.from_series( clusters) timestamps.index.name = 'clusters' new_data.coords['clusters'] = timestamps.index return new_data
def apply_clustering(data, timesteps, clustering_func, how, normalize=True, scale_clusters='mean', storage_inter_cluster=True, model_run=None, **kwargs): """ Apply the given clustering function to the given data. Parameters ---------- data : xarray.Dataset timesteps : pandas.DatetimeIndex or list of timesteps or None clustering_func : str Name of clustering function. Can be `file=....csv:column_name` if loading custom clustering. Custom clustering index = timeseries days. If no column_name, the CSV file must have only one column of data. how : str How to map clusters to data. 'mean' or 'closest'. normalize : bool, optional If True (default), data is normalized before clustering is applied, using :func:`~calliope.core.time.funcs.normalized_copy`. scale_clusters : str or None, default = 'mean' Scale the results of clustering such that the clusters match the metric given by scale_clusters. For example, 'mean' scales along each loc_tech and variable to match inputs and outputs. Other options for matching include 'sum', 'max', and 'min'. If None, no scaling occurs. **kwargs : optional Arguments passed to clustering_func. Returns ------- data_new_scaled : xarray.Dataset """ assert how in ['mean', 'closest'] daily_timesteps = get_daily_timesteps(data, check_uniformity=True) timesteps_per_day = len(daily_timesteps) # get a copy of the dataset with only timeseries variables, # and get all coordinates of the original dataset, to reinstate later data_to_cluster, data_coords = _drop_timestep_vars(data, timesteps) data_to_cluster = data_to_cluster.drop(['timestep_weights', 'timestep_resolution']) for dim in data_to_cluster.dims: data_to_cluster[dim] = data[dim] if normalize: data_normalized = normalized_copy(data_to_cluster) else: data_normalized = data_to_cluster if 'file=' in clustering_func: file = clustering_func.split('=')[1] if ':' in file: file, column = file.rsplit(':', 1) else: column = None df = model_run.timeseries_data[file] if isinstance(df, pd.Series) and column is not None: raise exceptions.ModelWarning( '{} given as time clustering column, but only one column to ' 'choose from in {}.'.format(column, file) ) clusters = df.resample('1D').mean() elif isinstance(df, pd.DataFrame) and column is None: raise exceptions.ModelError( 'No time clustering column given, but multiple columns found in ' '{0}. Choose one column and add it to {1} as {1}:name_of_column.' .format(file, clustering_func) ) elif isinstance(df, pd.DataFrame) and column not in df.columns: raise KeyError( 'time clustering column {} not found in {}.'.format(column, file) ) elif isinstance(df, pd.DataFrame): clusters = df.loc[:, column].groupby(pd.Grouper(freq='1D')).unique() # Check there weren't instances of more than one cluster assigned to a day # or days with no information assigned if any([len(i) == 0 for i in clusters.values]): raise exceptions.ModelError( 'Missing cluster days in `{}:{}`.'.format(file, column) ) elif any([len(i) > 1 for i in clusters.values]): raise exceptions.ModelError( 'More than one cluster value assigned to a day in `{}:{}`. ' 'Unique clusters per day: {}'.format(file, column, clusters) ) else: clusters.loc[:] = [i[0] for i in clusters.values] else: result = clustering.get_clusters( data_normalized, clustering_func, timesteps_per_day=timesteps_per_day, **kwargs ) clusters = result[0] # Ignore other stuff returned data_new = clustering.map_clusters_to_data( data_to_cluster, clusters, how=how, daily_timesteps=daily_timesteps, storage_inter_cluster=storage_inter_cluster ) # It's now safe to add the original coordinates back in (preserving all the # loc_tech sets that aren't used to index a variable in the DataArray) data_new.update(data_coords) data_new = _copy_non_t_vars(data, data_new) if timesteps is not None: data_new = _copy_non_t_vars(data, data_new) data_new = _combine_datasets(data.drop(timesteps, dim='timesteps'), data_new) data_new = _copy_non_t_vars(data, data_new) # Scale the new/combined data so that the mean for each (loc_tech, variable) # combination matches that from the original data data_new_scaled = data_new.copy(deep=True) if scale_clusters: data_vars_in_t = [ v for v in data_new.data_vars if 'timesteps' in data_new[v].dims and 'timestep_' not in v and v != 'clusters' ] for var in data_vars_in_t: scale = ( getattr(data[var], scale_clusters)(dim='timesteps') / getattr(data_new[var], scale_clusters)(dim='timesteps') ) data_new_scaled[var] = data_new[var] * scale.fillna(0) lookup_clusters(data_new_scaled) return data_new_scaled