Esempio n. 1
0
def solve_model(backend_model, solver,
                solver_io=None, solver_options=None, save_logs=False,
                **solve_kwargs):

    opt = SolverFactory(solver, solver_io=solver_io)

    if solver_options:
        for k, v in solver_options.items():
            opt.options[k] = v

    if save_logs:
        solve_kwargs.update({
            'symbolic_solver_labels': True,
            'keepfiles': True
        })
        os.makedirs(save_logs, exist_ok=True)
        TempfileManager.tempdir = save_logs  # Sets log output dir
    if 'warmstart' in solve_kwargs.keys() and solver == 'glpk':
        exceptions.ModelWarning(
            'The chosen solver, GLPK, does not suport warmstart, which may '
            'impact performance.'
        )
        del solve_kwargs['warmstart']

    with redirect_stdout(LogWriter('info', strip=True)):
        with redirect_stderr(LogWriter('error', strip=True)):
            results = opt.solve(backend_model, tee=True, **solve_kwargs)

    return results
Esempio n. 2
0
def solve_model(backend_model,
                solver,
                solver_io=None,
                solver_options=None,
                save_logs=False,
                **solve_kwargs):
    """
    Solve a Pyomo model using the chosen solver and all necessary solver options

    Returns a Pyomo results object
    """
    opt = SolverFactory(solver, solver_io=solver_io)

    if solver_options:
        for k, v in solver_options.items():
            opt.options[k] = v

    if save_logs:
        solve_kwargs.update({
            'symbolic_solver_labels': True,
            'keepfiles': True
        })
        os.makedirs(save_logs, exist_ok=True)
        TempfileManager.tempdir = save_logs  # Sets log output dir
    if 'warmstart' in solve_kwargs.keys() and solver in ['glpk', 'cbc']:
        exceptions.ModelWarning(
            'The chosen solver, {}, does not suport warmstart, which may '
            'impact performance.'.format(solver))
        del solve_kwargs['warmstart']

    with redirect_stdout(LogWriter(logger, 'debug', strip=True)):
        with redirect_stderr(LogWriter(logger, 'error', strip=True)):
            results = opt.solve(backend_model, tee=True, **solve_kwargs)

    return results
Esempio n. 3
0
def rerun_pyomo_model(model_data, backend_model):
    """
    Rerun the Pyomo backend, perhaps after updating a parameter value,
    (de)activating a constraint/objective or updating run options in the model
    model_data object (e.g. `run.solver`).

    Returns
    -------
    run_data : xarray.Dataset
        Raw data from this rerun, including both inputs and results.
        to filter inputs/results, use `run_data.filter_by_attrs(is_result=...)`
        with 0 for inputs and 1 for results.
    """
    backend_model.__calliope_run_config = AttrDict.from_yaml_string(model_data.attrs['run_config'])

    if backend_model.__calliope_run_config['mode'] != 'plan':
        raise exceptions.ModelError(
            'Cannot rerun the backend in {} run mode. Only `plan` mode is '
            'possible.'.format(backend_model.__calliope_run_config['mode'])
        )

    timings = {}
    log_time(logger, timings, 'model_creation')

    results, backend_model = backend_run.run_plan(
        model_data, timings, run_pyomo,
        build_only=False, backend_rerun=backend_model
    )
    for k, v in timings.items():
        results.attrs['timings.' + k] = v

    exceptions.ModelWarning(
        'model.results will only be updated on running the model from '
        '`model.run()`. We provide results of this rerun as a standalone xarray '
        'Dataset'
    )

    results.attrs.update(model_data.attrs)
    for key, var in results.data_vars.items():
        var.attrs['is_result'] = 1

    inputs = access_pyomo_model_inputs(backend_model)
    for key, var in inputs.data_vars.items():
        var.attrs['is_result'] = 0

    results.update(inputs)
    run_data = results

    return run_data
Esempio n. 4
0
def run_operate(model_data, timings, backend, build_only):
    """
    For use when mode is 'operate', to allow the model to be built, edited, and
    iteratively run within Pyomo.

    """
    log_time(timings,
             'run_start',
             comment='Backend: starting model run in operational mode')

    defaults = ruamel.yaml.load(model_data.attrs['defaults'],
                                Loader=ruamel.yaml.Loader)
    operate_params = ['purchased'] + [
        i.replace('_max', '') for i in defaults if i[-4:] == '_max'
    ]

    # Capacity results (from plan mode) can be used as the input to operate mode
    if (any(model_data.filter_by_attrs(is_result=1).data_vars)
            and model_data.attrs.get('run.operation.use_cap_results', False)):
        # Anything with is_result = 1 will be ignored in the Pyomo model
        for varname, varvals in model_data.data_vars.items():
            if varname in operate_params:
                varvals.attrs['is_result'] = 1
                varvals.attrs['operate_param'] = 1

    else:
        cap_max = xr.merge([
            v.rename(k.replace('_max', ''))
            for k, v in model_data.data_vars.items() if '_max' in k
        ])
        cap_equals = xr.merge([
            v.rename(k.replace('_equals', ''))
            for k, v in model_data.data_vars.items() if '_equals' in k
        ])
        caps = cap_max.update(cap_equals)
        for cap in caps.data_vars.values():
            cap.attrs['is_result'] = 1
            cap.attrs['operate_param'] = 1
        model_data.update(caps)

    # Storage initial is carried over between iterations, so must be defined along with storage
    if ('loc_techs_store' in model_data.dims.keys()
            and 'storage_initial' not in model_data.data_vars.keys()):
        model_data['storage_initial'] = (xr.DataArray(
            [0 for loc_tech in model_data.loc_techs_store.values],
            dims='loc_techs_store'))
        model_data['storage_initial'].attrs['is_result'] = 0
        exceptions.ModelWarning(
            'Initial stored energy not defined, set to zero for all '
            'loc::techs in loc_techs_store, for use in iterative optimisation')
    # Operated units is carried over between iterations, so must be defined in a milp model
    if ('loc_techs_milp' in model_data.dims.keys()
            and 'operated_units' not in model_data.data_vars.keys()):
        model_data['operated_units'] = (xr.DataArray(
            [0 for loc_tech in model_data.loc_techs_milp.values],
            dims='loc_techs_milp'))
        model_data['operated_units'].attrs['is_result'] = 1
        model_data['operated_units'].attrs['operate_param'] = 1
        exceptions.ModelWarning(
            'daily operated units not defined, set to zero for all '
            'loc::techs in loc_techs_milp, for use in iterative optimisation')

    comments, warnings, errors = checks.check_operate_params(model_data)
    exceptions.print_warnings_and_raise_errors(warnings=warnings,
                                               errors=errors)

    # Initialize our variables
    solver = model_data.attrs['run.solver']
    solver_io = model_data.attrs.get('run.solver_io', None)
    solver_options = model_data.attrs.get('run.solver_options', None)
    save_logs = model_data.attrs.get('run.save_logs', None)
    window = model_data.attrs['run.operation.window']
    horizon = model_data.attrs['run.operation.horizon']
    window_to_horizon = horizon - window

    # get the cumulative sum of timestep resolution, to find where we hit our window and horizon
    timestep_cumsum = model_data.timestep_resolution.cumsum(
        'timesteps').to_pandas()
    # get the timesteps at which we start and end our windows
    window_ends = timestep_cumsum.where((timestep_cumsum % window == 0) | (
        timestep_cumsum == timestep_cumsum[-1]))
    window_starts = timestep_cumsum.where((~np.isnan(window_ends.shift(1))) | (
        timestep_cumsum == timestep_cumsum[0])).dropna()

    window_ends = window_ends.dropna()
    horizon_ends = timestep_cumsum[timestep_cumsum.isin(window_ends.values +
                                                        window_to_horizon)]

    if not any(window_starts):
        raise exceptions.ModelError(
            'Not enough timesteps or incorrect timestep resolution to run in '
            'operational mode with an optimisation window of {}'.format(
                window))

    # We will only update timseries parameters
    timeseries_data_vars = [
        k for k, v in model_data.data_vars.items()
        if 'timesteps' in v.dims and v.attrs['is_result'] == 0
    ]

    # Loop through each window, solve over the horizon length, and add result to
    # result_array we only go as far as the end of the last horizon, which may
    # clip the last bit of data
    result_array = []
    # track whether each iteration finds an optimal solution or not
    terminations = []

    if build_only:
        iterations = [0]
    else:
        iterations = range(len(window_starts))

    for i in iterations:
        start_timestep = window_starts.index[i]

        # Build full model in first instance
        if i == 0:
            warmstart = False
            end_timestep = horizon_ends.index[i]
            timesteps = slice(start_timestep, end_timestep)
            window_model_data = model_data.loc[dict(timesteps=timesteps)]

            log_time(timings,
                     'model_gen_1',
                     comment='Backend: generating initial model')

            backend_model = backend.generate_model(window_model_data)

        # Build the full model in the last instance(s),
        # where number of timesteps is less than the horizon length
        elif i > len(horizon_ends) - 1:
            warmstart = False
            end_timestep = window_ends.index[i]
            timesteps = slice(start_timestep, end_timestep)
            window_model_data = model_data.loc[dict(timesteps=timesteps)]

            log_time(
                timings,
                'model_gen_{}'.format(i + 1),
                comment=(
                    'Backend: ite()ration {}: generating new model for '
                    'end of timeseries, with horizon = {} timesteps'.format(
                        i + 1, window_ends[i] - window_starts[i])))

            backend_model = backend.generate_model(window_model_data)

        # Update relevent Pyomo Params in intermediate instances
        else:
            warmstart = True
            end_timestep = horizon_ends.index[i]
            timesteps = slice(start_timestep, end_timestep)
            window_model_data = model_data.loc[dict(timesteps=timesteps)]

            log_time(
                timings,
                'model_gen_{}'.format(i + 1),
                comment='Backend: iteration {}: updating model parameters'.
                format(i + 1))
            # Pyomo model sees the same timestamps each time, we just change the
            # values associated with those timestamps
            for var in timeseries_data_vars:
                # New values
                var_series = window_model_data[var].to_series().dropna(
                ).replace('inf', np.inf)
                # Same timestamps
                var_series.index = backend_model.__calliope_model_data__[
                    'data'][var].keys()
                var_dict = var_series.to_dict()
                # Update pyomo Param with new dictionary
                for k, v in getattr(backend_model, var).items():
                    if k in var_dict:
                        v.set_value(var_dict[k])

        if not build_only:
            log_time(timings,
                     'model_run_{}'.format(i + 1),
                     time_since_start=True,
                     comment='Backend: iteration {}: sending model to solver'.
                     format(i + 1))
            # After iteration 1, warmstart = True, which should speed up the process
            # Note: Warmstart isn't possible with GLPK (dealt with later on)
            _results = backend.solve_model(backend_model,
                                           solver=solver,
                                           solver_io=solver_io,
                                           solver_options=solver_options,
                                           save_logs=save_logs,
                                           warmstart=warmstart)

            log_time(timings,
                     'run_solver_exit_{}'.format(i + 1),
                     time_since_start=True,
                     comment='Backend: iteration {}: solver finished running'.
                     format(i + 1))
            # xarray dataset is built for each iteration
            _termination = backend.load_results(backend_model, _results)
            terminations.append(_termination)

            _results = backend.get_result_array(backend_model, model_data)

            # We give back the actual timesteps for this iteration and take a slice
            # equal to the window length
            _results['timesteps'] = window_model_data.timesteps.copy()

            # We always save the window data. Until the last window(s) this will crop
            # the window_to_horizon timesteps. In the last window(s), optimistion will
            # only be occurring over a window length anyway
            _results = _results.loc[dict(
                timesteps=slice(None, window_ends.index[i]))]
            result_array.append(_results)

            # Set up initial storage for the next iteration
            if 'loc_techs_store' in model_data.dims.keys():
                storage_initial = _results.storage.loc[dict(
                    timesteps=window_ends.index[i])]
                model_data['storage_initial'].loc[{}] = storage_initial.values
                for k, v in backend_model.storage_initial.items():
                    v.set_value(
                        storage_initial.to_series().dropna().to_dict()[k])

            # Set up total operated units for the next iteration
            if 'loc_techs_milp' in model_data.dims.keys():
                operated_units = _results.operating_units.sum(
                    'timesteps').astype(np.int)
                model_data['operated_units'].loc[{}] += operated_units.values
                for k, v in backend_model.operated_units.items():
                    v.set_value(
                        operated_units.to_series().dropna().to_dict()[k])

            log_time(timings,
                     'run_solver_exit_{}'.format(i + 1),
                     time_since_start=True,
                     comment='Backend: iteration {}: generated solution array'.
                     format(i + 1))

    if build_only:
        results = xr.Dataset()
    else:
        # Concatenate results over the timestep dimension to get a single
        # xarray Dataset of interest
        results = xr.concat(result_array, dim='timesteps')
        if all(i == 'optimal' for i in terminations):
            results.attrs['termination_condition'] = 'optimal'
        else:
            results.attrs['termination_condition'] = ','.join(terminations)

        log_time(timings,
                 'run_solution_returned',
                 time_since_start=True,
                 comment='Backend: generated full solution array')

    return results, backend_model
Esempio n. 5
0
def map_clusters_to_data(data,
                         clusters,
                         how,
                         daily_timesteps,
                         storage_inter_cluster=True):
    """
    Returns a copy of data that has been clustered.

    Parameters
    ----------
    how : str
        How to select data from clusters. Can be mean (centroid) or closest real
        day to the mean (by root mean square error).
    storage_inter_cluster : bool, default=True
        If True, add `datesteps` to model_data, for use in the backend to build
        inter_cluster storage decision variables and constraints
    """
    # FIXME hardcoded time intervals ('1H', '1D')

    # Get all timesteps, not just the first per day
    timesteps_per_day = len(daily_timesteps)
    idx = clusters.index
    new_idx = _timesteps_from_daily_index(idx, daily_timesteps)
    clusters_timeseries = (clusters.reindex(new_idx).fillna(
        method='ffill').astype(int))

    new_data = get_mean_from_clusters(data, clusters_timeseries,
                                      timesteps_per_day)
    new_data.attrs = data.attrs

    if how == 'mean':
        # Add timestep names by taking the median timestamp from daily clusters...
        # (a random way of doing it, but we want some label to apply)
        timestamps = clusters.groupby(clusters).apply(
            lambda x: x.index[int(len(x.index) / 2)])
        new_data.coords['timesteps'] = _timesteps_from_daily_index(
            pd.Index(timestamps.values), daily_timesteps)
        # Generate weights
        # weight of each timestep = number of timesteps in this timestep's cluster
        # divided by timesteps per day (since we're grouping days together and
        # a cluster consisting of 1 day = 24 hours should have weight of 1)
        value_counts = clusters_timeseries.value_counts() / timesteps_per_day
        # And turn the index into dates (days)
        value_counts = pd.DataFrame({
            'dates': timestamps,
            'counts': value_counts
        }).set_index('dates')['counts']

    elif how == 'closest':
        new_data, chosen_ts = get_closest_days_from_clusters(
            data, new_data, clusters, daily_timesteps)
        # Deal with the case where more than one cluster has the same closest day
        # An easy way is to rename the original clusters with the chosen days
        # So at this point, clusterdays_timeseries maps all timesteps to the day
        # of year of the cluster the timestep belongs to
        clusterdays_timeseries = clusters_timeseries.map(
            lambda x: chosen_ts[x])
        value_counts = clusterdays_timeseries.value_counts(
        ) / timesteps_per_day
        timestamps = pd.DataFrame.from_dict(chosen_ts, orient='index')[0]
        cluster_diff = len(clusters.unique()) - len(timestamps.unique())
        if cluster_diff > 0:
            exceptions.ModelWarning(
                'Creating {} fewer clusters as some clusters share the same '
                'closest day'.format(cluster_diff))
            timestamps = timestamps.drop_duplicates()
            for cluster, date in timestamps.items():
                clusterdays_timeseries.loc[clusterdays_timeseries ==
                                           date] = cluster
            clusters = clusterdays_timeseries.resample('1D').mean()

    _clusters = xr.DataArray(data=np.full(len(new_data.timesteps.values),
                                          np.nan),
                             dims='timesteps',
                             coords={'timesteps': new_data.timesteps.values})

    for cluster, date in timestamps.items():
        _clusters.loc[date.strftime('%Y-%m-%d')] = cluster

    new_data['timestep_cluster'] = _clusters.astype(int)
    weights = (value_counts.reindex(
        _timesteps_from_daily_index(value_counts.index,
                                    daily_timesteps)).fillna(method='ffill'))
    new_data['timestep_weights'] = xr.DataArray(weights, dims=['timesteps'])
    days = np.unique(new_data.timesteps.to_index().date)
    new_data['timestep_resolution'] = (xr.DataArray(
        np.tile(daily_timesteps, len(days)),
        dims=['timesteps'],
        coords={'timesteps': new_data['timesteps']}))

    if storage_inter_cluster:
        clusters.index.name = 'datesteps'
        new_data['lookup_datestep_cluster'] = xr.DataArray.from_series(
            clusters)
    timestamps.index.name = 'clusters'
    new_data.coords['clusters'] = timestamps.index

    return new_data
Esempio n. 6
0
def apply_clustering(data, timesteps, clustering_func, how, normalize=True,
                     scale_clusters='mean', storage_inter_cluster=True,
                     model_run=None, **kwargs):
    """
    Apply the given clustering function to the given data.

    Parameters
    ----------
    data : xarray.Dataset
    timesteps : pandas.DatetimeIndex or list of timesteps or None
    clustering_func : str
        Name of clustering function. Can be `file=....csv:column_name`
        if loading custom clustering. Custom clustering index = timeseries days.
        If no column_name, the CSV file must have only one column of data.
    how : str
        How to map clusters to data. 'mean' or 'closest'.
    normalize : bool, optional
        If True (default), data is normalized before clustering is applied,
        using :func:`~calliope.core.time.funcs.normalized_copy`.
    scale_clusters : str or None, default = 'mean'
        Scale the results of clustering such that the clusters match the metric
        given by scale_clusters. For example, 'mean' scales along each loc_tech
        and variable to match inputs and outputs. Other options for matching
        include 'sum', 'max', and 'min'. If None, no scaling occurs.
    **kwargs : optional
        Arguments passed to clustering_func.

    Returns
    -------
    data_new_scaled : xarray.Dataset

    """

    assert how in ['mean', 'closest']

    daily_timesteps = get_daily_timesteps(data, check_uniformity=True)
    timesteps_per_day = len(daily_timesteps)

    # get a copy of the dataset with only timeseries variables,
    # and get all coordinates of the original dataset, to reinstate later
    data_to_cluster, data_coords = _drop_timestep_vars(data, timesteps)

    data_to_cluster = data_to_cluster.drop(['timestep_weights', 'timestep_resolution'])

    for dim in data_to_cluster.dims:
        data_to_cluster[dim] = data[dim]

    if normalize:
        data_normalized = normalized_copy(data_to_cluster)
    else:
        data_normalized = data_to_cluster

    if 'file=' in clustering_func:
        file = clustering_func.split('=')[1]
        if ':' in file:
            file, column = file.rsplit(':', 1)
        else:
            column = None

        df = model_run.timeseries_data[file]
        if isinstance(df, pd.Series) and column is not None:
            raise exceptions.ModelWarning(
                '{} given as time clustering column, but only one column to '
                'choose from in {}.'.format(column, file)
            )
            clusters = df.resample('1D').mean()
        elif isinstance(df, pd.DataFrame) and column is None:
            raise exceptions.ModelError(
                'No time clustering column given, but multiple columns found in '
                '{0}. Choose one column and add it to {1} as {1}:name_of_column.'
                .format(file, clustering_func)
            )
        elif isinstance(df, pd.DataFrame) and column not in df.columns:
            raise KeyError(
                'time clustering column {} not found in {}.'.format(column, file)
            )
        elif isinstance(df, pd.DataFrame):
            clusters = df.loc[:, column].groupby(pd.Grouper(freq='1D')).unique()

        # Check there weren't instances of more than one cluster assigned to a day
        # or days with no information assigned
        if any([len(i) == 0 for i in clusters.values]):
            raise exceptions.ModelError(
                'Missing cluster days in `{}:{}`.'.format(file, column)
            )
        elif any([len(i) > 1 for i in clusters.values]):
            raise exceptions.ModelError(
                'More than one cluster value assigned to a day in `{}:{}`. '
                'Unique clusters per day: {}'.format(file, column, clusters)
            )
        else:
            clusters.loc[:] = [i[0] for i in clusters.values]

    else:
        result = clustering.get_clusters(
            data_normalized, clustering_func, timesteps_per_day=timesteps_per_day,
            **kwargs
        )
        clusters = result[0]  # Ignore other stuff returned

    data_new = clustering.map_clusters_to_data(
        data_to_cluster, clusters,
        how=how, daily_timesteps=daily_timesteps,
        storage_inter_cluster=storage_inter_cluster
    )

    # It's now safe to add the original coordinates back in (preserving all the
    # loc_tech sets that aren't used to index a variable in the DataArray)
    data_new.update(data_coords)

    data_new = _copy_non_t_vars(data, data_new)

    if timesteps is not None:
        data_new = _copy_non_t_vars(data, data_new)
        data_new = _combine_datasets(data.drop(timesteps, dim='timesteps'), data_new)
        data_new = _copy_non_t_vars(data, data_new)


    # Scale the new/combined data so that the mean for each (loc_tech, variable)
    # combination matches that from the original data
    data_new_scaled = data_new.copy(deep=True)
    if scale_clusters:
        data_vars_in_t = [
            v for v in data_new.data_vars
            if 'timesteps' in data_new[v].dims and
            'timestep_' not in v and v != 'clusters'
        ]
        for var in data_vars_in_t:
            scale = (
                getattr(data[var], scale_clusters)(dim='timesteps') /
                getattr(data_new[var], scale_clusters)(dim='timesteps')
            )
            data_new_scaled[var] = data_new[var] * scale.fillna(0)

    lookup_clusters(data_new_scaled)

    return data_new_scaled