Exemple #1
0
def load_timeseries_from_dataframe(timeseries_dataframes, tskey):

    # If `df=` is called, timeseries_dataframes must be entered
    if timeseries_dataframes is None:
        raise exceptions.ModelError(
            "Error in loading timeseries. Model config specifies df={} but "
            "no timeseries passed as arguments in calliope.Model(...). "
            "Note that, if running from a command line, it is not possible "
            "to read dataframes via `df=...` and you should specify "
            "`file=...` with a CSV file.".format(tskey)
        )

    try:
        df = timeseries_dataframes[tskey]
    except KeyError:
        raise exceptions.ModelError(
            "Error in loading data from dataframe. "
            "Model attempted to load dataframe with key `{}`, "
            "but time series passed as arguments are {}".format(
                tskey, set(timeseries_dataframes.keys())
            )
        )
    if not isinstance(df, pd.DataFrame):
        raise exceptions.ModelError(
            "Error in loading data. Object passed in time series "
            "dictionary under key {} is a {}, not a DataFrame.".format(tskey, type(df))
        )
    return df
Exemple #2
0
def update_pyomo_param(backend_model, param, update_dict):
    """
    A Pyomo Param value can be updated without the user directly accessing the
    backend model.

    Parameters
    ----------
    param : str
        Name of the parameter to update
    update_dict : dict
        keys are parameter indeces (either strings or tuples of strings,
        depending on whether there is one or more than one dimension). Values
        are the new values being assigned to the parameter at the given indeces.

    Returns
    -------
    Value(s) will be updated in-place, requiring the user to run the model again to
    see the effect on results.

    """
    if not hasattr(backend_model, param):
        raise exceptions.ModelError(
            "Parameter `{}` not in the Pyomo Backend. Check that the string "
            "matches the corresponding constraint/cost in the model.inputs "
            "xarray Dataset".format(param))
    elif not isinstance(getattr(backend_model, param),
                        po.base.param.IndexedParam):
        raise exceptions.ModelError(
            "`{}` not a Parameter in the Pyomo Backend. Sets and decision variables "
            "cannot be updated by the user".format(param))
    elif not isinstance(update_dict, dict):
        raise TypeError("`update_dict` must be a dictionary")

    else:
        getattr(backend_model, param).store_values(update_dict)
Exemple #3
0
def activate_pyomo_constraint(backend_model, constraint, active=True):
    """
    Takes a constraint or objective name, finds it in the backend model and sets
    its status to either active or deactive.

    Parameters
    ----------
    constraint : str
        Name of the constraint/objective to activate/deactivate
        Built-in constraints include '_constraint'
    active: bool, default=True
        status to set the constraint/objective
    """
    if not hasattr(backend_model, constraint):
        raise exceptions.ModelError(
            "constraint/objective `{}` not in the Pyomo Backend.".format(
                constraint))
    elif not isinstance(getattr(backend_model, constraint),
                        po.base.Constraint):
        raise exceptions.ModelError(
            "`{}` not a constraint in the Pyomo Backend.".format(constraint))
    elif active is True:
        getattr(backend_model, constraint).activate()
    elif active is False:
        getattr(backend_model, constraint).deactivate()
    else:
        raise ValueError("Argument `active` must be True or False")
Exemple #4
0
 def _error_on_malformed_input():
     if backend_rerun:
         try:
             backend_rerun.obj()
         except ValueError:  # model has not yet been run
             pass
         else:
             raise exceptions.ModelError(
                 "Cannot run SPORES if the backend model already has a solution. "
                 "Consider using the `build_only` optional `run()` argument to avoid this."
             )
     if "spores" in model_data.filter_by_attrs(is_result=0).squeeze().dims:
         raise exceptions.ModelError(
             "Cannot run SPORES with a SPORES dimension in any input (e.g. `cost_energy_cap`)."
         )
Exemple #5
0
    def run(self, force_rerun=False, **kwargs):
        """
        Run the model. If ``force_rerun`` is True, any existing results
        will be overwritten.

        Additional kwargs are passed to the backend.

        """

        # Check that results exist and are non-empty
        if hasattr(self,
                   'results') and self.results.data_vars and not force_rerun:
            raise exceptions.ModelError(
                'This model object already has results. '
                'Use model.run(force_rerun=True) to force'
                'the results to be overwritten with a new run.')

        if (self._model_data.attrs['run.mode'] == 'operate'
                and not self._model_data.attrs['allow_operate_mode']):
            raise exceptions.ModelError(
                'Unable to run this model in operational mode, probably because '
                'there exist non-uniform timesteps (e.g. from time masking)')

        results, self._backend_model, interface = run_backend(
            self._model_data, self._timings, **kwargs)

        # Add additional post-processed result variables to results
        if results.attrs.get('termination_condition', None) == 'optimal':
            results = postprocess.postprocess_model_results(
                results, self._model_data, self._timings)

        for var in results.data_vars:
            results[var].attrs['is_result'] = 1

        self._model_data.update(results)
        self._model_data.attrs.update(results.attrs)

        if 'run_solution_returned' in self._timings.keys():
            self._model_data.attrs['solution_time'] = (
                self._timings['run_solution_returned'] -
                self._timings['run_start']).total_seconds()
            self._model_data.attrs['time_finished'] = (
                self._timings['run_solution_returned'].strftime(
                    '%Y-%m-%d %H:%M:%S'))

        self.results = self._model_data.filter_by_attrs(is_result=1)

        self.backend = interface(self)
Exemple #6
0
def drop(data, timesteps):
    """
    Drop timesteps from data, adjusting the timestep weight of remaining
    timesteps accordingly. Returns updated dataset.

    Parameters
    ----------
    data : xarray.Dataset
        Calliope model data.
    timestesp : str or list or other iterable
        Pandas-compatible timestep strings.

    """
    # Turn timesteps into a pandas datetime index for subsetting, which also
    # checks whether they are actually valid
    try:
        timesteps_pd = pd.to_datetime(timesteps)
    except Exception as e:
        raise exceptions.ModelError('Invalid timesteps: {}'.format(timesteps))

    # 'Distribute weight' of the dropped timesteps onto the remaining ones
    dropped_weight = data.timestep_weights.loc[{
        'timesteps': timesteps_pd
    }].sum()

    data = data.drop(timesteps_pd, dim='timesteps')

    data['timestep_weights'] = data['timestep_weights'] + (
        dropped_weight / len(data['timestep_weights']))

    return data
Exemple #7
0
def load_overrides_from_scenario(config_model, scenario):
    def _get_overrides(scenario_name):
        _overrides = config_model.get_key(f"scenarios.{scenario_name}",
                                          [scenario_name])
        if isinstance(_overrides, list):
            return _overrides
        else:
            return [_overrides]

    if scenario in config_model.get("scenarios", {}).keys():
        if "," in scenario:
            exceptions.warn(
                f"Scenario name `{scenario}` includes commas that won't be parsed as a list of overrides."
            )
        logger.info("Loading overrides from scenario: {} ".format(scenario))
        scenario_list = _get_overrides(scenario)
    else:
        scenario_list = scenario.split(",")
    scenario_overrides = set()
    for override in scenario_list:
        if isinstance(override, dict):
            raise exceptions.ModelError(
                "Scenario definition must be a list of override or other scenario names."
            )
        if override in config_model.get("scenarios", {}).keys():
            scenario_overrides.update(
                load_overrides_from_scenario(config_model, override))
        else:
            scenario_overrides.add(override)

    return list(scenario_overrides)
Exemple #8
0
def lookup_loc_techs_non_conversion(model_run):
    """
    loc_techs be linked to their loc_tech_carriers, based on their carrier_in or
    carrier_out attribute. E.g. `X1::ccgt` will be linked to `X1::ccgt::power`
    as carrier_out for the ccgt is `power`.
    """
    lookup_loc_techs_dict = dict(dims=['loc_techs_non_conversion'])

    data = []
    for loc_tech in model_run.sets['loc_techs_non_conversion']:
        # For any non-conversion technology, there is only one carrier (either
        # produced or consumed)
        loc_tech_carrier = list(
            set(i for i in model_run.sets['loc_tech_carriers_prod'] +
                model_run.sets['loc_tech_carriers_con']
                if loc_tech == i.rsplit("::", 1)[0]))
        if len(loc_tech_carrier) > 1:
            raise exceptions.ModelError(
                'More than one carrier associated with '
                'non-conversion location:technology `{}`'.format(loc_tech))
        else:
            data.append(loc_tech_carrier[0])
    lookup_loc_techs_dict['data'] = data

    return lookup_loc_techs_dict
Exemple #9
0
def combine_overrides(config_model, overrides):
    override_dict = AttrDict()
    for override in overrides:
        try:
            yaml_string = config_model.overrides[override].to_yaml()
            override_with_imports = AttrDict.from_yaml_string(yaml_string)
        except KeyError:
            raise exceptions.ModelError(
                "Override `{}` is not defined.".format(override))
        try:
            override_dict.union(override_with_imports, allow_override=False)
        except KeyError as e:
            raise exceptions.ModelError(
                str(e)[1:-1] + ". Already specified but defined again in "
                "override `{}`.".format(override))

    return override_dict
Exemple #10
0
def update_pyomo_param(backend_model, param, index, value):
    """
    A Pyomo Param value can be updated without the user directly accessing the
    backend model.

    Parameters
    ----------
    param : str
        Name of the parameter to update
    index : tuple of strings
        Tuple of dimension indeces, in the order given in model.inputs for the
        reciprocal parameter
    value : int, float, bool, or str
        Value to assign to the Pyomo Param at the given index

    Returns
    -------
    Value will be updated in-place, requiring the user to run the model again to
    see the effect on results.

    """
    if not hasattr(backend_model, param):
        raise exceptions.ModelError(
            'Parameter {} not in the Pyomo Backend. Check that the string '
            'matches the corresponding constraint/cost in the model.inputs '
            'xarray Dataset'.format(param)
        )
    elif not isinstance(getattr(backend_model, param), po.base.param.IndexedParam):
        raise exceptions.ModelError(
            '{} not a Parameter in the Pyomo Backend. Sets and decision variables '
            'cannot be updated by the user'.format(param)
        )
    elif index not in getattr(backend_model, param):
        raise exceptions.ModelError(
            'index {} not in the Pyomo Parameter {}. call '
            'model.access_backend_model_inputs to see the indeces of the Pyomo '
            'Parameters'.format(index, param)
        )
    else:
        print(
            'Warning: we currently do not check that the updated value is the '
            'correct data type for this Pyomo Parameter, this is your '
            'responsibility to check!'
        )
        getattr(backend_model, param)[index] = value
Exemple #11
0
 def _check_data(self):
     if self.node_dict or self.tech_dict:
         raise exceptions.ModelError(
             "Some data not extracted from inputs into model dataset:\n"
             f"{self.node_dict}")
     self.model_data, final_check_comments, warns, errors = checks.check_model_data(
         self.model_data)
     exceptions.print_warnings_and_raise_errors(warnings=warns,
                                                errors=errors)
Exemple #12
0
def check_timeseries_dataframes(timeseries_dataframes):
    """
    Timeseries dataframes should be dict of pandas DataFrames.
    """
    if not isinstance(timeseries_dataframes, dict) or not all([
            isinstance(timeseries_dataframes[i], pd.DataFrame)
            for i in timeseries_dataframes
    ]):
        raise exceptions.ModelError(
            "Error in loading timeseries data from dataframes. "
            "`timeseries_dataframes` must be dict of pandas DataFrames.")
Exemple #13
0
    def run(self, force_rerun=False, **kwargs):
        """
        Run the model. If ``force_rerun`` is True, any existing results
        will be overwritten.

        Additional kwargs are passed to the backend.

        """
        # Check that results exist and are non-empty
        if hasattr(self, "results") and self.results.data_vars and not force_rerun:
            raise exceptions.ModelError(
                "This model object already has results. "
                "Use model.run(force_rerun=True) to force"
                "the results to be overwritten with a new run."
            )

        if (
            self.run_config["mode"] == "operate"
            and not self._model_data.attrs["allow_operate_mode"]
        ):
            raise exceptions.ModelError(
                "Unable to run this model in operational mode, probably because "
                "there exist non-uniform timesteps (e.g. from time masking)"
            )

        results, self._backend_model, self._backend_model_opt, interface = run_backend(
            self._model_data, self._timings, **kwargs
        )

        # Add additional post-processed result variables to results
        if results.attrs.get("termination_condition", None) in ["optimal", "feasible"]:
            results = postprocess_results.postprocess_model_results(
                results, self._model_data, self._timings
            )
        self._model_data.attrs.update(results.attrs)
        self._model_data = xr.merge(
            [results, self._model_data], compat="override", combine_attrs="no_conflicts"
        )
        self._add_model_data_methods()

        self.backend = interface(self)
Exemple #14
0
def energy_capacity_systemwide_constraint_rule(backend_model, tech):
    """
    Set constraints to limit the capacity of a single technology type across all locations in the model.

    The first valid case is applied:

    .. container:: scrolling-wrapper

        .. math::

            \\sum_{loc}\\boldsymbol{energy_{cap}}(loc::tech)
            \\begin{cases}
                = energy_{cap, equals, systemwide}(loc::tech),&
                    \\text{if } energy_{cap, equals, systemwide}(loc::tech)\\\\
                \\leq energy_{cap, max, systemwide}(loc::tech),&
                    \\text{if } energy_{cap, max, systemwide}(loc::tech)\\\\
                \\text{unconstrained},& \\text{otherwise}
            \\end{cases}
            \\forall tech \\in techs

    """

    if tech in backend_model.techs_transmission_names:
        all_loc_techs = [
            i for i in backend_model.loc_techs_transmission
            if i.split('::')[1].split(':')[0] == tech
        ]
        multiplier = 2  # there are always two technologies associated with one link
    else:
        all_loc_techs = [
            i for i in backend_model.loc_techs if i.split('::')[1] == tech
        ]
        multiplier = 1

    max_systemwide = get_param(backend_model, 'energy_cap_max_systemwide',
                               tech)
    equals_systemwide = get_param(backend_model,
                                  'energy_cap_equals_systemwide', tech)

    if np.isinf(po.value(max_systemwide)) and not equals_systemwide:
        return po.Constraint.NoConstraint
    elif equals_systemwide and np.isinf(po.value(equals_systemwide)):
        raise exceptions.ModelError(
            'Cannot use inf for energy_cap_equals_systemwide for tech `{}`'.
            format(tech))

    sum_expr = sum(backend_model.energy_cap[loc_tech]
                   for loc_tech in all_loc_techs)

    if equals_systemwide:
        return sum_expr == equals_systemwide * multiplier
    else:
        return sum_expr <= max_systemwide * multiplier
Exemple #15
0
def get_daily_timesteps(data, check_uniformity=False):
    daily_timesteps = [
        data.timestep_resolution.loc[i].values
        for i in np.unique(data.timesteps.to_index().strftime('%Y-%m-%d'))
    ]

    if check_uniformity:
        if not np.all(daily_timesteps == daily_timesteps[0]):
            raise exceptions.ModelError(
                'For clustering, timestep resolution must be uniform.')

    return daily_timesteps[0]
Exemple #16
0
def load_timeseries_from_dataframe(timeseries_dataframes, tskey):

    # If `df=` is called, timeseries_dataframes must be entered
    if timeseries_dataframes is None:
        raise exceptions.ModelError(
            "Error in loading timeseries. Model config specifies df={} but "
            "no timeseries passed as arguments in calliope.Model(...). "
            "Note that, if running from a command line, it is not possible "
            "to read dataframes via `df=...` and you should specify "
            "`file=...` with a CSV file.".format(tskey))

    try:
        df = timeseries_dataframes[tskey]
    except KeyError:
        raise exceptions.ModelError(
            "Error in loading data from dataframe. "
            "Model attempted to load dataframe with key `{}`, "
            "but available dataframes are {}".format(
                tskey, set(timeseries_dataframes.keys())))
    df.columns = pd.MultiIndex.from_product([[tskey], df.columns],
                                            names=["source", "column"])
    return df
Exemple #17
0
def lookup_loc_techs_conversion(dataset, model_run):
    """
    Conversion technologies are seperated from other non-conversion technologies
    as there is more than one carrier associated with a single loc_tech. Here,
    the link is made per carrier tier (`out` and `in` are the two primary carrier
    tiers)
    """
    # Get the string name for a loc_tech which includes the carriers in and out
    # associated with that technology (for conversion technologies)
    carrier_tiers = model_run.sets["carrier_tiers"]

    loc_techs_conversion_array = xr.DataArray(
        data=np.empty(
            (len(model_run.sets["loc_techs_conversion"]), len(carrier_tiers)),
            dtype=np.object,
        ),
        dims=["loc_techs_conversion", "carrier_tiers"],
        coords={
            "loc_techs_conversion": list(model_run.sets["loc_techs_conversion"]),
            "carrier_tiers": list(carrier_tiers),
        },
    )
    for loc_tech in model_run.sets["loc_techs_conversion"]:
        # For any non-conversion technology, there are only two carriers
        # (one produced and one consumed)
        loc_tech_carrier_in = [
            i
            for i in model_run.sets["loc_tech_carriers_con"]
            if loc_tech == i.rsplit("::", 1)[0]
        ]

        loc_tech_carrier_out = [
            i
            for i in model_run.sets["loc_tech_carriers_prod"]
            if loc_tech == i.rsplit("::", 1)[0]
        ]
        if len(loc_tech_carrier_in) > 1 or len(loc_tech_carrier_out) > 1:
            raise exceptions.ModelError(
                "More than one carrier in or out associated with "
                "conversion location:technology `{}`".format(loc_tech)
            )
        else:
            loc_techs_conversion_array.loc[
                dict(loc_techs_conversion=loc_tech, carrier_tiers=["in", "out"])
            ] = [loc_tech_carrier_in[0], loc_tech_carrier_out[0]]

    dataset = dataset.merge(
        loc_techs_conversion_array.to_dataset(name="lookup_loc_techs_conversion")
    )

    return dataset
Exemple #18
0
def combine_overrides(override_file_path, override_groups):
    if ',' in override_groups:
        overrides = override_groups.split(',')
    else:
        overrides = [override_groups]

    override = AttrDict()
    for group in overrides:
        try:
            override_group_from_file = AttrDict.from_yaml(
                override_file_path)[group]
        except KeyError:
            raise exceptions.ModelError(
                'Override group `{}` does not exist in file `{}`.'.format(
                    group, override_file_path))
        try:
            override.union(override_group_from_file, allow_override=False)
        except KeyError as e:
            raise exceptions.ModelError(
                str(e)[1:-1] + '. Already specified but defined again in '
                'override group `{}`.'.format(group))

    return override
Exemple #19
0
def rerun_pyomo_model(model_data, backend_model):
    """
    Rerun the Pyomo backend, perhaps after updating a parameter value,
    (de)activating a constraint/objective or updating run options in the model
    model_data object (e.g. `run.solver`).

    Returns
    -------
    run_data : xarray.Dataset
        Raw data from this rerun, including both inputs and results.
        to filter inputs/results, use `run_data.filter_by_attrs(is_result=...)`
        with 0 for inputs and 1 for results.
    """
    backend_model.__calliope_run_config = AttrDict.from_yaml_string(model_data.attrs['run_config'])

    if backend_model.__calliope_run_config['mode'] != 'plan':
        raise exceptions.ModelError(
            'Cannot rerun the backend in {} run mode. Only `plan` mode is '
            'possible.'.format(backend_model.__calliope_run_config['mode'])
        )

    timings = {}
    log_time(logger, timings, 'model_creation')

    results, backend_model = backend_run.run_plan(
        model_data, timings, run_pyomo,
        build_only=False, backend_rerun=backend_model
    )
    for k, v in timings.items():
        results.attrs['timings.' + k] = v

    exceptions.ModelWarning(
        'model.results will only be updated on running the model from '
        '`model.run()`. We provide results of this rerun as a standalone xarray '
        'Dataset'
    )

    results.attrs.update(model_data.attrs)
    for key, var in results.data_vars.items():
        var.attrs['is_result'] = 1

    inputs = access_pyomo_model_inputs(backend_model)
    for key, var in inputs.data_vars.items():
        var.attrs['is_result'] = 0

    results.update(inputs)
    run_data = results

    return run_data
Exemple #20
0
def _get_array(data, var, tech, **kwargs):
    subset = {"techs": tech}
    if kwargs is not None:
        subset.update({k: v for k, v in kwargs.items()})

    unusable_dims = (set(subset.keys()).difference(
        ["techs", "nodes"]).difference(data[var].dims))
    if unusable_dims:
        raise exceptions.ModelError(
            "Attempting to mask time based on  technology {}, "
            "but dimension(s) {} do not exist for parameter {}".format(
                tech, unusable_dims, var.name))

    arr = data[var].loc[subset].groupby("timesteps").mean(...).to_pandas()
    return arr
Exemple #21
0
def _stack_data(data, dates, times):
    """
    Stack all non-time dimensions of an xarray DataArray
    """
    data_to_stack = data.assign_coords(timesteps=pd.MultiIndex.from_product(
        [dates, times], names=["dates", "times"])).unstack("timesteps")
    non_date_dims = list(
        set(data_to_stack.dims).difference(["dates", "times"])) + ["times"]
    if len(non_date_dims) >= 2:
        stacked_var = data_to_stack.stack(stacked=non_date_dims)
    else:
        raise exceptions.ModelError(
            "Cannot conduct time clustering with variable {} as it has no "
            "non-time dimensions.".format(data.name))
    return stacked_var
Exemple #22
0
def _get_array(data, var, tech, **kwargs):
    subset = {'techs':tech}
    if kwargs is not None:
        subset.update({k:v for k, v in kwargs.items()})
    unusable_dims = (set(subset.keys())
                        .difference(["techs", "locs"])
                        .difference(data[var].dims)
                    )
    if unusable_dims:
        raise exceptions.ModelError("attempting to mask time based on "
                                    "technology {}, but dimension(s) "
                                    "{} don't exist for parameter {}".format(
                                        tech, unusable_dims, var.name))
    arr = split_loc_techs(data[var].copy()).loc[subset]
    arr = arr.mean(dim=[i for i in arr.dims if i is not 'timesteps']).to_pandas()
    return arr
Exemple #23
0
def _get_carrier_group_constraint_loc_techs(
    loc_techs, locs, config, constraint_name, sets, constraint_sets
):
    flow = "con" if "_con_" in constraint_name else "prod"
    if len(config.keys()) > 1:
        raise exceptions.ModelError(
            "Can only handle one carrier per group constraint that is carrier-based"
        )
    carrier = list(config.keys())[0]
    if "net_import" in constraint_name:
        _loc_tech_carriers = _get_net_import_loc_tech_carrier_subset(
            loc_techs,
            sets["loc_tech_carriers_con"] + sets["loc_tech_carriers_prod"],
            carrier,
            locs,
        )
    else:
        _loc_tech_carriers = _get_loc_tech_carrier_subset(
            loc_techs, sets[f"loc_tech_carriers_{flow}"], carrier, locs
        )

    if "share" in constraint_name:
        lhs_loc_tech_carriers = _loc_tech_carriers
        if "demand" in constraint_name or "import" in constraint_name:
            rhs_loc_tech_carriers = _get_loc_tech_carrier_subset(
                sets["loc_techs_demand"], sets["loc_tech_carriers_con"], carrier, locs
            )
        elif flow == "con":
            rhs_loc_tech_carriers = _get_loc_tech_carrier_subset(
                sets["loc_techs_demand"] + sets["loc_techs_conversion_all"],
                sets["loc_tech_carriers_con"],
                carrier,
                locs,
            )
        elif flow == "prod":
            rhs_loc_tech_carriers = _get_loc_tech_carrier_subset(
                sets["loc_techs_supply_conversion_all"],
                sets["loc_tech_carriers_prod"],
                carrier,
                locs,
            )
        return {
            "group_constraint_loc_tech_carriers_{}_lhs": list(lhs_loc_tech_carriers),
            "group_constraint_loc_tech_carriers_{}_rhs": list(rhs_loc_tech_carriers),
        }
    else:
        return {"group_constraint_loc_tech_carriers_{}": list(_loc_tech_carriers)}
Exemple #24
0
def _get_relevant_vars(model, dataset, array):

    carriers = list(dataset.carriers.values)

    allowed_input_vars = [
        k for k, v in model.inputs.data_vars.items()
        if 'timesteps' in v.dims and len(v.dims) > 1
    ]
    allowed_result_vars = ([
        'results', 'inputs', 'all', 'storage', 'resource_con', 'cost_var'
    ])

    if ((isinstance(array, list)
         and not set(array).intersection(allowed_input_vars +
                                         allowed_result_vars + carriers))
            or (isinstance(array, str) and array
                not in allowed_input_vars + allowed_result_vars + carriers)):
        raise exceptions.ModelError(
            'Cannot plot array={}. If you want carrier flow (_prod, _con, _export) '
            'then specify the name of the energy carrier as array'.format(
                array))

    # relevant_vars are all variables relevant to this plotting instance
    relevant_vars = []

    # Ensure carriers are at the top of the list
    if array == 'results':
        relevant_vars += sorted(carriers) + sorted(allowed_result_vars)
    elif array == 'inputs':
        relevant_vars += sorted(allowed_input_vars)
    elif array == 'all':
        relevant_vars += sorted(carriers) + sorted(allowed_result_vars +
                                                   allowed_input_vars)
    elif isinstance(array, list):
        relevant_vars = array
    elif isinstance(array, str):
        relevant_vars = [array]

    relevant_vars = [i for i in relevant_vars if i in dataset or i in carriers]
    return relevant_vars
Exemple #25
0
def _get_relevant_vars(dataset, array):
    allowed_input_vars = [
        i + j for i, j in product([
            'resource_area', 'energy_cap', 'resource_cap', 'storage_cap',
            'units'
        ], ['_max', '_min', '_equals'])
    ]
    allowed_result_vars = [
        'results', 'inputs', 'all', 'resource_area', 'energy_cap',
        'resource_cap', 'storage_cap', 'units', 'systemwide_levelised_cost',
        'systemwide_capacity_factor'
    ]

    if ((isinstance(array, list)
         and not set(array) != set(allowed_input_vars + allowed_result_vars))
            or (isinstance(array, str)
                and array not in allowed_input_vars + allowed_result_vars)):
        raise exceptions.ModelError(
            'Cannot plot array={}. as one or more of the elements is not considered '
            'to be a capacity'.format(array))

    # relevant_vars are all variables relevant to this plotting instance
    if array == 'results':
        relevant_vars = sorted(allowed_result_vars)
    elif array == 'inputs':
        relevant_vars = sorted(allowed_input_vars)
    elif array == 'all':
        relevant_vars = sorted(allowed_result_vars + allowed_input_vars)
    elif isinstance(array, list):
        relevant_vars = array
    elif isinstance(array, str):
        relevant_vars = [array]

    relevant_vars = [i for i in relevant_vars if i in dataset]

    # Remove all vars that don't actually turn up in the dataset, which is relevant
    # ony really for results vars
    return sorted(
        list(set(relevant_vars).intersection(dataset.data_vars.keys())))
Exemple #26
0
def run_operate(model_data, timings, backend, build_only):
    """
    For use when mode is 'operate', to allow the model to be built, edited, and
    iteratively run within Pyomo.

    """
    log_time(
        logger,
        timings,
        "run_start",
        comment="Backend: starting model run in operational mode",
    )

    defaults = UpdateObserverDict(
        initial_yaml_string=model_data.attrs["defaults"],
        name="defaults",
        observer=model_data,
    )
    run_config = UpdateObserverDict(
        initial_yaml_string=model_data.attrs["run_config"],
        name="run_config",
        observer=model_data,
    )

    # New param defaults = old maximum param defaults (e.g. energy_cap gets default from energy_cap_max)
    operate_params = {
        k.replace("_max", ""): v
        for k, v in defaults.items() if k.endswith("_max")
    }
    operate_params[
        "purchased"] = 0  # no _max to work from here, so we hardcode a default

    defaults.update(operate_params)

    # Capacity results (from plan mode) can be used as the input to operate mode
    if any(model_data.filter_by_attrs(
            is_result=1).data_vars) and run_config.get(
                "operation.use_cap_results", False):
        # Anything with is_result = 1 will be ignored in the Pyomo model
        for varname, varvals in model_data.data_vars.items():
            if varname in operate_params.keys():
                varvals.attrs["is_result"] = 1
                varvals.attrs["operate_param"] = 1

    else:
        cap_max = xr.merge([
            v.rename(k.replace("_max", ""))
            for k, v in model_data.data_vars.items() if "_max" in k
        ])
        cap_equals = xr.merge([
            v.rename(k.replace("_equals", ""))
            for k, v in model_data.data_vars.items() if "_equals" in k
        ])
        caps = cap_max.update(cap_equals)
        for cap in caps.data_vars.values():
            cap.attrs["is_result"] = 1
            cap.attrs["operate_param"] = 1
        model_data.update(caps)

    comments, warnings, errors = checks.check_operate_params(model_data)
    exceptions.print_warnings_and_raise_errors(warnings=warnings,
                                               errors=errors)

    # Initialize our variables
    solver = run_config["solver"]
    solver_io = run_config.get("solver_io", None)
    solver_options = run_config.get("solver_options", None)
    save_logs = run_config.get("save_logs", None)
    window = run_config["operation"]["window"]
    horizon = run_config["operation"]["horizon"]
    window_to_horizon = horizon - window

    # get the cumulative sum of timestep resolution, to find where we hit our window and horizon
    timestep_cumsum = model_data.timestep_resolution.cumsum(
        "timesteps").to_pandas()
    # get the timesteps at which we start and end our windows
    window_ends = timestep_cumsum.where((timestep_cumsum % window == 0) | (
        timestep_cumsum == timestep_cumsum[-1]))
    window_starts = timestep_cumsum.where((~np.isnan(window_ends.shift(1))) | (
        timestep_cumsum == timestep_cumsum[0])).dropna()

    window_ends = window_ends.dropna()
    horizon_ends = timestep_cumsum[timestep_cumsum.isin(window_ends.values +
                                                        window_to_horizon)]

    if not any(window_starts):
        raise exceptions.ModelError(
            "Not enough timesteps or incorrect timestep resolution to run in "
            "operational mode with an optimisation window of {}".format(
                window))

    # We will only update timseries parameters
    timeseries_data_vars = [
        k for k, v in model_data.data_vars.items()
        if "timesteps" in v.dims and v.attrs["is_result"] == 0
    ]

    # Loop through each window, solve over the horizon length, and add result to
    # result_array we only go as far as the end of the last horizon, which may
    # clip the last bit of data
    result_array = []
    # track whether each iteration finds an optimal solution or not
    terminations = []

    if build_only:
        iterations = [0]
    else:
        iterations = range(len(window_starts))

    for i in iterations:
        start_timestep = window_starts.index[i]

        # Build full model in first instance
        if i == 0:
            warmstart = False
            end_timestep = horizon_ends.index[i]
            timesteps = slice(start_timestep, end_timestep)
            window_model_data = model_data.loc[dict(timesteps=timesteps)]

            log_time(
                logger,
                timings,
                "model_gen_1",
                comment="Backend: generating initial model",
            )

            backend_model = backend.generate_model(window_model_data)

        # Build the full model in the last instance(s),
        # where number of timesteps is less than the horizon length
        elif i > len(horizon_ends) - 1:
            warmstart = False
            end_timestep = window_ends.index[i]
            timesteps = slice(start_timestep, end_timestep)
            window_model_data = model_data.loc[dict(timesteps=timesteps)]

            log_time(
                logger,
                timings,
                "model_gen_{}".format(i + 1),
                comment=(
                    "Backend: iteration {}: generating new model for "
                    "end of timeseries, with horizon = {} timesteps".format(
                        i + 1, window_ends[i] - window_starts[i])),
            )

            backend_model = backend.generate_model(window_model_data)

        # Update relevent Pyomo Params in intermediate instances
        else:
            warmstart = True
            end_timestep = horizon_ends.index[i]
            timesteps = slice(start_timestep, end_timestep)
            window_model_data = model_data.loc[dict(timesteps=timesteps)]

            log_time(
                logger,
                timings,
                "model_gen_{}".format(i + 1),
                comment="Backend: iteration {}: updating model parameters".
                format(i + 1),
            )
            # Pyomo model sees the same timestamps each time, we just change the
            # values associated with those timestamps
            for var in timeseries_data_vars:
                # New values
                var_series = (
                    window_model_data[var].to_series().dropna().replace(
                        "inf", np.inf))
                # Same timestamps
                var_series.index = backend_model.__calliope_model_data["data"][
                    var].keys()
                var_dict = var_series.to_dict()
                # Update pyomo Param with new dictionary

                getattr(backend_model, var).store_values(var_dict)

        if not build_only:
            log_time(
                logger,
                timings,
                "model_run_{}".format(i + 1),
                time_since_run_start=True,
                comment="Backend: iteration {}: sending model to solver".
                format(i + 1),
            )
            # After iteration 1, warmstart = True, which should speed up the process
            # Note: Warmstart isn't possible with GLPK (dealt with later on)
            _results = backend.solve_model(
                backend_model,
                solver=solver,
                solver_io=solver_io,
                solver_options=solver_options,
                save_logs=save_logs,
                warmstart=warmstart,
            )

            log_time(
                logger,
                timings,
                "run_solver_exit_{}".format(i + 1),
                time_since_run_start=True,
                comment="Backend: iteration {}: solver finished running".
                format(i + 1),
            )
            # xarray dataset is built for each iteration
            _termination = backend.load_results(backend_model, _results)
            terminations.append(_termination)

            _results = backend.get_result_array(backend_model, model_data)

            # We give back the actual timesteps for this iteration and take a slice
            # equal to the window length
            _results["timesteps"] = window_model_data.timesteps.copy()

            # We always save the window data. Until the last window(s) this will crop
            # the window_to_horizon timesteps. In the last window(s), optimistion will
            # only be occurring over a window length anyway
            _results = _results.loc[dict(
                timesteps=slice(None, window_ends.index[i]))]
            result_array.append(_results)

            # Set up initial storage for the next iteration
            if "loc_techs_store" in model_data.dims.keys():
                storage_initial = _results.storage.loc[{
                    "timesteps":
                    window_ends.index[i]
                }].drop("timesteps")
                model_data["storage_initial"].loc[
                    storage_initial.coords] = storage_initial.values
                backend_model.storage_initial.store_values(
                    storage_initial.to_series().dropna().to_dict())

            # Set up total operated units for the next iteration
            if "loc_techs_milp" in model_data.dims.keys():
                operated_units = _results.operating_units.sum(
                    "timesteps").astype(np.int)
                model_data["operated_units"].loc[{}] += operated_units.values
                backend_model.operated_units.store_values(
                    operated_units.to_series().dropna().to_dict())

            log_time(
                logger,
                timings,
                "run_solver_exit_{}".format(i + 1),
                time_since_run_start=True,
                comment="Backend: iteration {}: generated solution array".
                format(i + 1),
            )

    if build_only:
        results = xr.Dataset()
    else:
        # Concatenate results over the timestep dimension to get a single
        # xarray Dataset of interest
        results = xr.concat(result_array, dim="timesteps")
        if all(i == "optimal" for i in terminations):
            results.attrs["termination_condition"] = "optimal"
        elif all(i in ["optimal", "feasible"] for i in terminations):
            results.attrs["termination_condition"] = "feasible"
        else:
            results.attrs["termination_condition"] = ",".join(terminations)

        log_time(
            logger,
            timings,
            "run_solution_returned",
            time_since_run_start=True,
            comment="Backend: generated full solution array",
        )

    return results, backend_model
Exemple #27
0
def run_operate(model_data, timings, backend, build_only):
    """
    For use when mode is 'operate', to allow the model to be built, edited, and
    iteratively run within Pyomo.

    """
    log_time(logger,
             timings,
             'run_start',
             comment='Backend: starting model run in operational mode')

    defaults = AttrDict.from_yaml_string(model_data.attrs['defaults'])
    run_config = AttrDict.from_yaml_string(model_data.attrs['run_config'])

    operate_params = ['purchased'] + [
        i.replace('_max', '') for i in defaults if i[-4:] == '_max'
    ]

    # Capacity results (from plan mode) can be used as the input to operate mode
    if (any(model_data.filter_by_attrs(is_result=1).data_vars)
            and run_config.get('operation.use_cap_results', False)):
        # Anything with is_result = 1 will be ignored in the Pyomo model
        for varname, varvals in model_data.data_vars.items():
            if varname in operate_params:
                varvals.attrs['is_result'] = 1
                varvals.attrs['operate_param'] = 1

    else:
        cap_max = xr.merge([
            v.rename(k.replace('_max', ''))
            for k, v in model_data.data_vars.items() if '_max' in k
        ])
        cap_equals = xr.merge([
            v.rename(k.replace('_equals', ''))
            for k, v in model_data.data_vars.items() if '_equals' in k
        ])
        caps = cap_max.update(cap_equals)
        for cap in caps.data_vars.values():
            cap.attrs['is_result'] = 1
            cap.attrs['operate_param'] = 1
        model_data.update(caps)

    # Storage initial is carried over between iterations, so must be defined along with storage
    if ('loc_techs_store' in model_data.dims.keys()
            and 'storage_initial' not in model_data.data_vars.keys()):
        model_data['storage_initial'] = (xr.DataArray(
            [0 for loc_tech in model_data.loc_techs_store.values],
            dims='loc_techs_store'))
        model_data['storage_initial'].attrs['is_result'] = 0
        exceptions.warn(
            'Initial stored energy not defined, set to zero for all '
            'loc::techs in loc_techs_store, for use in iterative optimisation')
    # Operated units is carried over between iterations, so must be defined in a milp model
    if ('loc_techs_milp' in model_data.dims.keys()
            and 'operated_units' not in model_data.data_vars.keys()):
        model_data['operated_units'] = (xr.DataArray(
            [0 for loc_tech in model_data.loc_techs_milp.values],
            dims='loc_techs_milp'))
        model_data['operated_units'].attrs['is_result'] = 1
        model_data['operated_units'].attrs['operate_param'] = 1
        exceptions.warn(
            'daily operated units not defined, set to zero for all '
            'loc::techs in loc_techs_milp, for use in iterative optimisation')

    comments, warnings, errors = checks.check_operate_params(model_data)
    exceptions.print_warnings_and_raise_errors(warnings=warnings,
                                               errors=errors)

    # Initialize our variables
    solver = run_config['solver']
    solver_io = run_config.get('solver_io', None)
    solver_options = run_config.get('solver_options', None)
    save_logs = run_config.get('save_logs', None)
    window = run_config['operation']['window']
    horizon = run_config['operation']['horizon']
    window_to_horizon = horizon - window

    # get the cumulative sum of timestep resolution, to find where we hit our window and horizon
    timestep_cumsum = model_data.timestep_resolution.cumsum(
        'timesteps').to_pandas()
    # get the timesteps at which we start and end our windows
    window_ends = timestep_cumsum.where((timestep_cumsum % window == 0) | (
        timestep_cumsum == timestep_cumsum[-1]))
    window_starts = timestep_cumsum.where((~np.isnan(window_ends.shift(1))) | (
        timestep_cumsum == timestep_cumsum[0])).dropna()

    window_ends = window_ends.dropna()
    horizon_ends = timestep_cumsum[timestep_cumsum.isin(window_ends.values +
                                                        window_to_horizon)]

    if not any(window_starts):
        raise exceptions.ModelError(
            'Not enough timesteps or incorrect timestep resolution to run in '
            'operational mode with an optimisation window of {}'.format(
                window))

    # We will only update timseries parameters
    timeseries_data_vars = [
        k for k, v in model_data.data_vars.items()
        if 'timesteps' in v.dims and v.attrs['is_result'] == 0
    ]

    # Loop through each window, solve over the horizon length, and add result to
    # result_array we only go as far as the end of the last horizon, which may
    # clip the last bit of data
    result_array = []
    # track whether each iteration finds an optimal solution or not
    terminations = []

    if build_only:
        iterations = [0]
    else:
        iterations = range(len(window_starts))

    for i in iterations:
        start_timestep = window_starts.index[i]

        # Build full model in first instance
        if i == 0:
            warmstart = False
            end_timestep = horizon_ends.index[i]
            timesteps = slice(start_timestep, end_timestep)
            window_model_data = model_data.loc[dict(timesteps=timesteps)]

            log_time(logger,
                     timings,
                     'model_gen_1',
                     comment='Backend: generating initial model')

            backend_model = backend.generate_model(window_model_data)

        # Build the full model in the last instance(s),
        # where number of timesteps is less than the horizon length
        elif i > len(horizon_ends) - 1:
            warmstart = False
            end_timestep = window_ends.index[i]
            timesteps = slice(start_timestep, end_timestep)
            window_model_data = model_data.loc[dict(timesteps=timesteps)]

            log_time(
                logger,
                timings,
                'model_gen_{}'.format(i + 1),
                comment=(
                    'Backend: iteration {}: generating new model for '
                    'end of timeseries, with horizon = {} timesteps'.format(
                        i + 1, window_ends[i] - window_starts[i])))

            backend_model = backend.generate_model(window_model_data)

        # Update relevent Pyomo Params in intermediate instances
        else:
            warmstart = True
            end_timestep = horizon_ends.index[i]
            timesteps = slice(start_timestep, end_timestep)
            window_model_data = model_data.loc[dict(timesteps=timesteps)]

            log_time(
                logger,
                timings,
                'model_gen_{}'.format(i + 1),
                comment='Backend: iteration {}: updating model parameters'.
                format(i + 1))
            # Pyomo model sees the same timestamps each time, we just change the
            # values associated with those timestamps
            for var in timeseries_data_vars:
                # New values
                var_series = window_model_data[var].to_series().dropna(
                ).replace('inf', np.inf)
                # Same timestamps
                var_series.index = backend_model.__calliope_model_data['data'][
                    var].keys()
                var_dict = var_series.to_dict()
                # Update pyomo Param with new dictionary

                getattr(backend_model, var).store_values(var_dict)

        if not build_only:
            log_time(logger,
                     timings,
                     'model_run_{}'.format(i + 1),
                     time_since_run_start=True,
                     comment='Backend: iteration {}: sending model to solver'.
                     format(i + 1))
            # After iteration 1, warmstart = True, which should speed up the process
            # Note: Warmstart isn't possible with GLPK (dealt with later on)
            _results = backend.solve_model(
                backend_model,
                solver=solver,
                solver_io=solver_io,
                solver_options=solver_options,
                save_logs=save_logs,
                warmstart=warmstart,
            )

            log_time(logger,
                     timings,
                     'run_solver_exit_{}'.format(i + 1),
                     time_since_run_start=True,
                     comment='Backend: iteration {}: solver finished running'.
                     format(i + 1))
            # xarray dataset is built for each iteration
            _termination = backend.load_results(backend_model, _results)
            terminations.append(_termination)

            _results = backend.get_result_array(backend_model, model_data)

            # We give back the actual timesteps for this iteration and take a slice
            # equal to the window length
            _results['timesteps'] = window_model_data.timesteps.copy()

            # We always save the window data. Until the last window(s) this will crop
            # the window_to_horizon timesteps. In the last window(s), optimistion will
            # only be occurring over a window length anyway
            _results = _results.loc[dict(
                timesteps=slice(None, window_ends.index[i]))]
            result_array.append(_results)

            # Set up initial storage for the next iteration
            if 'loc_techs_store' in model_data.dims.keys():
                storage_initial = _results.storage.loc[{
                    'timesteps':
                    window_ends.index[i]
                }].drop('timesteps')
                model_data['storage_initial'].loc[
                    storage_initial.coords] = storage_initial.values
                backend_model.storage_initial.store_values(
                    storage_initial.to_series().dropna().to_dict())

            # Set up total operated units for the next iteration
            if 'loc_techs_milp' in model_data.dims.keys():
                operated_units = _results.operating_units.sum(
                    'timesteps').astype(np.int)
                model_data['operated_units'].loc[{}] += operated_units.values
                backend_model.operated_units.store_values(
                    operated_units.to_series().dropna().to_dict())

            log_time(logger,
                     timings,
                     'run_solver_exit_{}'.format(i + 1),
                     time_since_run_start=True,
                     comment='Backend: iteration {}: generated solution array'.
                     format(i + 1))

    if build_only:
        results = xr.Dataset()
    else:
        # Concatenate results over the timestep dimension to get a single
        # xarray Dataset of interest
        results = xr.concat(result_array, dim='timesteps')
        if all(i == 'optimal' for i in terminations):
            results.attrs['termination_condition'] = 'optimal'
        elif all(i in ['optimal', 'feasible'] for i in terminations):
            results.attrs['termination_condition'] = 'feasible'
        else:
            results.attrs['termination_condition'] = ','.join(terminations)

        log_time(logger,
                 timings,
                 'run_solution_returned',
                 time_since_run_start=True,
                 comment='Backend: generated full solution array')

    return results, backend_model
Exemple #28
0
def process_timeseries_data(config_model, model_run):

    if config_model.model.timeseries_data is None:
        timeseries_data = AttrDict()
    else:
        timeseries_data = config_model.model.timeseries_data

    def _parser(x, dtformat):
        return pd.to_datetime(x, format=dtformat, exact=False)

    if 'timeseries_data_path' in config_model.model:
        dtformat = config_model.model['timeseries_dateformat']

        # Generate the set of all files we want to read from file
        location_config = model_run.locations.as_dict_flat()
        model_config = config_model.model.as_dict_flat()

        get_filenames = lambda config: set([
            v.split('=')[1].rsplit(':', 1)[0]
            for v in config.values() if 'file=' in str(v)
        ])
        constraint_filenames = get_filenames(location_config)
        cluster_filenames = get_filenames(model_config)

        datetime_min = []
        datetime_max = []

        for file in constraint_filenames | cluster_filenames:
            file_path = os.path.join(config_model.model.timeseries_data_path, file)
            # load the data, without parsing the dates, to catch errors in the data
            df = pd.read_csv(file_path, index_col=0)
            try:
                df.apply(pd.to_numeric)
            except ValueError as e:
                raise exceptions.ModelError(
                    'Error in loading data from {}. Ensure all entries are '
                    'numeric. Full error: {}'.format(file, e)
                )
            # Now parse the dates, checking for errors specific to this
            try:
                df.index = _parser(df.index, dtformat)
            except ValueError as e:
                raise exceptions.ModelError(
                    'Error in parsing dates in timeseries data from {}, '
                    'using datetime format `{}`: {}'.format(file, dtformat, e)
                )
            timeseries_data[file] = df

            datetime_min.append(df.index[0].date())
            datetime_max.append(df.index[-1].date())

    # Apply time subsetting, if supplied in model_run
    subset_time_config = config_model.model.subset_time
    if subset_time_config is not None:
        # Test parsing dates first, to make sure they fit our required subset format

        try:
            subset_time = _parser(subset_time_config, '%Y-%m-%d %H:%M:%S')
        except ValueError as e:
            raise exceptions.ModelError(
                'Timeseries subset must be in ISO format (anything up to the  '
                'detail of `%Y-%m-%d %H:%M:%S`.\n User time subset: {}\n '
                'Error caused: {}'.format(subset_time_config, e)
            )
        if isinstance(subset_time_config, list) and len(subset_time_config) == 2:
            time_slice = slice(subset_time_config[0], subset_time_config[1])

            # Don't allow slicing outside the range of input data
            if (subset_time[0].date() < max(datetime_min) or
                    subset_time[1].date() > min(datetime_max)):

                raise exceptions.ModelError(
                    'subset time range {} is outside the input data time range '
                    '[{}, {}]'.format(subset_time_config,
                                      max(datetime_min).strftime('%Y-%m-%d'),
                                      min(datetime_max).strftime('%Y-%m-%d'))
                )
        elif isinstance(subset_time_config, list):
            raise exceptions.ModelError(
                'Invalid subset_time value: {}'.format(subset_time_config)
            )
        else:
            time_slice = str(subset_time_config)

        for k in timeseries_data.keys():
            timeseries_data[k] = timeseries_data[k].loc[time_slice, :]
            if timeseries_data[k].empty:
                raise exceptions.ModelError(
                    'The time slice {} creates an empty timeseries array for {}'
                    .format(time_slice, k)
                )

    # Ensure all timeseries have the same index
    indices = [
        (file, df.index) for file, df in timeseries_data.items()
        if file not in cluster_filenames
    ]
    first_file, first_index = indices[0]
    for file, idx in indices[1:]:
        if not first_index.equals(idx):
            raise exceptions.ModelError(
                'Time series indices do not match '
                'between {} and {}'.format(first_file, file)
            )

    return timeseries_data, first_index
Exemple #29
0
def apply_overrides(config, scenario=None, override_dict=None):
    """
    Generate processed Model configuration, applying any scenarios overrides.

    Parameters
    ----------
    config : AttrDict
        a model configuration AttrDict
    scenario : str, optional
    override_dict : str or dict or AttrDict, optional
        If a YAML string, converted to AttrDict

    """
    debug_comments = AttrDict()

    base_model_config_file = os.path.join(
        os.path.dirname(calliope.__file__),
        'config', 'model.yaml'
    )
    config_model = AttrDict.from_yaml(base_model_config_file)

    # Interpret timeseries_data_path as relative
    config.model.timeseries_data_path = relative_path(
        config.config_path, config.model.timeseries_data_path
    )

    # The input files are allowed to override other model defaults
    config_model.union(config, allow_override=True)

    # First pass of applying override dict before applying scenarios,
    # so that can override scenario definitions by override_dict
    if override_dict:
        if isinstance(override_dict, str):
            override_dict = AttrDict.from_yaml_string(override_dict)
        elif not isinstance(override_dict, AttrDict):
            override_dict = AttrDict(override_dict)

        warnings = checks.check_overrides(config_model, override_dict)
        exceptions.print_warnings_and_raise_errors(warnings=warnings)

        config_model.union(
            override_dict, allow_override=True, allow_replacement=True
        )

    if scenario:
        scenarios = config_model.get('scenarios', {})

        if scenario in scenarios:
            # Manually defined scenario names cannot be the same as single
            # overrides or any combination of semicolon-delimited overrides
            if all([i in config_model.get('overrides', {})
                    for i in scenario.split(',')]):
                raise exceptions.ModelError(
                    'Manually defined scenario cannot be a combination of override names.'
                )
            if not isinstance(scenarios[scenario], str):
                raise exceptions.ModelError(
                    'Scenario definition must be string of comma-separated overrides.'
                )
            overrides = scenarios[scenario].split(',')
            logger.info(
                'Using scenario `{}` leading to the application of '
                'overrides `{}`.'.format(scenario, scenarios[scenario])
            )
        else:
            overrides = str(scenario).split(',')
            logger.info(
                'Applying overrides `{}` without a '
                'specific scenario name.'.format(scenario)
            )

        overrides_from_scenario = combine_overrides(config_model, overrides)

        warnings = checks.check_overrides(config_model, overrides_from_scenario)
        exceptions.print_warnings_and_raise_errors(warnings=warnings)

        config_model.union(
            overrides_from_scenario, allow_override=True, allow_replacement=True
        )
        for k, v in overrides_from_scenario.as_dict_flat().items():
            debug_comments.set_key(
                '{}'.format(k),
                'Applied from override')
    else:
        overrides = []

    # Second pass of applying override dict after applying scenarios,
    # so that scenario-based overrides are overridden by override_dict!
    if override_dict:
        config_model.union(
            override_dict, allow_override=True, allow_replacement=True
        )
        for k, v in override_dict.as_dict_flat().items():
            debug_comments.set_key(
                '{}'.format(k),
                'Overridden via override dictionary.')

    return config_model, debug_comments, overrides, scenario
Exemple #30
0
def get_clusters(data,
                 func,
                 timesteps_per_day,
                 tech=None,
                 timesteps=None,
                 k=None,
                 variables=None,
                 **kwargs):
    """
    Run a clustering algorithm on the timeseries data supplied. All timeseries
    data is reshaped into one row per day before clustering into similar days.

    Parameters
    ----------
    data : xarray.Dataset
        Should be normalized
    func : str
        'kmeans' or 'hierarchical' for KMeans or Agglomerative clustering, respectively
    timesteps_per_day : int
        Total number of timesteps in a day
    tech : list, optional
        list of strings referring to technologies by which clustering is undertaken.
        If none (default), all technologies within timeseries variables will be used.
    timesteps : list or str, optional
        Subset of the time domain within which to apply clustering.
    k : int, optional
        Number of clusters to create. If none (default), will use Hartigan's rule
        to infer a reasonable number of clusters.
    variables : list, optional
        data variables (e.g. `resource`, `energy_eff`) by whose values the data
        will be clustered. If none (default), all timeseries variables will be used.
    kwargs : dict
        Additional keyword arguments available depend on the `func`.
        For available KMeans kwargs see:
        http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
        For available hierarchical kwargs see:
        http://scikit-learn.org/stable/modules/generated/sklearn.cluster.AgglomerativeClustering.html
    Returns
    -------
    clusters : dataframe
        Indexed by timesteps and with locations as columns, giving cluster
        membership for first timestep of each day.
    clustered_data : sklearn.cluster object
        Result of clustering using sklearn.KMeans(k).fit(X) or
        sklearn.KMeans(k).AgglomerativeClustering(X). Allows user to access
        specific attributes, for detailed statistical analysis.

    """

    if timesteps is not None:
        data = data.loc[{"timesteps": timesteps}]
    else:
        timesteps = data.timesteps.values

    X = reshape_for_clustering(data, tech, variables)

    if func == "kmeans":
        if not k:
            k = hartigan_n_clusters(X)
            exceptions.warn("Used Hartigan's rule to determine that"
                            "a good number of clusters is {}.".format(k))
        clustered_data = sk_cluster.KMeans(k).fit(X)

    elif func == "hierarchical":
        if not k:
            raise exceptions.ModelError(
                "Cannot undertake hierarchical clustering without a predefined "
                "number of clusters (k)")
        clustered_data = sk_cluster.AgglomerativeClustering(k).fit(X)

    # Determine the cluster membership of each day
    day_clusters = clustered_data.labels_

    # Create mapping of timesteps to clusters
    clusters = pd.Series(day_clusters, index=timesteps[::timesteps_per_day])

    return clusters, clustered_data