Esempio n. 1
0
    def load_data(self) -> None:
        """Constructs the random number generators for all of the matrices that
        can be altered by uncertainty.

        If any of these uncertain calculations are not included, the initial
        amounts of the 'params' matrices are used in place of generating
        a vector
        """
        self.lca.load_lci_data()

        self.tech_rng = MCRandomNumberGenerator(self.lca.tech_params, seed=self.seed) \
            if self.include_technosphere else self.lca.tech_params["amount"].copy()
        self.bio_rng = MCRandomNumberGenerator(self.lca.bio_params, seed=self.seed) \
            if self.include_biosphere else self.lca.bio_params["amount"].copy()

        if self.lca.lcia:
            self.cf_rngs = {
            }  # we need as many cf_rng as impact categories, because they are of different size
            for m in self.methods:
                self.lca.switch_method(m)
                self.lca.load_lcia_data()
                self.cf_rngs[m] = MCRandomNumberGenerator(self.lca.cf_params, seed=self.seed) \
                    if self.include_cfs else self.lca.cf_params["amount"].copy()
        # Construct the MC parameter manager
        if self.include_parameters:
            pass  # not yet developed as stand-alone code (outside of the Activity Browser)
            # self.param_rng = MonteCarloParameterManager(seed=self.seed)

        self.lca.activity_dict_rev, self.lca.product_dict_rev, self.lca.biosphere_dict_rev = self.lca.reverse_dict(
        )
Esempio n. 2
0
class MonteCarloParameterManager(ParameterManager, Iterator):
    """Use to sample the uncertainty of parameter values, mostly for use in
    Monte Carlo calculations.

    Each iteration will sample the parameter uncertainty, after which
    all parameters and parameterized exchanges are recalculated. These
    recalculated values are then returned as a simplified `params` array,
    which is similar to the `tech_params` and `bio_params` arrays in the
    LCA classes.

    Makes use of the `MCRandomNumberGenerator` to sample from all of the
    distributions in the same way.

    """
    def __init__(self, seed: Optional[int] = None):
        super().__init__()
        parameters = itertools.chain(ProjectParameter.select(),
                                     DatabaseParameter.select(),
                                     ActivityParameter.select())
        self.uncertainties = UncertaintyBase.from_dicts(
            *[getattr(p, "data", {}) for p in parameters])
        self.mc_generator = MCRandomNumberGenerator(self.uncertainties,
                                                    seed=seed)

    def __iter__(self):
        return self

    def __next__(self):
        return self.next()

    def recalculate(self, iterations: int = 10) -> np.ndarray:
        assert iterations > 0, "Must have a positive amount of iterations"
        if iterations == 1:
            return self.next()
        # Construct indices, prepare sized array and sample parameter
        # uncertainty distributions `interations` times.
        all_data = np.empty((iterations, len(self.indices)),
                            dtype=Indices.array_dtype)
        random_bounded_values = self.mc_generator.generate(iterations)

        # Now, repeatedly replace parameter amounts with sampled data and
        # recalculate. Every processed row is added to the sized array.
        for i in range(iterations):
            values = random_bounded_values.take(i, axis=1)
            self.parameters.update(values)
            data = self.calculate()
            all_data[i] = self.indices.mock_params(data)

        return all_data

    def next(self) -> np.ndarray:
        """Similar to `recalculate` but only performs a single sampling and
        recalculation.
        """
        values = self.mc_generator.next()
        self.parameters.update(values)
        data = self.calculate()
        return self.indices.mock_params(data)
Esempio n. 3
0
 def __init__(self, seed: Optional[int] = None):
     super().__init__()
     parameters = itertools.chain(ProjectParameter.select(),
                                  DatabaseParameter.select(),
                                  ActivityParameter.select())
     self.uncertainties = UncertaintyBase.from_dicts(
         *[getattr(p, "data", {}) for p in parameters])
     self.mc_generator = MCRandomNumberGenerator(self.uncertainties,
                                                 seed=seed)
Esempio n. 4
0
    def rescale(self, X):
        iterations, num_params = X.shape[0], X.shape[1]
        assert num_params == len(self)
        params_offset = 0
        X_rescaled_all = np.zeros((iterations, 0))
        for exchange_type in self.uncertain_exchanges_types:
            mc = MCRandomNumberGenerator(self.uncertain_params[exchange_type])
            X_reordered = X[:, mc.ordering + params_offset]

            X_rescaled = np.zeros(
                (iterations, self.uncertain_exchange_lengths[exchange_type])
            )
            X_rescaled[:] = np.nan

            offset = 0
            for uncertainty_type in self.choices:
                num_uncertain_params = mc.positions[uncertainty_type]
                if not num_uncertain_params:
                    continue
                random_data = uncertainty_type.ppf(
                    params=mc.params[offset : num_uncertain_params + offset],
                    percentages=X_reordered[
                        :, offset : num_uncertain_params + offset
                    ].T,
                )
                X_rescaled[:, offset : num_uncertain_params + offset] = random_data.T
                offset += num_uncertain_params

            X_rescaled_all = np.hstack(
                [X_rescaled_all, X_rescaled[:, np.argsort(mc.ordering)]]
            )
            params_offset += self.uncertain_exchange_lengths[exchange_type]
        return X_rescaled_all
    def __init__(
        self,
        *,
        package: DatapackageBase,
        group_label: str,
        use_distributions: bool = False,
        seed_override: Union[int, None] = None,
        custom_filter: Union[Callable, None] = None,
        transpose: bool = False,
    ):
        self.label = group_label
        self.package = package
        self.use_distributions = use_distributions
        self.custom_filter = custom_filter
        self.transpose = transpose
        self.vector = self.is_vector()
        self.seed = seed_override or self.package.metadata.get("seed")

        if custom_filter is not None:
            self.custom_filter_mask = custom_filter(self.get_indices_data())
        else:
            self.custom_filter_mask = None

        if self.use_distributions and self.vector:
            if self.has_distributions:
                self.rng = MCRandomNumberGenerator(params=self.data_original,
                                                   seed=self.seed)
            else:
                self.rng = FakeRNG(self.data_original)

        self.aggregate = self.package.metadata["sum_intra_duplicates"]
        self.empty = self.get_indices_data().shape == (0, )
Esempio n. 6
0
def indices_and_samples_from_params(params, iterations, seed=None):
    """Format heterogeneous parameter array for presamples"""
    rev_mapping = {v: k for k, v in mapping.items()}
    input_keys_unformated = np.vectorize(rev_mapping.get)(params['input'])
    output_keys_unformated = np.vectorize(rev_mapping.get)(params['output'])
    rev_TYPE_DICTIONARY = {v: k for k, v in TYPE_DICTIONARY.items()}
    type_array = np.vectorize(rev_TYPE_DICTIONARY.get)(params['type'])
    indices = [
        ((input_key, input_code), (output_key, output_code), exc_type)
        for input_key, input_code, output_key, output_code, exc_type in zip(
            input_keys_unformated[0], input_keys_unformated[1],
            output_keys_unformated[0], output_keys_unformated[1], type_array)
    ]
    samples = np.empty(shape=(params.shape[0], iterations))
    rng = MCRandomNumberGenerator(params, seed)
    for i in range(iterations):
        samples[:, i] = next(rng)
    return samples, indices
Esempio n. 7
0
    def __init__(self, func_unit, method, write_dir):
        self.lca = bw.LCA(func_unit, method)
        self.lca.lci()
        self.lca.lcia()

        self.write_dir = write_dir
        self.make_dirs()

        #         self.uncertain_tech_params_where = np.where(self.lca.tech_params['uncertainty_type'] > 1)[0]
        #         self.uncertain_tech_params = self.lca.tech_params[self.uncertain_tech_params_where]

        self.uncertain_tech_params_where = self.get_LSA_params(
            var_threshold=0)  # TODO change the threshold
        self.uncertain_tech_params = self.lca.tech_params[
            self.uncertain_tech_params_where]

        self.num_params = self.__num_input_params__()
        self.influential_params = []

        self.choices = uncertainty_choices
        self.mc = MCRandomNumberGenerator(self.uncertain_tech_params)
 def sample_model_uncertainty(self):
     """ Sample model uncertainty for avail_delta and avail_net"""
     with open(self.filtered_pickles / 'model_uncertainty.pickle',
               'rb') as f:
         model_uncertainty_df = pickle.load(f)
     if self.caps and 'model_uncertainty' in self.caps:
         model_uncertainty_df[
             model_uncertainty_df > self.
             caps['model_uncertainty']] = self.caps['model_uncertainty']
     stacked_df = model_uncertainty_df.stack().reset_index()
     stacked_df.rename(columns={
         'level_1': 'month',
         0: 'value'
     },
                       inplace=True)
     gsd2_to_scale = lambda x: np.log(np.sqrt(x))
     stacked_df['scale'] = gsd2_to_scale(stacked_df['value'])
     stats_arrays_input_dicts = [{
         'loc': 0,
         'scale': stacked_df.loc[i, 'scale'],
         'uncertainty_type': 2
     } for i in stacked_df.index]
     uncertainty_base = UncertaintyBase.from_dicts(
         *stats_arrays_input_dicts)
     rng = MCRandomNumberGenerator(uncertainty_base)
     arr = np.zeros(shape=[stacked_df.shape[0], self.iterations],
                    dtype=self.dtype)
     for iteration in range(self.iterations):
         arr[:, iteration] = next(rng).astype(self.dtype)
     indices_as_arr = np.array(
         [stacked_df['BAS34S_ID'], stacked_df['month']]).T
     indices_as_list = [(indices_as_arr[i, 0], indices_as_arr[i, 1])
                        for i in range(indices_as_arr.shape[0])]
     np.save(self.samples_dir / "{}.npy".format('model_uncertainty'), arr)
     with open(self.indices_dir / "{}.pickle".format('model_uncertainty'),
               'wb') as f:
         pickle.dump(indices_as_list, f)
     print("\t{} samples taken for {}".format(self.iterations,
                                              'model_uncertainty'))
Esempio n. 9
0
class MonteCarloLCA(object):
    """A Monte Carlo LCA for multiple functional units and methods loaded from a calculation setup."""
    def __init__(self, cs_name):
        if cs_name not in bw.calculation_setups:
            raise ValueError(
                "{} is not a known `calculation_setup`.".format(cs_name))

        self.cs_name = cs_name
        self.cs = bw.calculation_setups[cs_name]
        self.seed = None
        self.cf_rngs = {}
        self.CF_rng_vectors = {}
        self.include_technosphere = True
        self.include_biosphere = True
        self.include_cfs = True
        self.include_parameters = True
        self.param_rng = None
        self.param_cols = ["row", "col", "type"]

        self.tech_rng: Optional[Union[MCRandomNumberGenerator,
                                      np.ndarray]] = None
        self.bio_rng: Optional[Union[MCRandomNumberGenerator,
                                     np.ndarray]] = None
        self.cf_rng: Optional[Union[MCRandomNumberGenerator,
                                    np.ndarray]] = None

        # functional units
        self.func_units = self.cs['inv']
        self.rev_fu_index = {i: fu for i, fu in enumerate(self.func_units)}

        # activities
        self.activity_keys = [list(fu.keys())[0] for fu in self.func_units]
        self.activity_index = {
            key: index
            for index, key in enumerate(self.activity_keys)
        }
        self.rev_activity_index = {
            index: key
            for index, key in enumerate(self.activity_keys)
        }
        # previously: self.rev_activity_index = {v: k for k, v in self.activity_keys}
        # self.fu_index = {k: i for i, k in enumerate(self.activity_keys)}

        # methods
        self.methods = self.cs['ia']
        self.method_index = {m: i for i, m in enumerate(self.methods)}
        self.rev_method_index = {i: m for i, m in enumerate(self.methods)}
        # previously: self.rev_method_index = {v: k for k, v in self.method_index.items()}
        # self.rev_method_index = {v: k for k, v in self.method_index.items()}

        self.func_unit_translation_dict = {
            str(bw.get_activity(list(func_unit.keys())[0])): func_unit
            for func_unit in self.func_units
        }
        if len(self.func_unit_translation_dict) != len(self.func_units):
            self.func_unit_translation_dict = {}
            for fu in self.func_units:
                act = bw.get_activity(next(iter(fu)))
                self.func_unit_translation_dict["{} {}".format(act,
                                                               act[0])] = fu
        self.func_key_dict = {
            m: i
            for i, m in enumerate(self.func_unit_translation_dict.keys())
        }
        self.func_key_list = list(self.func_key_dict.keys())

        # GSA calculation variables
        self.A_matrices = list()
        self.B_matrices = list()
        self.CF_dict = defaultdict(list)
        self.parameter_exchanges = list()
        self.parameters = list()
        self.parameter_data = defaultdict(dict)

        self.results = list()

        self.lca = bw.LCA(demand=self.func_units_dict, method=self.methods[0])

    def unify_param_exchanges(self, data: np.ndarray) -> np.ndarray:
        """Convert an array of parameterized exchanges from input/output keys
        into row/col values using dicts generated in bw.LCA object.

        If any given exchange does not exist in the current LCA matrix,
        it will be dropped from the returned array.
        """
        def key_to_rowcol(x) -> Optional[tuple]:
            if x["type"] in [0, 1]:
                row = self.lca.activity_dict.get(x["input"], None)
                col = self.lca.product_dict.get(x["output"], None)
            else:
                row = self.lca.biosphere_dict.get(x["input"], None)
                col = self.lca.activity_dict.get(x["output"], None)
            # if either the row or the column is None, return np.NaN.
            if row is None or col is None:
                return None
            return row, col, x["type"], x["amount"]

        # Convert the data and store in a new array, dropping Nones.
        converted = (key_to_rowcol(d) for d in data)
        unified = np.array([x for x in converted if x is not None],
                           dtype=[('row', '<u4'), ('col', '<u4'),
                                  ('type', 'u1'), ('amount', '<f4')])
        return unified

    def load_data(self) -> None:
        """Constructs the random number generators for all of the matrices that
        can be altered by uncertainty.

        If any of these uncertain calculations are not included, the initial
        amounts of the 'params' matrices are used in place of generating
        a vector
        """
        self.lca.load_lci_data()

        self.tech_rng = MCRandomNumberGenerator(self.lca.tech_params, seed=self.seed) \
            if self.include_technosphere else self.lca.tech_params["amount"].copy()
        self.bio_rng = MCRandomNumberGenerator(self.lca.bio_params, seed=self.seed) \
            if self.include_biosphere else self.lca.bio_params["amount"].copy()

        if self.lca.lcia:
            self.cf_rngs = {
            }  # we need as many cf_rng as impact categories, because they are of different size
            for m in self.methods:
                self.lca.switch_method(m)
                self.lca.load_lcia_data()
                self.cf_rngs[m] = MCRandomNumberGenerator(self.lca.cf_params, seed=self.seed) \
                    if self.include_cfs else self.lca.cf_params["amount"].copy()
        # Construct the MC parameter manager
        if self.include_parameters:
            pass  # not yet developed as stand-alone code (outside of the Activity Browser)
            # self.param_rng = MonteCarloParameterManager(seed=self.seed)

        self.lca.activity_dict_rev, self.lca.product_dict_rev, self.lca.biosphere_dict_rev = self.lca.reverse_dict(
        )

    def calculate(self, iterations=10, seed: int = None, **kwargs):
        """Main calculate method for the MC LCA class, allows fine-grained control
        over which uncertainties are included when running MC sampling.
        """
        start = time()
        self.iterations = iterations
        self.seed = seed or get_seed()
        self.include_technosphere = kwargs.get("technosphere", True)
        self.include_biosphere = kwargs.get("biosphere", True)
        self.include_cfs = kwargs.get("cf", True)
        self.include_parameters = kwargs.get("parameters", True)

        self.load_data()

        self.results = np.zeros(
            (iterations, len(self.func_units), len(self.methods)))

        # Reset GSA variables to empty.
        self.A_matrices = list()
        self.B_matrices = list()
        self.CF_dict = defaultdict(list)
        self.parameter_exchanges = list()
        self.parameters = list()

        # Prepare GSA parameter schema:
        if self.include_parameters:
            pass  # not yet developed as stand-alone code (outside of the Activity Browser)
            # self.parameter_data = self.param_rng.extract_active_parameters(self.lca)
            # # Add a values field to handle all the sampled parameter values.
            # for k in self.parameter_data:
            #     self.parameter_data[k]["values"] = []

        for iteration in range(iterations):
            tech_vector = self.tech_rng.next(
            ) if self.include_technosphere else self.tech_rng
            bio_vector = self.bio_rng.next(
            ) if self.include_biosphere else self.bio_rng
            if self.include_parameters:
                pass  # not yet developed as stand-alone code (outside of the Activity Browser)
                # # Convert the input/output keys into row/col keys, and then match them against
                # # the tech_ and bio_params
                # data = self.param_rng.next()
                # param_exchanges = self.unify_param_exchanges(data)
                #
                # # Select technosphere subset from param_exchanges.
                # subset = param_exchanges[np.isin(param_exchanges["type"], [0, 1])]
                # # Create index of where to insert new values from tech_params array.
                # idx = np.argwhere(
                #     np.isin(self.lca.tech_params[self.param_cols], subset[self.param_cols])
                # ).flatten()
                # # Construct unique array of row+col combinations
                # uniq = np.unique(self.lca.tech_params[idx][["row", "col"]])
                # # Use the unique array to sort the subset (ensures values
                # # are inserted at the correct index)
                # sort_idx = np.searchsorted(uniq, subset[["row", "col"]])
                # # Finally, insert the sorted subset amounts into the tech_vector
                # # at the correct indexes.
                # tech_vector[idx] = subset[sort_idx]["amount"]
                # # Repeat the above, but for the biosphere array.
                # subset = param_exchanges[param_exchanges["type"] == 2]
                # idx = np.argwhere(
                #     np.isin(self.lca.bio_params[self.param_cols], subset[self.param_cols])
                # ).flatten()
                # uniq = np.unique(self.lca.bio_params[idx][["row", "col"]])
                # sort_idx = np.searchsorted(uniq, subset[["row", "col"]])
                # bio_vector[idx] = subset[sort_idx]["amount"]
                #
                # # Store parameter data for GSA
                # self.parameter_exchanges.append(param_exchanges)
                # self.parameters.append(self.param_rng.parameters.to_gsa())
                # # Extract sampled values for parameters, store.
                # self.param_rng.retrieve_sampled_values(self.parameter_data)

            self.lca.rebuild_technosphere_matrix(tech_vector)
            self.lca.rebuild_biosphere_matrix(bio_vector)

            # store matrices for GSA
            self.A_matrices.append(self.lca.technosphere_matrix)
            self.B_matrices.append(self.lca.biosphere_matrix)

            if not hasattr(self.lca, "demand_array"):
                self.lca.build_demand_array()
            self.lca.lci_calculation()

            # pre-calculating CF vectors enables the use of the SAME CF vector for each FU in a given run
            cf_vectors = {}
            for m in self.methods:
                cf_vectors[m] = self.cf_rngs[m].next(
                ) if self.include_cfs else self.cf_rngs[m]
                # store CFs for GSA (in a list defaultdict)
                self.CF_dict[m].append(cf_vectors[m])

            # iterate over FUs
            for row, func_unit in self.rev_fu_index.items():
                self.lca.redo_lci(func_unit)  # lca calculation

                # iterate over methods
                for col, m in self.rev_method_index.items():
                    self.lca.switch_method(m)
                    self.lca.rebuild_characterization_matrix(cf_vectors[m])
                    self.lca.lcia_calculation()
                    self.results[iteration, row, col] = self.lca.score

        print(
            'Monte Carlo LCA: finished {} iterations for {} functional units and {} methods in {} seconds.'
            .format(iterations, len(self.func_units), len(self.methods),
                    np.round(time() - start, 2)))

    @property
    def func_units_dict(self) -> dict:
        """Return a dictionary of functional units (key, demand)."""
        return {key: 1 for func_unit in self.func_units for key in func_unit}

    def get_results_by(self, act_key=None, method=None):
        """Get a slice or all of the results.
        - if a method is provided, results will be given for all functional units and runs
        - if a functional unit is provided, results will be given for all methods and runs
        - if a functional unit and method is provided, results will be given for all runs of that combination
        - if nothing is given, all results are returned
        """

        if not self.results.any():
            raise ValueError(
                'You need to perform a Monte Carlo Simulation first.')

        if act_key:
            act_index = self.activity_index.get(act_key)
            print('Activity key provided:', act_key, act_index)
        if method:
            method_index = self.method_index.get(method)
            print('Method provided', method, method_index)

        if not act_key and not method:
            return self.results
        elif act_key and not method:
            return np.squeeze(self.results[:, act_index, :])
        elif method and not act_key:
            return np.squeeze(self.results[:, :, method_index])
        elif method and act_key:
            print(act_index, method_index)
            return np.squeeze(self.results[:, act_index, method_index])

    def get_results_dataframe(self, act_key=None, method=None, labelled=True):
        """Return a Pandas DataFrame with results for all runs either for
        - all functional units and a selected method or
        - all methods and a selected functional unit.

        If labelled=True, then the activity keys are converted to a human
        readable format.
        """

        if not self.results.any():
            raise ValueError(
                'You need to perform a Monte Carlo Simulation first.')

        if act_key and method or not act_key and not method:
            raise ValueError(
                'Must provide activity key or method, but not both.')
        data = self.get_results_by(act_key=act_key, method=method)

        if method:
            labels = self.activity_keys
        elif act_key:
            labels = self.methods

        df = pd.DataFrame(data, columns=labels)

        # optionally convert activity keys to human readable output
        if labelled and method:
            df.columns = self.get_labels(df.columns, max_length=20)

        return df

    @staticmethod
    def get_labels(key_list,
                   fields: list = None,
                   separator=' | ',
                   max_length: int = None) -> list:
        fields = fields or [
            'name', 'reference product', 'location', 'database'
        ]
        # need to do this as the keys come from a pd.Multiindex
        acts = (bw.get_activity(key).as_dict()
                for key in (k for k in key_list))
        translated_keys = [
            separator.join([act.get(field, '') for field in fields])
            for act in acts
        ]
        # if max_length:
        #     translated_keys = [wrap_text(k, max_length=max_length) for k in translated_keys]
        return translated_keys
    def generate_samples(self):
        """ Generate an array of sampled values for uncertain elements of given df

        Returns both a samples array and a list of indices that represent the
        basin or the basin/month for each sample array row.
        These can subsequently be used to replace values in a stacked version of
        dataframes used in calculations.
        """
        random_variables = [
            v for v in self.given_variable_names if v != 'area'
        ]
        for variable in random_variables:
            with open(self.filtered_pickles / "{}.pickle".format(variable),
                      'rb') as f:
                det_df = pickle.load(f)

            if det_df.shape[1] == 12:  # Variables with monthly resolution
                stacked_df = det_df.stack().reset_index()
                stacked_df.rename(columns={
                    'level_1': 'month',
                    0: 'value'
                },
                                  inplace=True)

            else:  # Variables with no monthly resolution
                stacked_df = det_df.reset_index()
                stacked_df.rename(columns={variable: 'value'}, inplace=True)

            if variable not in [
                    'pastor'
            ]:  # All uncertain parameters lognormally distributed except pastor
                uncertainty_df = pd.read_pickle(self.filtered_pickles /
                                                'uncertainty.pickle')
                uncertainty = uncertainty_df[variable]
                stacked_df['GSD2'] = stacked_df['BAS34S_ID'].map(uncertainty)
                non_zero = stacked_df['value'] != 0
                uncertain = stacked_df['GSD2'] > 1
                lognormally_distributed = non_zero & uncertain
                df_only_uncertain = copy.deepcopy(
                    stacked_df[lognormally_distributed])
                gsd2_to_scale = lambda x: np.log(np.sqrt(x))
                amount_to_loc = lambda x: np.log(np.absolute(x))
                df_only_uncertain['loc'] = df_only_uncertain['value'].apply(
                    amount_to_loc)
                df_only_uncertain['scale'] = df_only_uncertain['GSD2'].apply(
                    gsd2_to_scale)
                df_only_uncertain['negative'] = df_only_uncertain['value'] < 0
                df_only_uncertain['uncertainty_type'] = 2
                df_only_uncertain['uncertainty_type'] = df_only_uncertain[
                    'uncertainty_type'].astype(int)
                d_list = df_only_uncertain.to_dict(orient='records')
                stats_arrays_input_dicts = [{
                    'loc':
                    d['loc'],
                    'scale':
                    d['scale'],
                    'negative':
                    d['negative'],
                    'uncertainty_type':
                    d['uncertainty_type']
                } for d in d_list]

            else:  # For pastor, which has a triangular distribution, Min=Mode, Max = 1.5*Mode
                include = stacked_df['value'] != 0
                df_only_uncertain = copy.deepcopy(stacked_df[include])

                df_only_uncertain['loc'] = df_only_uncertain['value']
                df_only_uncertain['minimum'] = df_only_uncertain['value']
                max_out_to_one = lambda x: 1.5 * x if 1.5 * x < 1 else 1
                df_only_uncertain['maximum'] = df_only_uncertain['loc'].apply(
                    max_out_to_one)
                df_only_uncertain['uncertainty_type'] = 5
                df_only_uncertain['uncertainty_type'] = df_only_uncertain[
                    'uncertainty_type'].astype(int)
                d_list = df_only_uncertain.to_dict(orient='records')
                stats_arrays_input_dicts = [{
                    'loc':
                    d['loc'],
                    'minimum':
                    d['minimum'],
                    'maximum':
                    d['maximum'],
                    'uncertainty_type':
                    d['uncertainty_type']
                } for d in d_list]
            uncertainty_base = UncertaintyBase.from_dicts(
                *stats_arrays_input_dicts)
            rng = MCRandomNumberGenerator(uncertainty_base)
            arr = np.zeros(shape=[df_only_uncertain.shape[0], self.iterations],
                           dtype=self.dtype)
            for iteration in range(self.iterations):
                arr[:, iteration] = next(rng).astype(self.dtype)

            if 'month' in df_only_uncertain.columns:
                indices_as_arr = np.array([
                    df_only_uncertain['BAS34S_ID'], df_only_uncertain['month']
                ]).T
                indices_as_list = [(indices_as_arr[i, 0], indices_as_arr[i, 1])
                                   for i in range(indices_as_arr.shape[0])]
            else:
                indices_as_list = list(df_only_uncertain['BAS34S_ID'].values)

            name = variable + "_wo_model_uncertainty" if variable in [
                'avail_net', 'avail_delta'
            ] else variable
            np.save(self.samples_dir / "{}.npy".format(name), arr)
            with open(self.indices_dir / "{}.pickle".format(name), 'wb') as f:
                pickle.dump(indices_as_list, f)
            print("\t{} samples taken for {}".format(self.iterations, name))
        self.sample_model_uncertainty()
        self.add_model_uncertainty_to_avail()
        self.samples_generated = True
path_merlin = path_setac / "merlin"
path_model_dir = path_setac / "regression" / "{}_model".format(model_seed)
filepath_row_acts_names = path_model_dir / "row_acts_names.pickle"
filepath_col_acts_names = path_model_dir / "col_acts_names.pickle"
filepath_tech_params = path_model_dir / "tech_params.pickle"
with open(filepath_row_acts_names, "rb") as f:
    row_acts_names = pickle.load(f)
with open(filepath_col_acts_names, "rb") as f:
    col_acts_names = pickle.load(f)
with open(filepath_tech_params, "rb") as f:
    tech_params = pickle.load(f)

tech_params_narrow = deepcopy(tech_params)  # TODO understand this!
tech_params_narrow["scale"] = tech_params_narrow["scale"] / scaling_factor
mc = MCRandomNumberGenerator(tech_params,
                             maximum_iterations=iterations,
                             seed=89)
mc_narrow = MCRandomNumberGenerator(tech_params_narrow,
                                    maximum_iterations=iterations)
X = np.array([list(next(mc)) for _ in range(iterations)])
X_narrow = np.array([list(next(mc_narrow)) for _ in range(iterations)])

units = []
act_in_names_ = []
act_out_names_ = []
name_ind = 0
unit_ind = 1
for k, param in enumerate(tech_params):
    units.append(row_acts_names[k][unit_ind])
    act_in_names_.append(row_acts_names[k][name_ind])
    act_out_names_.append(col_acts_names[k][name_ind])
Esempio n. 12
0
class CSMonteCarloLCA(object):
    """A Monte Carlo LCA for multiple functional units and methods loaded from a calculation setup."""
    def __init__(self, cs_name):
        if cs_name not in bw.calculation_setups:
            raise ValueError(
                "{} is not a known `calculation_setup`.".format(cs_name))

        self.cs_name = cs_name
        cs = bw.calculation_setups[cs_name]
        self.seed = None
        self.cf_rngs = {}
        self.CF_rng_vectors = {}
        self.include_technosphere = True
        self.include_biosphere = True
        self.include_cfs = True
        self.include_parameters = True
        self.param_rng = None
        self.param_cols = ["input", "output", "type"]

        self.tech_rng: Optional[Union[MCRandomNumberGenerator,
                                      np.ndarray]] = None
        self.bio_rng: Optional[Union[MCRandomNumberGenerator,
                                     np.ndarray]] = None
        self.cf_rng: Optional[Union[MCRandomNumberGenerator,
                                    np.ndarray]] = None

        # functional units
        self.func_units = cs['inv']
        self.rev_fu_index = {i: fu for i, fu in enumerate(self.func_units)}

        # activities
        self.activity_keys = [list(fu.keys())[0] for fu in self.func_units]
        self.activity_index = {
            key: index
            for index, key in enumerate(self.activity_keys)
        }
        self.rev_activity_index = {v: k for k, v in self.activity_keys}
        # self.fu_index = {k: i for i, k in enumerate(self.activity_keys)}

        # methods
        self.methods = cs['ia']
        self.method_index = {m: i for i, m in enumerate(self.methods)}
        self.rev_method_index = {v: k for k, v in self.method_index.items()}

        # todo: get rid of the below
        self.func_unit_translation_dict = {
            str(bw.get_activity(list(func_unit.keys())[0])): func_unit
            for func_unit in self.func_units
        }
        self.func_key_dict = {
            m: i
            for i, m in enumerate(self.func_unit_translation_dict.keys())
        }
        self.func_key_list = list(self.func_key_dict.keys())

        self.results = []

        self.lca = bw.LCA(demand=self.func_units_dict, method=self.methods[0])

    def load_data(self) -> None:
        """Constructs the random number generators for all of the matrices that
        can be altered by uncertainty.

        If any of these uncertain calculations are not included, the initial
        amounts of the 'params' matrices are used in place of generating
        a vector
        """
        self.lca.load_lci_data()

        self.tech_rng = MCRandomNumberGenerator(self.lca.tech_params, seed=self.seed) \
            if self.include_technosphere else self.lca.tech_params["amount"].copy()
        self.bio_rng = MCRandomNumberGenerator(self.lca.bio_params, seed=self.seed) \
            if self.include_biosphere else self.lca.bio_params["amount"].copy()

        if self.lca.lcia:
            self.cf_rngs = {
            }  # we need as many cf_rng as impact categories, because they are of different size
            for m in self.methods:
                self.lca.switch_method(m)
                self.lca.load_lcia_data()
                self.cf_rngs[m] = MCRandomNumberGenerator(self.lca.cf_params, seed=self.seed) \
                    if self.include_cfs else self.lca.cf_params["amount"].copy()
        # Construct the MC parameter manager
        if self.include_parameters:
            self.param_rng = MonteCarloParameterManager(seed=self.seed)

    def calculate(self, iterations=10, seed: int = None, **kwargs):
        """Main calculate method for the MC LCA class, allows fine-grained control
        over which uncertainties are included when running MC sampling.
        """
        start = time()
        self.seed = seed or get_seed()
        self.include_technosphere = kwargs.get("technosphere", True)
        self.include_biosphere = kwargs.get("biosphere", True)
        self.include_cfs = kwargs.get("cf", True)
        self.include_parameters = kwargs.get("parameters", True)

        self.load_data()
        self.results = np.zeros(
            (iterations, len(self.func_units), len(self.methods)))

        for iteration in range(iterations):
            tech_vector = self.tech_rng.next(
            ) if self.include_technosphere else self.tech_rng
            bio_vector = self.bio_rng.next(
            ) if self.include_biosphere else self.bio_rng
            if self.include_parameters:
                param_exchanges = self.param_rng.next()
                # combination of 'input', 'output', 'type' columns is unique
                # For each recalculated exchange, match it to either matrix and
                # override the value within that matrix.
                for p in param_exchanges:
                    tech_vector[self.lca.tech_params[self.param_cols] == p[
                        self.param_cols]] = p["amount"]
                    bio_vector[self.lca.bio_params[self.param_cols] == p[
                        self.param_cols]] = p["amount"]

            self.lca.rebuild_technosphere_matrix(tech_vector)
            self.lca.rebuild_biosphere_matrix(bio_vector)

            if not hasattr(self.lca, "demand_array"):
                self.lca.build_demand_array()
            self.lca.lci_calculation()

            # pre-calculating CF vectors enables the use of the SAME CF vector for each FU in a given run
            cf_vectors = {}
            for m in self.methods:
                cf_vectors[m] = self.cf_rngs[m].next(
                ) if self.include_cfs else self.cf_rngs[m]

            # lca_scores = np.zeros((len(self.func_units), len(self.methods)))

            # iterate over FUs
            for row, func_unit in self.rev_fu_index.items():
                self.lca.redo_lci(func_unit)  # lca calculation

                # iterate over methods
                for col, m in self.rev_method_index.items():
                    self.lca.switch_method(m)
                    self.lca.rebuild_characterization_matrix(cf_vectors[m])
                    self.lca.lcia_calculation()
                    # lca_scores[row, col] = self.lca.score
                    self.results[iteration, row, col] = self.lca.score

        print(
            'CSMonteCarloLCA: finished {} iterations for {} functional units and {} methods in {} seconds.'
            .format(iterations, len(self.func_units), len(self.methods),
                    time() - start))

    @property
    def func_units_dict(self) -> dict:
        """Return a dictionary of functional units (key, demand)."""
        return {key: 1 for func_unit in self.func_units for key in func_unit}

    def get_results_by(self, act_key=None, method=None):
        """Get a slice or all of the results.
        - if a method is provided, results will be given for all functional units and runs
        - if a functional unit is provided, results will be given for all methods and runs
        - if a functional unit and method is provided, results will be given for all runs of that combination
        - if nothing is given, all results are returned
        """
        if act_key:
            act_index = self.activity_index.get(act_key)
            print('Activity key provided:', act_key, act_index)
        if method:
            method_index = self.method_index.get(method)
            print('Method provided', method, method_index)

        if not act_key and not method:
            return self.results
        elif act_key and not method:
            return np.squeeze(self.results[:, act_index, :])
        elif method and not act_key:
            return np.squeeze(self.results[:, :, method_index])
        elif method and act_key:
            print(act_index, method_index)
            return np.squeeze(self.results[:, act_index, method_index])

    def get_results_dataframe(self, act_key=None, method=None, labelled=True):
        """Return a Pandas DataFrame with results for all runs either for
        - all functional units and a selected method or
        - all methods and a selected functional unit.

        If labelled=True, then the activity keys are converted to a human
        readable format.
        """
        if act_key and method or not act_key and not method:
            raise ValueError(
                'Must provide activity key or method, but not both.')
        data = self.get_results_by(act_key=act_key, method=method)

        if method:
            labels = self.activity_keys
        elif act_key:
            labels = self.methods

        df = pd.DataFrame(data, columns=labels)

        # optionally convert activity keys to human readable output
        if labelled and method:
            df.columns = self.get_labels(df.columns, max_length=20)

        return df

    @staticmethod
    def get_labels(key_list,
                   fields: list = None,
                   separator=' | ',
                   max_length: int = None) -> list:
        fields = fields or [
            'name', 'reference product', 'location', 'database'
        ]
        # need to do this as the keys come from a pd.Multiindex
        acts = (bw.get_activity(key).as_dict()
                for key in (k for k in key_list))
        translated_keys = [
            separator.join([act.get(field, '') for field in fields])
            for act in acts
        ]
        # if max_length:
        #     translated_keys = [wrap_text(k, max_length=max_length) for k in translated_keys]
        return translated_keys