Exemple #1
0
    def __init__(self, *args, **kwargs):
        """Initialize state.

        Raises InvalidParameterError if any arguments are passed.

        This class's initializer serves as a catch-all for invalid initializer arguments
        in the inheritance chain. Arguments unused by specific initializers are passed
        to base class initializers using super(). Since Object is the last class in
        method resolution order, any arguments passed to it have not been handled by
        any of the classes in the inheritance chain. Such unhandled arguments are
        considered errors.

        Parameters:
            arbitrary arguments and keyword arguments.

        Raises:
            InvalidParameterError if any (keyword) arguments are specified
        """

        if len(args) > 0:
            raise InvalidParameterError(
                "nothing",
                args,
                f"Unhandled positional arguments in '{self.__class__.__name__}.__init__'",
            )
        if len(kwargs) > 0:
            raise InvalidParameterError(
                "nothing",
                kwargs,
                f"Unhandled keyword arguments in '{self.__class__.__name__}'.__init__",
            )
Exemple #2
0
    def _evaluate(self, true, pred):
        """Root mean squared error divided by standard deviation of labels.

        stdRMSE = RMSE / std. dev.

        See class docstring for details.

        Parameters:
            true: observed property distribution; requires only means
            pred: predictive property distribution; requires only means

        Returns:
            standardized root mean squared error as a floating point number
        """

        true = params.distribution(true)

        # ensure sufficiently many samples
        n = len(true.mean)
        if n <= 1:
            raise InvalidParameterError(
                "enough samples to compute standard deviation", f"{n} samples")

        # compute RMSE and standard deviation
        rmse = super()._evaluate(true, pred)
        stddev = np.std(true.mean, ddof=self._bias_correction)

        # ensure sufficient variance in samples
        if stddev <= 1e-3:  # hard-coded, could be initialization parameter
            raise InvalidParameterError(
                "sufficient label variance for non-zero standard deviation",
                f"standard deviation of {stddev}",
            )

        return float(rmse / stddev)
Exemple #3
0
    def __init__(self,
                 data: np.ndarray,
                 labels: Optional[np.ndarray] = None,
                 **kwargs):
        """Initialize dataset.

        Parameters:
            data: tabular data as a NumPy ndarray
            labels: tabular data as a NumPy ndarray. If not specified,
                dataset is unlabeled.

        Raises:
            InvalidParameterError for invalid arguments. In particular,
                numbers of data and labels must match.

        Examples:
            From numerical NumPy data:
            ```
            TabularData(numpy.ndarray(...), ...)
            ```

            From a Pandas DataFrame:
            ```
            df = pandas.DataFrame(..., columns=[...])
            TabularData(df.to_records(index=False), labels=...)
            ```

            From mixed NumPy data, with column names (note use of tuples):
            ```
            a = numpy.array([('a', 1), ('b', 2)], dtype=[('C', str), ('D', int)])
            TabularData(a, ...)
            ```
        """

        # parameter validation
        data = params.instance(data, np.ndarray)
        labels = params.optional_(labels,
                                  lambda arg: params.instance(arg, np.ndarray))

        if labels is not None:
            # number of samples and labels must match
            if data.shape[0] != labels.shape[0]:
                raise InvalidParameterError(
                    "same number of samples and labels",
                    f"{data.shape[0]} samples, {labels.shape[0]} labels",
                )

            # uniqueness of "column" names, if any, is enforced by NumPy,
            # but only separately for data and labels
            if is_sequence(data.dtype.names) and is_sequence(
                    labels.dtype.names):
                column_names = data.dtype.names + labels.dtype.names
                if len(column_names) != len(np.unique(column_names)):
                    raise InvalidParameterError(
                        "unique column names for samples and labels",
                        column_names)

        self._data, self._labels = data, labels

        super().__init__(**kwargs)
Exemple #4
0
def which(*args):
    """'which' statement.

    which(
        cond_1, value_1,
        cond_2, value_2,
        ...
    )

    Returns value_i for the first condition cond_i that is true.
    It is an error if none of the cond_i are true.

    For default values, use 'which(cond_1, value_1, ..., True, default)',
    where the 'True' can be omitted, that is, a last default value can be specified:
    which(cond_1, value_1, ..., cond_k, value_k, default)
    """

    if len(args) == 0:
        raise InvalidParameterError(
            "conditions and cases", "nothing", explanation="'which' statement without arguments"
        )
    if len(args) % 2 == 1:
        return which(*args[:-1], True, args[-1])
    for i in range(0, len(args), 2):
        if args[i]:
            return args[i + 1]
    raise InvalidParameterError(
        "at least one condition applies",
        "no condition applied",
        explanation="'which' command fell through (no case applied)",
    )
Exemple #5
0
    def _intersection(lhs: "TabularData",
                      rhs: "TabularData",
                      duplicates: bool = False) -> "TabularData":
        """Specialized intersection.

        For labeled data, labels are compared as well.

        The datasets must be compatible in the sense that both are of type
        TabularData or derived, and either labeled or unlabeled.

        Parameters:
            lhs: one of the two datasets to intersect ('left hand side')
            rhs: one of the two datasets to intersect ('right hand side')
            duplicates: if False (default), the returned data do not contain
                duplicate entries; if True, duplicates are taken into account.
                Both inputs and labels have to match for duplicates.

        Returns:
            TabularData containing only samples in both datasets, either without duplicates
            (set intersection) or taking duplicates into account (multiset intersection)

        Raises:
            NotImplementedError if the set intersection can not be computed
        """

        # parameter validation
        lhs = params.instance(lhs, TabularData)
        rhs = params.instance(rhs, TabularData)
        duplicates = params.boolean(duplicates)

        # special case: empty set
        if lhs.num_samples == 0:
            return lhs.subset()  # copy
        if rhs.num_samples == 0:
            return rhs.subset()  # copy

        if lhs.is_labeled != rhs.is_labeled:
            raise InvalidParameterError("compatible TabularData",
                                        "mismatch in labeling")

        # intersection calculation
        _lhs, _rhs = TabularData._joint_data_labels(
            lhs), TabularData._joint_data_labels(rhs)

        if _lhs.dtype != _rhs.dtype:
            raise InvalidParameterError(
                "Matching TabularData",
                f"{_lhs.dtype.descr} and {_rhs.dtype.descr}")

        if duplicates is False:
            _, indices, _ = np.intersect1d(
                _lhs, _rhs, return_indices=True)  # drops any duplicates
            indices = np.sort(indices)  # restores original order
            return lhs.subset(indices)
        else:  # duplicates = True
            raise NotImplementedError(  # todo: implement
                "specialized multiset intersection not implemented for TabularData"
            )
Exemple #6
0
    def _get_single_property(properties: List[dict],
                             property_name: str,
                             units: Optional[str] = None,
                             default_value=None):
        """
        Helper function to get a single property.

        Parameters:
            properties: A list of dicts, each of which is a single property. Each entry is expected
             to have a 'name' field that corresponds to the property name and a `scalars` field
                that is a list with one entry, a dict of the form {'value': <property value>}.
                It may also have a 'units' field.
            property_name: The name of the property to get the value of. `properties` is expected
                to have exactly one entry with the 'name' field equal to `property_name`.
            units: Optional expected value of 'units' field. If specified, then there must be a
                'units' field and its value must correspond to `units`.
            default_value: Value to return if `property_name` is not present.

        Raises:
            InvalidParameterError: if `properties` does not conform to the expected structure

        Returns:
            The value of the property `property_name`

        """
        matching_props = [
            prop for prop in properties if prop.get("name") == property_name
        ]
        if len(matching_props) == 0:
            return default_value
        elif len(matching_props) > 1:
            raise InvalidParameterError(
                expected=f"Only one entry in properties should have name"
                f" '{property_name}'",
                got=properties,
            )
        matching_prop = matching_props[0]

        try:
            scalars = matching_prop["scalars"]
            assert len(scalars) == 1
            val = scalars[0]["value"]
            if units is not None:
                assert matching_prop["units"] == units
        except (KeyError, AssertionError):
            units_str = "" if units is None else f", 'units': {units}"
            raise InvalidParameterError(
                expected="Property as a dictionary of the form\n"
                "{'name': <property name>, 'scalars': "
                "[{'value': <property value>}]" + units_str + "}",
                got=matching_prop,
            )
        return val
Exemple #7
0
    def optional_(arg, testf, default=None):
        """Optional argument, can be None or something else.

        This is a shorthand for `params.any_(arg, testf, params.none)`, but more explicit.

        Parameters:
            arg: parameter to validate
            testf: test function that accepts a single argument and validates it
            default: if arg is None, the specified default is returned

        Returns:
            either default (if arg is None) or the result of testf(arg)

        Raises:
            InvalidParameterError if testf(arg) raises InvalidParameterError
        """

        ipe = InvalidParameterError("None or successful test",
                                    "not None and test failed")

        if arg is None:
            return default

        try:
            return testf(arg)
        except InvalidParameterError as e:
            raise ipe from e
Exemple #8
0
    def _get_categorical_property(self, properties: List[dict],
                                  property_name: str,
                                  categories_dict: dict) -> int:
        """
        Helper function to get a single categorical property as an int.

        Parameters:
            properties: A list of dicts, each of which is a single property.
            property_name: The name of the property to get the value of.
            categories_dict: Dict from the categorical property (string) to a unique integer value.

        Raises:
            InvalidParameterError: if the value is not in the expected list of possible categories
                as given by the keys in `categories_dict`

        Returns: int
            An integer that corresponds to the value of the desired property.

        """
        category = self._get_single_property(properties, property_name)
        try:
            return categories_dict[category]
        except KeyError:
            raise InvalidParameterError(
                f"A value in the array: {categories_dict.keys()}", category)
Exemple #9
0
    def _parse_composition_as_dict(raw_composition: List[dict]) -> dict:
        """
        Helper function to parse composition as a dictionary.

        Parameters:
            raw_composition (List[dict]): A list, each entry of which corresponds to an element.
                An entry is a dict with an 'element' key and an 'idealWeightPercent' key.
                The element is a string (e.g., 'Cu') and the weight percent is another dict
                with a single key, 'value', pointing to a floating point number.
                The values are in percentage points, and add up to ~100 (but not exactly).

        Returns: dict
            Chemical composition as a dictionary with the elements as keys
                and their raw amounts as values

        """
        composition_dict = dict()
        for entry in raw_composition:
            try:
                element_name = entry["element"]
                element_amount = entry["idealWeightPercent"]["value"]
            except KeyError:
                raise InvalidParameterError(
                    expected="Element amount as a dictionary of the form\n"
                    "{'element': <element name>,"
                    "'idealWeightPercent': "
                    "{'value': <element amount>}}",
                    got=entry,
                )
            composition_dict[element_name] = element_amount
        return composition_dict
Exemple #10
0
 def _extract_raw_composition(entry: dict) -> List[dict]:
     """Get composition in its raw form."""
     raw_composition = entry.get("composition")
     if raw_composition is None or not isinstance(raw_composition, list):
         raise InvalidParameterError(
             expected="Chemical composition as a list", got=raw_composition)
     return raw_composition
Exemple #11
0
    def __init__(self, failmode, num_samples: int):
        """Initialize failure handler.

        Parameters:
            failmode: how to handle failed descriptor calculations, either due to rejected SMILES
                encodings or failing descriptor code. Possible values:
                "raise" [default]: raise a Benchmarexception
                "drop": drop the sample. Returned Data will have fewer samples
                ("mask", mask): where `mask` is a NumPy array with dtype bool whose entries will
                    be set to False for failures
                ("index", index): where `index` is an empty list to which the indices of failed
                    entries will be appended
            num_samples: number of samples that are transformed
        """

        self.num_samples = params.integer(num_samples, from_=0)
        self.failmode = self.failmode(failmode)

        if is_sequence(self.failmode) and self.failmode[0] == "mask":
            self.failmode = "mask"
            if len(failmode[1]) != self.num_samples:
                raise InvalidParameterError(
                    "failure mode mask length of {self.num_samples}",
                    len(self.mask))
            self.mask = failmode[1]
            self.mask.fill(False)

        if is_sequence(self.failmode) and self.failmode[0] == "index":
            self.failmode = "index"
            self.index = failmode[1]

        self.failures = []  # list of indices of failed samples
Exemple #12
0
    def failmode(failmode):
        """Failure mode.

        Validate that argument is failure mode, similar to smlb.params.
        See __init__ for valid values.
        """

        ipe = InvalidParameterError("valid failure mode specification",
                                    failmode)

        if failmode in ("raise", "drop"):
            return failmode

        if not (is_sequence(failmode) and len(failmode) == 2):
            raise ipe

        if (failmode[0] == "mask" and isinstance(failmode[1], np.ndarray)
                and failmode[1].ndim == 1
                and failmode[1].dtype.name == "bool"):
            return failmode

        if failmode[0] == "index" and isinstance(failmode[1], list) and len(
                failmode[1]) == 0:
            return failmode

        raise ipe
Exemple #13
0
    def chemical_element(arg: Union[int, str]) -> int:
        """Chemical element.

        Can be specified either via proton number (int) or abbreviation (str).

        Parameters:
            arg: parameter to be validated as a chemical element specification

        Returns:
            proton number corresponding to element

        Raises:
            InvalidParameterError: for invalid parameters
        """

        ipe = InvalidParameterError("chemical element", arg)

        if isinstance(arg, str):
            if arg not in params._element_atomic_number:
                raise ipe
            arg = params._element_atomic_number[arg]

        if not (int(arg)
                and 1 <= arg <= 118):  # convertible to int and in range
            raise ipe

        return arg
Exemple #14
0
    def boolean(arg):
        """True or False.

        Accepts values True, False, "true", "True", "false", "False".
        Rejects values 0, 1, 0., 1.

        Parameters:
            arg: parameter to validate as boolean

        Returns:
            built-in boolean

        Raises:
            InvalidParameterError: if arg is invalid

        Acceptance of all objects `arg` for which `bool(arg)` works
        would lead to subtle bugs, for example, when testing for bool or float,
        `params.any_(arg, lambda arg: params.boolean(arg), lambda arg: params.real(arg))`
        would yield True due to conversion of real to bool.
        """

        ipe = InvalidParameterError("boolean", arg)

        # try: arg = bool(arg) as test would lead to bugs, see docstring.
        # arg in {False, "false", "False"} fails as 0 == False
        if arg is True or arg in {"true", "True"}:
            arg = True
        elif arg is False or arg in {"false", "False"}:
            arg = False
        else:
            raise ipe

        return arg
Exemple #15
0
    def samples(self, indices: Optional[np.ndarray] = None) -> np.ndarray:
        """Query vector samples.

        Returns a sequence of samples or raises InvalidParameterError.

        Vectors are queried by themselves, that is, vectors are their own indices.

        Parameter:
            indices: a real matrix of appropriate dimensions (rows are vectors)

        Return:
            real matrix (vectors are rows)

        Raises:
            InvalidParameterError: for invalid keys
        """

        samples = params.real_matrix(indices, ncols=self.dimensions)

        if self.domain is not None:
            if (samples < self._domain[:, 0]).any() or (
                    samples > self._domain[:, 1]).any():
                raise InvalidParameterError("vectors in domain",
                                            "vectors outside of domain")

        return samples
Exemple #16
0
    def distribution(arg):
        """Predictive distribution.

        Parameters:
            arg: parameter to validate; predictive distributions;
                 a sequence is interpreted as specifying the means of a DeltaPredictiveDistribution

        Returns:
            PredictiveDistribution or subclass

        Raises:
            InvalidParameterError: if arg is invalid
        """

        # due to circular dependency
        from .distributions import PredictiveDistribution, DeltaPredictiveDistribution

        ipe = InvalidParameterError("distribution", arg)

        try:
            if isinstance(arg, PredictiveDistribution):
                pass
            elif is_sequence(arg):
                # interpret as sequence of means
                arg = np.asfarray(arg)
                if len(arg.shape) != 1:
                    raise ipe
                arg = DeltaPredictiveDistribution(arg)
            else:
                raise ipe
        except Exception as e:
            raise ipe from e

        return arg
Exemple #17
0
    def normal_distribution(arg):
        """Predictive normal distribution.

        Parameters:
            arg: parameter to validate; normal predictive distributions;
                 a pair of two same-length sequences is interpreted as
                 means and standard deviations of independent normal predictive distributions

        Returns:
            NormalPredictiveDistribution

        Raises:
            InvalidParameterError: if arg is invalid
        """

        # due to circular dependency
        from .distributions import NormalPredictiveDistribution

        ipe = InvalidParameterError("normal distribution", arg)

        try:
            if isinstance(arg, NormalPredictiveDistribution):
                pass
            elif (is_sequence(arg) and len(arg) == 2 and is_sequence(arg[0])
                  and is_sequence(arg[1]) and len(arg[0]) == len(arg[1])):
                # interpret as pair of two same-length sequences
                arg = NormalPredictiveDistribution(arg[0], arg[1])
            else:
                raise ipe  # check if arg is a normal distribution
        except Exception as e:
            raise ipe from e

        return arg
Exemple #18
0
    def any_(arg, testf, *args):
        """Logical or/union meta-test.

        At least one of several tests is valid.
        Logical or is a special case of any_.

        Parameters:
            arg: parameter to validate
            testf: test function that accepts a single argument and validates it
            arbitrarily many further functions can be passed

        Returns:
            if only one test function is passed, result of testf(arg) if successful;
            otherwise, result of any(arg, *args)

        Raises:
            InvalidParameterError if testf is the only function passed and testf(arg) raises InvalidParameterError
        """

        ipe = InvalidParameterError("at least one test successful (any_)",
                                    "all tests failed")

        try:
            return testf(arg)
        except InvalidParameterError:
            if len(args) == 0:
                raise ipe
            return params.any_(arg, *args)
Exemple #19
0
    def numpy_array(arg, dtype=None):
        """Any NumPy array.

        Tests if argument is a NumPy array, of any dtype (if not specified), of any dimensionality.

        Parameters:
            arg: parameter to validate
            dtype: dtype of arg if not None (default)

        Returns:
            NumPy array, of given dtype if specified

        Raises:
            InvalidParameterError: for invalid parameters
        """

        ipe = InvalidParameterError(
            f"NumPy array{'' if dtype is None else 'of dtype ' + str(dtype)}",
            arg)

        try:
            arg = np.asarray(arg) if dtype is None else np.asarray(arg,
                                                                   dtype=dtype)
        except Exception as e:
            raise ipe from e

        return arg
Exemple #20
0
    def all_(arg, testf, *args):
        """Logical and/intersection meta-test.

        All of several tests are valid.
        Logical and is a special case of all_.

        Parameters:
            arg: parameter to validate
            testf: function accepting a single argument
            arbitrarily many further functions can be passed

        Returns:
            if all test functions pass, arg if successful, raises otherwise

        Raises:
            InvalidParameterError if any testf raises an InvalidParameterError
        """

        ipe = InvalidParameterError("all tests successful (all_)",
                                    "a test failed")

        try:
            testf(arg)
            if len(args) > 0:
                params.all_(arg, *args)
            return arg
        except InvalidParameterError as e:
            raise ipe from e
Exemple #21
0
 def apply(self, data: Data):
     if not data.is_finite:
         raise InvalidParameterError(
             "a finite dataset",
             f"an infinite dataset of type {data.__class__}")
     means = self._function.labels(data.samples())
     stddevs = np.zeros_like(means)
     return NormalPredictiveDistribution(means, stddevs)
Exemple #22
0
    def apply(self, data: Data):
        if not data.is_finite:
            raise InvalidParameterError(
                "a finite dataset",
                f"an infinite dataset of type {data.__class__}")

        means = np.random.uniform(0, 10, data.num_samples)
        stddevs = np.random.uniform(0.5, 2.0, data.num_samples)
        return NormalPredictiveDistribution(means, stddevs)
Exemple #23
0
    def apply(self, data: Data) -> Data:
        """Transforms data.

        Parameters:
            data: labeled data to transform

        Returns:
            transformed data

        Raises:
            InvalidParameterError if Data is not labeled
        """

        data = params.instance(data, Data)
        if not data.is_labeled:
            raise InvalidParameterError("labeled data", "unlabeled data")

        # patch the labels() method of the data object (not class)
        # there is no need to store the old labels function as it is a class member, not an object member

        for name in ("_orig_labels", "labels", "_noise"):
            # patch if necessary by choosing a random name instead of _labels
            if name in data.__dict__:
                raise BenchmarkError(
                    f"internal error: data object already has {name} method")

        # create a copy of the dataset
        data = copy.deepcopy(data)

        # rename labels to _labels for data only
        setattr(data, "_orig_labels", getattr(data, "labels"))

        # store noise model
        setattr(data, "_noise", self._noise)

        # add wrapper as new labels() method

        def labels(self, indices=None):
            """Query labels of a sequence of samples.

            This wrapper adds noise.

            Parameters:
                indices: a sequence of sample 'indices'.
                         See 'samples()' for details.

            Returns:
                a sequence of labels
            """

            labels = self._orig_labels(indices)
            return labels + self._noise.noise(labels.shape)

        setattr(data, "labels", labels.__get__(data))

        return data
Exemple #24
0
    def asymptotic_fit(self, fdata):
        r"""Compute asymptotic fit in log-space for a single curve.

        The asymptotic fit is computed using a simple form of linear ridge regression,
        estimating two parameters, offset b and slope a: $f(x) = b + a x$.
        In short, we augment x with a second dimension of constant value 1 to remove the bias,
        $f( (x,1) ) = <(a,b),(x,1)>$. Then, solving
        $\argmin_{a,b} \sum_{i=1}^n (y_i - f((x_i,1)))^2 + \lambda ||(a,b)||^2$
        by rewriting in matrix notation, setting the derivative to zero and solving for (a,b) yields
        $(a,b) = (X^T X + \lambda I)^{-1} X^T y$, where the $n \times 2$-dimensional matrix X
        contains the data the fit is based on. The variance, or mean squared error (MSE),
        indicates how well empirical errors follow the asymptotic fit.

        Parameters:
            fdata: data for a single curve
        """

        # compute mean in log-space as the fit is linear in log space
        # todo: verify that this is the correct procedure
        sizes = self._logf(np.asfarray(tuple(entry[0] for entry in fdata)))
        means = np.asfarray(
            tuple(np.mean(self._logf(entry[1])) for entry in fdata))
        n = len(sizes)  # number of training set sizes

        if self._fit_weights is None:
            weights = np.ones(n)
        elif self._fit_weights == "variance":
            raise NotImplementedError  # todo: do weighting properly
            if min(len(entry[1]) for entry in fdata) < 2:
                raise InvalidParameterError(
                    "multiple values per horizontal location",
                    "fewer than two samples for at least one location",
                    explanation=
                    "weighting by variance not defined for fewer than two samples",
                )
            # todo: check for zero variance cases and replace by one
            weights = tuple(1 / np.var(entry[1]) for entry in fdata)
        else:
            raise BenchmarkError("internal error, invalid weighting scheme")
        weights /= np.sum(weights)

        X = np.ones((n, 2))  # second column is 1
        X[:, 0], y = sizes, means  # fit is in log-space
        assert y.shape == (n, ), f"loss vector has wrong dimensions {y.shape}"

        # standard linear ridge regression in log-space
        slope, offset = np.linalg.pinv(X.T @ X + self._fit_lambda *
                                       np.identity(2)) @ X.T @ y

        # variance of the fit
        residuals = y - (offset + slope * self._logf(n))
        variance = np.mean(np.asfarray(residuals**2))

        return offset, slope, residuals, variance
Exemple #25
0
    def __init__(
        self,
        rng: int = None,
        num_seeds: int = 1,
        resolution: int = 64,
        max_relative_jump: float = 1.0,
        dimensions_varied: Union[str, float, int] = "all",
        max_iters: Optional[int] = None,
        max_evals: Optional[int] = None,
        **kwargs,
    ):
        """Initialize state.

        Parameters:
            rng: pseudo-random number generator seed
            num_seeds: the number of starting points, and the number of points chosen at the end
                of each iteration
            resolution: the number of points to sample along a single dimension for a single seed
            max_relative_jump: the maximum relative step size along a single dimension. If a given
                dimension has length `L` and a seed has value `x` along that dimension, then the
                candidates are `resolution` linearly spaced points from the range
                [x - max_relative_jump * L, x + max_relative_jump * L] (clipped by the bounds).
                `max_relative_jump must be on (0, 1].
                For a value of 1, the entire range is always considered.
            dimensions_varied: how many randomly selected dimensions to explore with each step.
                'all' indicates all dimensions. An integer directly specifies the number of
                dimensions. A float on (0, 1) indicates the fractional number of the total.
            max_iters: the maximum number of iterations
            max_evals: the maximum number of function evaluations (this is a soft maximum:
                once it is reached then the current iteration finishes)

        TODO: add tolerance stopping conditions
        """
        super().__init__(rng=rng, **kwargs)

        self._num_seeds = params.integer(num_seeds, from_=1)
        self._resolution = params.integer(resolution, from_=2)
        self._max_relative_jump = params.real(max_relative_jump,
                                              above=0.0,
                                              to=1.0)
        self._dimensions_varied = params.any_(
            dimensions_varied,
            lambda arg: params.integer(arg, above=0),
            lambda arg: params.real(arg, above=0.0, below=1.0),
            lambda arg: params.enumeration(arg, {"all"}),
        )
        self._max_iters = params.optional_(
            max_iters, lambda arg: params.integer(arg, from_=1))
        self._max_evals = params.optional_(
            max_evals, lambda arg: params.integer(arg, from_=1))
        if self._max_iters is None and self._max_evals is None:
            raise InvalidParameterError(
                "at least one stopping condition defined", "all Nones")
Exemple #26
0
    def integer(arg, from_=None, to=None, above=None, below=None):
        """Integer number.

        Negative, non-negative, positive, non-positive integers are special cases.

        Parameters:
            arg: parameter to validate as an integer
            from_: if specified, lowest admissible number (closed set lower bound)
            to: if specified, highest admissible number (closed set upper bound)
            above: if specified, highest non-admissible number (open set lower bound)
            below: if specified, lowest non-admissible number (open set upper bound)

        Returns:
            built-in integer type

        Raises:
            InvalidParameterError: if parameter arg is invalid
        """

        # TODO: throw exception on non-integer floating point values
        bounded = not (from_ is None and to is None and above is None
                       and below is None)
        msg = f"{'bounded ' if bounded else ''}integer"
        if bounded:
            msg += ("(" + ("" if from_ is None else f"from {from_}, ") +
                    ("" if above is None else f"above {above}, ") +
                    ("" if to is None else f"to {to}, ") +
                    ("" if below is None else f"below {below}, "))
            msg = msg[:-2] + ")"
        ipe = InvalidParameterError(msg, arg)

        try:
            arg = int(arg)

            if from_ is not None:
                if arg < from_:
                    raise ipe
            if to is not None:
                if arg > to:
                    raise ipe
            if above is not None:
                if arg <= above:
                    raise ipe
            if below is not None:
                if arg >= below:
                    raise ipe
        except Exception as e:
            raise ipe from e

        return arg
Exemple #27
0
    def hypercube_domain(arg, dimensions: Optional[int] = None):
        """A hypercube domain in a real vector space.

        A sequence of ranges [a,b].

        If only a single interval is passed as `arg`,
        it is extended to match the dimensionality.
        For this, dimensionality needs to be specified.

        If dimensionality is not specified, any sequence
        of ranges is accepted. In this case, a single range
        is not accepted, as it can not be reliably extended.

        Parameters:
            arg: sequence of ranges [a,b]
            dimensions: dimensionality of sequence

        Returns:
            2d NumPy array of shape (dimensions,2) and dtype float
        """

        # if empty vector spaces are needed, change to from_=0 to allow zero dimensions
        dimensions = params.any_(dimensions,
                                 lambda arg: params.integer(arg, from_=1),
                                 params.none)

        ipe = InvalidParameterError(
            f"{'d' if dimensions is None else dimensions}-dimensional hypercube domain",
            arg)

        try:
            res = np.asarray(arg, dtype=float)
            if len(res.shape) not in (1, 2):
                raise ipe

            if len(res.shape) == 1:
                if dimensions is None:
                    raise ipe
                else:
                    res = np.tile(res, (dimensions, 1))

            if dimensions is not None and res.shape != (dimensions, 2):
                raise ipe

            if (res[:, 0] > res[:, 1]).any():
                raise ipe
        except Exception as e:
            raise ipe from e

        return res
Exemple #28
0
    def _parse_peculiar_amount(x: str) -> float:
        """
        Deals with dataset-specific-peculiarities in composition amounts.

        Some composition amounts have a trailing asterisk, e.g., '2*'. The meaning is unclear.
            Perhaps it denotes that the amount is imprecise. In any case, they only occur in 6
            samples. The trailing asterisk will be ignored.

        """
        if x[-1] == "*":
            x = x[:-1]
        try:
            return float(x)
        except ValueError:
            raise InvalidParameterError("Amount as a float", x)
Exemple #29
0
    def __init__(self, rng=None, **kwargs):
        """Initialize state.

        Parameters:
            rng: seed (key) for pseudo-random number generator.
                 This parameter must be specified to encourage correct usage
                 of pseudo-random numbers throughout the benchmark.
        """

        super().__init__(**kwargs)

        if rng is None:
            raise InvalidParameterError(
                "rng seed", "nothing",
                "pseudo-random number generator seed must be specified")
        rng = params.integer(rng, from_=0, to=2**32 - 1)
        self._random = PseudoRandomNumberGenerator(seed=rng)
Exemple #30
0
    def real(arg, from_=None, to=None, above=None, below=None):
        """Real number, floating point type.

        Parameters:
            arg: parameter to validate as a real numbre
            from_: if specified, lowest admissible number (closed set lower bound)
            to: if specified, highest admissible number (closed set upper bound)
            above: if specified, highest non-admissible number (open set lower bound)
            below: if specified, lowest non-admissible number (open set upper bound)

        Returns:
            built-in floating point type

        Raises:
            InvalidParameterError: for invalid parameter arg
        """

        bounded = not (from_ is None and to is None and above is None
                       and below is None)
        ipe = InvalidParameterError(
            f"{'bounded ' if bounded else ''}real number", arg)

        try:
            # guard against True and False, which are convertible to float
            if arg is True or arg is False:
                raise ipe

            arg = float(arg)

            if from_ is not None:
                if arg < from_:
                    raise ipe
            if to is not None:
                if arg > to:
                    raise ipe
            if above is not None:
                if arg <= above:
                    raise ipe
            if below is not None:
                if arg >= below:
                    raise ipe
        except Exception as e:
            raise ipe from e

        return arg