Beispiel #1
0
def lhs(sample_size: int,
        dimensions: int,
        seed: Optional[int] = None) -> np.ndarray:
    """ Latin Hypercube Sample design.

    Generate n stratified samples in d dimensions by drawing samples from a
    latin hypercube.

    'lhs' is faster than both 'mdurs' and 'optimised_lhs' but has less
    consistent uniformity properties, especially in higher numbers of
    dimensions.

    Parameters
    ----------
    sample_size: int
        Number of requested sample points
    dimensions: int
        Number of dimensions to sample in
    seed: {None, int}, optional
        Seed for numpy's random state. If None, an arbitrary seed is generated.
        Default = None.

    Returns
    -------
    sample: ndarray
        (sample_size, dimensions) array of n sample points in d dimensions.
        Results are scaled on [0,1] by default.

    See Also
    --------
    mdurs
    optimised_lhs
    """
    # pylint: disable=no-member
    set_seed(seed)
    slices = np.linspace(0, 1, sample_size + 1)
    urnd = np.random.random((sample_size, dimensions))
    lower = slices[:sample_size]
    upper = slices[1:]
    points = np.empty((sample_size, dimensions), order='C', dtype=np.float64)
    sample = np.empty((sample_size, dimensions), order='C', dtype=np.float64)
    for j in range(dimensions):
        points[:, j] = urnd[:, j] * (upper - lower) + lower
        index = np.random.permutation(range(sample_size))
        sample[:, j] = points[index, j]
    return sample
Beispiel #2
0
def urandom(sample_size: int, dimensions: int, seed: Optional[int] = None):
    """ Uniform random sample.

    Parameters
    ----------
    n : int
        Number of random vectors to draw.
    d : int
        Dimension of the random vectors.

    Returns
    -------
    urvs : ndarray
        (n, d) array of n random d-dimensional vectors drawn uniformly at
        random.
    """
    # pylint: disable=no-member
    set_seed(seed)
    return np.random.random((sample_size, dimensions))
Beispiel #3
0
def solve_inv_gamma(
    lower_bound: float,
    upper_bound: float,
    lower_tol: float,
    upper_tol: float,
    gridsize: int = 10000,
    max_attempts: int = 3,
    seed: Optional[int] = None,
) -> Tuple[float, float]:
    # pylint: disable= too-many-arguments, too-many-locals, no-member
    """ Solve system of equations to find appropriate inverse gamma parameters.

    Aims to identify parameters alpha and beta such that:
    * A total of lb_tol probability mass lies < lb
    * A total of ub_tol probability mass lies > ub

    Given an inverse gamma distribution parametrised by alpha and beta.

    Scipy's root finding module scipy.optimize.root is used to solve the
    above system of equations, following a preliminary grid search used to
    identify a suitable starting point.

    Parameters
    ----------
    lower_bound: float
        Lower bound.
    upper_bound: float
        Upper bound.
    lower_tol: float
        Lower bound tolerance.
    upper_tol: float
        Upper bound tolerance.
    gridsize: int, optional
        Size of grid used for preliminary grid search.
    max_attempts: int, optional
        Maximum number of attempts permitted.
    seed: {None, int32}
        Seed for numpy's random state. If None, an arbitrary random seed will
        be used. Default = None.

    Returns
    -------
    alpha: float
        Inverse gamma parameter alpha.
    beta: float
        Inverse gamma parameter beta.
    """
    utils.set_seed(seed)
    if lower_bound >= upper_bound:
        raise ValueError('Lower bound must be smaller than upper bound.')
    obj = create_objective(lower_bound, upper_bound, lower_tol, upper_tol)
    attempts = 1
    converged = False
    scales = np.array([10, 10])
    obj_grid = np.empty((gridsize, 2))
    while not converged:
        theta_grid = np.random.random((gridsize, 2)) * scales
        # objective function is not vectorised, so run in loop...
        for i in range(gridsize):
            obj_grid[i, :] = obj(theta_grid[i, :])
        obj_grid_norm = np.sqrt(np.sum(obj_grid**2, axis=1))
        theta0 = theta_grid[obj_grid_norm.argmin(), :]
        theta_sol = optimize.root(obj, theta0)
        converged = theta_sol['success']
        if attempts > max_attempts:
            raise RuntimeError(
                'Maximum number of attempts exceeded without convergence.')
    return theta_sol['x'][0], theta_sol['x'][1]
Beispiel #4
0
def sobol(
    sample_size: int,
    dimensions: int,
    seed: Optional[int] = None,
    generator_seed: int = 1,
    skip: int = 0,
) -> np.ndarray:
    """ Generate n length d quasi-random vectors from the Sobol sequence.

    Generate n length d quasi-random vectors using the Sobol sequence [1]_.

    Implements i4sobol_generate from the sobol_seq package but without the
    ability to change the dimension of the sequence after it has been
    initialised.

    Parameters
    ----------
    sample_size: int
        The number of random vectors to retrieve.
    dimensions: int
        The dimension of the random vectors
    seed: {None, int32}
        Seed for numpy's random state. If None, an arbitrary seed will be used.
        Default = None.
    generator_seed : int
        Seed for the Sobol sequence generator. Default = 1.
    skip: int
        Skip every this number of generated points. Default = 0.

    Returns
    -------
    samples : ndarray
        (sample_size, dimensions) array consisting of the requested number of
        quasi-random vectors scaled between [0, 1] in the requested number of
        dimensions.

    Notes
    -----
    This function implements i4sobol_generate from the sobol_seq using a
    generator instead of global variable declarations. See the original
    source code at https://github.com/naught101/sobol_seq for more details,
    and the documentation of i4_sobol2 for a full list of references.

    TODO: implement ability to resume sampling from an existing Sobol sequence.

    References
    ----------
    [1] Sobol, I.M., 1976. Uniformly distributed sequences with an additional
    uniform property. USSR Computational Mathematics and Mathematical
    Physics, 16(5), pp.236-242.

    See Also
    --------
    sobol_scatter : Sobol sequence with additive randomisation.
    i4_sobol2 : Sobol sequence generator.
    """
    set_seed(seed)
    sample = np.empty((sample_size, dimensions), dtype=np.float64)
    seq_generator = i4_sobol2(dimensions,
                              generator_seed=generator_seed,
                              skip=skip)
    for j in range(sample_size):
        sample[j, :] = six.next(seq_generator)
    return sample
Beispiel #5
0
def optimised_lhs(
    sample_size: int,
    dimensions: int,
    iterations: int = 100,
    measure: str = 'euclidean',
    criteria: Union[str, ta.LhsCriteria] = 'maximin',
    options: Optional[Mapping[str, Any]] = None,
    seed: Optional[int] = None,
) -> np.ndarray:
    """Optimised Latin Hypercube Sample design.

    Pick a sample from a collection of latin hypercube designs maximising a
    specified criteria, nominally the 'maximin' criteria of Morris and
    Mitchell [1]_.

    optimised_lhs generates a large number of lhs designs, then selects from
    this pool the design best satisfying a specified criteria, which is a
    function of a specified distance measure (or 'metric' - though 'metric'
    is not necessarily a metric in the mathematical sense).

    A valid distance measure is any supported by scipy's cdist, of which
    typical choices are:
        * 'cityblock' : L1 distance
        * 'eculidean' : L2 distance
        * 'sqeuclidean' : squared L2 distance

    Currently supported comparison criteria are:
        * 'maximin' [1]_.

    Parameters
    ----------
    sample_size: int
        Number of requested sample points
    dimensions: int
        Number of dimensions
    iterations: int, optional
        The number of individual designs to compare. The design maximising
        'criteria' after the requested number of iterations will be returned.
        Default = 100.
    measure: str, optional
        Distance measure to be used for comparing designs. References one of
        the measures compatible with scipy's spatial.distance.cdist function.
        Default = 'euclidean'.
    criteria: {str, callable}, optional
        Comparison criteria:
        * 'maximin' - maximin criteria.
        * callable - user supplied function; see below.

    Returns
    -------
    sample: ndarray
        (sample_size, dimensions) array of n sample points in d dimensions.
        Results are scaled on [0,1].

    Notes
    -----
    A user supplied function can be used as a comparison criteria. The supplied
    function accepts a vector of pairwise distances calculated using 'measure'.
    The function should return a quantity intended to be maximised.

    References
    ----------
    [1] Morris, M.D. and Mitchell, T.J., 1995. Exploratory designs for
    computational experiments. Journal of statistical planning and inference,
    43(3), pp.381-402.

    See Also
    --------
    scipy.spatial.distance.cdist
    """
    # pylint: disable=too-many-arguments, too-many-locals, no-member
    set_seed(seed)
    if options is None:
        options = {}
    slices = np.linspace(0, 1, sample_size + 1)
    lower = slices[:sample_size]
    upper = slices[1:]
    indices_list = np.arange(sample_size)
    points = np.empty((sample_size, dimensions), order='C', dtype=np.float64)
    sample = np.empty((sample_size, dimensions), order='C', dtype=np.float64)
    tmp = -np.inf
    for _ in range(iterations):
        urnd = np.random.random((sample_size, dimensions))
        for j in range(dimensions):
            points[:, j] = urnd[:, j] * (upper - lower) + lower
            index = np.random.permutation(indices_list)
            sample[:, j] = points[index, j]
        delta = eval_criteria(sample, measure, criteria, options)
        if delta > tmp:
            tmp = delta
            ret = sample.copy()  # is this copy necessary?
    return ret
Beispiel #6
0
def mdurs(
    sample_size: int,
    dimensions: int,
    scale_factor: int = 10,
    nearest_k: int = 2,
    measure: str = 'cityblock',
    seed: Optional[int] = None,
) -> np.ndarray:
    """ Multi-Dimensionally Uniform Random Sample.

    Implements the "LHSMDU" algorithm of Deutsch and Deutsch [1]_.

    mdurs is suited to randomised designs of low (n < 50) numbers of samples.
    Though it can be used for larger n, runtime may become an issue as the
    algorithm iterates over individual sample points in a canidate pool rather
    than the sample designs themselves.

    mdurs uses one of scipy's distance measures to maximise dispersion between
    points. Valid distance measures are any supported by
    scipy.spatial.distance.cdist, of which typical choices are:
    * 'cityblock' : L1 distance
    * 'eculidean' : L2 distance
    * 'sqeuclidean' : squared L2 distance

    Parameters
    ----------
    sample_size: int
        Number of requested sample points
    dimensions: int
        Number of dimensions
    scale_factor : int, optional
        Scale factor (default = 10). You should not need to change this; see
        [1]_.
    nearest_k: int, optional
        Number of neighbours used to compute moving average (default = 2).
        You should not need to change this; see [1]_.
    measure: string, optional
        Distance measure to be used. Passed as a method argument to scipy's
        spatial.distance.cdist function. Default = 'cityblock'.
    seed: {None, int}, optional
        Seed for numpy's random state. If None, an arbitrary seed is generated.
        Default = None.

    Returns
    -------
    random_sample: ndarray
        (sample_size, dimensions) array of n sample points in d dimensions.
        Results are scaled on [0,1].

    Notes
    -----
    This algorithm is unusably slow for large n. For n > 50 it is recommended
    to use one of the other sampling algorithms unless the time required to
    generate the sample points is less important than a highly uniform random
    sample.

    References
    ----------
    [1] Deutsch, J.L. and Deutsch, C.V., 2012. Latin hypercube sampling with
    multidimensional uniformity. Journal of Statistical Planning and
    Inference, 142(3), pp.763-772.

    See Also
    --------
    lhs
    optimised_lhs
    scipy.spatial.distance.cdist
    """
    # pylint: disable=no-member
    set_seed(seed)
    n_pool = scale_factor * sample_size
    random_sample = np.random.random((n_pool, dimensions))
    while random_sample.shape[0] > sample_size:
        len_s = random_sample.shape[0]
        distance_matrix = cdist(random_sample, random_sample, metric=measure)
        ret = np.empty(len_s, dtype=np.float64, order='C')
        for i in range(len_s):
            ret[i] = np.mean(np.sort(distance_matrix[i, :])[1:1 + nearest_k])
        random_sample = np.delete(random_sample, np.argmin(ret), axis=0)
    return random_sample