Example #1
0
def getSamples(N_samples, eps=True, rho=True):

    if not rho and not eps:
        raise Exception("Error - Either eps or rho should be specified.")

    samples = doe_lhs.lhs(5, N_samples)

    trio = lambda v: [
        int(v > (chk - 1.0 / 3.0) and v <= chk)
        for chk in [1. / 3., 2. / 3., 1.0]
    ]
    solvent_ranges = [
        i * 1.0 / len(solvents) for i in range(1,
                                               len(solvents) + 1)
    ]

    solv = lambda v: solvent_names[[v <= s
                                    for s in solvent_ranges].index(True)]

    samples = [
        trio(s[0]) + trio(s[1]) + trio(s[2]) + trio(s[3]) + [
            solvents[solv(s[-1])]["density"], solvents[solv(
                s[-1])]["dielectric"], solvents[solv(s[-1])]["index"]
        ] for s in samples
    ]

    if not rho:
        samples = [s[:-3] + s[-2:] for s in samples]
    if not eps:
        samples = [s[:-2] + [s[-1]] for s in samples]

    # order the first three trios by Br -> Cl -> I
    for i, sample in enumerate(samples):
        t0 = sample[0:3]
        t1 = sample[3:6]
        t2 = sample[6:9]
        ts = [t0, t1, t2]
        char = []
        for t in ts:
            char.append(["Br", "Cl", "I"][t.index(1)])
        dat = [(c, t) for c, t in zip(char, ts)]
        dat = sorted(dat, key=lambda x: x[0])
        samples[i] = dat[0][1] + dat[1][1] + dat[2][1] + sample[9:]

    # Ensure no duplicates
    samples = [tuple(s) for s in samples]
    samples = [list(s) for s in set(samples)]

    return samples
Example #2
0
def create_lhs(
        N_combinations,
        sample_max=[10, 10, 1000, 1000, 1, 10, 10, 10, 100000, 5, 2, 15, 15],
        sample_min=[0, 0, 0, 0, -1, 0, 0, 0, 0, 2, 0, 0, 0],
        num_samples=1):
    """
    Generate a latin hypercube sample for the tersoff parameters.
    The order in which parameters are stored are as follows:
        gamma, lambda3, c, d, cos(theta_0), n,
        beta, lambda2, B, R, D, lambda1, A

    **Parameters**

        sample_max: *list, float, optional*
            The maximum for each of the 13 tersoff parameters.
        sample_min: *list, float, optional*
            The minimum for each of the 13 tersoff parameters.
        num_samples: *int, optional*
            The number of samples you wish to generate.

    **Returns**

        Stuff.
    """

    # Since there are 27 total entires and 13 parameters per entry we must
    # sample 351 numbers
    N_points = N_combinations * 13
    lhs_points = lhs.lhs(N_points, num_samples)
    lhs_list = []
    OldRange = 1
    OldMin = 0

    for sample in lhs_points:
        sample_list = []
        i = 0
        for OldValue in sample:
            # Gamma
            if (i == 0):
                # This lets us append a 1 or 3 to the front for the m variable
                # without changing LHS applicability
                sample_list.append(random.choice([1, 3]))
                NewValue = (
                    (
                        (OldValue - OldMin) * (sample_max[i] - sample_min[i])
                    ) / OldRange
                ) + sample_min[i]

                i += 1
            # R
            elif (i == 9):
                NewValue = (
                    (
                        (OldValue - OldMin) * (sample_max[i] - sample_min[i])
                    ) / OldRange
                ) + sample_min[i]

                i += 1
                R = NewValue
            # D
            elif (i == 10):
                NewValue = (
                    (
                        (OldValue - OldMin) * (sample_max[i] - sample_min[i])
                    ) / OldRange
                ) + sample_min[i]

                i += 1

                if NewValue > R:
                    raise Exception("Generated D>R, which is unrealistic.")
            # A
            elif (i == 12):
                NewValue = (
                    (
                        (OldValue - OldMin) * (sample_max[i] - sample_min[i])
                    ) / OldRange
                ) + sample_min[i]

                i = 0
            else:
                NewValue = (
                    (
                        (OldValue - OldMin) * (sample_max[i] - sample_min[i])
                    ) / OldRange
                ) + sample_min[i]

                i += 1
            sample_list.append(NewValue)

        lhs_list.append(sample_list)

    return lhs_list
Example #3
0
def create_lhs(N_points, N_samples, sample_bounds, params=None):
    '''
    Generate a latin hypercube sample for an n dimensional space specified by
    the sample_bounds keyword.  An example is to call create_lhs to sample the
    lennard jones parameter space:

        # Assuming we want to sample 5 times
        parameters = create_lhs(2, 5, [(0, 10), (0, 10)])

    **Parameters**

        N_points: *int*
            The dimensionality of our system.
        N_samples: *int*
            How many samples we want to do.
        sample_bounds: *list, tuple, float* or *list, list, int/float*
            The min and max values for each dimension.  Note, in special cases
            we may want to specify that a value is discrete from a list, or
            specifically an integer.  Finally, if neither a list nor tuple is
            passed, we assume the value is static.  Thus, all the following
            cases are allowed:

                * [(0, 10), (3, 20), (-3, 2)]
                * [3, [2, 3], (-5., 2.3, float)]

            In the second case, the first parameter is set to 3, the second
            parameter is chosen as either 2 or 3, and the third parameter
            is force cast to a float.

        params: *list, str, optional*
            Current return is a list of lists, each holding the randomly
            chosen N_points.  However, by specifying params, the return
            can be made into a dictionary, with each point associated with
            the string in params.  Note, this is one to one with the
            sample_bounds.  That is, params[i] has the bounds specified by
            sample_bounds[i].

    **Returns**

        params: *list, dict/list, float*
            A list of lists, each holding a 1D array of points chosen from the
            LHC method.  Note, if params was specified then instead a list of
            dictionaries is returned.
    '''

    # Generate our sample list.  lhs_points is a list of N_samples, each
    # being a tuple of N_points.  Note, lhs returns in the range of 0 to 1.
    lhs_points = lhs.lhs(N_points, N_samples)
    lhs_list = []

    # Copy over sample bounds so we don't change it
    bounds = copy.deepcopy(sample_bounds)
    # Ensure all tuple bounds have a type for the third index
    for i, bound in enumerate(bounds):
        if isinstance(bound, tuple) and len(bound) == 2:
            bounds[i] = (bound[0], bound[1], float)

    # Loop through all the samples
    for sample in lhs_points:
        new_params = []
        # Loop through all the randomized points in the samples
        for point, bound in zip(sample, bounds):
            # Scale to be in the desired range
            if isinstance(bound, tuple):
                low, high, use_type = bound
                new_params.append(use_type(point * (high - low) + low))
            # Or, in the case that we can only have something from a list,
            # choose randomly
            elif isinstance(bound, list):
                new_params.append(random.choice(bound))
            # Or, in the case that it is a static value, leave as is.
            else:
                new_params.append(bound)

        if params is not None:
            new_params = {p: np for p, np in zip(params, new_params)}
        lhs_list.append(new_params)

    return lhs_list
Example #4
0
def MLE(x, y, sp, n_start=None, method=None):
    '''
    Given sampled data, use the maximum likelihood-estimator to find
    hyperparameters.

    **Returns**

        hyperparams: *list, float/int*
    '''

    # ASSUME: HP = [mu_alpha, sig_alpha, sig_beta, mu_zeta, sig_zeta, sig_m, l1, l2]

    if n_start is None:
        n_start = NUMBER_OF_RANDOM_STARTING_PARAMETERS  # How many samples we do for MLE
    bounds = [(1E-3, max(y)), (1E-3, np.var(y)), (1E-3, np.var(y)),
              (1E-3, max(y)), (1E-3, np.var(y)), (1E-3, np.var(y)), (1E-3, 1),
              (1E-3, 1)]

    if len(sp[0]) == 1:
        bounds = bounds[:-2] + [(1.0, 1.0)]

    if method is not None and method == 'simple':
        bounds = [
            (1E-3, 1),  # h1_Br
            (1E-3, 1),  # h1_Cl
            (1E-3, 1),  # h1_I
            (1E-3, 1),  # h2_Br
            (1E-3, 1),  # h2_Cl
            (1E-3, 1),  # h2_I
            (1E-3, 1),  # h3_Br
            (1E-3, 1),  # h3_Cl
            (1E-3, 1),  # h3_I
            (1E-3, 1),  # c_Cs
            (1E-3, 1),  # c_FA
            (1E-3, 1),  # c_MA
            (1E-3, 1),  # Dielectric
            (1E-3, 1),  # sigma
            (1E-3, max(y))  # constant prior
        ]
    elif method is not None and method == "hutter":
        bounds = [
            (1E-3, 1),  # h1
            (1E-3, 1),  # h2
            (1E-3, 1),  # h3
            (1E-3, 1),  # c
            (1E-3, 1),  # Dielectric
            (1E-3, 1),  # sigma
            (1E-3, max(y))  # constant prior
        ]

    sampled_values = doe_lhs.lhs(len(bounds), samples=n_start)

    init_values = [[
        s * (b[1] - b[0]) + b[0] for s, b in zip(sampled_values[j], bounds)
    ] for j in range(n_start)]

    mle_list = np.zeros([n_start, len(bounds)])
    lkh_list = np.zeros(n_start)
    # MLE = Maximum Likelihood Estimation.  But we use a minimizer! So invert the
    # likelihood instead.
    f = lambda *args: -1.0 * likelihood(x, y, sp, method, *args)

    # For each possible starting of parameters, minimize and store the resulting likelihood
    for i in range(n_start):
        results = op.minimize(f, init_values[i], bounds=bounds)
        mle_list[i, :] = results['x']  # Store the optimized parameters
        lkh_list[i] = results.fun  # Store the resulting likelihood

    # Now, select parameters for the max likelihood of these
    index = np.nanargmin(
        lkh_list
    )  # Note, min because we inverted the likelihood so we can use a minimizer.
    best_theta = mle_list[index, :]

    return best_theta
Example #5
0
def MLE_parallel(x, y, sp, n_start=None, method=None):
    '''
    Given sampled data, use the maximum likelihood-estimator to find
    hyperparameters.

    **Returns**

        hyperparams: *list, float/int*
    '''

    # ASSUME: HP = [mu_alpha, sig_alpha, sig_beta, mu_zeta, sig_zeta, sig_m, l1, l2]

    import numpy as np

    if n_start is None:
        n_start = NUMBER_OF_RANDOM_STARTING_PARAMETERS  # How many samples we do for MLE
    bounds = [(1E-3, max(y)), (1E-3, np.var(y)), (1E-3, np.var(y)),
              (1E-3, max(y)), (1E-3, np.var(y)), (1E-3, np.var(y)), (1E-3, 1),
              (1E-3, 1)]

    if len(sp[0]) == 1:
        bounds = bounds[:-2] + [(1.0, 1.0)]

    if method is not None and method == "simple":
        bounds = [
            (1E-3, 1),  # h1_Br
            (1E-3, 1),  # h1_Cl
            (1E-3, 1),  # h1_I
            (1E-3, 1),  # h2_Br
            (1E-3, 1),  # h2_Cl
            (1E-3, 1),  # h2_I
            (1E-3, 1),  # h3_Br
            (1E-3, 1),  # h3_Cl
            (1E-3, 1),  # h3_I
            (1E-3, 1),  # c_Cs
            (1E-3, 1),  # c_FA
            (1E-3, 1),  # c_MA
            (1E-3, 1),  # Dielectric
            (1E-3, 1),  # sigma
            (1E-3, max(2E-3, max(y)))  # constant prior
        ]
    elif method is not None and method == "hutter":
        bounds = [
            (1E-3, 1),  # h1
            (1E-3, 1),  # h2
            (1E-3, 1),  # h3
            (1E-3, 1),  # c
            (1E-3, 1),  # Dielectric
            (1E-3, 1),  # sigma
            (1E-3, max(2E-3, max(y)))  # constant prior
        ]

    sampled_values = doe_lhs.lhs(len(bounds), samples=n_start)

    init_values = [
        (x, y, sp,
         [s * (b[1] - b[0]) + b[0]
          for s, b in zip(sampled_values[j], bounds)], bounds)
        for j in range(n_start)
    ]

    mle_list = np.zeros([n_start, len(bounds)])
    lkh_list = np.zeros(n_start)

    pool = mp.Pool(processes=PROCESSES_ALLOWED)

    if method is not None and method == "simple":
        all_res = pool.map(opt_hps_parallel_simple, init_values)
    elif method is not None and method == "hutter":
        all_res = pool.map(opt_hps_parallel_hutter, init_values)
    else:
        all_res = pool.map(opt_hps_parallel, init_values)
    pool.terminate()

    for i, res in zip(range(n_start), all_res):
        mle_list[i, :], lkh_list[i] = res

    # Now, select parameters for the max likelihood of these
    index = np.nanargmin(
        lkh_list
    )  # Note, min because we inverted the likelihood so we can use a minimizer.
    best_theta = mle_list[index, :]

    return best_theta
Example #6
0
def create_lhs(
        N_combinations,
        sample_max=[10, 10, 1000, 1000, 1, 10, 10, 10, 100000, 5, 2, 15, 15],
        sample_min=[0, 0, 0, 0, -1, 0, 0, 0, 0, 2, 0, 0, 0],
        num_samples=1):
    """
    Generate a latin hypercube sample for the tersoff parameters.
    The order in which parameters are stored are as follows:
        gamma, lambda3, c, d, cos(theta_0), n,
        beta, lambda2, B, R, D, lambda1, A

    **Parameters**

        sample_max: *list, float, optional*
            The maximum for each of the 13 tersoff parameters.
        sample_min: *list, float, optional*
            The minimum for each of the 13 tersoff parameters.
        num_samples: *int, optional*
            The number of samples you wish to generate.

    **Returns**

        Stuff.
    """

    # Since there are 27 total entires and 13 parameters per entry we must
    # sample 351 numbers
    N_points = N_combinations * 13
    lhs_points = lhs.lhs(N_points, num_samples)
    lhs_list = []
    OldRange = 1
    OldMin = 0

    for sample in lhs_points:
        sample_list = []
        i = 0
        for OldValue in sample:
            # Gamma
            if (i == 0):
                # This lets us append a 1 or 3 to the front for the m variable
                # without changing LHS applicability
                sample_list.append(random.choice([1, 3]))
                NewValue = (((OldValue - OldMin) *
                             (sample_max[i] - sample_min[i])) /
                            OldRange) + sample_min[i]

                i += 1
            # R
            elif (i == 9):
                NewValue = (((OldValue - OldMin) *
                             (sample_max[i] - sample_min[i])) /
                            OldRange) + sample_min[i]

                i += 1
                R = NewValue
            # D
            elif (i == 10):
                NewValue = (((OldValue - OldMin) *
                             (sample_max[i] - sample_min[i])) /
                            OldRange) + sample_min[i]

                i += 1

                if NewValue > R:
                    raise Exception("Generated D>R, which is unrealistic.")
            # A
            elif (i == 12):
                NewValue = (((OldValue - OldMin) *
                             (sample_max[i] - sample_min[i])) /
                            OldRange) + sample_min[i]

                i = 0
            else:
                NewValue = (((OldValue - OldMin) *
                             (sample_max[i] - sample_min[i])) /
                            OldRange) + sample_min[i]

                i += 1
            sample_list.append(NewValue)

        lhs_list.append(sample_list)

    return lhs_list