Beispiel #1
0
def first_design(emuls, n, filename="design.npy"):

    minmax, orig_minmax = emulsetup(emuls)
    act_ref = ref_act(minmax)
    dim = len(act_ref)
    #print("\nDIM:", dim)
    print("\nMINMAX:", minmax)
    print("\nORIG_MINMAX:", orig_minmax)

    olhc_range = [
        it[1] for it in sorted(minmax.items(), key=lambda x: int(x[0]))
    ]
    #print("\nOLHC:", olhc_range)

    design = _gd.optLatinHyperCube(dim, n, 1, olhc_range, "blank", save=False)

    # if filename supplied, trying memmap...
    if filename != None:
        print("\nNumpy 'save' to file LHC design for HM...")
        _np.save(filename, design)

    return design
Beispiel #2
0
def imp_plot(emuls,
             zs,
             cm,
             var_extra,
             maxno=1,
             olhcmult=100,
             grid=10,
             act=[],
             fileStr="",
             plot=True):
    """Create an implausibility and optical depth plot, made of subplots for each pair of active inputs (or only those specified). Implausibility plots in the lower triangle, optical depth plots in the upper triangle. The diagonal is blank, and implausibility plots are paired with optical depth plots across the diagonal.

    Args:
        emuls (Emulator list): list of Emulator instances
        zs (float list): list of output values to match
        cm (float list): cut-off for implausibility
        var_extra (float list): extra (non-emulator) variance on outputs
        maxno (int): which maximum implausibility to consider, default 1
        olhcmult (int): option for size of oLHC design across other inputs not in the considered pair, size = olhcmult*(no. active inputs - 2), default 100
        grid (int): divisions of each input range to make, with values of each input for a subplot centred on the gridpoint, default 10
        act (int list): list of active inputs for plot, default [] (all inputs)
        fileStr (str): string to prepend to output files, default ""
        plot (bool): choice to plot (e.g. False for batches), default True

    Returns:
        None

    """

    sets, minmax, orig_minmax = emulsetup(emuls)
    check_act(act, sets)
    act_ref = ref_act(minmax)
    plt_ref = ref_plt(act)

    num_inputs = len(minmax)  # number of inputs we'll look at
    dim = num_inputs - 2  # dimensions of input that we'll change with oLHC

    maxno = int(maxno)
    IMP, ODP = [], []  ## need an IMP and ODP for each I_max
    for i in range(maxno):
        IMP.append(_np.zeros((grid, grid)))
        ODP.append(_np.zeros((grid, grid)))

    ## space for all plots, and reference index to subplot indices
    print("Creating plot objects... may take some time...")
    plot = True if plot == True else False
    rc = num_inputs if act == [] else len(act)
    if plot:
        fig, ax = _plt.subplots(nrows=rc, ncols=rc)
    plot_ref = act_ref if act == [] else ref_plt(act)

    ## reduce sets to only the chosen ones
    less_sets = []
    if act == []:
        less_sets = sets
    else:
        for s in sets:
            if s[0] in act and s[1] in act:
                less_sets.append(s)
    print("HM for input pairs:", less_sets)

    ## calculate plot for each pair of inputs
    for s in less_sets:
        print("\nset:", s)

        ## rows and columns of 2D grid for the {i,j} value of pair of inputs
        X1 = _np.linspace(minmax[str(s[0])][0],
                          minmax[str(s[0])][1],
                          grid,
                          endpoint=False)
        X1 = X1 + 0.5 * (minmax[str(s[0])][1] -
                         minmax[str(s[0])][0]) / float(grid)
        X2 = _np.linspace(minmax[str(s[1])][0],
                          minmax[str(s[1])][1],
                          grid,
                          endpoint=False)
        X2 = X2 + 0.5 * (minmax[str(s[1])][1] -
                         minmax[str(s[1])][0]) / float(grid)
        print("Values of the grid 1:", X1)
        print("Values of the grid 2:", X2)
        x_all = _np.zeros((grid * grid, 2))
        for i in range(0, grid):
            for j in range(0, grid):
                x_all[i * grid + j, 0] = X1[i]
                x_all[i * grid + j, 1] = X2[j]

        ## use an OLHC design for all remaining inputs
        n = dim * int(olhcmult)  # no. of design_points
        N = int(n / 2)  # number of designs from which 1 maximin is chosen
        olhc_range = [it[1] for it in sorted(minmax.items(), key=lambda x: int(x[0])) \
                      if int(it[0])!=s[0] and int(it[0])!=s[1]]
        print("olhc_range:", olhc_range)
        filename = "imp_input_" + str(s[0]) + '_' + str(s[1])
        _gd.optLatinHyperCube(dim, n, N, olhc_range, filename)
        x_other_inputs = _np.loadtxt(filename)  # read generated oLHC file in

        ## enough for ALL inputs - we'll mask any inputs not used by a particular emulator later
        x = _np.empty([n, num_inputs])

        ## stepping over the grid {i,j} to build subplot
        print("\nCalculating Implausibilities...")
        for i in range(0, grid):
            for j in range(0, grid):
                I2 = _np.zeros((n, len(emuls)))

                ## loop over outputs (i.e. over emulators)
                for o in range(len(emuls)):
                    E, z, var_e = emuls[o], zs[o], var_extra[o]
                    Eai = E.beliefs.active_index
                    ind_in_active = True if s[0] in Eai and s[
                        1] in Eai else False
                    if ind_in_active:

                        ## set the input pair for this subplot
                        x[:, act_ref[str(s[0])]] = x_all[i * grid + j, 0]
                        x[:, act_ref[str(s[1])]] = x_all[i * grid + j, 1]

                        ## figure out what the other inputs active_indices are
                        other_dim = [
                            act_ref[str(key)] for key in act_ref
                            if int(key) not in s
                        ]
                        if len(other_dim) == 1:
                            x[:, other_dim] = _np.array([
                                x_other_inputs,
                            ]).T
                        else:
                            x[:, other_dim] = x_other_inputs

                        ## inactive inputs are masked
                        act_ind_list = [act_ref[str(l)] for l in Eai]
                        ni = __emuc.Data(x[:, act_ind_list], None, E.basis,
                                         E.par, E.beliefs, E.K)
                        post = __emuc.Posterior(ni,
                                                E.training,
                                                E.par,
                                                E.beliefs,
                                                E.K,
                                                predict=False)
                        mean = post.mean
                        var = _np.diag(post.var)

                        ## calculate implausibility^2 values
                        for r in range(0, n):
                            I2[r, o] = (mean[r] - z)**2 / (var[r] + var_e)

                ## find maximum implausibility across different outputs
                I = _np.sqrt(I2)
                odp_count = _np.zeros(maxno, dtype=_np.uint32)
                Imaxes = _np.empty([n, maxno])
                for r in range(0, n):
                    Imaxes[r, :] = _np.sort(
                        _np.partition(I[r, :], -maxno)[-maxno:])[-maxno:]
                    for m in range(maxno):
                        if Imaxes[r, -(
                                m + 1)] < cm:  # check cut-off using this value
                            odp_count[m] = odp_count[m] + 1

                for m in range(maxno):
                    IMP[m][i, j] = _np.amin(
                        Imaxes[:, -(m + 1)])  # minimise across n points
                    ODP[m][i, j] = float(odp_count[m]) / float(n)

        ## save the results to file
        nfileStr = fileStr + "_" if fileStr != "" else fileStr
        for m in range(maxno):  ## different file for each max
            _np.savetxt(
                nfileStr + str(m + 1) + "_" + "IMP_" + str(s[0]) + '_' +
                str(s[1]), IMP[m])
            _np.savetxt(
                nfileStr + str(m + 1) + "_" + "ODP_" + str(s[0]) + '_' +
                str(s[1]), ODP[m])

        if plot:
            make_plots(s, plt_ref, cm, maxno, ax, IMP, ODP, minmax=minmax)

    if plot:
        plot_options(plt_ref, ax, fig, minmax)
        _plt.show()

    return
Beispiel #3
0
#######################
## create noisy data ##
#######################

def mfunc(x): # function
    return 3.0*x[0]**3 + np.exp(np.cos(10.0*x[1])*np.cos(5.0*x[0])**2)

def nfunc(x): # noise function
    n = 0.500 * ( (x[1]**1)*(np.cos(6*x[0])**2 + 0.1) )
    return np.sqrt(n**2) ## must return positive value
    
#### configuration of design inputs
dim, n, N = 2, 500, 200
minmax = [ [0.0,1.0] , [0.0,1.0] ]
filename = "INPUTS"
d.optLatinHyperCube(dim, n, N, minmax, filename)

#### we would perform our simulations here instead
x = np.loadtxt("INPUTS") # inputs
y = np.array([mfunc(xi) for xi in x]) # mean function
n = np.array([nfunc(xi) for xi in x]) # noise function
y = y + n*np.random.randn(y.size)
np.savetxt("OUTPUTS",y)


###################
## fit the noise ##
###################

data, noise = "config-data" , "config-noise"
gn.noisefit(data, noise, stopat=10, olhcmult=200)
Beispiel #4
0
def new_wave_design(emuls,
                    zs,
                    cm,
                    var_extra,
                    datafiles,
                    maxno=1,
                    olhcmult=100,
                    act=[],
                    fileStr=""):
    """Create a set of non-implausible design inputs to use for more simulations or experiments. Datafiles of non-implausible inputs (and corresponding outputs) should be provided so the design is optimised with respect to this data. An optimised Latin Hypercube design is made and only non-implausible inputs from this are kept. To adjust the design size while fixing cm, try adjusting olhcmult.

    Args:
        emuls (Emulator list): list of Emulator instances
        zs (float list): list of output values to match
        cm (float list): cut-off for implausibility
        var_extra (float list): extra (non-emulator) variance on outputs
        datafiles(str list): specify names of inputs and outputs files. These should be correspond to non-implausible inputs only; see nonimp_data() function 
        maxno (int): which maximum implausibility to consider, default 1
        olhcmult (int): option for size of oLHC design across other inputs not in the considered pair, size = olhcmult*(no. active inputs - 2), default 100
        act (int list): list of active inputs for plot, default [] (all inputs)
        fileStr (str): string to prepend to output files, default ""

    Returns:
        nimp_inputs (int): number of non-implausible design points created

    """

    sets, minmax, orig_minmax = emulsetup(emuls)
    act_ref = ref_act(minmax)
    check_act(act, sets)
    num_inputs = len(minmax)
    dim = num_inputs
    maxno = int(maxno)

    sim_x, sim_y = load_datafiles(datafiles, orig_minmax)

    ## use an OLHC design for all remaining inputs
    n = dim * int(olhcmult)  # no. of design_points
    N = int(n / 2)  # number of designs from which 1 maximin is chosen
    olhc_range = [
        it[1] for it in sorted(minmax.items(), key=lambda x: int(x[0]))
    ]
    print("olhc_range:", olhc_range)
    filename = "olhc_des"
    if sim_x == None:
        _gd.optLatinHyperCube(dim, n, N, olhc_range, filename)
    else:
        _gd.optLatinHyperCube(dim, n, N, olhc_range, filename, fextra=sim_x)
    x = _np.loadtxt(filename)  # read generated oLHC file in

    print("\nCalculating Implausibilities...")
    I2 = _np.zeros((n, len(emuls)))

    ## loop over outputs (i.e. over emulators)
    for o in range(len(emuls)):
        E, z, var_e = emuls[o], zs[o], var_extra[o]
        Eai = E.beliefs.active_index
        act_ind_list = [act_ref[str(l)] for l in Eai]

        ni = __emuc.Data(x[:, act_ind_list], None, E.basis, E.par, E.beliefs,
                         E.K)
        post = __emuc.Posterior(ni,
                                E.training,
                                E.par,
                                E.beliefs,
                                E.K,
                                predict=False)
        mean = post.mean
        var = _np.diag(post.var)

        ## calculate implausibility^2 values
        for r in range(0, n):
            I2[r, o] = (mean[r] - z)**2 / (var[r] + var_e)

    ## find maximum implausibility across different outputs
    I = _np.sqrt(I2)
    Imaxes = _np.empty([n, maxno])
    nimp_inputs = []
    for r in range(0, n):
        Imaxes[r, :] = _np.sort(_np.partition(I[r, :],
                                              -maxno)[-maxno:])[-maxno:]

        m = maxno - 1
        if Imaxes[r, -(m + 1)] < cm:  # check cut-off using this value
            nimp_inputs.append(x[r, :])

    ## save the results to file
    nfileStr = fileStr + "_" if fileStr != "" else fileStr
    for m in range(maxno):  ## different file for each max
        _np.savetxt(nfileStr + datafiles[0], nimp_inputs)

    print("Generated", len(nimp_inputs), "new data points")

    return len(nimp_inputs)
Beispiel #5
0
def noisefit(data, noise, stopat=20, olhcmult=100, samples=200, fileStr=""):
    """Try to fit one emualtor to the mean of the data and another emulator to the noise of the data. Results of estimating the noise are saved to the files 'noise-inputs' and 'noise-outputs'.

    Args:
        data (str): Name of configuration file for fitting the input-output data.
        noise (str): Name of configuration file for fitting the input-noise.
        stopat (int): Number of iterations.
        olhcmult (int): Scales the number of data points in the results files.

    Returns:
        None

    """

    #### check transform option
    ## if not "log", no transformation will be done

    #### check consistency
    datac, noisec = __read_file(data), __read_file(noise)
    datab, noiseb = __read_file(datac["beliefs"]), __read_file(
        noisec["beliefs"])
    if datac["inputs"] != noisec["inputs"]:
        print("\nWARNING: different inputs files in config files. Exiting.")
        return None
    if datab["mucm"] == 'T':
        print(
            "\nWARNING: data beliefs must have mucm F, "
            "as sigma (presumably) not valid if extra pointwise variance is added. Exiting."
        )
        return None
    if datab["fix_nugget"] == 'T' or noiseb["fix_nugget"] == 'T':
        print("\nWARNING: data and noise beliefs need fix_nugget F. Exiting.")
        return None
    if datac["tv_config"] != noisec["tv_config"]:
        print("\nWARNING: different tv_config in config files. Exiting.")
        return None
    if noisec["outputs"] != "zp-outputs":
        print("\nWARNING: noise config outputs must be 'zp-outputs'. Exiting.")
        return None

    ## setup emulators here
    GD = g.setup(data, datashuffle=True, scaleinputs=False)
    ## create 'zp-outputs' file with zeros
    np.savetxt("zp-outputs", \
      np.zeros(GD.training.outputs.size + GD.validation.outputs.size*GD.tv_conf.noV).T)
    GN = g.setup(noise, datashuffle=True, scaleinputs=False)

    ## if shuffled, fix the inconsistencies
    GN.training.inputs = GD.training.inputs
    GN.validation.inputs = GD.validation.inputs
    GN.training.remake()
    GN.validation.remake()

    ## if we have validation sets, set no_retrain=True
    if GD.all_data.tv.noV > 1:
        print(
            "\nWARNING: should have 0 or 1 validation sets for noise fitting. Exiting."
        )
        ## extra validation sets would be totally unused
        exit()
    valsets = False if GD.all_data.tv.noV == 0 else True

    #### step 1 ####
    print("\n****************" "\nTRAIN GP ON DATA" "\n****************")
    #GD = g.setup(data, datashuffle=False, scaleinputs=False)
    x = GD.training.inputs  # values of the inputs
    t = GD.training.outputs  # values of the noisy outputs
    if valsets:
        xv = GD.validation.inputs  # values of the inputs
        tv = GD.validation.outputs

    #print(np.amin(x), np.amax(x))
    g.train(GD, no_retrain=valsets)

    r = np.zeros(t.size)
    if valsets:
        rv = np.zeros(tv.size)

    ## we stay within this loop until done 'stopat' fits
    count = 0
    while True:
        if count == 0:
            xp = __emuc.Data(x, None, GD.basis, GD.par, GD.beliefs, GD.K)
            if valsets:
                xvp = __emuc.Data(xv, None, GD.basis, GD.par, GD.beliefs, GD.K)
        else:
            #### step 5 - return to step 2 if not converged ####
            xp = __emuc.Data(x, None, GD.basis, GD.par, GD.beliefs, GD.K)
            xp.set_r(r)
            xp.make_A(s2=GD.par.sigma**2, predict=True)
            if valsets:
                xvp = __emuc.Data(xv, None, GD.basis, GD.par, GD.beliefs, GD.K)
                xvp.set_r(rv)
                xvp.make_A(s2=GD.par.sigma**2, predict=True)
        count = count + 1

        #### step 2 - generate D'={(xi,zi)} ####
        print("\n***********************"
              "\nESTIMATING NOISE LEVELS " + str(count) +
              "\n***********************")

        post = __emuc.Posterior(xp,
                                GD.training,
                                GD.par,
                                GD.beliefs,
                                GD.K,
                                predict=True)
        L = np.linalg.cholesky(post.var)
        z_prime = np.zeros(t.size)
        s = samples
        for j in range(s):  # predict 's' different values
            u = np.random.randn(t.size)
            tij = post.mean + L.dot(u)
            z_prime = z_prime + 0.5 * (t - tij)**2
        z_prime = __transform(z_prime / float(s))
        np.savetxt('zp-outputs', z_prime)

        # estimate noise levels for validation set
        if valsets:
            post = __emuc.Posterior(xvp,
                                    GD.training,
                                    GD.par,
                                    GD.beliefs,
                                    GD.K,
                                    predict=True)
            L = np.linalg.cholesky(post.var)
            z_prime_V = np.zeros(tv.size)
            s = samples
            for j in range(s):  # predict 's' different values
                u = np.random.randn(tv.size)
                tij = post.mean + L.dot(u)
                z_prime_V = z_prime_V + 0.5 * (tv - tij)**2
            z_prime_V = __transform(z_prime_V / float(s))

        #### step 3 ####
        # train a GP on x and z
        print("\n*****************"
              "\nTRAIN GP ON NOISE " + str(count) + "\n*****************")
        ## need to setup again so as to re-read updated zp-outputs
        #GN = g.setup(noise, datashuffle=False, scaleinputs=False)
        #GN.training.outputs = np.loadtxt('zp-outputs').T
        GN.training.outputs = z_prime.T
        GN.training.remake()
        if valsets:
            GN.validation.outputs = z_prime_V.T
            GN.validation.remake()
        ## fix to allow retraining using same training set against validation
        GN.tv_conf.no_of_trains = 0
        GN.tv_conf.retrain = 'y'
        g.train(GN, no_retrain=valsets)

        #### step 4 - use GN to predict noise values for G3 ####
        print("\n***********************************"
              "\nTRAIN GP ON DATA WITH NOISE FROM GP " + str(count) +
              "\n***********************************")

        xp_GN = __emuc.Data(x, None, GN.basis, GN.par, GN.beliefs, GN.K)
        p_GN = __emuc.Posterior(xp_GN,
                                GN.training,
                                GN.par,
                                GN.beliefs,
                                GN.K,
                                predict=True)  ## I'VE CHANGED THIS TO FALSE
        r = __untransform(p_GN.mean, np.diag(p_GN.var))
        #r = __untransform(p_GN.mean, 0.0)

        #GD = g.setup(data, datashuffle=False, scaleinputs=False)
        GD.training.set_r(r)

        ## add estimated r to the validation set for better diagnostics
        if valsets:
            v_GN = __emuc.Data(xv, None, GN.basis, GN.par, GN.beliefs, GN.K)
            pv_GN = __emuc.Posterior(v_GN,
                                     GN.training,
                                     GN.par,
                                     GN.beliefs,
                                     GN.K,
                                     predict=True)
            rv = __untransform(pv_GN.mean, np.diag(pv_GN.var))
            #rv = __untransform(pv_GN.mean, 0.0)
            GD.validation.set_r(rv)

        ## fix to allow retraining using same training set against validation
        GD.tv_conf.no_of_trains = 0
        GD.tv_conf.retrain = 'y'
        g.train(GD, no_retrain=valsets)

        # break when we've done 'stopat' fits
        if count == stopat:
            print("\nCompleted", count, "fits, stopping here.")

            ## use an OLHC design for x_values of noise guesses we'll save
            print("\nGenerating input points to predict noise values at...")
            n = x[0].size * int(olhcmult)
            N = int(n)
            olhc_range = [[np.amin(col), np.amax(col)] for col in x.T]
            #print("olhc_range:", olhc_range)
            filename = "x_range_input"
            _gd.optLatinHyperCube(x[0].size, n, N, olhc_range, filename)
            x_range = np.loadtxt(filename)  # read generated oLHC file in

            # if 1D inputs, store in 2D array with only 1 column
            if x[0].size == 1:
                x_range = np.array([
                    x_range,
                ]).T

            ## save data to file
            x_plot = __emuc.Data(x_range, None, GN.basis, GN.par, GN.beliefs,
                                 GN.K)
            p_plot = __emuc.Posterior(x_plot,
                                      GN.training,
                                      GN.par,
                                      GN.beliefs,
                                      GN.K,
                                      predict=True)
            mean_plot = p_plot.mean
            var_plot = p_plot.var
            p_plot.interval()
            UI, LI = p_plot.UI, p_plot.LI

            print("\nSaving results to file...")
            nfileStr = fileStr + "_" if fileStr != "" else fileStr
            np.savetxt(nfileStr + 'noise-inputs', x_range)

            #np.savetxt(nfileStr + 'noise-outputs', np.transpose(\
            #  [np.sqrt(__untransform(mean_plot, np.diag(var_plot), reg=REG)),\
            #  np.sqrt(__untransform(LI, 0.0, reg=REG)), np.sqrt(__untransform(UI, 0.0, reg=REG))] ) )

            np.savetxt(nfileStr + 'noise-outputs', np.transpose(\
              [  __untransform(mean_plot, np.diag(var_plot)) ,\
                 __untransform(LI, 0.0) ,\
                 __untransform(UI, 0.0) ] ) )

            break

    return None