def noisepost(data, noise, X_inputs): #### load config files for data and noise ## note that inputs and outputs are already in same order for both GD = g.setup("config-data-recon", datashuffle=False, scaleinputs=False) GN = g.setup("config-noise-recon", datashuffle=False, scaleinputs=False) #### prediction of r at known data points 'x' ## get r values from the noise emulator x = GD.training.inputs x1 = __emuc.Data(x, None, GN.basis, GN.par, GN.beliefs, GN.K) p1 = __emuc.Posterior(x1, GN.training, GN.par, GN.beliefs, GN.K, predict=True) GN_mean, GN_var = p1.mean, p1.var r = np.exp(GN_mean + np.diag(GN_var) / 2.0) ## set r values in the data emulator GD.training.set_r(r) GD.training.make_A(s2=GD.par.sigma**2, predict=True) #### prediction of r at new data points 'X' ## get R values from the noise emulator X = X_inputs x2 = __emuc.Data(X, None, GN.basis, GN.par, GN.beliefs, GN.K) p2 = __emuc.Posterior(x2, GN.training, GN.par, GN.beliefs, GN.K, predict=True) GN_mean, GN_var = p2.mean, p2.var R = np.exp(GN_mean + np.diag(GN_var) / 2.0) ## mean of noise prediction ## set R values for new points (X) Data object xp = __emuc.Data(X, None, GD.basis, GD.par, GD.beliefs, GD.K) xp.set_r(R) xp.make_A(s2=GD.par.sigma**2, predict=True) post = __emuc.Posterior(xp, GD.training, GD.par, GD.beliefs, GD.K, predict=True) GD_mean = post.mean ## mean of mean prediction ## return the predictive mean for data and noise return GD_mean, R
def posterior_sample(E, x, predict=True): """Return a sample from the posterior given inputs x and emulator E. Args: E (Emulator): Emulator instance. x (Numpy array): Test points (same dimensions as emulator training data). predict (bool): Default True. Gives posterior prediction (as opposed to posterior estimation). Returns: sample (Numpy array): Posterior sample """ # if 1D inputs, store in 2D array with only 1 column if x[0].size == 1: x = _np.array([x,]).T ## tests here to make sure that x is compatible with E if x[0,:].size != E.training.inputs[0,:].size: print("ERROR: test points have different number of columns" "to data in emulator. Exiting.") exit() xs = __emuc.Data(x, None, E.basis, E.par, E.beliefs, E.K) p = __emuc.Posterior(xs, E.training, E.par, E.beliefs, E.K, predict=predict) pmean, pvar = p.mean, p.var # draw a sample from the posterior distribution and return it L = _np.linalg.cholesky(pvar) u = _np.random.randn(x[:,0].size) sample = pmean + L.dot(u) return sample
def posterior(E, x, predict=True): """Return the posterior mean and variance given inputs x and emulator E. Args: E (Emulator): Emulator instance. x (Numpy array): Test points (same dimensions as emulator training data). predict (bool): Default True. Gives posterior prediction (as opposed to posterior estimation). Returns: (pmean, pvar): (Numpy array, Numpy array): Posterior mean, Posterior Var """ # if 1D inputs, store in 2D array with only 1 column if x[0].size == 1: x = _np.array([x,]).T ## tests here to make sure that x is compatible with E if x[0,:].size != E.training.inputs[0,:].size: print("ERROR: test points have different number of columns" "to data in emulator. Exiting.") exit() xs = __emuc.Data(x, None, E.basis, E.par, E.beliefs, E.K) p = __emuc.Posterior(xs, E.training, E.par, E.beliefs, E.K, predict=predict) pmean, pvar = p.mean, p.var return ( pmean, pvar )
def setup(config_file, datashuffle=True, scaleinputs=True): """Do initialisation of classes Beliefs, Hyperparams, Basis, TV_config, All_Data, Data, Posterior, Optimize, and K. Return instance of Emulator class. Args: config_file (str): Name of configuration file. datashuffle (bool): Default is True. Randomly orders dataset. scaleinputs (bool): Default is True. Scales inputs into range 0 to 1. Returns: Emulator: Initialised Emulator class. """ # returns instance of configuration config = __emuc.Config(config_file) # read from beliefs file beliefs = __emuc.Beliefs(config.beliefs) par = __emuc.Hyperparams(beliefs) basis = __emuc.Basis(beliefs) # split data T & V ; (k,c,noV) - no.sets, set for first V, no.V.sets tv_conf = __emuc.TV_config(*(config.tv_config)) all_data = __emuc.All_Data(\ config.inputs, config.outputs, tv_conf,\ beliefs, par, datashuffle, scaleinputs) # build the kernel if beliefs.alt_nugget != 'T': K = __emuk.kernel(all_data.x_full[0].size, par) else: print("\n*** Using alternative nugget ***") K = __emuk.kernel_alt_nug(all_data.x_full[0].size, par) # build remaining structures (x_T, y_T) = all_data.choose_T() (x_V, y_V) = all_data.choose_V() training = __emuc.Data(x_T, y_T, basis, par, beliefs, K) validation = __emuc.Data(x_V, y_V, basis, par, beliefs, K) post = __emuc.Posterior(validation, training, par, beliefs, K) opt_T = __emuo.Optimize(training, basis, par, beliefs, config) return __emuc.Emulator(\ config, beliefs, par, basis, tv_conf,\ all_data, training, validation, post, opt_T, K)
def imp_plot(emuls, zs, cm, var_extra, maxno=1, olhcmult=100, grid=10, act=[], fileStr="", plot=True): """Create an implausibility and optical depth plot, made of subplots for each pair of active inputs (or only those specified). Implausibility plots in the lower triangle, optical depth plots in the upper triangle. The diagonal is blank, and implausibility plots are paired with optical depth plots across the diagonal. Args: emuls (Emulator list): list of Emulator instances zs (float list): list of output values to match cm (float list): cut-off for implausibility var_extra (float list): extra (non-emulator) variance on outputs maxno (int): which maximum implausibility to consider, default 1 olhcmult (int): option for size of oLHC design across other inputs not in the considered pair, size = olhcmult*(no. active inputs - 2), default 100 grid (int): divisions of each input range to make, with values of each input for a subplot centred on the gridpoint, default 10 act (int list): list of active inputs for plot, default [] (all inputs) fileStr (str): string to prepend to output files, default "" plot (bool): choice to plot (e.g. False for batches), default True Returns: None """ sets, minmax, orig_minmax = emulsetup(emuls) check_act(act, sets) act_ref = ref_act(minmax) plt_ref = ref_plt(act) num_inputs = len(minmax) # number of inputs we'll look at dim = num_inputs - 2 # dimensions of input that we'll change with oLHC maxno = int(maxno) IMP, ODP = [], [] ## need an IMP and ODP for each I_max for i in range(maxno): IMP.append(_np.zeros((grid, grid))) ODP.append(_np.zeros((grid, grid))) ## space for all plots, and reference index to subplot indices print("Creating plot objects... may take some time...") plot = True if plot == True else False rc = num_inputs if act == [] else len(act) if plot: fig, ax = _plt.subplots(nrows=rc, ncols=rc) plot_ref = act_ref if act == [] else ref_plt(act) ## reduce sets to only the chosen ones less_sets = [] if act == []: less_sets = sets else: for s in sets: if s[0] in act and s[1] in act: less_sets.append(s) print("HM for input pairs:", less_sets) ## calculate plot for each pair of inputs for s in less_sets: print("\nset:", s) ## rows and columns of 2D grid for the {i,j} value of pair of inputs X1 = _np.linspace(minmax[str(s[0])][0], minmax[str(s[0])][1], grid, endpoint=False) X1 = X1 + 0.5 * (minmax[str(s[0])][1] - minmax[str(s[0])][0]) / float(grid) X2 = _np.linspace(minmax[str(s[1])][0], minmax[str(s[1])][1], grid, endpoint=False) X2 = X2 + 0.5 * (minmax[str(s[1])][1] - minmax[str(s[1])][0]) / float(grid) print("Values of the grid 1:", X1) print("Values of the grid 2:", X2) x_all = _np.zeros((grid * grid, 2)) for i in range(0, grid): for j in range(0, grid): x_all[i * grid + j, 0] = X1[i] x_all[i * grid + j, 1] = X2[j] ## use an OLHC design for all remaining inputs n = dim * int(olhcmult) # no. of design_points N = int(n / 2) # number of designs from which 1 maximin is chosen olhc_range = [it[1] for it in sorted(minmax.items(), key=lambda x: int(x[0])) \ if int(it[0])!=s[0] and int(it[0])!=s[1]] print("olhc_range:", olhc_range) filename = "imp_input_" + str(s[0]) + '_' + str(s[1]) _gd.optLatinHyperCube(dim, n, N, olhc_range, filename) x_other_inputs = _np.loadtxt(filename) # read generated oLHC file in ## enough for ALL inputs - we'll mask any inputs not used by a particular emulator later x = _np.empty([n, num_inputs]) ## stepping over the grid {i,j} to build subplot print("\nCalculating Implausibilities...") for i in range(0, grid): for j in range(0, grid): I2 = _np.zeros((n, len(emuls))) ## loop over outputs (i.e. over emulators) for o in range(len(emuls)): E, z, var_e = emuls[o], zs[o], var_extra[o] Eai = E.beliefs.active_index ind_in_active = True if s[0] in Eai and s[ 1] in Eai else False if ind_in_active: ## set the input pair for this subplot x[:, act_ref[str(s[0])]] = x_all[i * grid + j, 0] x[:, act_ref[str(s[1])]] = x_all[i * grid + j, 1] ## figure out what the other inputs active_indices are other_dim = [ act_ref[str(key)] for key in act_ref if int(key) not in s ] if len(other_dim) == 1: x[:, other_dim] = _np.array([ x_other_inputs, ]).T else: x[:, other_dim] = x_other_inputs ## inactive inputs are masked act_ind_list = [act_ref[str(l)] for l in Eai] ni = __emuc.Data(x[:, act_ind_list], None, E.basis, E.par, E.beliefs, E.K) post = __emuc.Posterior(ni, E.training, E.par, E.beliefs, E.K, predict=False) mean = post.mean var = _np.diag(post.var) ## calculate implausibility^2 values for r in range(0, n): I2[r, o] = (mean[r] - z)**2 / (var[r] + var_e) ## find maximum implausibility across different outputs I = _np.sqrt(I2) odp_count = _np.zeros(maxno, dtype=_np.uint32) Imaxes = _np.empty([n, maxno]) for r in range(0, n): Imaxes[r, :] = _np.sort( _np.partition(I[r, :], -maxno)[-maxno:])[-maxno:] for m in range(maxno): if Imaxes[r, -( m + 1)] < cm: # check cut-off using this value odp_count[m] = odp_count[m] + 1 for m in range(maxno): IMP[m][i, j] = _np.amin( Imaxes[:, -(m + 1)]) # minimise across n points ODP[m][i, j] = float(odp_count[m]) / float(n) ## save the results to file nfileStr = fileStr + "_" if fileStr != "" else fileStr for m in range(maxno): ## different file for each max _np.savetxt( nfileStr + str(m + 1) + "_" + "IMP_" + str(s[0]) + '_' + str(s[1]), IMP[m]) _np.savetxt( nfileStr + str(m + 1) + "_" + "ODP_" + str(s[0]) + '_' + str(s[1]), ODP[m]) if plot: make_plots(s, plt_ref, cm, maxno, ax, IMP, ODP, minmax=minmax) if plot: plot_options(plt_ref, ax, fig, minmax) _plt.show() return
def new_wave_design(emuls, zs, cm, var_extra, datafiles, maxno=1, olhcmult=100, act=[], fileStr=""): """Create a set of non-implausible design inputs to use for more simulations or experiments. Datafiles of non-implausible inputs (and corresponding outputs) should be provided so the design is optimised with respect to this data. An optimised Latin Hypercube design is made and only non-implausible inputs from this are kept. To adjust the design size while fixing cm, try adjusting olhcmult. Args: emuls (Emulator list): list of Emulator instances zs (float list): list of output values to match cm (float list): cut-off for implausibility var_extra (float list): extra (non-emulator) variance on outputs datafiles(str list): specify names of inputs and outputs files. These should be correspond to non-implausible inputs only; see nonimp_data() function maxno (int): which maximum implausibility to consider, default 1 olhcmult (int): option for size of oLHC design across other inputs not in the considered pair, size = olhcmult*(no. active inputs - 2), default 100 act (int list): list of active inputs for plot, default [] (all inputs) fileStr (str): string to prepend to output files, default "" Returns: nimp_inputs (int): number of non-implausible design points created """ sets, minmax, orig_minmax = emulsetup(emuls) act_ref = ref_act(minmax) check_act(act, sets) num_inputs = len(minmax) dim = num_inputs maxno = int(maxno) sim_x, sim_y = load_datafiles(datafiles, orig_minmax) ## use an OLHC design for all remaining inputs n = dim * int(olhcmult) # no. of design_points N = int(n / 2) # number of designs from which 1 maximin is chosen olhc_range = [ it[1] for it in sorted(minmax.items(), key=lambda x: int(x[0])) ] print("olhc_range:", olhc_range) filename = "olhc_des" if sim_x == None: _gd.optLatinHyperCube(dim, n, N, olhc_range, filename) else: _gd.optLatinHyperCube(dim, n, N, olhc_range, filename, fextra=sim_x) x = _np.loadtxt(filename) # read generated oLHC file in print("\nCalculating Implausibilities...") I2 = _np.zeros((n, len(emuls))) ## loop over outputs (i.e. over emulators) for o in range(len(emuls)): E, z, var_e = emuls[o], zs[o], var_extra[o] Eai = E.beliefs.active_index act_ind_list = [act_ref[str(l)] for l in Eai] ni = __emuc.Data(x[:, act_ind_list], None, E.basis, E.par, E.beliefs, E.K) post = __emuc.Posterior(ni, E.training, E.par, E.beliefs, E.K, predict=False) mean = post.mean var = _np.diag(post.var) ## calculate implausibility^2 values for r in range(0, n): I2[r, o] = (mean[r] - z)**2 / (var[r] + var_e) ## find maximum implausibility across different outputs I = _np.sqrt(I2) Imaxes = _np.empty([n, maxno]) nimp_inputs = [] for r in range(0, n): Imaxes[r, :] = _np.sort(_np.partition(I[r, :], -maxno)[-maxno:])[-maxno:] m = maxno - 1 if Imaxes[r, -(m + 1)] < cm: # check cut-off using this value nimp_inputs.append(x[r, :]) ## save the results to file nfileStr = fileStr + "_" if fileStr != "" else fileStr for m in range(maxno): ## different file for each max _np.savetxt(nfileStr + datafiles[0], nimp_inputs) print("Generated", len(nimp_inputs), "new data points") return len(nimp_inputs)
def nonimp_data(emuls, zs, cm, var_extra, datafiles, maxno=1, act=[], fileStr=""): """Determine which inputs from a specified input file are non-implausible, and output these values (along with the corresponding outputs from a specified output file) to new files. Args: emuls (Emulator list): list of Emulator instances zs (float list): list of output values to match cm (float list): cut-off for implausibility var_extra (float list): extra (non-emulator) variance on outputs datafiles(str list): specify names of inputs and outputs files maxno (int): which maximum implausibility to consider, default 1 act (int list): list of active inputs for plot, default [] (all inputs) fileStr (str): string to prepend to output files of non-implausible inputs and outputs, default "" Returns: nimp_inputs (int): number of non-implausible input points in input datafile """ sets, minmax, orig_minmax = emulsetup(emuls) act_ref = ref_act(minmax) num_inputs = len(minmax) check_act(act, sets) maxno = int(maxno) sim_x, sim_y = load_datafiles(datafiles, orig_minmax) n = sim_x[:, 0].size print("\nCalculating Implausibilities...") I2 = _np.zeros((n, len(emuls))) ## loop over outputs (i.e. over emulators) for o in range(len(emuls)): E, z, var_e = emuls[o], zs[o], var_extra[o] Eai = E.beliefs.active_index act_ind_list = [act_ref[str(l)] for l in Eai] ni = __emuc.Data(sim_x[:, act_ind_list], None, E.basis, E.par, E.beliefs, E.K) post = __emuc.Posterior(ni, E.training, E.par, E.beliefs, E.K, predict=False) mean = post.mean var = _np.diag(post.var) ## calculate implausibility^2 values for r in range(0, n): I2[r, o] = (mean[r] - z)**2 / (var[r] + var_e) ## find maximum implausibility across different outputs I = _np.sqrt(I2) Imaxes = _np.empty([n, maxno]) nimp_inputs, nimp_outputs = [], [] for r in range(0, n): Imaxes[r, :] = _np.sort(_np.partition(I[r, :], -maxno)[-maxno:])[-maxno:] m = maxno - 1 if Imaxes[r, -(m + 1)] < cm: # check cut-off using this value nimp_inputs.append(sim_x[r, :]) nimp_outputs.append(sim_y[r, :]) ## save the results to file nfileStr = fileStr + "_" if fileStr != "" else fileStr for m in range(maxno): _np.savetxt(nfileStr + "nonimp_" + datafiles[0], nimp_inputs) _np.savetxt(nfileStr + "nonimp_" + datafiles[1], nimp_outputs) print(len(nimp_inputs), "data points were non-implausible") return len(nimp_inputs)
def plot(E, plot_dims, fixed_dims=[], fixed_vals=[], mean_or_var="mean", customLabels=[], points=False, predict=True): """Do plot of the Emulator posterior against 1 or 2 input variables, while holding the other inputs at constant values. Args: E (Emulator): Emulator instance. plot_dims (int list): Dimensions of inputs to plot (1 or 2 list items). fixed_dims (int list): Dimensions of inputs to hold fixed. fixed_vals (float list): Values of the inputs that aren't being plotted. mean_or_var (string): Choose to plot mean ("mean") of variance ("var"). customLabels (string list): Labels ["x","y"] for the x and y axes. Returns: None """ dim = E.training.inputs[0].size minmax = [] # for plotting training points on a scatter graph x = [] y = [] print("\n*** Generating plot ***") # if we are doing a 1D plot for multidimensional inputs if len(plot_dims) == 1 and dim>1: one_d = True minmax.append( [_np.amin(E.training.inputs[:,plot_dims[0]]) , _np.amax(E.training.inputs[:,plot_dims[0]])] ) if points and mean_or_var == "mean": x = E.training.inputs[:,plot_dims[0]] y = E.training.outputs # set labels if customLabels == []: xlabel="input " + str(plot_dims[0]) ylabel="output " + str(E.beliefs.output) else: try: xlabel=customLabels[0] except IndexError as e: xlabel="input " + str(plot_dims[0]) try: ylabel=customLabels[1] except IndexError as e: ylabel="output " + str(E.beliefs.output) else: one_d =False if points and mean_or_var == "mean": x = E.training.inputs[:,plot_dims[0]] y = E.training.outputs minmax.append( [_np.amin(E.training.inputs[:,plot_dims[0]]) , _np.amax(E.training.inputs[:,plot_dims[0]])] ) if dim > 1: minmax.append( [_np.amin(E.training.inputs[:,plot_dims[1]]) , _np.amax(E.training.inputs[:,plot_dims[1]])] ) # set labels if customLabels == []: xlabel="input " + str(plot_dims[0]) if dim == 1: ylabel="output " else: ylabel="input " + str(plot_dims[1]) else: try: xlabel=customLabels[0] except IndexError as e: xlabel="input " + str(plot_dims[0]) try: ylabel=customLabels[1] except IndexError as e: if dim == 1: ylabel="output " else: ylabel="input " + str(plot_dims[1]) # number of inputs along each prediction dim pn=30 # generate range of inputs to make predictions full_xrange = __emup.make_inputs(dim, pn, pn,\ plot_dims, fixed_dims, fixed_vals, one_d, minmax) newinputs = __emuc.Data(full_xrange, None, E.basis, E.par, E.beliefs, E.K) if predict == False: print("Estimation (rather than prediction)") else: print("Prediction (rather than estimation)") post = __emuc.Posterior(newinputs, E.training, E.par, E.beliefs, E.K, predict) # call the actual plotting routine __emup.plotting(dim, post, pn, pn, one_d, mean_or_var, minmax , x, y, labels=[xlabel,ylabel]) return None
def noisefit(data, noise, stopat=20, olhcmult=100, samples=200, fileStr=""): """Try to fit one emualtor to the mean of the data and another emulator to the noise of the data. Results of estimating the noise are saved to the files 'noise-inputs' and 'noise-outputs'. Args: data (str): Name of configuration file for fitting the input-output data. noise (str): Name of configuration file for fitting the input-noise. stopat (int): Number of iterations. olhcmult (int): Scales the number of data points in the results files. Returns: None """ #### check transform option ## if not "log", no transformation will be done #### check consistency datac, noisec = __read_file(data), __read_file(noise) datab, noiseb = __read_file(datac["beliefs"]), __read_file( noisec["beliefs"]) if datac["inputs"] != noisec["inputs"]: print("\nWARNING: different inputs files in config files. Exiting.") return None if datab["mucm"] == 'T': print( "\nWARNING: data beliefs must have mucm F, " "as sigma (presumably) not valid if extra pointwise variance is added. Exiting." ) return None if datab["fix_nugget"] == 'T' or noiseb["fix_nugget"] == 'T': print("\nWARNING: data and noise beliefs need fix_nugget F. Exiting.") return None if datac["tv_config"] != noisec["tv_config"]: print("\nWARNING: different tv_config in config files. Exiting.") return None if noisec["outputs"] != "zp-outputs": print("\nWARNING: noise config outputs must be 'zp-outputs'. Exiting.") return None ## setup emulators here GD = g.setup(data, datashuffle=True, scaleinputs=False) ## create 'zp-outputs' file with zeros np.savetxt("zp-outputs", \ np.zeros(GD.training.outputs.size + GD.validation.outputs.size*GD.tv_conf.noV).T) GN = g.setup(noise, datashuffle=True, scaleinputs=False) ## if shuffled, fix the inconsistencies GN.training.inputs = GD.training.inputs GN.validation.inputs = GD.validation.inputs GN.training.remake() GN.validation.remake() ## if we have validation sets, set no_retrain=True if GD.all_data.tv.noV > 1: print( "\nWARNING: should have 0 or 1 validation sets for noise fitting. Exiting." ) ## extra validation sets would be totally unused exit() valsets = False if GD.all_data.tv.noV == 0 else True #### step 1 #### print("\n****************" "\nTRAIN GP ON DATA" "\n****************") #GD = g.setup(data, datashuffle=False, scaleinputs=False) x = GD.training.inputs # values of the inputs t = GD.training.outputs # values of the noisy outputs if valsets: xv = GD.validation.inputs # values of the inputs tv = GD.validation.outputs #print(np.amin(x), np.amax(x)) g.train(GD, no_retrain=valsets) r = np.zeros(t.size) if valsets: rv = np.zeros(tv.size) ## we stay within this loop until done 'stopat' fits count = 0 while True: if count == 0: xp = __emuc.Data(x, None, GD.basis, GD.par, GD.beliefs, GD.K) if valsets: xvp = __emuc.Data(xv, None, GD.basis, GD.par, GD.beliefs, GD.K) else: #### step 5 - return to step 2 if not converged #### xp = __emuc.Data(x, None, GD.basis, GD.par, GD.beliefs, GD.K) xp.set_r(r) xp.make_A(s2=GD.par.sigma**2, predict=True) if valsets: xvp = __emuc.Data(xv, None, GD.basis, GD.par, GD.beliefs, GD.K) xvp.set_r(rv) xvp.make_A(s2=GD.par.sigma**2, predict=True) count = count + 1 #### step 2 - generate D'={(xi,zi)} #### print("\n***********************" "\nESTIMATING NOISE LEVELS " + str(count) + "\n***********************") post = __emuc.Posterior(xp, GD.training, GD.par, GD.beliefs, GD.K, predict=True) L = np.linalg.cholesky(post.var) z_prime = np.zeros(t.size) s = samples for j in range(s): # predict 's' different values u = np.random.randn(t.size) tij = post.mean + L.dot(u) z_prime = z_prime + 0.5 * (t - tij)**2 z_prime = __transform(z_prime / float(s)) np.savetxt('zp-outputs', z_prime) # estimate noise levels for validation set if valsets: post = __emuc.Posterior(xvp, GD.training, GD.par, GD.beliefs, GD.K, predict=True) L = np.linalg.cholesky(post.var) z_prime_V = np.zeros(tv.size) s = samples for j in range(s): # predict 's' different values u = np.random.randn(tv.size) tij = post.mean + L.dot(u) z_prime_V = z_prime_V + 0.5 * (tv - tij)**2 z_prime_V = __transform(z_prime_V / float(s)) #### step 3 #### # train a GP on x and z print("\n*****************" "\nTRAIN GP ON NOISE " + str(count) + "\n*****************") ## need to setup again so as to re-read updated zp-outputs #GN = g.setup(noise, datashuffle=False, scaleinputs=False) #GN.training.outputs = np.loadtxt('zp-outputs').T GN.training.outputs = z_prime.T GN.training.remake() if valsets: GN.validation.outputs = z_prime_V.T GN.validation.remake() ## fix to allow retraining using same training set against validation GN.tv_conf.no_of_trains = 0 GN.tv_conf.retrain = 'y' g.train(GN, no_retrain=valsets) #### step 4 - use GN to predict noise values for G3 #### print("\n***********************************" "\nTRAIN GP ON DATA WITH NOISE FROM GP " + str(count) + "\n***********************************") xp_GN = __emuc.Data(x, None, GN.basis, GN.par, GN.beliefs, GN.K) p_GN = __emuc.Posterior(xp_GN, GN.training, GN.par, GN.beliefs, GN.K, predict=True) ## I'VE CHANGED THIS TO FALSE r = __untransform(p_GN.mean, np.diag(p_GN.var)) #r = __untransform(p_GN.mean, 0.0) #GD = g.setup(data, datashuffle=False, scaleinputs=False) GD.training.set_r(r) ## add estimated r to the validation set for better diagnostics if valsets: v_GN = __emuc.Data(xv, None, GN.basis, GN.par, GN.beliefs, GN.K) pv_GN = __emuc.Posterior(v_GN, GN.training, GN.par, GN.beliefs, GN.K, predict=True) rv = __untransform(pv_GN.mean, np.diag(pv_GN.var)) #rv = __untransform(pv_GN.mean, 0.0) GD.validation.set_r(rv) ## fix to allow retraining using same training set against validation GD.tv_conf.no_of_trains = 0 GD.tv_conf.retrain = 'y' g.train(GD, no_retrain=valsets) # break when we've done 'stopat' fits if count == stopat: print("\nCompleted", count, "fits, stopping here.") ## use an OLHC design for x_values of noise guesses we'll save print("\nGenerating input points to predict noise values at...") n = x[0].size * int(olhcmult) N = int(n) olhc_range = [[np.amin(col), np.amax(col)] for col in x.T] #print("olhc_range:", olhc_range) filename = "x_range_input" _gd.optLatinHyperCube(x[0].size, n, N, olhc_range, filename) x_range = np.loadtxt(filename) # read generated oLHC file in # if 1D inputs, store in 2D array with only 1 column if x[0].size == 1: x_range = np.array([ x_range, ]).T ## save data to file x_plot = __emuc.Data(x_range, None, GN.basis, GN.par, GN.beliefs, GN.K) p_plot = __emuc.Posterior(x_plot, GN.training, GN.par, GN.beliefs, GN.K, predict=True) mean_plot = p_plot.mean var_plot = p_plot.var p_plot.interval() UI, LI = p_plot.UI, p_plot.LI print("\nSaving results to file...") nfileStr = fileStr + "_" if fileStr != "" else fileStr np.savetxt(nfileStr + 'noise-inputs', x_range) #np.savetxt(nfileStr + 'noise-outputs', np.transpose(\ # [np.sqrt(__untransform(mean_plot, np.diag(var_plot), reg=REG)),\ # np.sqrt(__untransform(LI, 0.0, reg=REG)), np.sqrt(__untransform(UI, 0.0, reg=REG))] ) ) np.savetxt(nfileStr + 'noise-outputs', np.transpose(\ [ __untransform(mean_plot, np.diag(var_plot)) ,\ __untransform(LI, 0.0) ,\ __untransform(UI, 0.0) ] ) ) break return None