def test_values_scaled_to_real_multiple_bounds(self): bounds = [[2.0, 4.0], [2.0, 3.0]] scaled_values = [[0.0, 1.0]] values = values_scaled_to_real(scaled_values, bounds) assert np.isclose(values, [[2.0, 3.0]]).all() scaled_values = [[0.0, 1.0], [0.5, 0.0]] values = values_scaled_to_real(scaled_values, bounds) assert np.isclose(values, [[2.0, 3.0], [3.0, 2.0]]).all()
def test_values_scaled_to_real_multiple_values(self): bounds = [2.0, 4.0] scaled_values = [0.0, 1.0] values = values_scaled_to_real(scaled_values, bounds) assert np.isclose(values, [[2.0], [4.0]]).all() scaled_values = [0.0, 1.0, 1.5] values = values_scaled_to_real(scaled_values, bounds) assert np.isclose(values, [[2.0], [4.0], [5.0]]).all()
def plot_property(df, prp_name, bounds, axis_name=None): """Plot a comparison of the simulated and experimental property Parameters ---------- df : pandas.Dataframe dataframe with information prp_name : string property name to plot from df bounds : np.ndarray bounds of the property axis_name : string name to use on the plot axis, with units Returns ------- None Notes ----- Saves a plt with name 'figs/expt_v_sim_{prp_name}.png' """ if axis_name is None: axis_name = prp_name # Basic plots to view output yeqx = np.linspace(bounds[0] - 3, bounds[1] + 3, 10) fig, ax = plt.subplots() ax.plot(yeqx, yeqx, color="black") ax.scatter( values_scaled_to_real(df["expt_" + prp_name], bounds), values_scaled_to_real(df["sim_" + prp_name], bounds), alpha=0.2, color="black", ) ax.set_xlabel("Expt. " + axis_name, fontsize=16, labelpad=15) ax.set_ylabel("Sim. " + axis_name, fontsize=16, labelpad=15) ax.tick_params(axis="both", labelsize=12) ax.set_xlim(yeqx[0], yeqx[-1]) ax.set_ylim(yeqx[0], yeqx[-1]) ax.set_aspect("equal", "box") fig.tight_layout() try: fig.savefig("figs/expt_v_sim_" + prp_name + ".png", dpi=300) except FileNotFoundError: os.mkdir("figs") fig.savefig("figs/expt_v_sim_" + prp_name + ".png", dpi=300)
def prepare_df_density_errors(df, molecule): """Create a dataframe with mean square error (mse) and mean absolute percent error (mape) for each unique parameter set. Parameters ---------- df : pandas.Dataframe per simulation results molecule : R32, R125 molecule class with bounds/experimental data Returns ------- df_new : pandas.Dataframe dataframe with one row per parameter set and including the MSE and MAPE for liq_density """ new_data = [] for group, values in df.groupby(list(molecule.param_names)): # Temperatures temps = values_scaled_to_real(values["temperature"], molecule.temperature_bounds) # Liquid density sim_liq_density = values_scaled_to_real(values["md_density"], molecule.liq_density_bounds) expt_liq_density = values_scaled_to_real(values["expt_density"], molecule.liq_density_bounds) mse_liq_density = np.mean((sim_liq_density - expt_liq_density)**2) mape_liq_density = (np.mean( np.abs((sim_liq_density - expt_liq_density) / expt_liq_density)) * 100.0) properties = { f"sim_liq_density_{float(temp):.0f}K": float(liq_density) for temp, liq_density in zip(temps, sim_liq_density) } new_quantities = { **properties, "mse_liq_density": mse_liq_density, "mape_liq_density": mape_liq_density, } new_data.append(list(group) + list(new_quantities.values())) columns = list(molecule.param_names) + list(new_quantities.keys()) new_df = pd.DataFrame(new_data, columns=columns) return new_df
def test_values_scaled_to_real_single(self): bounds = [2.0, 4.0] scaled_value = 0.0 value = values_scaled_to_real(scaled_value, bounds) assert np.isclose(value, 2.0) scaled_value = 1.0 value = values_scaled_to_real(scaled_value, bounds) assert np.isclose(value, 4.0) scaled_value = 0.5 value = values_scaled_to_real(scaled_value, bounds) assert np.isclose(value, 3.0) scaled_value = [-0.5] value = values_scaled_to_real(scaled_value, bounds) assert np.isclose(value, 1.0) bounds = [-5.0, -4.0] scaled_value = 0.5 value = values_scaled_to_real(scaled_value, bounds) assert np.isclose(value, -4.5)
def _calc_gp_mse(gp_model, samples, expt_property, property_bounds, temperature_bounds): """Calculate the MSE between the GP model and experiment for samples""" all_errs = np.empty(shape=(samples.shape[0], len(expt_property.keys()))) col_idx = 0 for (temp, density) in expt_property.items(): scaled_temp = values_real_to_scaled(temp, temperature_bounds) xx = np.hstack((samples, np.tile(scaled_temp, (samples.shape[0], 1)))) means_scaled, vars_scaled = gp_model.predict_f(xx) means = values_scaled_to_real(means_scaled, property_bounds) err = means - density all_errs[:, col_idx] = err[:, 0] col_idx += 1 return np.mean(all_errs**2, axis=1)
def plot_model_vs_test( models, param_values, train_points, test_points, temperature_bounds, property_bounds, plot_bounds=[220.0, 340.0], property_name="property", ): """Plots the GP model(s) as a function of temperature with all other parameters taken as param_values. Overlays training and testing points with the same param_values. Parameters ---------- models : dict {"label" : gpflow.model } GPFlow models to plot param_values : np.ndarray, shape=(n_params) The parameters at which to evaluate the GP model train_points : np.ndarray, shape=(n_points, 2) The temperature (scaled) and property (scaled) of each training point test_points : np.ndarray, shape=(n_points, 2) The temperature (scaled) and property (scaled) of each test point temperature_bounds: array-like bounds for scaling temperature between physical and dimensionless values property_bounds: array-like bounds for scaling property between physical and dimensionless values plot_bounds : array-like, optional temperature bounds for the plot property_name : str, optional, default="property" property name with units for axis label Returns ------- matplotlib.figure.Figure """ n_samples = 100 vals = np.linspace(plot_bounds[0], plot_bounds[1], n_samples).reshape(-1, 1) vals_scaled = values_real_to_scaled(vals, temperature_bounds) other = np.tile(param_values, (n_samples, 1)) xx = np.hstack((other, vals_scaled)) fig, ax = plt.subplots() for (label, model) in models.items(): mean_scaled, var_scaled = model.predict_f(xx) mean = values_scaled_to_real(mean_scaled, property_bounds) var = variances_scaled_to_real(var_scaled, property_bounds) ax.plot(vals, mean, lw=2, label="GP model" + label) ax.fill_between( vals[:, 0], mean[:, 0] - 1.96 * np.sqrt(var[:, 0]), mean[:, 0] + 1.96 * np.sqrt(var[:, 0]), alpha=0.25, ) if train_points.shape[0] > 0: md_train_temp = values_scaled_to_real(train_points[:, 0], temperature_bounds) md_train_property = values_scaled_to_real(train_points[:, 1], property_bounds) ax.plot(md_train_temp, md_train_property, "s", color="black", label="Train") if test_points.shape[0] > 0: md_test_temp = values_scaled_to_real(test_points[:, 0], temperature_bounds) md_test_property = values_scaled_to_real(test_points[:, 1], property_bounds) ax.plot(md_test_temp, md_test_property, "ro", label="Test") ax.set_xlabel("Temperature") ax.set_ylabel(property_name) fig.legend() if not mpl_is_inline: return fig
def prepare_df_vle_errors(df, molecule): """Create a dataframe with mean square error (mse) and mean absolute percent error (mape) for each unique parameter set. The critical temperature and density are also evaluated. Parameters ---------- df : pandas.Dataframe per simulation results molecule : R32, R125 molecule class with bounds/experimental data Returns ------- df_new : pandas.Dataframe dataframe with one row per parameter set and including the MSE and MAPE for liq_density, vap_density, pvap, hvap, critical temperature, critical density """ new_data = [] for group, values in df.groupby(list(molecule.param_names)): # Temperatures temps = values_scaled_to_real(values["temperature"], molecule.temperature_bounds) # Liquid density sim_liq_density = values_scaled_to_real(values["sim_liq_density"], molecule.liq_density_bounds) expt_liq_density = values_scaled_to_real(values["expt_liq_density"], molecule.liq_density_bounds) mse_liq_density = np.mean((sim_liq_density - expt_liq_density)**2) mape_liq_density = (np.mean( np.abs((sim_liq_density - expt_liq_density) / expt_liq_density)) * 100.0) properties = { f"sim_liq_density_{float(temp):.0f}K": float(liq_density) for temp, liq_density in zip(temps, sim_liq_density) } # Vapor density sim_vap_density = values_scaled_to_real(values["sim_vap_density"], molecule.vap_density_bounds) expt_vap_density = values_scaled_to_real(values["expt_vap_density"], molecule.vap_density_bounds) mse_vap_density = np.mean((sim_vap_density - expt_vap_density)**2) mape_vap_density = (np.mean( np.abs((sim_vap_density - expt_vap_density) / expt_vap_density)) * 100.0) properties.update({ f"sim_vap_density_{float(temp):.0f}K": float(vap_density) for temp, vap_density in zip(temps, sim_vap_density) }) # Vapor pressure sim_Pvap = values_scaled_to_real(values["sim_Pvap"], molecule.Pvap_bounds) expt_Pvap = values_scaled_to_real(values["expt_Pvap"], molecule.Pvap_bounds) mse_Pvap = np.mean((sim_Pvap - expt_Pvap)**2) mape_Pvap = np.mean(np.abs((sim_Pvap - expt_Pvap) / expt_Pvap)) * 100.0 properties.update({ f"sim_Pvap_{float(temp):.0f}K": float(Pvap) for temp, Pvap in zip(temps, sim_Pvap) }) # Enthalpy of vaporization sim_Hvap = values_scaled_to_real(values["sim_Hvap"], molecule.Hvap_bounds) expt_Hvap = values_scaled_to_real(values["expt_Hvap"], molecule.Hvap_bounds) mse_Hvap = np.mean((sim_Hvap - expt_Hvap)**2) mape_Hvap = np.mean(np.abs((sim_Hvap - expt_Hvap) / expt_Hvap)) * 100.0 properties.update({ f"sim_Hvap_{float(temp):.0f}K": float(Hvap) for temp, Hvap in zip(temps, sim_Hvap) }) # Critical Point (Law of rectilinear diameters) slope1, intercept1, r_value1, p_value1, std_err1 = linregress( temps.flatten(), ((sim_liq_density + sim_vap_density) / 2.0).flatten(), ) slope2, intercept2, r_value2, p_value2, std_err2 = linregress( temps.flatten(), ((sim_liq_density - sim_vap_density)**(1 / 0.32)).flatten(), ) Tc = np.abs(intercept2 / slope2) mse_Tc = (Tc - molecule.expt_Tc)**2 mape_Tc = np.abs((Tc - molecule.expt_Tc) / molecule.expt_Tc) * 100.0 properties.update({"sim_Tc": Tc}) rhoc = intercept1 + slope1 * Tc mse_rhoc = (rhoc - molecule.expt_rhoc)**2 mape_rhoc = (np.abs( (rhoc - molecule.expt_rhoc) / molecule.expt_rhoc) * 100.0) properties.update({"sim_rhoc": rhoc}) new_quantities = { **properties, "mse_liq_density": mse_liq_density, "mse_vap_density": mse_vap_density, "mse_Pvap": mse_Pvap, "mse_Hvap": mse_Hvap, "mse_Tc": mse_Tc, "mse_rhoc": mse_rhoc, "mape_liq_density": mape_liq_density, "mape_vap_density": mape_vap_density, "mape_Pvap": mape_Pvap, "mape_Hvap": mape_Hvap, "mape_Tc": mape_Tc, "mape_rhoc": mape_rhoc, } new_data.append(list(group) + list(new_quantities.values())) columns = list(molecule.param_names) + list(new_quantities.keys()) new_df = pd.DataFrame(new_data, columns=columns) return new_df
def init_project(): # Initialize project project = signac.init_project("r125-vle-iter1") # Define temps temps = [ 229.0 * u.K, 249.0 * u.K, 269.0 * u.K, 289.0 * u.K, 309.0 * u.K, ] # Run at vapor pressure press = { 229: (123.65 * u.kPa), 249: (290.76 * u.kPa), 269: (592.27 * u.kPa), 289: (1082.84 * u.kPa), 309: (1824.93 * u.kPa), } n_vap = 160 n_liq = 640 # Experimental density R125 = R125Constants() # Load samples from Latin hypercube lh_samples = np.genfromtxt( "../../analysis/csv/r125-vle-iter1-params.csv", delimiter=",", skip_header=1, )[:, 1:] # Define bounds on sigma/epsilon bounds_sigma = np.asarray([ [3.0, 4.0], # C [3.0, 4.0], # C [2.5, 3.5], # F [2.5, 3.5], # F [1.7, 2.7], # H ]) bounds_epsilon = np.asarray([ [20.0, 60.0], # C [20.0, 60.0], # C [15.0, 40.0], # F [15.0, 40.0], # F [2.0, 10.0], # H ]) bounds = np.vstack((bounds_sigma, bounds_epsilon)) # Convert scaled latin hypercube samples to physical values scaled_params = values_scaled_to_real(lh_samples, bounds) for temp in temps: for sample in scaled_params: # Unpack the sample ( sigma_C1, sigma_C2, sigma_F1, sigma_F2, sigma_H1, epsilon_C1, epsilon_C2, epsilon_F1, epsilon_F2, epsilon_H1, ) = sample # Define the state point state_point = { "T": float(temp.in_units(u.K).value), "P": float(press[int(temp.in_units(u.K).value)].in_units( u.bar).value), "sigma_C1": float((sigma_C1 * u.Angstrom).in_units(u.nm).value), "sigma_C2": float((sigma_C2 * u.Angstrom).in_units(u.nm).value), "sigma_F1": float((sigma_F1 * u.Angstrom).in_units(u.nm).value), "sigma_F2": float((sigma_F2 * u.Angstrom).in_units(u.nm).value), "sigma_H1": float((sigma_H1 * u.Angstrom).in_units(u.nm).value), "epsilon_C1": float((epsilon_C1 * u.K * u.kb).in_units("kJ/mol").value), "epsilon_C2": float((epsilon_C2 * u.K * u.kb).in_units("kJ/mol").value), "epsilon_F1": float((epsilon_F1 * u.K * u.kb).in_units("kJ/mol").value), "epsilon_F2": float((epsilon_F2 * u.K * u.kb).in_units("kJ/mol").value), "epsilon_H1": float((epsilon_H1 * u.K * u.kb).in_units("kJ/mol").value), "N_vap": n_vap, "N_liq": n_liq, "expt_liq_density": R125.expt_liq_density[int(temp.in_units(u.K).value)], "nsteps_liqeq": 5000, "nsteps_eq": 10000, "nsteps_prod": 100000, } job = project.open_job(state_point) job.init()
def init_project(): project = signac.init_project("r125-density-iter2") # Define temps temps = [ 229.0 * u.K, 249.0 * u.K, 269.0 * u.K, 289.0 * u.K, 309.0 * u.K, ] # Run at vapor pressure press = { 229: (123.65 * u.kPa), 249: (290.76 * u.kPa), 269: (592.27 * u.kPa), 289: (1082.84 * u.kPa), 309: (1824.93 * u.kPa), } # Run for 2.5 ns (1 fs timestep) nstepseq = 500000 nstepsprod = 2500000 # Load samples from Latin hypercube lh_samples = np.genfromtxt( "../../analysis/csv/r125-density-iter2-params.csv", delimiter=",", skip_header=1, )[:, 1:] # Define bounds on sigma/epsilon bounds_sigma = np.asarray([ [3.0, 4.0], # C [3.0, 4.0], # C [2.5, 3.5], # F [2.5, 3.5], # F [1.7, 2.7], # H ]) bounds_epsilon = np.asarray([ [20.0, 60.0], # C [20.0, 60.0], # C [15.0, 40.0], # F [15.0, 40.0], # F [2.0, 10.0], # H ]) bounds = np.vstack((bounds_sigma, bounds_epsilon)) # Convert scaled latin hypercube samples to physical values scaled_params = values_scaled_to_real(lh_samples, bounds) for temp in temps: for sample in scaled_params: ( sigma_C1, sigma_C2, sigma_F1, sigma_F2, sigma_H1, epsilon_C1, epsilon_C2, epsilon_F1, epsilon_F2, epsilon_H1, ) = sample state_point = { "T": float(temp.in_units(u.K).value), "P": float(press[int(temp.in_units(u.K).value)].in_units( u.bar).value), "nstepseq": nstepseq, "nstepsprod": nstepsprod, "sigma_C1": float((sigma_C1 * u.Angstrom).in_units(u.nm).value), "sigma_C2": float((sigma_C2 * u.Angstrom).in_units(u.nm).value), "sigma_F1": float((sigma_F1 * u.Angstrom).in_units(u.nm).value), "sigma_F2": float((sigma_F2 * u.Angstrom).in_units(u.nm).value), "sigma_H1": float((sigma_H1 * u.Angstrom).in_units(u.nm).value), "epsilon_C1": float((epsilon_C1 * u.K * u.kb).in_units("kJ/mol").value), "epsilon_C2": float((epsilon_C2 * u.K * u.kb).in_units("kJ/mol").value), "epsilon_F1": float((epsilon_F1 * u.K * u.kb).in_units("kJ/mol").value), "epsilon_F2": float((epsilon_F2 * u.K * u.kb).in_units("kJ/mol").value), "epsilon_H1": float((epsilon_H1 * u.K * u.kb).in_units("kJ/mol").value), } job = project.open_job(state_point) job.init()
df_liquid, param_names, property_name, shuffle_seed=md_gp_shuffle_seed) # Fit model md_model = run_gpflow_scipy( x_train, y_train, gpflow.kernels.RBF(lengthscales=np.ones(R125.n_params + 1)), ) # Get difference between GROMACS/Cassandra density df_test_points = df_vle[list(R125.param_names) + ["temperature", "sim_liq_density"]] xx = df_test_points[list(R125.param_names) + ["temperature"]].values means, vars_ = md_model.predict_f(xx) diff = values_scaled_to_real( df_test_points["sim_liq_density"].values.reshape(-1, 1), R125.liq_density_bounds, ) - values_scaled_to_real(means, R125.liq_density_bounds) print( f"The average density difference between Cassandra and GROMACS is {np.mean(diff)} kg/m^3" ) print( f"The minimum density difference between Cassandra and GROMACS is {np.min(diff)} kg/m^3" ) print( f"The maximum density difference between Cassandra and GROMACS is {np.max(diff)} kg/m^3" ) ### Step 3: Find new parameters for simulations max_mse = 625 # kg^2/m^6 latin_hypercube = np.loadtxt("LHS_5e5x10.csv", delimiter=",") ranked_samples = rank_samples(latin_hypercube,
def init_project(): project = signac.init_project("ap-iter2") # Define temps temperatures = [ 298.0 * u.K, 78.0 * u.K, 10.0 * u.K, ] # Run at vapor pressure pressure = 1.0 * u.atm # Run for 200 ps (1 fs timestep) nsteps_eq = 100000 nsteps_prod = 100000 # Load samples from Latin hypercube lh_samples = pd.read_csv("/scratch365/bbefort/ap-fffit/ap-fffit/analysis/csv/uc-lattice-iter2-params.csv", header=0,usecols=[1,2,3,4,5,6,7,8]).values # Define bounds on sigma/epsilon # Sigma units = angstrom # Epsilon units = kcal/mol bounds_sigma = np.asarray( [ [3.5, 4.5], # Cl [0.5, 2.0], # H [2.5, 3.8], # N [2.5, 3.8], # O ] ) bounds_epsilon = np.asarray( [ [0.1, 0.8], # Cl [0.0, 0.02], # H [0.01, 0.2], # N [0.02, 0.3], # O ] ) bounds = np.vstack((bounds_sigma, bounds_epsilon)) # Convert scaled latin hypercube samples to physical values scaled_params = values_scaled_to_real(lh_samples, bounds) for temperature in temperatures: for sample in scaled_params: ( sigma_Cl, sigma_H, sigma_N, sigma_O, epsilon_Cl, epsilon_H, epsilon_N, epsilon_O, ) = sample state_point = { "T": float(temperature.to_value("K")), "P": float(pressure.to_value("atm")), "nsteps": { "eq" : nsteps_eq, "prod" : nsteps_prod, }, "sigma_Cl": sigma_Cl, "sigma_H": sigma_H, "sigma_N": sigma_N, "sigma_O": sigma_O, "epsilon_Cl": epsilon_Cl, "epsilon_H": epsilon_H, "epsilon_N": epsilon_N, "epsilon_O": epsilon_O, } job = project.open_job(state_point) job.init()
def plot_slices_temperature( models, n_params, temperature_bounds, property_bounds, plot_bounds=[220.0, 340.0], property_name="property", ): """Plot the model predictions as a function of temperature Slices are plotted where the values of the other parameters are all set to 0.0 --> 1.0 in increments of 0.1 Parameters ---------- models : dict models to plot, key=label, value=gpflow.model n_params : int number of non-temperature parameters in the model temperature_bounds: array-like bounds for scaling temperature between physical and dimensionless values property_bounds: array-like bounds for scaling the property between physical and dimensionless values plot_bounds : array-like, optional temperature bounds for the plot property_name : str, optional, default="property" property name with units for axis label Returns ------- figs : list list of matplotlib.figure.Figure objects """ n_samples = 100 vals = np.linspace(plot_bounds[0], plot_bounds[1], n_samples).reshape(-1, 1) vals_scaled = values_real_to_scaled(vals, temperature_bounds) figs = [] for other_vals in np.arange(0, 1.1, 0.1): other = np.tile(other_vals, (n_samples, n_params)) xx = np.hstack((other, vals_scaled)) fig, ax = plt.subplots() for (label, model) in models.items(): mean_scaled, var_scaled = model.predict_f(xx) mean = values_scaled_to_real(mean_scaled, property_bounds) var = variances_scaled_to_real(var_scaled, property_bounds) ax.plot(vals, mean, lw=2, label=label) ax.fill_between( vals[:, 0], mean[:, 0] - 1.96 * np.sqrt(var[:, 0]), mean[:, 0] + 1.96 * np.sqrt(var[:, 0]), alpha=0.3, ) ax.set_title(f"Other vals = {other_vals:.2f}") ax.set_xlabel("Temperature") ax.set_ylabel(property_name) fig.legend() figs.append(fig) if not mpl_is_inline: return figs
def plot_slices_params( models, param_to_plot, param_names, temperature, temperature_bounds, property_bounds, property_name="property", ): """Plot the model predictions as a function of param_to_plot at the specified temperature Parameters ---------- models : dict {"label" : gpflow.model } GPFlow models to plot param_to_plot : string Parameter to vary param_names : list, tuple list of parameter names temperature : float temperature at which to plot the surface temperature_bounds: array-like bounds for scaling temperature between physical and dimensionless values property_bounds: array-like bounds for scaling property between physical and dimensionless values property_name : string, optional, default="property" name of property to plot Returns ------- figs : list list of matplotlib.figure.Figure objects """ try: param_idx = param_names.index(param_to_plot) except ValueError: raise ValueError( f"parameter: {param_to_plot} not found in parameter_names: {param_names}" ) n_params = len(param_names) n_samples = 100 vals_scaled = np.linspace(-0.1, 1.1, n_samples).reshape(-1, 1) temp_vals = np.tile(temperature, (n_samples, 1)) temp_vals_scaled = values_real_to_scaled(temp_vals, temperature_bounds) figs = [] for other_vals in np.arange(0, 1.1, 0.1): other1 = np.tile(other_vals, (n_samples, param_idx)) other2 = np.tile(other_vals, (n_samples, n_params - 1 - param_idx)) xx = np.hstack((other1, vals_scaled, other2, temp_vals_scaled)) fig, ax = plt.subplots() for (label, model) in models.items(): mean_scaled, var_scaled = model.predict_f(xx) mean = values_scaled_to_real(mean_scaled, property_bounds) var = variances_scaled_to_real(var_scaled, property_bounds) ax.plot(vals_scaled, mean, lw=2, label=label) ax.fill_between( vals_scaled[:, 0], mean[:, 0] - 1.96 * np.sqrt(var[:, 0]), mean[:, 0] + 1.96 * np.sqrt(var[:, 0]), alpha=0.3, ) math_parameter = "$\\" + param_to_plot + "$" ax.set_title( f"{math_parameter} at T = {temperature:.0f} K. Other vals = {other_vals:.2f}." ) ax.set_xlabel(math_parameter) ax.set_ylabel(property_name) fig.legend() figs.append(fig) if not mpl_is_inline: return figs
def plot_model_performance(models, x_data, y_data, property_bounds, xylim=None): """Plot the predictions vs. result for one or more GP models Parameters ---------- models : dict { label : model } Each model to be plotted (value, GPFlow model) is provided with a label (key, string) x_data : np.array data to create model predictions for y_data : np.ndarray correct answer property_bounds : array-like bounds for scaling density between physical and dimensionless values xylim : array-like, shape=(2,), optional lower and upper x and y limits of the plot Returns ------- matplotlib.Figure.figure """ y_data_physical = values_scaled_to_real(y_data, property_bounds) min_xylim = np.min(y_data_physical) max_xylim = np.max(y_data_physical) fig, ax = plt.subplots() for (label, model) in models.items(): gp_mu, gp_var = model.predict_f(x_data) gp_mu_physical = values_scaled_to_real(gp_mu, property_bounds) ax.scatter(y_data_physical, gp_mu_physical, label=label, zorder=2.5, alpha=0.4) meansqerr = np.mean( (gp_mu_physical - y_data_physical.reshape(-1, 1))**2) print("Model: {}. Mean squared err: {:.2e}".format(label, meansqerr)) if np.min(gp_mu_physical) < min_xylim: min_xylim = np.min(gp_mu_physical) if np.max(gp_mu_physical) > max_xylim: max_xylim = np.max(gp_mu_physical) if xylim is None: xylim = [min_xylim, max_xylim] ax.plot( np.arange(xylim[0], xylim[1] + 100, 100), np.arange(xylim[0], xylim[1] + 100, 100), color="xkcd:blue grey", label="y=x", ) ax.set_xlim(xylim[0], xylim[1]) ax.set_ylim(xylim[0], xylim[1]) ax.set_xlabel("Actual") ax.set_ylabel("Model Prediction") ax.legend() ax.set_aspect("equal", "box") if not mpl_is_inline: return fig
def init_project(): # Initialize project project = signac.init_project("r32-density-iter2") # Define temps temps = [241.0 * u.K, 261.0 * u.K, 281.0 * u.K, 301.0 * u.K, 321.0 * u.K] # Run at vapor pressure press = { 241: (2.5159 * u.bar), 261: (5.4327 * u.bar), 281: (10.426 * u.bar), 301: (18.295 * u.bar), 321: (29.989 * u.bar), } # Run for 2.5 ns (1 fs timestep) nstepseq = 500000 nstepsprod = 2500000 # Load samples from Latin hypercube lh_samples = np.genfromtxt( "../../analysis/csv/r32-density-iter2-params.csv", delimiter=",", skip_header=1, )[:, 1:] # Define bounds on sigma/epsilon bounds_sigma = np.asarray( [[3.0, 4.0], [2.5, 3.5], [1.7, 2.7],] ) # C # F # H bounds_epsilon = np.asarray( [[20.0, 60.0], [15.0, 40.0], [2.0, 10.0],] # C # F # H ) bounds = np.vstack((bounds_sigma, bounds_epsilon)) # Convert scaled latin hypercube samples to physical values scaled_params = values_scaled_to_real(lh_samples, bounds) for temp in temps: for sample in scaled_params: # Unpack the sample ( sigma_C, sigma_F, sigma_H, epsilon_C, epsilon_F, epsilon_H, ) = sample # Define the state point state_point = { "T": float(temp.in_units(u.K).value), "P": float( press[int(temp.in_units(u.K).value)].in_units(u.bar).value ), "nstepseq": nstepseq, "nstepsprod": nstepsprod, "sigma_C": float((sigma_C * u.Angstrom).in_units(u.nm).value), "sigma_F": float((sigma_F * u.Angstrom).in_units(u.nm).value), "sigma_H": float((sigma_H * u.Angstrom).in_units(u.nm).value), "epsilon_C": float( (epsilon_C * u.K * u.kb).in_units("kJ/mol").value ), "epsilon_F": float( (epsilon_F * u.K * u.kb).in_units("kJ/mol").value ), "epsilon_H": float( (epsilon_H * u.K * u.kb).in_units("kJ/mol").value ), } job = project.open_job(state_point) job.init()
def main(): seaborn.set_palette("Paired") # Liquid density first param_names = list(R32.param_names) + ["temperature"] property_name = "sim_liq_density" property_bounds = R32.liq_density_bounds # Extract train/test data x_train, y_train, x_test, y_test = shuffle_and_split( df_all, param_names, property_name, shuffle_seed=gp_shuffle_seed, fraction_train=0.8, ) # Fit model model = run_gpflow_scipy( x_train, y_train, gpflow.kernels.RBF(lengthscales=np.ones(R32.n_params + 1)), ) # Use model to predict results gp_mu_train, gp_var_train = model.predict_f(x_train) gp_mu_test, gp_var_test = model.predict_f(x_test) # Convert results to physical values y_train_physical = values_scaled_to_real(y_train, property_bounds) y_test_physical = values_scaled_to_real(y_test, property_bounds) gp_mu_train_physical = values_scaled_to_real(gp_mu_train, property_bounds) gp_mu_test_physical = values_scaled_to_real(gp_mu_test, property_bounds) # Plot fig, ax = plt.subplots() ax.scatter( y_train_physical, gp_mu_train_physical, label="Train", alpha=0.4, s=130, c="C1", ) ax.scatter( y_test_physical, gp_mu_test_physical, marker="+", label="Test", alpha=0.7, s=170, c="C5", ) xylim = [750, 1250] ax.plot( np.arange(xylim[0], xylim[1] + 100, 100), np.arange(xylim[0], xylim[1] + 100, 100), color="black", linewidth=3, alpha=0.6, ) ax.set_xlim(xylim[0], xylim[1]) ax.set_ylim(xylim[0], xylim[1]) ax.set_xticks([800, 1000, 1200]) ax.set_yticks([800, 1000, 1200]) ax.set_xticks([850, 900, 950, 1050, 1100, 1150], minor=True) ax.set_yticks([850, 900, 950, 1050, 1100, 1150], minor=True) ax.tick_params("both", direction="in", which="both", length=4, labelsize=26, pad=10) ax.tick_params("both", which="major", length=8) ax.xaxis.set_ticks_position("both") ax.yaxis.set_ticks_position("both") ax.set_xlabel(r"$\mathregular{\rho_{liq}\ sim. (kg/m^3)}$", fontsize=28, labelpad=20) ax.set_ylabel(r"$\mathregular{\rho_{liq}\ sur. (kg/m^3)}$", fontsize=28, labelpad=10) ax.legend(fontsize=24, handletextpad=0.00, loc="lower right", bbox_to_anchor=(1.01, -0.01)) ax.set_aspect("equal", "box") fig.tight_layout() fig.savefig("pdfs/fig1-surrogate-liquiddensity.pdf") # Vapor density next param_names = list(R32.param_names) + ["temperature"] property_name = "sim_vap_density" property_bounds = R32.vap_density_bounds # Extract train/test data x_train, y_train, x_test, y_test = shuffle_and_split( df_all, param_names, property_name, shuffle_seed=gp_shuffle_seed, fraction_train=0.8, ) # Fit model model = run_gpflow_scipy( x_train, y_train, gpflow.kernels.RBF(lengthscales=np.ones(R32.n_params + 1)), ) # Use model to predict results gp_mu_train, gp_var_train = model.predict_f(x_train) gp_mu_test, gp_var_test = model.predict_f(x_test) # Convert results to physical values y_train_physical = values_scaled_to_real(y_train, property_bounds) y_test_physical = values_scaled_to_real(y_test, property_bounds) gp_mu_train_physical = values_scaled_to_real(gp_mu_train, property_bounds) gp_mu_test_physical = values_scaled_to_real(gp_mu_test, property_bounds) # Plot fig, ax = plt.subplots() ax.scatter( y_train_physical, gp_mu_train_physical, label="Train", alpha=0.6, s=130, c="C1", ) ax.scatter( y_test_physical, gp_mu_test_physical, marker="+", label="Test", alpha=0.8, s=170, c="C5", ) xylim = [0, 125] ax.plot(np.arange(xylim[0], xylim[1] + 100, 100), np.arange(xylim[0], xylim[1] + 100, 100), color="black", linewidth=3, alpha=0.6) ax.set_xlim(xylim[0], xylim[1]) ax.set_ylim(xylim[0], xylim[1]) ax.set_xticks([0, 50, 100]) ax.set_yticks([0, 50, 100]) ax.set_xticks([25, 75, 125], minor=True) ax.set_yticks([25, 75, 125], minor=True) ax.tick_params("both", direction="in", which="both", length=4, labelsize=26, pad=10) ax.tick_params("both", which="major", length=8) ax.xaxis.set_ticks_position("both") ax.yaxis.set_ticks_position("both") ax.set_xlabel(r"$\mathregular{\rho_{vap}\ sim. (kg/m^3)}$", fontsize=28, labelpad=20) ax.set_ylabel(r"$\mathregular{\rho_{vap}\ sur. (kg/m^3)}$", fontsize=28, labelpad=10) ax.legend(fontsize=24, handletextpad=0.00) ax.set_aspect("equal", "box") fig.tight_layout() fig.savefig("pdfs/fig1-surrogate-vapordensity.pdf")