def test_values_real_to_scaled_multiple_bounds(self): bounds = [[2.0, 4.0], [2.0, 3.0]] values = [[2.0, 3.0]] scaled_values = values_real_to_scaled(values, bounds) assert np.isclose(scaled_values, [[0.0, 1.0]]).all() values = [[2.0, 3.0], [3.0, 2.0]] scaled_values = values_real_to_scaled(values, bounds) assert np.isclose(scaled_values, [[0.0, 1.0], [0.5, 0.0]]).all()
def test_values_real_to_scaled_multiple_values(self): bounds = [2.0, 4.0] values = [2.0, 4.0] scaled_values = values_real_to_scaled(values, bounds) assert np.isclose(scaled_values, [[0.0], [1.0]]).all() values = [2.0, 4.0, 5.0] scaled_values = values_real_to_scaled(values, bounds) assert np.isclose(scaled_values, [[0.0], [1.0], [1.5]]).all()
def prepare_df(df_csv, AP): """Prepare a pandas dataframe for fitting GP models to AP data""" # Drop simulations that failed df_all = df_csv.dropna(subset=['uc_mean_distance']) # Add scaled_temperature df_all["scaled_temperature"] = values_real_to_scaled( df_all["temperature"].values, AP.temperature_bounds ) params = df_all[list(AP.param_names)] scaled_params = values_real_to_scaled(params, AP.param_bounds) df_all[list(AP.param_names)] = scaled_params return df_all
def test_values_real_to_scaled_single(self): bounds = [2.0, 4.0] value = 2.0 scaled_value = values_real_to_scaled(value, bounds) assert np.isclose(scaled_value, 0.0) value = 4.0 scaled_value = values_real_to_scaled(value, bounds) assert np.isclose(scaled_value, 1.0) value = 3.0 scaled_value = values_real_to_scaled(value, bounds) assert np.isclose(scaled_value, 0.5) value = [1.0] scaled_value = values_real_to_scaled(value, bounds) assert np.isclose(scaled_value, -0.5) bounds = [-5.0, -4.0] value = -4.5 scaled_value = values_real_to_scaled(value, bounds) assert np.isclose(scaled_value, 0.5)
def _calc_gp_mse(gp_model, samples, expt_property, property_bounds, temperature_bounds): """Calculate the MSE between the GP model and experiment for samples""" all_errs = np.empty(shape=(samples.shape[0], len(expt_property.keys()))) col_idx = 0 for (temp, density) in expt_property.items(): scaled_temp = values_real_to_scaled(temp, temperature_bounds) xx = np.hstack((samples, np.tile(scaled_temp, (samples.shape[0], 1)))) means_scaled, vars_scaled = gp_model.predict_f(xx) means = values_scaled_to_real(means_scaled, property_bounds) err = means - density all_errs[:, col_idx] = err[:, 0] col_idx += 1 return np.mean(all_errs**2, axis=1)
for result_csv_name in result_csv_names ] # Concatenate all parameter sets and results df_params = pd.concat(df_params).reset_index(drop=True) df_results = pd.concat(df_results).reset_index(drop=True) # Create a df with the MSE for each parameter set # and add the parameter set idx df_results["expt_density"] = df_results["temperature"].apply( lambda x: R125.expt_liq_density[int(x)]) df_results["sq_err"] = (df_results["density"] - df_results["expt_density"])**2 df_mse = (df_results.groupby(list( R125.param_names))["sq_err"].mean().reset_index(name="mse")) scaled_param_values = values_real_to_scaled(df_mse[list(R125.param_names)], R125.param_bounds) param_idxs = [] param_vals = [] for params1 in scaled_param_values: for idx, params2 in enumerate(df_params[list(R125.param_names)].values): if np.allclose(params1, params2): param_idxs.append(idx) param_vals.append(params2) break df_mse["param_idx"] = param_idxs df_mse[list(R125.param_names)] = param_vals # Plot all with MSE < 625 g = seaborn.pairplot( pd.DataFrame(df_mse[df_mse["mse"] < 625.0], columns=list(R125.param_names)))
def main(): # Make sure we have a folder for figures try: os.mkdir("figs") except FileExistsError: pass ########################################################### #################### Fit GP models ################### ########################################################### ## UCMD Model param_names = list(AP.param_names) property_name = "uc_mean_distance" # Only train on 10 K data that meets ucmd clf threshold df_ucmd = df_all.loc[(df_all["temperature"] == 10) & (df_all["uc_mean_distance"] < ucmd_clf_threshold)] # Get train/test x_train, y_train, x_test, y_test = shuffle_and_split( df_ucmd, param_names, property_name, shuffle_seed=gp_shuffle_seed) # Fit model ucmd_gp = run_gpflow_scipy( x_train, y_train, gpflow.kernels.Matern32(lengthscales=np.ones(AP.n_params)), ) ## Lattice APE Model param_names = list(AP.param_names) + ["scaled_temperature"] property_name = "lattice_ape" # Get train/test x_train, y_train, x_test, y_test = shuffle_and_split( df_all, param_names, property_name, shuffle_seed=gp_shuffle_seed) # Fit model lattice_gp = run_gpflow_scipy( x_train, y_train, gpflow.kernels.Matern32(lengthscales=np.ones(AP.n_params + 1)), ) ########################################################### ################## Train classifers ################## ########################################################### x_train, y_train, x_test, y_test = shuffle_and_split( df_all.loc[df_all["temperature"] == 10], list(AP.param_names), "uc_mean_distance", shuffle_seed=clf_shuffle_seed, ) y_train = np.array(y_train < ucmd_clf_threshold, dtype=np.int32) y_test = np.array(y_test < ucmd_clf_threshold, dtype=np.int32) ucmd_clf = svm.SVC(kernel="rbf") ucmd_clf.fit(x_train, y_train) y_pred = ucmd_clf.predict(x_train) print("Training accuracy:", metrics.accuracy_score(y_train, y_pred)) y_pred = ucmd_clf.predict(x_test) print("Testing accuracy:", metrics.accuracy_score(y_test, y_pred)) print(metrics.confusion_matrix(y_test, y_pred)) ########################################################### ################### Find new points ################### ########################################################### # Load large hypercube latin_hypercube = np.loadtxt("LHS_1e6x8.csv", delimiter=",") # Apply UCMD classifier ucmd_pred = ucmd_clf.predict(latin_hypercube) # Predict UCMD with GP model gp_means_ucmd, gp_vars_ucmd = ucmd_gp.predict_f(latin_hypercube) # Predict Lattice APE with GP model at each temperature all_errs = np.empty(shape=(latin_hypercube.shape[0], len(temperatures))) col_idx = 0 for temperature in temperatures: scaled_temperature = values_real_to_scaled(temperature, AP.temperature_bounds) xx = np.hstack((latin_hypercube, np.tile(scaled_temperature, (latin_hypercube.shape[0], 1)))) gp_means_ape, gp_vars_ape = lattice_gp.predict_f(xx) all_errs[:, col_idx] = gp_means_ape[:, 0] col_idx += 1 # Compute MAPE across all three temperatures mean_errs = np.mean(all_errs, axis=1) ## Save all the results to a dataframe LH_results = pd.DataFrame(latin_hypercube, columns=AP.param_names) LH_results["ucmd_clf"] = ucmd_pred.astype(np.bool_) LH_results["ucmd"] = gp_means_ucmd.numpy() LH_results["lattice_mape"] = mean_errs # Only take points where structure classifier is satisifed LH_results_pass_ucmd_clf = LH_results.loc[LH_results.ucmd_clf == True] costs = LH_results_pass_ucmd_clf[["ucmd", "lattice_mape"]].to_numpy() # Find pareto efficient points result, pareto_points, dominated_points = find_pareto_set( costs, is_pareto_efficient) LH_results_pass_ucmd_clf["is_pareto"] = result # Plot pareto points vs. costs g = seaborn.pairplot( LH_results_pass_ucmd_clf, vars=["ucmd", "lattice_mape"], hue="is_pareto", ) g.savefig("figs/pareto-mses.png", dpi=300) # Plot pareto points vs. params g = seaborn.pairplot(LH_results_pass_ucmd_clf, vars=list(AP.param_names), hue="is_pareto") g.set(xlim=(-0.1, 1.1), ylim=(-0.1, 1.1)) g.savefig("figs/pareto-params.png", dpi=300) # For next iteration: 1. All non-dominated points that meet the thresholds # 2. "Separated" dominated points that meet the thresholds next_iteration_points = LH_results_pass_ucmd_clf.loc[ (LH_results_pass_ucmd_clf.is_pareto == True) & (LH_results_pass_ucmd_clf.ucmd < ucmd_next_itr_threshold) & (LH_results_pass_ucmd_clf.lattice_mape < lattice_mape_next_itr_threshold)] dominated_points = LH_results_pass_ucmd_clf.loc[ (LH_results_pass_ucmd_clf.is_pareto == False) & (LH_results_pass_ucmd_clf.ucmd < ucmd_next_itr_threshold) & (LH_results_pass_ucmd_clf.lattice_mape < lattice_mape_next_itr_threshold)] print(f"{len(LH_results_pass_ucmd_clf)} points meet the ucmd classifier.") print( f"{len(LH_results_pass_ucmd_clf[LH_results_pass_ucmd_clf.is_pareto == True])} are non-dominated." ) print( f"{len(dominated_points)} are dominated with ucmd < {ucmd_next_itr_threshold} and lattice_mape < {lattice_mape_next_itr_threshold}" ) removal_distance = 1.034495 np.random.seed(distance_seed) discarded_points = pd.DataFrame(columns=dominated_points.columns) while len(dominated_points > 0): # Shuffle the top parameter sets dominated_points = dominated_points.sample(frac=1) # Select one off the top # Note: here we use a random one rather than the "best" one; we don't have # confidence that the GP models are more accurate than our thresholds next_iteration_points = next_iteration_points.append( dominated_points.iloc[[0]]) # Remove anything within given distance l1_norm = np.sum( np.abs( dominated_points[list(AP.param_names)].values - next_iteration_points[list(AP.param_names)].iloc[[-1]].values), axis=1, ) points_to_remove = np.where(l1_norm < removal_distance)[0] discarded_points = discarded_points.append( dominated_points.iloc[points_to_remove]) dominated_points.drop(index=dominated_points.index[points_to_remove], inplace=True) print( f"After removing similar points, we are left with {len(next_iteration_points)} final top points." ) next_iteration_points = next_iteration_points[:250] next_iteration_points.drop(columns=["ucmd", "lattice_mape", "is_pareto"], inplace=True) # Plot new points g = seaborn.pairplot(next_iteration_points, vars=list(AP.param_names)) g.set(xlim=(-0.1, 1.1), ylim=(-0.1, 1.1)) g.savefig("figs/new-points-params.png", dpi=300) # Save the final new parameters next_iteration_points.to_csv(csv_path + out_csv_name)
def main(): df = pd.read_csv("../analysis/csv/uc-lattice-final-params.csv", index_col=0) df = df[df.temperature == 10] dff = pd.read_csv("../analysis/csv/ap-final-2.csv", index_col=0) seaborn.set_palette('bright', n_colors=len(df)) #data = values_real_to_scaled(df[list(AP.param_names)].values, AP.param_bounds) #data_f = values_real_to_scaled(dff[list(AP.param_names)].values, AP.param_bounds) data = df[list(AP.param_names)].values data_f = dff[list(AP.param_names)].values result_bounds = np.array([[0, 0.5], [0, 5]]) results = values_real_to_scaled( df[["uc_mean_distance", "lattice_ape"]].values, result_bounds) results_f = values_real_to_scaled( dff[["uc_mean_distance", "lattice_ape"]].values, result_bounds) param_bounds = AP.param_bounds param_bounds[:4] = param_bounds[:4] #* NM_TO_ANGSTROM param_bounds[4:] = param_bounds[4:] #* KJMOL_TO_K data = np.hstack((data, results)) data_f = np.hstack((data_f, results_f)) bounds = np.vstack((param_bounds, result_bounds)) print(data.shape) col_names = [ r"$\sigma_{Cl}$", r"$\sigma_H$", r"$\sigma_N$", r"$\sigma_O$", r"$\epsilon_{Cl}$", r"$\epsilon_H$", r"$\epsilon_N$", r"$\epsilon_O$", r"UCMD", "Lattice\nMAPE", ] n_axis = len(col_names) assert data.shape[1] == n_axis x_vals = [i for i in range(n_axis)] # Create (N-1) subplots along x axis fig, axes = plt.subplots(1, n_axis - 1, sharey=False, figsize=(12, 5)) # Plot each row for i, ax in enumerate(axes): for line in data: ax.plot(x_vals, line, alpha=0.35) ax.set_xlim([x_vals[i], x_vals[i + 1]]) for line in data_f: ax.plot(x_vals, line, alpha=1.0, linewidth=3) for dim, ax in enumerate(axes): ax.xaxis.set_major_locator(ticker.FixedLocator([dim])) set_ticks_for_axis(ax, bounds[dim], nticks=6) ax.set_xticklabels([col_names[dim]], fontsize=30) ax.tick_params(axis="x", pad=10) ax.set_ylim(-0.05, 1.05) # Add white background behind labels for label in ax.get_yticklabels(): label.set_bbox( dict(facecolor='white', edgecolor='none', alpha=0.45, boxstyle=mpatch.BoxStyle("round4"))) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_linewidth(2.0) ax = axes[-1] ax.xaxis.set_major_locator(ticker.FixedLocator([n_axis - 2, n_axis - 1])) ax.set_xticklabels([col_names[-2], col_names[-1]], fontsize=24) ax.tick_params(axis="x", pad=14) # Add class II data #ax.plot(x_vals[-2], 0.3485/bounds[-2][1], markersize=15, color="red", marker="*", clip_on=False, zorder=200) ax = plt.twinx(axes[-1]) ax.set_ylim(-0.05, 1.05) set_ticks_for_axis(ax, bounds[-1], nticks=6) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['right'].set_linewidth(2.0) # Add class I data ax.plot(x_vals[-1], 1.42 / bounds[-1][1], markersize=15, color="tab:purple", marker="o", clip_on=False, zorder=200) ax.plot(x_vals[-2], 0.156 / bounds[-2][1], markersize=15, color="tab:purple", marker="o", clip_on=False, zorder=200) # Add class II data ax.plot(x_vals[-1], 3.55 / bounds[-1][1], markersize=20, color="tab:red", marker="*", clip_on=False, zorder=200) ax.plot(x_vals[-2], 0.3485 / bounds[-2][1], markersize=20, color="tab:red", marker="*", clip_on=False, zorder=200) # Remove space between subplots plt.subplots_adjust(wspace=0, bottom=0.2, left=0.05, right=0.95) fig.savefig("pdfs/fig4-ap-parallel.pdf")
def plot_slices_temperature( models, n_params, temperature_bounds, property_bounds, plot_bounds=[220.0, 340.0], property_name="property", ): """Plot the model predictions as a function of temperature Slices are plotted where the values of the other parameters are all set to 0.0 --> 1.0 in increments of 0.1 Parameters ---------- models : dict models to plot, key=label, value=gpflow.model n_params : int number of non-temperature parameters in the model temperature_bounds: array-like bounds for scaling temperature between physical and dimensionless values property_bounds: array-like bounds for scaling the property between physical and dimensionless values plot_bounds : array-like, optional temperature bounds for the plot property_name : str, optional, default="property" property name with units for axis label Returns ------- figs : list list of matplotlib.figure.Figure objects """ n_samples = 100 vals = np.linspace(plot_bounds[0], plot_bounds[1], n_samples).reshape(-1, 1) vals_scaled = values_real_to_scaled(vals, temperature_bounds) figs = [] for other_vals in np.arange(0, 1.1, 0.1): other = np.tile(other_vals, (n_samples, n_params)) xx = np.hstack((other, vals_scaled)) fig, ax = plt.subplots() for (label, model) in models.items(): mean_scaled, var_scaled = model.predict_f(xx) mean = values_scaled_to_real(mean_scaled, property_bounds) var = variances_scaled_to_real(var_scaled, property_bounds) ax.plot(vals, mean, lw=2, label=label) ax.fill_between( vals[:, 0], mean[:, 0] - 1.96 * np.sqrt(var[:, 0]), mean[:, 0] + 1.96 * np.sqrt(var[:, 0]), alpha=0.3, ) ax.set_title(f"Other vals = {other_vals:.2f}") ax.set_xlabel("Temperature") ax.set_ylabel(property_name) fig.legend() figs.append(fig) if not mpl_is_inline: return figs
def plot_model_vs_test( models, param_values, train_points, test_points, temperature_bounds, property_bounds, plot_bounds=[220.0, 340.0], property_name="property", ): """Plots the GP model(s) as a function of temperature with all other parameters taken as param_values. Overlays training and testing points with the same param_values. Parameters ---------- models : dict {"label" : gpflow.model } GPFlow models to plot param_values : np.ndarray, shape=(n_params) The parameters at which to evaluate the GP model train_points : np.ndarray, shape=(n_points, 2) The temperature (scaled) and property (scaled) of each training point test_points : np.ndarray, shape=(n_points, 2) The temperature (scaled) and property (scaled) of each test point temperature_bounds: array-like bounds for scaling temperature between physical and dimensionless values property_bounds: array-like bounds for scaling property between physical and dimensionless values plot_bounds : array-like, optional temperature bounds for the plot property_name : str, optional, default="property" property name with units for axis label Returns ------- matplotlib.figure.Figure """ n_samples = 100 vals = np.linspace(plot_bounds[0], plot_bounds[1], n_samples).reshape(-1, 1) vals_scaled = values_real_to_scaled(vals, temperature_bounds) other = np.tile(param_values, (n_samples, 1)) xx = np.hstack((other, vals_scaled)) fig, ax = plt.subplots() for (label, model) in models.items(): mean_scaled, var_scaled = model.predict_f(xx) mean = values_scaled_to_real(mean_scaled, property_bounds) var = variances_scaled_to_real(var_scaled, property_bounds) ax.plot(vals, mean, lw=2, label="GP model" + label) ax.fill_between( vals[:, 0], mean[:, 0] - 1.96 * np.sqrt(var[:, 0]), mean[:, 0] + 1.96 * np.sqrt(var[:, 0]), alpha=0.25, ) if train_points.shape[0] > 0: md_train_temp = values_scaled_to_real(train_points[:, 0], temperature_bounds) md_train_property = values_scaled_to_real(train_points[:, 1], property_bounds) ax.plot(md_train_temp, md_train_property, "s", color="black", label="Train") if test_points.shape[0] > 0: md_test_temp = values_scaled_to_real(test_points[:, 0], temperature_bounds) md_test_property = values_scaled_to_real(test_points[:, 1], property_bounds) ax.plot(md_test_temp, md_test_property, "ro", label="Test") ax.set_xlabel("Temperature") ax.set_ylabel(property_name) fig.legend() if not mpl_is_inline: return fig
def plot_slices_params( models, param_to_plot, param_names, temperature, temperature_bounds, property_bounds, property_name="property", ): """Plot the model predictions as a function of param_to_plot at the specified temperature Parameters ---------- models : dict {"label" : gpflow.model } GPFlow models to plot param_to_plot : string Parameter to vary param_names : list, tuple list of parameter names temperature : float temperature at which to plot the surface temperature_bounds: array-like bounds for scaling temperature between physical and dimensionless values property_bounds: array-like bounds for scaling property between physical and dimensionless values property_name : string, optional, default="property" name of property to plot Returns ------- figs : list list of matplotlib.figure.Figure objects """ try: param_idx = param_names.index(param_to_plot) except ValueError: raise ValueError( f"parameter: {param_to_plot} not found in parameter_names: {param_names}" ) n_params = len(param_names) n_samples = 100 vals_scaled = np.linspace(-0.1, 1.1, n_samples).reshape(-1, 1) temp_vals = np.tile(temperature, (n_samples, 1)) temp_vals_scaled = values_real_to_scaled(temp_vals, temperature_bounds) figs = [] for other_vals in np.arange(0, 1.1, 0.1): other1 = np.tile(other_vals, (n_samples, param_idx)) other2 = np.tile(other_vals, (n_samples, n_params - 1 - param_idx)) xx = np.hstack((other1, vals_scaled, other2, temp_vals_scaled)) fig, ax = plt.subplots() for (label, model) in models.items(): mean_scaled, var_scaled = model.predict_f(xx) mean = values_scaled_to_real(mean_scaled, property_bounds) var = variances_scaled_to_real(var_scaled, property_bounds) ax.plot(vals_scaled, mean, lw=2, label=label) ax.fill_between( vals_scaled[:, 0], mean[:, 0] - 1.96 * np.sqrt(var[:, 0]), mean[:, 0] + 1.96 * np.sqrt(var[:, 0]), alpha=0.3, ) math_parameter = "$\\" + param_to_plot + "$" ax.set_title( f"{math_parameter} at T = {temperature:.0f} K. Other vals = {other_vals:.2f}." ) ax.set_xlabel(math_parameter) ax.set_ylabel(property_name) fig.legend() figs.append(fig) if not mpl_is_inline: return figs
def main(): # ID the top ten by lowest average MAPE df = pd.read_csv("../csv/r125-pareto.csv", index_col=0) dff = pd.read_csv("../csv/r125-final-4.csv", index_col=0) seaborn.set_palette('bright', n_colors=len(df)) data = df[list(R125.param_names)].values result_bounds = np.array([[0, 25], [0, 50], [0, 50], [0, 25]]) results = values_real_to_scaled( df[["mape_liq_density", "mape_vap_density", "mape_Pvap", "mape_Hvap"]].values, result_bounds) data_f = dff[list(R125.param_names)].values results_f = values_real_to_scaled( dff[["mape_liq_density", "mape_vap_density", "mape_Pvap", "mape_Hvap"]].values, result_bounds) param_bounds = R125.param_bounds param_bounds[:5] = param_bounds[:5] * NM_TO_ANGSTROM param_bounds[5:] = param_bounds[5:] * KJMOL_TO_K data = np.hstack((data, results)) data_f = np.hstack((data_f, results_f)) bounds = np.vstack((param_bounds, result_bounds)) col_names = [ r"$\sigma_{C1}$", r"$\sigma_{C2}$", r"$\sigma_{F1}$", r"$\sigma_{F2}$", r"$\sigma_{H}$", r"$\epsilon_{C1}$", r"$\epsilon_{C2}$", r"$\epsilon_{F1}$", r"$\epsilon_{F2}$", r"$\epsilon_{H}$", "MAPE\n" + r"$\rho^l_{\mathrm{sat}}$", "MAPE\n" + r"$\rho^v_{\mathrm{sat}}$", "MAPE\n" + r"$P_{\mathrm{vap}}$", "MAPE\n" + r"$\Delta H_{\mathrm{vap}}$", ] n_axis = len(col_names) assert data.shape[1] == n_axis x_vals = [i for i in range(n_axis)] # Create (N-1) subplots along x axis fig, axes = plt.subplots(1, n_axis - 1, sharey=False, figsize=(20, 5)) # Plot each row for i, ax in enumerate(axes): for line in data: ax.plot(x_vals, line, alpha=0.45) ax.set_xlim([x_vals[i], x_vals[i + 1]]) for line in data_f: ax.plot(x_vals, line, alpha=1.0, linewidth=3) for dim, ax in enumerate(axes): ax.xaxis.set_major_locator(ticker.FixedLocator([dim])) set_ticks_for_axis(ax, bounds[dim], nticks=6) if dim < 10: ax.set_xticklabels([col_names[dim]], fontsize=24) else: ax.set_xticklabels([col_names[dim]], fontsize=20) ax.set_ylim(-0.05, 1.05) # Add white background behind labels for label in ax.get_yticklabels(): label.set_bbox( dict(facecolor='white', edgecolor='none', alpha=0.45, boxstyle=mpatch.BoxStyle("round4"))) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_linewidth(2.0) ax = axes[-1] ax.xaxis.set_major_locator(ticker.FixedLocator([n_axis - 2, n_axis - 1])) ax.set_xticklabels([col_names[-2], col_names[-1]], fontsize=20) ax = plt.twinx(axes[-1]) ax.set_ylim(-0.05, 1.05) set_ticks_for_axis(ax, bounds[-1], nticks=6) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['right'].set_linewidth(2.0) # Add GAFF ax.plot(x_vals[-1], 22.37 / bounds[-1][1], markersize=12, color="gray", marker="s", clip_on=False, zorder=200) ax.plot(x_vals[-2], 46.05 / bounds[-2][1], markersize=12, color="gray", marker="s", clip_on=False, zorder=200) ax.plot(x_vals[-3], 50.52 / bounds[-3][1], markersize=12, color="gray", marker="s", clip_on=False, zorder=200) ax.plot(x_vals[-4], 2.92 / bounds[-4][1], markersize=12, color="gray", marker="s", clip_on=False, zorder=200) # Remove space between subplots plt.subplots_adjust(wspace=0, bottom=0.2) fig.savefig("pdfs/fig_r125-parallel.pdf")
def prepare_df_vle(df_csv, molecule): """Prepare a pandas dataframe for fitting a GP model to density data Performs the following actions: - Renames "liq_density" to "sim_liq_density" - Renames "vap_density" to "sim_vap_density" - Renames "Pvap" to "sim_Pvap" - Removes "liq_enthalpy" and "vap_enthalpy" and adds "sim_Hvap" - Adds "expt_liq_density" - Adds "expt_vap_density" - Adds "expt_Pvap" - Adds "expt_Hvap" - Adds "is_liquid" - Converts all values from physical values to scaled values Parameters ---------- df_csv : pd.DataFrame The dataframe as loaded from a CSV file with the signac results molecule : R32Constants, R125Constants An instance of a molecule constants class n_molecules : int The number of molecules in the simulation Returns ------- df_all : pd.DataFrame The dataframe with scaled parameters and MD/expt. properties """ if "liq_density" not in df_csv.columns: raise ValueError("df_csv must contain column 'liq_density'") if "vap_density" not in df_csv.columns: raise ValueError("df_csv must contain column 'vap_density'") if "Pvap" not in df_csv.columns: raise ValueError("df_csv must contain column 'Pvap'") if "Hvap" not in df_csv.columns: raise ValueError("df_csv must contain column 'Hvap'") if "liq_enthalpy" not in df_csv.columns: raise ValueError("df_csv must contain column 'liq_enthalpy'") if "vap_enthalpy" not in df_csv.columns: raise ValueError("df_csv must contain column 'vap_enthalpy'") if "temperature" not in df_csv.columns: raise ValueError("df_csv must contain column 'temperature'") for param in list(molecule.param_names): if param not in df_csv.columns: raise ValueError( f"df_csv must contain a column for parameter: '{param}'") # Rename properties to MD, calculate Hvap, add expt properties df_all = df_csv.rename(columns={"liq_density": "sim_liq_density"}) df_all = df_all.rename(columns={"vap_density": "sim_vap_density"}) df_all = df_all.rename(columns={"Pvap": "sim_Pvap"}) df_all = df_all.rename(columns={"Hvap": "sim_Hvap"}) df_all.drop(columns="vap_enthalpy", inplace=True) df_all.drop(columns="liq_enthalpy", inplace=True) # Convert Hvap to kJ/kg df_all["sim_Hvap"] = (df_all["sim_Hvap"] / molecule.molecular_weight * 1000.0) df_all["expt_liq_density"] = df_all["temperature"].apply( lambda temp: molecule.expt_liq_density[int(temp)]) df_all["expt_vap_density"] = df_all["temperature"].apply( lambda temp: molecule.expt_vap_density[int(temp)]) df_all["expt_Pvap"] = df_all["temperature"].apply( lambda temp: molecule.expt_Pvap[int(temp)]) df_all["expt_Hvap"] = df_all["temperature"].apply( lambda temp: molecule.expt_Hvap[int(temp)]) # Scale all values scaled_param_values = values_real_to_scaled( df_all[list(molecule.param_names)], molecule.param_bounds) scaled_temperature = values_real_to_scaled(df_all["temperature"], molecule.temperature_bounds) scaled_sim_liq_density = values_real_to_scaled(df_all["sim_liq_density"], molecule.liq_density_bounds) scaled_sim_vap_density = values_real_to_scaled(df_all["sim_vap_density"], molecule.vap_density_bounds) scaled_sim_Pvap = values_real_to_scaled(df_all["sim_Pvap"], molecule.Pvap_bounds) scaled_sim_Hvap = values_real_to_scaled(df_all["sim_Hvap"], molecule.Hvap_bounds) scaled_expt_liq_density = values_real_to_scaled( df_all["expt_liq_density"], molecule.liq_density_bounds) scaled_expt_vap_density = values_real_to_scaled( df_all["expt_vap_density"], molecule.vap_density_bounds) scaled_expt_Pvap = values_real_to_scaled(df_all["expt_Pvap"], molecule.Pvap_bounds) scaled_expt_Hvap = values_real_to_scaled(df_all["expt_Hvap"], molecule.Hvap_bounds) df_all[list(molecule.param_names)] = scaled_param_values df_all["temperature"] = scaled_temperature df_all["sim_liq_density"] = scaled_sim_liq_density df_all["sim_vap_density"] = scaled_sim_vap_density df_all["sim_Pvap"] = scaled_sim_Pvap df_all["sim_Hvap"] = scaled_sim_Hvap df_all["expt_liq_density"] = scaled_expt_liq_density df_all["expt_vap_density"] = scaled_expt_vap_density df_all["expt_Pvap"] = scaled_expt_Pvap df_all["expt_Hvap"] = scaled_expt_Hvap return df_all
def prepare_df_density(df_csv, molecule, liquid_density_threshold): """Prepare a pandas dataframe for fitting a GP model to density data Performs the following actions: - Renames "density" to "md_density" - Adds "expt_density" - Adds "is_liquid" - Converts all values from physical values to scaled values Parameters ---------- df_csv : pd.DataFrame The dataframe as loaded from a CSV file with the signac results molecule : R32Constants, R125Constants An instance of a molecule constants class liquid_density_threshold : float Density threshold (kg/m^3) for distinguishing liquid and vapor Returns ------- df_all : pd.DataFrame The dataframe with scaled parameters, temperature, density, and is_liquid df_liquid : pd.DataFrame `df_all` where `is_liquid` is True df_vapor : pd.DataFrame `df_all` where `is_liquid` is False """ if "density" not in df_csv.columns: raise ValueError("df_csv must contain column 'density'") if "temperature" not in df_csv.columns: raise ValueError("df_csv must contain column 'temperature'") for param in list(molecule.param_names): if param not in df_csv.columns: raise ValueError( f"df_csv must contain a column for parameter: '{param}'") # Add expt density and is_liquid df_all = df_csv.rename(columns={"density": "md_density"}) df_all["expt_density"] = df_all["temperature"].apply( lambda temp: molecule.expt_liq_density[int(temp)]) df_all["is_liquid"] = df_all["md_density"].apply( lambda x: x > liquid_density_threshold) # Scale all values scaled_param_values = values_real_to_scaled( df_all[list(molecule.param_names)], molecule.param_bounds) scaled_temperature = values_real_to_scaled(df_all["temperature"], molecule.temperature_bounds) scaled_md_density = values_real_to_scaled(df_all["md_density"], molecule.liq_density_bounds) scaled_expt_density = values_real_to_scaled(df_all["expt_density"], molecule.liq_density_bounds) df_all[list(molecule.param_names)] = scaled_param_values df_all["temperature"] = scaled_temperature df_all["md_density"] = scaled_md_density df_all["expt_density"] = scaled_expt_density # Split out vapor and liquid samples df_liquid = df_all[df_all["is_liquid"] == True] df_vapor = df_all[df_all["is_liquid"] == False] return df_all, df_liquid, df_vapor