Beispiel #1
0
    def test_values_scaled_to_real_multiple_bounds(self):
        bounds = [[2.0, 4.0], [2.0, 3.0]]
        scaled_values = [[0.0, 1.0]]
        values = values_scaled_to_real(scaled_values, bounds)
        assert np.isclose(values, [[2.0, 3.0]]).all()

        scaled_values = [[0.0, 1.0], [0.5, 0.0]]
        values = values_scaled_to_real(scaled_values, bounds)
        assert np.isclose(values, [[2.0, 3.0], [3.0, 2.0]]).all()
Beispiel #2
0
    def test_values_scaled_to_real_multiple_values(self):
        bounds = [2.0, 4.0]
        scaled_values = [0.0, 1.0]
        values = values_scaled_to_real(scaled_values, bounds)
        assert np.isclose(values, [[2.0], [4.0]]).all()

        scaled_values = [0.0, 1.0, 1.5]
        values = values_scaled_to_real(scaled_values, bounds)
        assert np.isclose(values, [[2.0], [4.0], [5.0]]).all()
Beispiel #3
0
def plot_property(df, prp_name, bounds, axis_name=None):
    """Plot a comparison of the simulated and experimental property

    Parameters
    ----------
    df : pandas.Dataframe
        dataframe with information
    prp_name : string
        property name to plot from df
    bounds : np.ndarray
        bounds of the property
    axis_name : string
        name to use on the plot axis, with units

    Returns
    -------
    None

    Notes
    -----
    Saves a plt with name 'figs/expt_v_sim_{prp_name}.png'
    """

    if axis_name is None:
        axis_name = prp_name

    # Basic plots to view output
    yeqx = np.linspace(bounds[0] - 3, bounds[1] + 3, 10)

    fig, ax = plt.subplots()

    ax.plot(yeqx, yeqx, color="black")
    ax.scatter(
        values_scaled_to_real(df["expt_" + prp_name], bounds),
        values_scaled_to_real(df["sim_" + prp_name], bounds),
        alpha=0.2,
        color="black",
    )
    ax.set_xlabel("Expt. " + axis_name, fontsize=16, labelpad=15)
    ax.set_ylabel("Sim. " + axis_name, fontsize=16, labelpad=15)
    ax.tick_params(axis="both", labelsize=12)

    ax.set_xlim(yeqx[0], yeqx[-1])
    ax.set_ylim(yeqx[0], yeqx[-1])
    ax.set_aspect("equal", "box")

    fig.tight_layout()
    try:
        fig.savefig("figs/expt_v_sim_" + prp_name + ".png", dpi=300)
    except FileNotFoundError:
        os.mkdir("figs")
        fig.savefig("figs/expt_v_sim_" + prp_name + ".png", dpi=300)
Beispiel #4
0
def prepare_df_density_errors(df, molecule):
    """Create a dataframe with mean square error (mse) and mean absolute
    percent error (mape) for each unique parameter set.

    Parameters
    ----------
    df : pandas.Dataframe
        per simulation results
    molecule : R32, R125
        molecule class with bounds/experimental data

    Returns
    -------
    df_new : pandas.Dataframe
        dataframe with one row per parameter set and including
        the MSE and MAPE for liq_density
    """
    new_data = []
    for group, values in df.groupby(list(molecule.param_names)):

        # Temperatures
        temps = values_scaled_to_real(values["temperature"],
                                      molecule.temperature_bounds)

        # Liquid density
        sim_liq_density = values_scaled_to_real(values["md_density"],
                                                molecule.liq_density_bounds)
        expt_liq_density = values_scaled_to_real(values["expt_density"],
                                                 molecule.liq_density_bounds)
        mse_liq_density = np.mean((sim_liq_density - expt_liq_density)**2)
        mape_liq_density = (np.mean(
            np.abs((sim_liq_density - expt_liq_density) / expt_liq_density)) *
                            100.0)
        properties = {
            f"sim_liq_density_{float(temp):.0f}K": float(liq_density)
            for temp, liq_density in zip(temps, sim_liq_density)
        }

        new_quantities = {
            **properties,
            "mse_liq_density": mse_liq_density,
            "mape_liq_density": mape_liq_density,
        }

        new_data.append(list(group) + list(new_quantities.values()))

    columns = list(molecule.param_names) + list(new_quantities.keys())
    new_df = pd.DataFrame(new_data, columns=columns)

    return new_df
Beispiel #5
0
    def test_values_scaled_to_real_single(self):
        bounds = [2.0, 4.0]
        scaled_value = 0.0
        value = values_scaled_to_real(scaled_value, bounds)
        assert np.isclose(value, 2.0)

        scaled_value = 1.0
        value = values_scaled_to_real(scaled_value, bounds)
        assert np.isclose(value, 4.0)

        scaled_value = 0.5
        value = values_scaled_to_real(scaled_value, bounds)
        assert np.isclose(value, 3.0)

        scaled_value = [-0.5]
        value = values_scaled_to_real(scaled_value, bounds)
        assert np.isclose(value, 1.0)

        bounds = [-5.0, -4.0]
        scaled_value = 0.5
        value = values_scaled_to_real(scaled_value, bounds)
        assert np.isclose(value, -4.5)
Beispiel #6
0
def _calc_gp_mse(gp_model, samples, expt_property, property_bounds,
                 temperature_bounds):
    """Calculate the MSE between the GP model and experiment for samples"""

    all_errs = np.empty(shape=(samples.shape[0], len(expt_property.keys())))
    col_idx = 0
    for (temp, density) in expt_property.items():
        scaled_temp = values_real_to_scaled(temp, temperature_bounds)
        xx = np.hstack((samples, np.tile(scaled_temp, (samples.shape[0], 1))))
        means_scaled, vars_scaled = gp_model.predict_f(xx)
        means = values_scaled_to_real(means_scaled, property_bounds)
        err = means - density
        all_errs[:, col_idx] = err[:, 0]
        col_idx += 1

    return np.mean(all_errs**2, axis=1)
Beispiel #7
0
def plot_model_vs_test(
    models,
    param_values,
    train_points,
    test_points,
    temperature_bounds,
    property_bounds,
    plot_bounds=[220.0, 340.0],
    property_name="property",
):
    """Plots the GP model(s) as a function of temperature with all other parameters
    taken as param_values. Overlays training and testing points with the same
    param_values.

    Parameters
    ----------
    models : dict {"label" : gpflow.model }
        GPFlow models to plot
    param_values : np.ndarray, shape=(n_params)
        The parameters at which to evaluate the GP model
    train_points : np.ndarray, shape=(n_points, 2)
        The temperature (scaled) and property (scaled) of each training point
    test_points : np.ndarray, shape=(n_points, 2)
        The temperature (scaled) and property (scaled) of each test point
    temperature_bounds: array-like
        bounds for scaling temperature between physical
        and dimensionless values
    property_bounds: array-like
        bounds for scaling property between physical
        and dimensionless values
    plot_bounds : array-like, optional
        temperature bounds for the plot
    property_name : str, optional, default="property"
        property name with units for axis label

    Returns
    -------
    matplotlib.figure.Figure
    """

    n_samples = 100
    vals = np.linspace(plot_bounds[0], plot_bounds[1],
                       n_samples).reshape(-1, 1)
    vals_scaled = values_real_to_scaled(vals, temperature_bounds)

    other = np.tile(param_values, (n_samples, 1))
    xx = np.hstack((other, vals_scaled))

    fig, ax = plt.subplots()
    for (label, model) in models.items():
        mean_scaled, var_scaled = model.predict_f(xx)

        mean = values_scaled_to_real(mean_scaled, property_bounds)
        var = variances_scaled_to_real(var_scaled, property_bounds)
        ax.plot(vals, mean, lw=2, label="GP model" + label)
        ax.fill_between(
            vals[:, 0],
            mean[:, 0] - 1.96 * np.sqrt(var[:, 0]),
            mean[:, 0] + 1.96 * np.sqrt(var[:, 0]),
            alpha=0.25,
        )

    if train_points.shape[0] > 0:
        md_train_temp = values_scaled_to_real(train_points[:, 0],
                                              temperature_bounds)
        md_train_property = values_scaled_to_real(train_points[:, 1],
                                                  property_bounds)
        ax.plot(md_train_temp,
                md_train_property,
                "s",
                color="black",
                label="Train")
    if test_points.shape[0] > 0:
        md_test_temp = values_scaled_to_real(test_points[:, 0],
                                             temperature_bounds)
        md_test_property = values_scaled_to_real(test_points[:, 1],
                                                 property_bounds)
        ax.plot(md_test_temp, md_test_property, "ro", label="Test")

    ax.set_xlabel("Temperature")
    ax.set_ylabel(property_name)
    fig.legend()

    if not mpl_is_inline:
        return fig
Beispiel #8
0
def prepare_df_vle_errors(df, molecule):
    """Create a dataframe with mean square error (mse) and mean absolute
    percent error (mape) for each unique parameter set. The critical
    temperature and density are also evaluated.

    Parameters
    ----------
    df : pandas.Dataframe
        per simulation results
    molecule : R32, R125
        molecule class with bounds/experimental data

    Returns
    -------
    df_new : pandas.Dataframe
        dataframe with one row per parameter set and including
        the MSE and MAPE for liq_density, vap_density, pvap, hvap,
        critical temperature, critical density
    """
    new_data = []
    for group, values in df.groupby(list(molecule.param_names)):

        # Temperatures
        temps = values_scaled_to_real(values["temperature"],
                                      molecule.temperature_bounds)

        # Liquid density
        sim_liq_density = values_scaled_to_real(values["sim_liq_density"],
                                                molecule.liq_density_bounds)
        expt_liq_density = values_scaled_to_real(values["expt_liq_density"],
                                                 molecule.liq_density_bounds)
        mse_liq_density = np.mean((sim_liq_density - expt_liq_density)**2)
        mape_liq_density = (np.mean(
            np.abs((sim_liq_density - expt_liq_density) / expt_liq_density)) *
                            100.0)
        properties = {
            f"sim_liq_density_{float(temp):.0f}K": float(liq_density)
            for temp, liq_density in zip(temps, sim_liq_density)
        }
        # Vapor density
        sim_vap_density = values_scaled_to_real(values["sim_vap_density"],
                                                molecule.vap_density_bounds)
        expt_vap_density = values_scaled_to_real(values["expt_vap_density"],
                                                 molecule.vap_density_bounds)
        mse_vap_density = np.mean((sim_vap_density - expt_vap_density)**2)
        mape_vap_density = (np.mean(
            np.abs((sim_vap_density - expt_vap_density) / expt_vap_density)) *
                            100.0)
        properties.update({
            f"sim_vap_density_{float(temp):.0f}K": float(vap_density)
            for temp, vap_density in zip(temps, sim_vap_density)
        })

        # Vapor pressure
        sim_Pvap = values_scaled_to_real(values["sim_Pvap"],
                                         molecule.Pvap_bounds)
        expt_Pvap = values_scaled_to_real(values["expt_Pvap"],
                                          molecule.Pvap_bounds)
        mse_Pvap = np.mean((sim_Pvap - expt_Pvap)**2)
        mape_Pvap = np.mean(np.abs((sim_Pvap - expt_Pvap) / expt_Pvap)) * 100.0
        properties.update({
            f"sim_Pvap_{float(temp):.0f}K": float(Pvap)
            for temp, Pvap in zip(temps, sim_Pvap)
        })

        # Enthalpy of vaporization
        sim_Hvap = values_scaled_to_real(values["sim_Hvap"],
                                         molecule.Hvap_bounds)
        expt_Hvap = values_scaled_to_real(values["expt_Hvap"],
                                          molecule.Hvap_bounds)
        mse_Hvap = np.mean((sim_Hvap - expt_Hvap)**2)
        mape_Hvap = np.mean(np.abs((sim_Hvap - expt_Hvap) / expt_Hvap)) * 100.0
        properties.update({
            f"sim_Hvap_{float(temp):.0f}K": float(Hvap)
            for temp, Hvap in zip(temps, sim_Hvap)
        })

        # Critical Point (Law of rectilinear diameters)
        slope1, intercept1, r_value1, p_value1, std_err1 = linregress(
            temps.flatten(),
            ((sim_liq_density + sim_vap_density) / 2.0).flatten(),
        )

        slope2, intercept2, r_value2, p_value2, std_err2 = linregress(
            temps.flatten(),
            ((sim_liq_density - sim_vap_density)**(1 / 0.32)).flatten(),
        )

        Tc = np.abs(intercept2 / slope2)
        mse_Tc = (Tc - molecule.expt_Tc)**2
        mape_Tc = np.abs((Tc - molecule.expt_Tc) / molecule.expt_Tc) * 100.0
        properties.update({"sim_Tc": Tc})

        rhoc = intercept1 + slope1 * Tc
        mse_rhoc = (rhoc - molecule.expt_rhoc)**2
        mape_rhoc = (np.abs(
            (rhoc - molecule.expt_rhoc) / molecule.expt_rhoc) * 100.0)
        properties.update({"sim_rhoc": rhoc})

        new_quantities = {
            **properties,
            "mse_liq_density": mse_liq_density,
            "mse_vap_density": mse_vap_density,
            "mse_Pvap": mse_Pvap,
            "mse_Hvap": mse_Hvap,
            "mse_Tc": mse_Tc,
            "mse_rhoc": mse_rhoc,
            "mape_liq_density": mape_liq_density,
            "mape_vap_density": mape_vap_density,
            "mape_Pvap": mape_Pvap,
            "mape_Hvap": mape_Hvap,
            "mape_Tc": mape_Tc,
            "mape_rhoc": mape_rhoc,
        }

        new_data.append(list(group) + list(new_quantities.values()))

    columns = list(molecule.param_names) + list(new_quantities.keys())
    new_df = pd.DataFrame(new_data, columns=columns)

    return new_df
Beispiel #9
0
def init_project():

    # Initialize project
    project = signac.init_project("r125-vle-iter1")

    # Define temps
    temps = [
        229.0 * u.K,
        249.0 * u.K,
        269.0 * u.K,
        289.0 * u.K,
        309.0 * u.K,
    ]

    # Run at vapor pressure
    press = {
        229: (123.65 * u.kPa),
        249: (290.76 * u.kPa),
        269: (592.27 * u.kPa),
        289: (1082.84 * u.kPa),
        309: (1824.93 * u.kPa),
    }

    n_vap = 160
    n_liq = 640

    # Experimental density
    R125 = R125Constants()

    # Load samples from Latin hypercube
    lh_samples = np.genfromtxt(
        "../../analysis/csv/r125-vle-iter1-params.csv",
        delimiter=",",
        skip_header=1,
    )[:, 1:]

    # Define bounds on sigma/epsilon
    bounds_sigma = np.asarray([
        [3.0, 4.0],  # C
        [3.0, 4.0],  # C
        [2.5, 3.5],  # F
        [2.5, 3.5],  # F
        [1.7, 2.7],  # H
    ])

    bounds_epsilon = np.asarray([
        [20.0, 60.0],  # C
        [20.0, 60.0],  # C
        [15.0, 40.0],  # F
        [15.0, 40.0],  # F
        [2.0, 10.0],  # H
    ])

    bounds = np.vstack((bounds_sigma, bounds_epsilon))

    # Convert scaled latin hypercube samples to physical values
    scaled_params = values_scaled_to_real(lh_samples, bounds)

    for temp in temps:
        for sample in scaled_params:

            # Unpack the sample
            (
                sigma_C1,
                sigma_C2,
                sigma_F1,
                sigma_F2,
                sigma_H1,
                epsilon_C1,
                epsilon_C2,
                epsilon_F1,
                epsilon_F2,
                epsilon_H1,
            ) = sample

            # Define the state point
            state_point = {
                "T":
                float(temp.in_units(u.K).value),
                "P":
                float(press[int(temp.in_units(u.K).value)].in_units(
                    u.bar).value),
                "sigma_C1":
                float((sigma_C1 * u.Angstrom).in_units(u.nm).value),
                "sigma_C2":
                float((sigma_C2 * u.Angstrom).in_units(u.nm).value),
                "sigma_F1":
                float((sigma_F1 * u.Angstrom).in_units(u.nm).value),
                "sigma_F2":
                float((sigma_F2 * u.Angstrom).in_units(u.nm).value),
                "sigma_H1":
                float((sigma_H1 * u.Angstrom).in_units(u.nm).value),
                "epsilon_C1":
                float((epsilon_C1 * u.K * u.kb).in_units("kJ/mol").value),
                "epsilon_C2":
                float((epsilon_C2 * u.K * u.kb).in_units("kJ/mol").value),
                "epsilon_F1":
                float((epsilon_F1 * u.K * u.kb).in_units("kJ/mol").value),
                "epsilon_F2":
                float((epsilon_F2 * u.K * u.kb).in_units("kJ/mol").value),
                "epsilon_H1":
                float((epsilon_H1 * u.K * u.kb).in_units("kJ/mol").value),
                "N_vap":
                n_vap,
                "N_liq":
                n_liq,
                "expt_liq_density":
                R125.expt_liq_density[int(temp.in_units(u.K).value)],
                "nsteps_liqeq":
                5000,
                "nsteps_eq":
                10000,
                "nsteps_prod":
                100000,
            }

            job = project.open_job(state_point)
            job.init()
Beispiel #10
0
def init_project():

    project = signac.init_project("r125-density-iter2")

    # Define temps
    temps = [
        229.0 * u.K,
        249.0 * u.K,
        269.0 * u.K,
        289.0 * u.K,
        309.0 * u.K,
    ]

    # Run at vapor pressure
    press = {
        229: (123.65 * u.kPa),
        249: (290.76 * u.kPa),
        269: (592.27 * u.kPa),
        289: (1082.84 * u.kPa),
        309: (1824.93 * u.kPa),
    }

    # Run for 2.5 ns (1 fs timestep)
    nstepseq = 500000
    nstepsprod = 2500000

    # Load samples from Latin hypercube
    lh_samples = np.genfromtxt(
        "../../analysis/csv/r125-density-iter2-params.csv",
        delimiter=",",
        skip_header=1,
    )[:, 1:]

    # Define bounds on sigma/epsilon
    bounds_sigma = np.asarray([
        [3.0, 4.0],  # C
        [3.0, 4.0],  # C
        [2.5, 3.5],  # F
        [2.5, 3.5],  # F
        [1.7, 2.7],  # H
    ])

    bounds_epsilon = np.asarray([
        [20.0, 60.0],  # C
        [20.0, 60.0],  # C
        [15.0, 40.0],  # F
        [15.0, 40.0],  # F
        [2.0, 10.0],  # H
    ])

    bounds = np.vstack((bounds_sigma, bounds_epsilon))

    # Convert scaled latin hypercube samples to physical values
    scaled_params = values_scaled_to_real(lh_samples, bounds)

    for temp in temps:
        for sample in scaled_params:

            (
                sigma_C1,
                sigma_C2,
                sigma_F1,
                sigma_F2,
                sigma_H1,
                epsilon_C1,
                epsilon_C2,
                epsilon_F1,
                epsilon_F2,
                epsilon_H1,
            ) = sample

            state_point = {
                "T":
                float(temp.in_units(u.K).value),
                "P":
                float(press[int(temp.in_units(u.K).value)].in_units(
                    u.bar).value),
                "nstepseq":
                nstepseq,
                "nstepsprod":
                nstepsprod,
                "sigma_C1":
                float((sigma_C1 * u.Angstrom).in_units(u.nm).value),
                "sigma_C2":
                float((sigma_C2 * u.Angstrom).in_units(u.nm).value),
                "sigma_F1":
                float((sigma_F1 * u.Angstrom).in_units(u.nm).value),
                "sigma_F2":
                float((sigma_F2 * u.Angstrom).in_units(u.nm).value),
                "sigma_H1":
                float((sigma_H1 * u.Angstrom).in_units(u.nm).value),
                "epsilon_C1":
                float((epsilon_C1 * u.K * u.kb).in_units("kJ/mol").value),
                "epsilon_C2":
                float((epsilon_C2 * u.K * u.kb).in_units("kJ/mol").value),
                "epsilon_F1":
                float((epsilon_F1 * u.K * u.kb).in_units("kJ/mol").value),
                "epsilon_F2":
                float((epsilon_F2 * u.K * u.kb).in_units("kJ/mol").value),
                "epsilon_H1":
                float((epsilon_H1 * u.K * u.kb).in_units("kJ/mol").value),
            }

            job = project.open_job(state_point)
            job.init()
Beispiel #11
0
    df_liquid, param_names, property_name, shuffle_seed=md_gp_shuffle_seed)

# Fit model
md_model = run_gpflow_scipy(
    x_train,
    y_train,
    gpflow.kernels.RBF(lengthscales=np.ones(R125.n_params + 1)),
)

# Get difference between GROMACS/Cassandra density
df_test_points = df_vle[list(R125.param_names) +
                        ["temperature", "sim_liq_density"]]
xx = df_test_points[list(R125.param_names) + ["temperature"]].values
means, vars_ = md_model.predict_f(xx)
diff = values_scaled_to_real(
    df_test_points["sim_liq_density"].values.reshape(-1, 1),
    R125.liq_density_bounds,
) - values_scaled_to_real(means, R125.liq_density_bounds)
print(
    f"The average density difference between Cassandra and GROMACS is {np.mean(diff)} kg/m^3"
)
print(
    f"The minimum density difference between Cassandra and GROMACS is {np.min(diff)} kg/m^3"
)
print(
    f"The maximum density difference between Cassandra and GROMACS is {np.max(diff)} kg/m^3"
)

### Step 3: Find new parameters for simulations
max_mse = 625  # kg^2/m^6
latin_hypercube = np.loadtxt("LHS_5e5x10.csv", delimiter=",")
ranked_samples = rank_samples(latin_hypercube,
Beispiel #12
0
def init_project():

    project = signac.init_project("ap-iter2")

    # Define temps
    temperatures = [
        298.0 * u.K,
        78.0 * u.K,
        10.0 * u.K,
    ]

    # Run at vapor pressure
    pressure = 1.0 * u.atm

    # Run for 200 ps (1 fs timestep)
    nsteps_eq = 100000
    nsteps_prod = 100000

    # Load samples from Latin hypercube
    lh_samples = pd.read_csv("/scratch365/bbefort/ap-fffit/ap-fffit/analysis/csv/uc-lattice-iter2-params.csv", header=0,usecols=[1,2,3,4,5,6,7,8]).values

    # Define bounds on sigma/epsilon
    # Sigma units = angstrom
    # Epsilon units = kcal/mol
    bounds_sigma = np.asarray(
        [
            [3.5, 4.5],  # Cl
            [0.5, 2.0],  # H
            [2.5, 3.8],  # N
            [2.5, 3.8],  # O
        ]
    )

    bounds_epsilon = np.asarray(
        [
            [0.1, 0.8],    # Cl
            [0.0, 0.02],   # H
            [0.01, 0.2],   # N
            [0.02, 0.3],   # O
        ]
    )

    bounds = np.vstack((bounds_sigma, bounds_epsilon))

    # Convert scaled latin hypercube samples to physical values
    scaled_params = values_scaled_to_real(lh_samples, bounds)

    for temperature in temperatures:
        for sample in scaled_params:

            (
                sigma_Cl,
                sigma_H,
                sigma_N,
                sigma_O,
                epsilon_Cl,
                epsilon_H,
                epsilon_N,
                epsilon_O,
            ) = sample

            state_point = {
                "T": float(temperature.to_value("K")),
                "P": float(pressure.to_value("atm")),
                "nsteps": {
                    "eq" : nsteps_eq,
                    "prod" : nsteps_prod,
                },
                "sigma_Cl": sigma_Cl,
                "sigma_H": sigma_H,
                "sigma_N": sigma_N,
                "sigma_O": sigma_O,
                "epsilon_Cl": epsilon_Cl,
                "epsilon_H": epsilon_H,
                "epsilon_N": epsilon_N,
                "epsilon_O": epsilon_O,
            }

            job = project.open_job(state_point)
            job.init()
Beispiel #13
0
def plot_slices_temperature(
    models,
    n_params,
    temperature_bounds,
    property_bounds,
    plot_bounds=[220.0, 340.0],
    property_name="property",
):
    """Plot the model predictions as a function of temperature
    Slices are plotted where the values of the other parameters
    are all set to 0.0 --> 1.0 in increments of 0.1
    Parameters
    ----------
    models : dict
        models to plot, key=label, value=gpflow.model
    n_params : int
        number of non-temperature parameters in the model
    temperature_bounds: array-like
        bounds for scaling temperature between physical
        and dimensionless values
    property_bounds: array-like
        bounds for scaling the property between physical
        and dimensionless values
    plot_bounds : array-like, optional
        temperature bounds for the plot
    property_name : str, optional, default="property"
        property name with units for axis label

    Returns
    -------
    figs : list
        list of matplotlib.figure.Figure objects
    """

    n_samples = 100
    vals = np.linspace(plot_bounds[0], plot_bounds[1],
                       n_samples).reshape(-1, 1)
    vals_scaled = values_real_to_scaled(vals, temperature_bounds)

    figs = []
    for other_vals in np.arange(0, 1.1, 0.1):
        other = np.tile(other_vals, (n_samples, n_params))
        xx = np.hstack((other, vals_scaled))

        fig, ax = plt.subplots()
        for (label, model) in models.items():
            mean_scaled, var_scaled = model.predict_f(xx)
            mean = values_scaled_to_real(mean_scaled, property_bounds)
            var = variances_scaled_to_real(var_scaled, property_bounds)

            ax.plot(vals, mean, lw=2, label=label)
            ax.fill_between(
                vals[:, 0],
                mean[:, 0] - 1.96 * np.sqrt(var[:, 0]),
                mean[:, 0] + 1.96 * np.sqrt(var[:, 0]),
                alpha=0.3,
            )

        ax.set_title(f"Other vals = {other_vals:.2f}")
        ax.set_xlabel("Temperature")
        ax.set_ylabel(property_name)
        fig.legend()
        figs.append(fig)

    if not mpl_is_inline:
        return figs
Beispiel #14
0
def plot_slices_params(
    models,
    param_to_plot,
    param_names,
    temperature,
    temperature_bounds,
    property_bounds,
    property_name="property",
):
    """Plot the model predictions as a function of param_to_plot
    at the specified temperature

    Parameters
    ----------
    models : dict {"label" : gpflow.model }
        GPFlow models to plot
    param_to_plot : string
        Parameter to vary
    param_names : list, tuple
        list of parameter names
    temperature : float
        temperature at which to plot the surface
    temperature_bounds: array-like
        bounds for scaling temperature between physical
        and dimensionless values
    property_bounds: array-like
        bounds for scaling property between physical
        and dimensionless values
    property_name : string, optional, default="property"
        name of property to plot

    Returns
    -------
    figs : list
        list of matplotlib.figure.Figure objects
    """

    try:
        param_idx = param_names.index(param_to_plot)
    except ValueError:
        raise ValueError(
            f"parameter: {param_to_plot} not found in parameter_names: {param_names}"
        )

    n_params = len(param_names)

    n_samples = 100
    vals_scaled = np.linspace(-0.1, 1.1, n_samples).reshape(-1, 1)
    temp_vals = np.tile(temperature, (n_samples, 1))
    temp_vals_scaled = values_real_to_scaled(temp_vals, temperature_bounds)

    figs = []
    for other_vals in np.arange(0, 1.1, 0.1):
        other1 = np.tile(other_vals, (n_samples, param_idx))
        other2 = np.tile(other_vals, (n_samples, n_params - 1 - param_idx))
        xx = np.hstack((other1, vals_scaled, other2, temp_vals_scaled))

        fig, ax = plt.subplots()
        for (label, model) in models.items():
            mean_scaled, var_scaled = model.predict_f(xx)
            mean = values_scaled_to_real(mean_scaled, property_bounds)
            var = variances_scaled_to_real(var_scaled, property_bounds)

            ax.plot(vals_scaled, mean, lw=2, label=label)
            ax.fill_between(
                vals_scaled[:, 0],
                mean[:, 0] - 1.96 * np.sqrt(var[:, 0]),
                mean[:, 0] + 1.96 * np.sqrt(var[:, 0]),
                alpha=0.3,
            )

        math_parameter = "$\\" + param_to_plot + "$"
        ax.set_title(
            f"{math_parameter} at T = {temperature:.0f} K. Other vals = {other_vals:.2f}."
        )
        ax.set_xlabel(math_parameter)
        ax.set_ylabel(property_name)
        fig.legend()
        figs.append(fig)

    if not mpl_is_inline:
        return figs
Beispiel #15
0
def plot_model_performance(models,
                           x_data,
                           y_data,
                           property_bounds,
                           xylim=None):
    """Plot the predictions vs. result for one or more GP models

    Parameters
    ----------
    models : dict { label : model }
        Each model to be plotted (value, GPFlow model) is provided
        with a label (key, string)
    x_data : np.array
        data to create model predictions for
    y_data : np.ndarray
        correct answer
    property_bounds : array-like
        bounds for scaling density between physical
        and dimensionless values
    xylim : array-like, shape=(2,), optional
        lower and upper x and y limits of the plot

    Returns
    -------
    matplotlib.Figure.figure
    """
    y_data_physical = values_scaled_to_real(y_data, property_bounds)
    min_xylim = np.min(y_data_physical)
    max_xylim = np.max(y_data_physical)

    fig, ax = plt.subplots()

    for (label, model) in models.items():
        gp_mu, gp_var = model.predict_f(x_data)
        gp_mu_physical = values_scaled_to_real(gp_mu, property_bounds)
        ax.scatter(y_data_physical,
                   gp_mu_physical,
                   label=label,
                   zorder=2.5,
                   alpha=0.4)
        meansqerr = np.mean(
            (gp_mu_physical - y_data_physical.reshape(-1, 1))**2)
        print("Model: {}. Mean squared err: {:.2e}".format(label, meansqerr))
        if np.min(gp_mu_physical) < min_xylim:
            min_xylim = np.min(gp_mu_physical)
        if np.max(gp_mu_physical) > max_xylim:
            max_xylim = np.max(gp_mu_physical)

    if xylim is None:
        xylim = [min_xylim, max_xylim]

    ax.plot(
        np.arange(xylim[0], xylim[1] + 100, 100),
        np.arange(xylim[0], xylim[1] + 100, 100),
        color="xkcd:blue grey",
        label="y=x",
    )

    ax.set_xlim(xylim[0], xylim[1])
    ax.set_ylim(xylim[0], xylim[1])
    ax.set_xlabel("Actual")
    ax.set_ylabel("Model Prediction")
    ax.legend()
    ax.set_aspect("equal", "box")

    if not mpl_is_inline:
        return fig
Beispiel #16
0
def init_project():

    # Initialize project
    project = signac.init_project("r32-density-iter2")

    # Define temps
    temps = [241.0 * u.K, 261.0 * u.K, 281.0 * u.K, 301.0 * u.K, 321.0 * u.K]

    # Run at vapor pressure
    press = {
        241: (2.5159 * u.bar),
        261: (5.4327 * u.bar),
        281: (10.426 * u.bar),
        301: (18.295 * u.bar),
        321: (29.989 * u.bar),
    }

    # Run for 2.5 ns (1 fs timestep)
    nstepseq = 500000
    nstepsprod = 2500000

    # Load samples from Latin hypercube
    lh_samples = np.genfromtxt(
        "../../analysis/csv/r32-density-iter2-params.csv",
        delimiter=",",
        skip_header=1,
    )[:, 1:]

    # Define bounds on sigma/epsilon
    bounds_sigma = np.asarray(
        [[3.0, 4.0], [2.5, 3.5], [1.7, 2.7],]
    )  # C  # F  # H

    bounds_epsilon = np.asarray(
        [[20.0, 60.0], [15.0, 40.0], [2.0, 10.0],]  # C  # F  # H
    )

    bounds = np.vstack((bounds_sigma, bounds_epsilon))

    # Convert scaled latin hypercube samples to physical values
    scaled_params = values_scaled_to_real(lh_samples, bounds)

    for temp in temps:
        for sample in scaled_params:

            # Unpack the sample
            (
                sigma_C,
                sigma_F,
                sigma_H,
                epsilon_C,
                epsilon_F,
                epsilon_H,
            ) = sample

            # Define the state point
            state_point = {
                "T": float(temp.in_units(u.K).value),
                "P": float(
                    press[int(temp.in_units(u.K).value)].in_units(u.bar).value
                ),
                "nstepseq": nstepseq,
                "nstepsprod": nstepsprod,
                "sigma_C": float((sigma_C * u.Angstrom).in_units(u.nm).value),
                "sigma_F": float((sigma_F * u.Angstrom).in_units(u.nm).value),
                "sigma_H": float((sigma_H * u.Angstrom).in_units(u.nm).value),
                "epsilon_C": float(
                    (epsilon_C * u.K * u.kb).in_units("kJ/mol").value
                ),
                "epsilon_F": float(
                    (epsilon_F * u.K * u.kb).in_units("kJ/mol").value
                ),
                "epsilon_H": float(
                    (epsilon_H * u.K * u.kb).in_units("kJ/mol").value
                ),
            }

            job = project.open_job(state_point)
            job.init()
Beispiel #17
0
def main():

    seaborn.set_palette("Paired")

    # Liquid density first
    param_names = list(R32.param_names) + ["temperature"]
    property_name = "sim_liq_density"
    property_bounds = R32.liq_density_bounds

    # Extract train/test data
    x_train, y_train, x_test, y_test = shuffle_and_split(
        df_all,
        param_names,
        property_name,
        shuffle_seed=gp_shuffle_seed,
        fraction_train=0.8,
    )

    # Fit model
    model = run_gpflow_scipy(
        x_train,
        y_train,
        gpflow.kernels.RBF(lengthscales=np.ones(R32.n_params + 1)),
    )

    # Use model to predict results
    gp_mu_train, gp_var_train = model.predict_f(x_train)
    gp_mu_test, gp_var_test = model.predict_f(x_test)

    # Convert results to physical values
    y_train_physical = values_scaled_to_real(y_train, property_bounds)
    y_test_physical = values_scaled_to_real(y_test, property_bounds)
    gp_mu_train_physical = values_scaled_to_real(gp_mu_train, property_bounds)
    gp_mu_test_physical = values_scaled_to_real(gp_mu_test, property_bounds)

    # Plot
    fig, ax = plt.subplots()

    ax.scatter(
        y_train_physical,
        gp_mu_train_physical,
        label="Train",
        alpha=0.4,
        s=130,
        c="C1",
    )
    ax.scatter(
        y_test_physical,
        gp_mu_test_physical,
        marker="+",
        label="Test",
        alpha=0.7,
        s=170,
        c="C5",
    )

    xylim = [750, 1250]

    ax.plot(
        np.arange(xylim[0], xylim[1] + 100, 100),
        np.arange(xylim[0], xylim[1] + 100, 100),
        color="black",
        linewidth=3,
        alpha=0.6,
    )

    ax.set_xlim(xylim[0], xylim[1])
    ax.set_ylim(xylim[0], xylim[1])

    ax.set_xticks([800, 1000, 1200])
    ax.set_yticks([800, 1000, 1200])
    ax.set_xticks([850, 900, 950, 1050, 1100, 1150], minor=True)
    ax.set_yticks([850, 900, 950, 1050, 1100, 1150], minor=True)

    ax.tick_params("both",
                   direction="in",
                   which="both",
                   length=4,
                   labelsize=26,
                   pad=10)
    ax.tick_params("both", which="major", length=8)
    ax.xaxis.set_ticks_position("both")
    ax.yaxis.set_ticks_position("both")

    ax.set_xlabel(r"$\mathregular{\rho_{liq}\ sim. (kg/m^3)}$",
                  fontsize=28,
                  labelpad=20)
    ax.set_ylabel(r"$\mathregular{\rho_{liq}\ sur. (kg/m^3)}$",
                  fontsize=28,
                  labelpad=10)
    ax.legend(fontsize=24,
              handletextpad=0.00,
              loc="lower right",
              bbox_to_anchor=(1.01, -0.01))

    ax.set_aspect("equal", "box")
    fig.tight_layout()
    fig.savefig("pdfs/fig1-surrogate-liquiddensity.pdf")

    # Vapor density next
    param_names = list(R32.param_names) + ["temperature"]
    property_name = "sim_vap_density"
    property_bounds = R32.vap_density_bounds

    # Extract train/test data
    x_train, y_train, x_test, y_test = shuffle_and_split(
        df_all,
        param_names,
        property_name,
        shuffle_seed=gp_shuffle_seed,
        fraction_train=0.8,
    )

    # Fit model
    model = run_gpflow_scipy(
        x_train,
        y_train,
        gpflow.kernels.RBF(lengthscales=np.ones(R32.n_params + 1)),
    )

    # Use model to predict results
    gp_mu_train, gp_var_train = model.predict_f(x_train)
    gp_mu_test, gp_var_test = model.predict_f(x_test)

    # Convert results to physical values
    y_train_physical = values_scaled_to_real(y_train, property_bounds)
    y_test_physical = values_scaled_to_real(y_test, property_bounds)
    gp_mu_train_physical = values_scaled_to_real(gp_mu_train, property_bounds)
    gp_mu_test_physical = values_scaled_to_real(gp_mu_test, property_bounds)

    # Plot
    fig, ax = plt.subplots()

    ax.scatter(
        y_train_physical,
        gp_mu_train_physical,
        label="Train",
        alpha=0.6,
        s=130,
        c="C1",
    )
    ax.scatter(
        y_test_physical,
        gp_mu_test_physical,
        marker="+",
        label="Test",
        alpha=0.8,
        s=170,
        c="C5",
    )

    xylim = [0, 125]

    ax.plot(np.arange(xylim[0], xylim[1] + 100, 100),
            np.arange(xylim[0], xylim[1] + 100, 100),
            color="black",
            linewidth=3,
            alpha=0.6)

    ax.set_xlim(xylim[0], xylim[1])
    ax.set_ylim(xylim[0], xylim[1])

    ax.set_xticks([0, 50, 100])
    ax.set_yticks([0, 50, 100])
    ax.set_xticks([25, 75, 125], minor=True)
    ax.set_yticks([25, 75, 125], minor=True)

    ax.tick_params("both",
                   direction="in",
                   which="both",
                   length=4,
                   labelsize=26,
                   pad=10)
    ax.tick_params("both", which="major", length=8)
    ax.xaxis.set_ticks_position("both")
    ax.yaxis.set_ticks_position("both")

    ax.set_xlabel(r"$\mathregular{\rho_{vap}\ sim. (kg/m^3)}$",
                  fontsize=28,
                  labelpad=20)
    ax.set_ylabel(r"$\mathregular{\rho_{vap}\ sur. (kg/m^3)}$",
                  fontsize=28,
                  labelpad=10)
    ax.legend(fontsize=24, handletextpad=0.00)

    ax.set_aspect("equal", "box")
    fig.tight_layout()
    fig.savefig("pdfs/fig1-surrogate-vapordensity.pdf")