Ejemplo n.º 1
0
    def test_pareto_simple_3d(self):
        costs = np.asarray([
            [0.2, 0.2, 0.1],
            [0.1, 0.1, 0.2],
            [0.1, 0.1, 0.1],
        ])
        result, pareto_points, dominated_points = find_pareto_set(
            costs, is_pareto_efficient_simple)
        assert np.allclose(result, [False, False, True])
        assert np.allclose(pareto_points, [[0.1, 0.1, 0.1]])
        assert np.allclose(dominated_points,
                           [[0.2, 0.2, 0.1], [0.1, 0.1, 0.2]])

        costs = np.asarray([
            [0.2, 0.2, 0.1],
            [0.1, 0.1, 0.2],
            [0.05, 0.3, 0.4],
            [0.1, 0.15, 0.3],
        ])
        result, pareto_points, dominated_points = find_pareto_set(
            costs, is_pareto_efficient_simple)
        assert np.allclose(result, [True, True, True, False])
        assert np.allclose(
            pareto_points,
            [[0.2, 0.2, 0.1], [0.1, 0.1, 0.2], [0.05, 0.3, 0.4]])
        assert np.allclose(dominated_points, [[0.1, 0.15, 0.3]])
Ejemplo n.º 2
0
 def test_compare_pareto_max(self):
     np.random.seed(5)
     costs = np.random.random(size=(1000, 10))
     result1, pareto_points1, dominated_points1 = find_pareto_set(
         costs, is_pareto_efficient_simple, max_front=True)
     result2, pareto_points2, dominated_points2 = find_pareto_set(
         costs, is_pareto_efficient, max_front=True)
     assert np.allclose(result1, result2)
     assert np.allclose(pareto_points1, pareto_points2)
     assert np.allclose(dominated_points1, dominated_points2)
Ejemplo n.º 3
0
 def test_paret_efficient_max(self):
     costs = np.asarray([
         [0.2, 0.2],
         [0.1, 0.1],
     ])
     result, pareto_points, dominated_points = find_pareto_set(
         costs, is_pareto_efficient, max_front=True)
     assert np.allclose(result, [True, False])
     assert np.allclose(pareto_points, [[0.2, 0.2]])
     assert np.allclose(dominated_points, [[0.1, 0.1]])
Ejemplo n.º 4
0
 def test_pareto_efficient_known(self):
     costs = np.asarray([
         [0.2, 0.2],
         [0.1, 0.1],
     ])
     result, pareto_points, dominated_points = find_pareto_set(
         costs, is_pareto_efficient)
     assert np.allclose(result, [False, True])
     assert np.allclose(pareto_points, [[0.1, 0.1]])
     assert np.allclose(dominated_points, [[0.2, 0.2]])
Ejemplo n.º 5
0
def main():

    # Create a dataframe with one row per parameter set
    df_paramsets = prepare_df_vle_errors(df_all, R32)

    # ID pareto points
    # ID pareto points
    result, pareto_points, dominated_points = find_pareto_set(
        df_paramsets.filter(["mse_liq_density", "mse_vap_density", "mse_Pvap", "mse_Hvap", "mse_Tc", "mse_rhoc"]).values,
        is_pareto_efficient
    )
    df_paramsets = df_paramsets.join(pd.DataFrame(result, columns=["is_pareto"]))

    df_paramsets[df_paramsets["is_pareto"]==True].to_csv(csv_path + "/" + out_csv_name)
Ejemplo n.º 6
0
def main():

    # Make sure we have a folder for figures
    try:
        os.mkdir("figs")
    except FileExistsError:
        pass

    ###########################################################
    ####################   Fit GP models    ###################
    ###########################################################
    ## UCMD Model
    param_names = list(AP.param_names)
    property_name = "uc_mean_distance"
    # Only train on 10 K data that meets ucmd clf threshold
    df_ucmd = df_all.loc[(df_all["temperature"] == 10)
                         & (df_all["uc_mean_distance"] < ucmd_clf_threshold)]
    # Get train/test
    x_train, y_train, x_test, y_test = shuffle_and_split(
        df_ucmd, param_names, property_name, shuffle_seed=gp_shuffle_seed)
    # Fit model
    ucmd_gp = run_gpflow_scipy(
        x_train,
        y_train,
        gpflow.kernels.Matern32(lengthscales=np.ones(AP.n_params)),
    )

    ## Lattice APE Model
    param_names = list(AP.param_names) + ["scaled_temperature"]
    property_name = "lattice_ape"
    # Get train/test
    x_train, y_train, x_test, y_test = shuffle_and_split(
        df_all, param_names, property_name, shuffle_seed=gp_shuffle_seed)
    # Fit model
    lattice_gp = run_gpflow_scipy(
        x_train,
        y_train,
        gpflow.kernels.Matern32(lengthscales=np.ones(AP.n_params + 1)),
    )

    ###########################################################
    ##################   Train classifers    ##################
    ###########################################################

    x_train, y_train, x_test, y_test = shuffle_and_split(
        df_all.loc[df_all["temperature"] == 10],
        list(AP.param_names),
        "uc_mean_distance",
        shuffle_seed=clf_shuffle_seed,
    )

    y_train = np.array(y_train < ucmd_clf_threshold, dtype=np.int32)
    y_test = np.array(y_test < ucmd_clf_threshold, dtype=np.int32)
    ucmd_clf = svm.SVC(kernel="rbf")
    ucmd_clf.fit(x_train, y_train)
    y_pred = ucmd_clf.predict(x_train)
    print("Training accuracy:", metrics.accuracy_score(y_train, y_pred))
    y_pred = ucmd_clf.predict(x_test)
    print("Testing accuracy:", metrics.accuracy_score(y_test, y_pred))
    print(metrics.confusion_matrix(y_test, y_pred))

    ###########################################################
    ###################   Find new points   ###################
    ###########################################################

    # Load large hypercube
    latin_hypercube = np.loadtxt("LHS_1e6x8.csv", delimiter=",")

    # Apply UCMD classifier
    ucmd_pred = ucmd_clf.predict(latin_hypercube)

    # Predict UCMD with GP model
    gp_means_ucmd, gp_vars_ucmd = ucmd_gp.predict_f(latin_hypercube)
    # Predict Lattice APE with GP model at each temperature
    all_errs = np.empty(shape=(latin_hypercube.shape[0], len(temperatures)))
    col_idx = 0
    for temperature in temperatures:
        scaled_temperature = values_real_to_scaled(temperature,
                                                   AP.temperature_bounds)
        xx = np.hstack((latin_hypercube,
                        np.tile(scaled_temperature,
                                (latin_hypercube.shape[0], 1))))
        gp_means_ape, gp_vars_ape = lattice_gp.predict_f(xx)
        all_errs[:, col_idx] = gp_means_ape[:, 0]
        col_idx += 1
    # Compute MAPE across all three temperatures
    mean_errs = np.mean(all_errs, axis=1)

    ## Save all the results to a dataframe
    LH_results = pd.DataFrame(latin_hypercube, columns=AP.param_names)
    LH_results["ucmd_clf"] = ucmd_pred.astype(np.bool_)
    LH_results["ucmd"] = gp_means_ucmd.numpy()
    LH_results["lattice_mape"] = mean_errs

    # Only take points where structure classifier is satisifed
    LH_results_pass_ucmd_clf = LH_results.loc[LH_results.ucmd_clf == True]
    costs = LH_results_pass_ucmd_clf[["ucmd", "lattice_mape"]].to_numpy()

    # Find pareto efficient points
    result, pareto_points, dominated_points = find_pareto_set(
        costs, is_pareto_efficient)
    LH_results_pass_ucmd_clf["is_pareto"] = result

    # Plot pareto points vs. costs
    g = seaborn.pairplot(
        LH_results_pass_ucmd_clf,
        vars=["ucmd", "lattice_mape"],
        hue="is_pareto",
    )
    g.savefig("figs/pareto-mses.png", dpi=300)

    # Plot pareto points vs. params
    g = seaborn.pairplot(LH_results_pass_ucmd_clf,
                         vars=list(AP.param_names),
                         hue="is_pareto")
    g.set(xlim=(-0.1, 1.1), ylim=(-0.1, 1.1))
    g.savefig("figs/pareto-params.png", dpi=300)

    # For next iteration: 1. All non-dominated points that meet the thresholds
    #                     2. "Separated" dominated points that meet the thresholds

    next_iteration_points = LH_results_pass_ucmd_clf.loc[
        (LH_results_pass_ucmd_clf.is_pareto == True)
        & (LH_results_pass_ucmd_clf.ucmd < ucmd_next_itr_threshold) &
        (LH_results_pass_ucmd_clf.lattice_mape <
         lattice_mape_next_itr_threshold)]
    dominated_points = LH_results_pass_ucmd_clf.loc[
        (LH_results_pass_ucmd_clf.is_pareto == False)
        & (LH_results_pass_ucmd_clf.ucmd < ucmd_next_itr_threshold) &
        (LH_results_pass_ucmd_clf.lattice_mape <
         lattice_mape_next_itr_threshold)]

    print(f"{len(LH_results_pass_ucmd_clf)} points meet the ucmd classifier.")
    print(
        f"{len(LH_results_pass_ucmd_clf[LH_results_pass_ucmd_clf.is_pareto == True])} are non-dominated."
    )
    print(
        f"{len(dominated_points)} are dominated with ucmd < {ucmd_next_itr_threshold} and lattice_mape < {lattice_mape_next_itr_threshold}"
    )

    removal_distance = 1.034495
    np.random.seed(distance_seed)
    discarded_points = pd.DataFrame(columns=dominated_points.columns)

    while len(dominated_points > 0):
        # Shuffle the top parameter sets
        dominated_points = dominated_points.sample(frac=1)

        # Select one off the top
        # Note: here we use a random one rather than the "best" one; we don't have
        # confidence that the GP models are more accurate than our thresholds
        next_iteration_points = next_iteration_points.append(
            dominated_points.iloc[[0]])

        # Remove anything within given distance
        l1_norm = np.sum(
            np.abs(
                dominated_points[list(AP.param_names)].values -
                next_iteration_points[list(AP.param_names)].iloc[[-1]].values),
            axis=1,
        )

        points_to_remove = np.where(l1_norm < removal_distance)[0]
        discarded_points = discarded_points.append(
            dominated_points.iloc[points_to_remove])
        dominated_points.drop(index=dominated_points.index[points_to_remove],
                              inplace=True)

    print(
        f"After removing similar points, we are left with {len(next_iteration_points)} final top points."
    )

    next_iteration_points = next_iteration_points[:250]
    next_iteration_points.drop(columns=["ucmd", "lattice_mape", "is_pareto"],
                               inplace=True)

    # Plot new points
    g = seaborn.pairplot(next_iteration_points, vars=list(AP.param_names))
    g.set(xlim=(-0.1, 1.1), ylim=(-0.1, 1.1))
    g.savefig("figs/new-points-params.png", dpi=300)

    # Save the final new parameters
    next_iteration_points.to_csv(csv_path + out_csv_name)
Ejemplo n.º 7
0
    vle_predicted_mses["sim_vap_density"], on=R125.param_names)
vle_mses = vle_mses.rename(
    {
        "mse_x": "mse_liq_density",
        "mse_y": "mse_vap_density"
    }, axis="columns")
vle_mses = vle_mses.merge(vle_predicted_mses["sim_Pvap"], on=R125.param_names)
vle_mses = vle_mses.merge(vle_predicted_mses["sim_Hvap"], on=R125.param_names)
vle_mses = vle_mses.rename({
    "mse_x": "mse_Pvap",
    "mse_y": "mse_Hvap"
},
                           axis="columns")

# Find pareto efficient points
result, pareto_points, dominated_points = find_pareto_set(
    vle_mses.drop(columns=list(R125.param_names)).values, is_pareto_efficient)
vle_mses = vle_mses.join(pd.DataFrame(result, columns=["is_pareto"]))

# Plot pareto points vs. MSEs
g = seaborn.pairplot(
    vle_mses,
    vars=["mse_liq_density", "mse_vap_density", "mse_Pvap", "mse_Hvap"],
    hue="is_pareto",
)
g.savefig("figs/R125-pareto-mses.pdf")

# Plot pareto points vs. params
g = seaborn.pairplot(vle_mses, vars=list(R125.param_names), hue="is_pareto")
g.set(xlim=(-0.1, 1.1), ylim=(-0.1, 1.1))
g.savefig("figs/R125-pareto-params.pdf")