Python mkdir_if_not_existの例、gphypo.util.mkdir_if_not_exist Pythonの例

コード例 #1

0

ファイルを表示

ファイル: base_bo.py プロジェクト: LittleWat/hyper-parameter-optimization-by-GMRF-GPUCB

    def plot(self, output_dir):
        def plot3d():
            fig = plt.figure()
            ax = Axes3D(fig)

            X_seq, T_seq = self.point_info_manager.X_seq, self.point_info_manager.T_seq
            if self.gt_available:
                c_true, lower, upper = normalization.zero_one_normalization(self.z)
                c_true = cm.bwr(c_true * 255)
                ax.scatter([x[0] for x in self.X_grid.astype(float)], [x[1] for x in self.X_grid.astype(float)],
                           [x[2] for x in self.X_grid.astype(float)],
                           c=c_true, marker='o',
                           alpha=0.5, s=5)
                c = cm.bwr(normalization.zero_one_normalization(T_seq, self.z.min(), self.z.max())[0] * 255)

            else:
                c = cm.bwr(normalization.zero_one_normalization(T_seq)[0] * 255)

            ax.scatter([x[0] for x in X_seq], [x[1] for x in X_seq], [x[2] for x in X_seq], c='y', marker='o',
                       alpha=0.5)

            if self.does_pairwise_sampling:
                ax.scatter(X_seq[-1][0], X_seq[-1][1], X_seq[-1][2], c='m', s=50, marker='o', alpha=1.0)
                ax.scatter(X_seq[-2][0], X_seq[-2][1], X_seq[-2][2], c='m', s=100, marker='o', alpha=1.0)
            else:
                ax.scatter(X_seq[-1][0], X_seq[-1][1], X_seq[-1][2], c='m', s=50, marker='o', alpha=1.0)

        def plot2d():
            acq_score = self.acquisition_func.compute(self.mu, self.sigma, self.point_info_manager.get_T())
            mu = self.mu.flatten()
            X, T = self.point_info_manager.get_observed_XT_pair(gets_real=True)
            X_seq, T_seq = self.point_info_manager.X_seq, self.point_info_manager.T_seq

            if self.normalize_output:
                mu = self.point_info_manager.get_unnormalized_value_list(mu)
                acq_score = self.point_info_manager.get_unnormalized_value_list(acq_score)

            if self.acquisition_func_name == 'ucb':
                fig = plt.figure()
                ax = Axes3D(fig)
                ax.plot_wireframe(self.meshgrid[0].astype(float), self.meshgrid[1].astype(float),
                                  mu.reshape(self.meshgrid[0].shape), alpha=0.5,
                                  color='g')

                ax.plot_wireframe(self.meshgrid[0].astype(float), self.meshgrid[1].astype(float),
                                  acq_score.reshape(self.meshgrid[0].shape), alpha=0.5, color='y')

                if self.gt_available:
                    ax.plot_wireframe(self.meshgrid[0].astype(float), self.meshgrid[1].astype(float), self.z, alpha=0.3,
                                      color='b')

                ax.scatter([x[0] for x in X], [x[1] for x in X], T, c='r', marker='o', alpha=0.5)

                if self.does_pairwise_sampling:
                    ax.scatter(X_seq[-2][0], X_seq[-2][1], T_seq[-2], c='m', s=100, marker='o', alpha=1.0)

                ax.scatter(X_seq[-1][0], X_seq[-1][1], T_seq[-1], c='m', s=50, marker='o', alpha=1.0)
            else:
                fig = plt.figure(figsize=(6, 10))
                fig.subplots_adjust(right=0.8)

                upper = fig.add_subplot(2, 1, 1, projection='3d')
                lower = fig.add_subplot(2, 1, 2, projection='3d')

                upper.plot_wireframe(self.meshgrid[0].astype(float), self.meshgrid[1].astype(float),
                                     mu.reshape(self.meshgrid[0].shape), alpha=0.5,
                                     color='g')

                lower.plot_wireframe(self.meshgrid[0].astype(float), self.meshgrid[1].astype(float),
                                     acq_score.reshape(self.meshgrid[0].shape), alpha=0.5, color='y')

                if self.gt_available:
                    upper.plot_wireframe(self.meshgrid[0].astype(float), self.meshgrid[1].astype(float), self.z,
                                         alpha=0.3,
                                         color='b')

                upper.scatter([x[0] for x in X], [x[1] for x in X], T, c='r', marker='o', alpha=0.5)

                if self.does_pairwise_sampling:
                    upper.scatter(X_seq[-2][0], X_seq[-2][1], T_seq[-2], c='m', s=100, marker='o', alpha=1.0)

                upper.scatter(X_seq[-1][0], X_seq[-1][1], T_seq[-1], c='m', s=50, marker='o', alpha=1.0)

                # upper.set_zlabel('f(x)', fontdict={'size': 18})
                # lower.set_zlabel(self.acquisition_func_name.upper(), fontdict={'size': 18})

        def plot1d():
            acq_score = self.acquisition_func.compute(self.mu, self.sigma, self.point_info_manager.get_T())

            mu = self.mu.flatten()
            if self.normalize_output:
                mu = self.point_info_manager.get_unnormalized_value_list(mu)
                acq_score = self.point_info_manager.get_unnormalized_value_list(acq_score)

            X, T = self.point_info_manager.get_observed_XT_pair(gets_real=True)
            X_seq, T_seq = self.point_info_manager.X_seq, self.point_info_manager.T_seq

            if self.acquisition_func_name == 'ucb':
                plt.plot(self.meshgrid[0].astype(float), mu, color='g')
                plt.plot(self.meshgrid[0].astype(float), acq_score, color='y')
                plt.plot(self.meshgrid[0], self.z, alpha=0.3, color='b')
                plt.scatter(X, T, c='r', s=10, marker='o', alpha=1.0)
                if self.does_pairwise_sampling:
                    plt.scatter(X_seq[-2], T_seq[-2], c='m', s=100, marker='o', alpha=1.0)
                plt.scatter(X_seq[-1], T_seq[-1], c='m', s=50, marker='o', alpha=1.0)

            else:
                fig = plt.figure()
                fig.subplots_adjust(left=0.15)

                gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1])
                upper = plt.subplot(gs[0])
                lower = plt.subplot(gs[1])

                upper.plot(self.meshgrid[0].astype(float), mu, color='g')
                lower.plot(self.meshgrid[0].astype(float), acq_score, color='y')

                if self.gt_available:
                    upper.plot(self.meshgrid[0], self.z, alpha=0.3, color='b')

                upper.scatter(X, T, c='r', s=10, marker='o', alpha=1.0)

                if self.does_pairwise_sampling:
                    upper.scatter(X_seq[-2], T_seq[-2], c='m', s=100, marker='o', alpha=1.0)

                upper.scatter(X_seq[-1], T_seq[-1], c='m', s=50, marker='o', alpha=1.0)

                upper.set_ylabel('f(x)', fontdict={'size': 18})
                lower.set_ylabel(self.acquisition_func_name.upper(), fontdict={'size': 18})

        if self.ndim in [1, 2, 3]:
            exec("plot{}d()".format(self.ndim))
            out_fn = os.path.join(output_dir, 'res_%04d.png' % self.point_info_manager.update_cnt)
            mkdir_if_not_exist(output_dir)
            plt.savefig(out_fn, transparent=True, bbox_inches='tight', pad_inches=0)
            plt.close()

        else:
            print("Sorry... Plotting only supports 1, 2, or 3 dim.")

コード例 #2

0

ファイルを表示

ファイル: generate_lda_param_file.py プロジェクト: LittleWat/hyper-parameter-optimization-by-GMRF-GPUCB

import os
import sys

import numpy as np
import pandas as pd

from gphypo.util import mkdir_if_not_exist

gp_param_dic = {
    "alpha": np.arange(-2, 2.01, 0.2),
    "beta": np.arange(-2, 2.01, 0.2),
    "n_cluster": np.arange(5, 20.1).astype(int)
}

gp_param2lda_param = {
    "alpha": lambda x: 10**x,
    "beta": lambda x: 10**x,
    "n_cluster": lambda x: x
}

output_dir = 'csv_files'
mkdir_if_not_exist(output_dir)

for k, v in gp_param_dic.items():
    output_filename = os.path.join(output_dir, k) + ".csv"
    res = pd.DataFrame({k: v})
    res['bo_' + k] = res[k].apply(gp_param2lda_param[k])

    res.to_csv(output_filename, index=False)
    print(output_filename + " was created")

コード例 #3

0

ファイルを表示

def singleTest(ACQUISITION_FUNC, trialCount):
    print("%s: trial %d"%(ACQUISITION_FUNC, trialCount))
    OUTPUT_DIR = os.path.join(os.getcwd(), 'output_%s_clicks'%ACQUISITION_FUNC)
    if os.path.exists(OUTPUT_DIR):
        shutil.rmtree(OUTPUT_DIR)
    RESULT_FILENAME = os.path.join(OUTPUT_DIR, "gaussian_result_2dim_clicks_%s_trialCount_%d.csv"%(ACQUISITION_FUNC, trialCount))

    mu_sigma_csv_path = './mu2ratio_%s/mu_sigma.csv'%ACQUISITION_FUNC
    ratio_csv_out_path = './mu2ratio_%s/ratios.csv'%ACQUISITION_FUNC
    N_TOTAL_EXP = 100000

    print('GAMMA: ', GAMMA)
    print('GAMMA_Y: ', GAMMA_Y)
    print('GAMMA0:', GAMMA0)

    MU2RATIO_DIR = './mu2ratio_%s'%ACQUISITION_FUNC
    mkdir_if_not_exist(OUTPUT_DIR)
    mkdir_if_not_exist(MU2RATIO_DIR)
    mkdir_if_not_exist("./eval")

    param_names = sorted([x.replace('.csv', '') for x in os.listdir(parameter_dir)])


    bo_param2model_param_dic = {}
    
    bo_param_list = []
    for param_name in param_names:
        param_df = pd.read_csv(os.path.join(parameter_dir, param_name + '.csv'), dtype=str)
        bo_param_list.append(param_df[param_name].values)
    
        param_df.set_index(param_name, inplace=True)
    
        bo_param2model_param_dic[param_name] = param_df.to_dict()['bo_' + param_name]
    
    env = ClickTwoDimGaussianEnvironment(bo_param2model_param_dic=bo_param2model_param_dic, 
                                         result_filename=RESULT_FILENAME,
                                         output_dir=OUTPUT_DIR,
                                         reload=reload)
    
    agent = GMRF_BO(bo_param_list, env, GAMMA=GAMMA, GAMMA0=GAMMA0, GAMMA_Y=GAMMA_Y, ALPHA=ALPHA,
                    is_edge_normalized=IS_EDGE_NORMALIZED, gt_available=True, n_early_stopping=N_EARLY_STOPPING,
                    burnin=BURNIN,
                    normalize_output=NORMALIZE_OUTPUT, update_hyperparam_func=UPDATE_HYPERPARAM_FUNC,
                    initial_k=INITIAL_K, initial_theta=INITIAL_THETA, acquisition_func=ACQUISITION_FUNC,
                    acquisition_param_dic=ACQUISITION_PARAM_DIC, n_ctr=N_TOTAL_EXP)


    #agent.plot_click_distribution(output_dir)
    agent.save_mu_sigma_csv(outfn=mu_sigma_csv_path)

# agent.learn_from_clicks()
    nIter = 100
    for i in range(nIter):
        try:
            flg = agent.learn_from_clicks(mu2ratio_dir=MU2RATIO_DIR, 
                                          mu_sigma_csv_path=mu_sigma_csv_path, 
                                          ratio_csv_out_path=ratio_csv_out_path)
            # agent.sample_randomly()
            #agent.plot_click_distribution(output_dir)
            #break
            if flg == False:
                print("Early Stopping!!!")
                print(agent.bestX)
                print(agent.bestT)
                break
        except KeyboardInterrupt:
            print("Learnig process was forced to stop!")
            # print(agent.X)
            # print(agent.Treal)
            exit(0)
    os.system("mv %s/*.csv ./eval/"%OUTPUT_DIR)

コード例 #4

0

ファイルを表示

ファイル: test40.py プロジェクト: vivi489/gp_ucb_python_lib

def singleTest(ACQUISITION_FUNC, trialCount):
    print("%s: trial %d" % (ACQUISITION_FUNC, trialCount))
    OUTPUT_DIR = os.path.join(os.getcwd(), 'output_%s' % ACQUISITION_FUNC)

    ########################

    ### temporary ###
    if os.path.exists(OUTPUT_DIR):
        shutil.rmtree(OUTPUT_DIR)
    ##################
    RESULT_FILENAME = os.path.join(
        OUTPUT_DIR, "gaussian_result_4dim_%s_trialCount_%d.csv" %
        (ACQUISITION_FUNC, trialCount))

    print('GAMMA: ', GAMMA)
    print('GAMMA_Y: ', GAMMA_Y)
    print('GAMMA0:', GAMMA0)

    mkdir_if_not_exist(OUTPUT_DIR)

    param_names = sorted(
        [x.replace('.csv', '') for x in os.listdir(PARAMETER_DIR)])

    bo_param2model_param_dic = {}

    bo_param_list = []

    for param_name in param_names:
        param_df = pd.read_csv(os.path.join(PARAMETER_DIR,
                                            param_name + '.csv'),
                               dtype=str)
        bo_param_list.append(param_df[param_name].values)

        param_df.set_index(param_name, inplace=True)

        bo_param2model_param_dic[param_name] = param_df.to_dict()['gp_' +
                                                                  param_name]

    env = FourDimGaussianEnvironment(
        bo_param2model_param_dic=bo_param2model_param_dic,
        result_filename=RESULT_FILENAME,
        output_dir=OUTPUT_DIR,
        reload=False)

    agent = GMRF_BO(bo_param_list,
                    env,
                    GAMMA=GAMMA,
                    GAMMA0=GAMMA0,
                    GAMMA_Y=GAMMA_Y,
                    ALPHA=ALPHA,
                    is_edge_normalized=IS_EDGE_NORMALIZED,
                    gt_available=True,
                    n_early_stopping=N_EARLY_STOPPING,
                    burnin=BURNIN,
                    normalize_output=NORMALIZE_OUTPUT,
                    update_hyperparam_func=UPDATE_HYPERPARAM_FUNC,
                    initial_k=INITIAL_K,
                    initial_theta=INITIAL_THETA,
                    acquisition_func=ACQUISITION_FUNC,
                    acquisition_param_dic=ACQUISITION_PARAM_DIC)

    nIter = 500
    for i in range(nIter):
        flg = agent.learn(drop=True if i < nIter - 1 else False)
        if not flg:
            print("Early Stopping!!!")
            print(agent.bestX)
            print(agent.bestT)
            break
    os.system("mv %s/*.csv ./eval/" % OUTPUT_DIR)

コード例 #5

0

ファイルを表示

ファイル: test40.py プロジェクト: vivi489/gp_ucb_python_lib

    os.system("mv %s/*.csv ./eval/" % OUTPUT_DIR)


def testForTrials(acFunc, nIter):
    trialCount = 21
    while trialCount < nIter:
        #np.random.seed(int(time.time()))
        singleTest(acFunc, trialCount)
        trialCount += 1


if __name__ == '__main__':
    #    for ac in :#["ucb", "pi", "ei", "greedy", "ts"]:
    #        iterCount = 0
    #        while iterCount < 50:
    #            test(ac, iterCount)
    #            iterCount += 1
    mkdir_if_not_exist(os.path.join(os.getcwd(), "eval"))
    acFuncs = sys.argv[1]
    nTrials = 30
    #    jobs = []
    #    for acFuncs, nTrial in zip(acFuncs, nTrials):
    #        #testForTrials(acFuncs, nTrial)
    #        p = Process(target=testForTrials, args=(acFuncs, nTrial,))
    #        jobs.append(p)
    #        p.start()
    #
    #    for p in jobs:
    #        p.join()
    testForTrials(acFuncs, nTrials)

コード例 #6

0

ファイルを表示

def singleTest(ACQUISITION_FUNC, trialCount):
    print("%s: trial %d" % (ACQUISITION_FUNC, trialCount))
    OUTPUT_DIR = os.path.join(os.getcwd(), 'output_%s' % ACQUISITION_FUNC)
    # ### temporary ###
    if os.path.exists(OUTPUT_DIR):
        shutil.rmtree(OUTPUT_DIR)
    ##################
    RESULT_FILENAME = os.path.join(
        OUTPUT_DIR, "gaussian_result_1dim_%s_trialCount_%d.csv" %
        (ACQUISITION_FUNC, trialCount))
    np.random.seed(int(time.time()))
    print('GAMMA: ', GAMMA)
    print('GAMMA_Y: ', GAMMA_Y)
    print('GAMMA0:', GAMMA0)

    mkdir_if_not_exist(OUTPUT_DIR)
    param_names = sorted(
        [x.replace('.csv', '') for x in os.listdir(PARAMETER_DIR)])

    bo_param2model_param_dic = {}
    bo_param_list = []
    for param_name in param_names:  # param_name is a param file's name
        param_df = pd.read_csv(
            os.path.join(PARAMETER_DIR, param_name + '.csv'),
            dtype=str)  #makes index column type str instead of float

        # always read the column of the same name as the file name -- param_name
        bo_param_list.append(param_df[param_name].values)
        # param_df has a column of its csv file name, e.g. "x"
        # and this column is set as the index column
        param_df.set_index(param_name, inplace=True)
        # dict: param_file name -> column dict (the column with the name "bo_"+param_file name)
        # column dict: index column element -> cell value #index column is type str
        bo_param2model_param_dic[param_name] = param_df.to_dict()['bo_' +
                                                                  param_name]

        # bo_param_list is a list of every "bo_" column in all the param files of param_names
        # print("bo_param_list", bo_param_list)


#    env = SinEnvironment(bo_param2model_param_dic=bo_param2model_param_dic,
#                         result_filename=RESULT_FILENAME,
#                         output_dir=OUTPUT_DIR,
#                         reload=RELOAD)

    env = OneDimGaussianEnvironment(
        bo_param2model_param_dic=bo_param2model_param_dic,
        result_filename=RESULT_FILENAME,
        output_dir=OUTPUT_DIR,
        reload=RELOAD)

    agent = GMRF_BO(bo_param_list,
                    env,
                    GAMMA=GAMMA,
                    GAMMA0=GAMMA0,
                    GAMMA_Y=GAMMA_Y,
                    ALPHA=ALPHA,
                    is_edge_normalized=IS_EDGE_NORMALIZED,
                    gt_available=True,
                    n_early_stopping=N_EARLY_STOPPING,
                    burnin=BURNIN,
                    normalize_output=NORMALIZE_OUTPUT,
                    update_hyperparam_func=UPDATE_HYPERPARAM_FUNC,
                    initial_k=INITIAL_K,
                    initial_theta=INITIAL_THETA,
                    acquisition_func=ACQUISITION_FUNC,
                    acquisition_param_dic=ACQUISITION_PARAM_DIC)

    #    agent = GP_BO(bo_param_list, env,
    #                   gt_available=True,
    #                   my_kernel=kernel,
    #                   burnin=BURNIN,
    #                   normalize_output=NORMALIZE_OUTPUT,
    #                   acquisition_func=ACQUISITION_FUNC,
    #                   acquisition_param_dic=ACQUISITION_PARAM_DIC)

    nIter = 1000
    for i in range(nIter):
        flg = agent.learn()
        #agent.plot(output_dir=OUTPUT_DIR) #plotting causes deadlock among processes
        #agent.save_mu_sigma_csv() #this line causes deadlock among processes (I/O contention)
        if flg == False:
            print("Early Stopping!!!")
            print("bestX =", agent.bestX)
            print("bestT =", agent.bestT)
            break
    #plot_1dim(agent.point_info_manager.T_seq, 'reward.png')
    #subprocess.call(["./convert_pngs2gif.sh ./output/res*.png demo_%s_iterCount_%d.gif"%(ACQUISITION_FUNC, iterCount)])
    #os.system("mv %s/*.gif ./eval/"%OUTPUT_DIR)
    os.system("mv %s/*.csv ./eval/" % OUTPUT_DIR)

コード例 #7

0

ファイルを表示

def test(ACQUISITION_FUNC):
    # ### temporary ###
    if os.path.exists(OUTPUT_DIR):
        shutil.rmtree(OUTPUT_DIR)
    ##################

    print('GAMMA: ', GAMMA)
    print('GAMMA_Y: ', GAMMA_Y)
    print('GAMMA0:', GAMMA0)

    mkdir_if_not_exist(OUTPUT_DIR)
    param_names = sorted(
        [x.replace('.csv', '') for x in os.listdir(PARAMETER_DIR)])

    bo_param2model_param_dic = {}
    bo_param_list = []
    for param_name in param_names:  # param_name is a param file's name
        param_df = pd.read_csv(
            os.path.join(PARAMETER_DIR, param_name + '.csv'),
            dtype=str)  #makes index column type str instead of float

        # always read the column of the same name as the file name -- param_name
        bo_param_list.append(param_df[param_name].values)
        # param_df has a column of its csv file name, e.g. "x"
        # and this column is set as the index column
        param_df.set_index(param_name, inplace=True)
        # dict: param_file name -> column dict (the column with the name "bo_"+param_file name)
        # column dict: index column element -> cell value #index column is type str
        bo_param2model_param_dic[param_name] = param_df.to_dict()['bo_' +
                                                                  param_name]

        # bo_param_list is a list of every "bo_" column in all the param files of param_names
        # print("bo_param_list", bo_param_list)


#    env = SinEnvironment(bo_param2model_param_dic=bo_param2model_param_dic,
#                         result_filename=RESULT_FILENAME,
#                         output_dir=OUTPUT_DIR,
#                         reload=RELOAD)

    env = OneDimGaussianEnvironment(
        bo_param2model_param_dic=bo_param2model_param_dic,
        result_filename=RESULT_FILENAME,
        output_dir=OUTPUT_DIR,
        reload=RELOAD)

    #    agent = GMRF_BO(bo_param_list, env, GAMMA=GAMMA, GAMMA0=GAMMA0, GAMMA_Y=GAMMA_Y, ALPHA=ALPHA,
    #                     is_edge_normalized=IS_EDGE_NORMALIZED,
    #                     gt_available=True,
    #                     n_early_stopping=N_EARLY_STOPPING,
    #                     burnin=BURNIN,
    #                     normalize_output=NORMALIZE_OUTPUT,
    #                     update_hyperparam_func=UPDATE_HYPERPARAM_FUNC,
    #                     initial_k=INITIAL_K,
    #                     initial_theta=INITIAL_THETA,
    #                     acquisition_func=ACQUISITION_FUNC,
    #                     acquisition_param_dic=ACQUISITION_PARAM_DIC)

    agent = GP_BO(bo_param_list,
                  env,
                  gt_available=True,
                  my_kernel=kernel,
                  burnin=BURNIN,
                  normalize_output=NORMALIZE_OUTPUT,
                  acquisition_func=ACQUISITION_FUNC,
                  acquisition_param_dic=ACQUISITION_PARAM_DIC)

    nIter = 200
    for i in range(nIter):
        flg = agent.learn(drop=True if i < nIter - 1 else False)
        agent.plot(output_dir=OUTPUT_DIR)
        agent.save_mu_sigma_csv()
        if flg == False:
            print("Early Stopping!!!")
            print("bestX =", agent.bestX)
            print("bestT =", agent.bestT)
            break
    plot_1dim(agent.point_info_manager.T_seq, 'reward.png')
    subprocess.call([
        "./convert_pngs2gif.sh demo_%s_iter_%d_eps_%f.gif" %
        (ACQUISITION_FUNC, nIter, ACQUISITION_PARAM_DIC["eps"])
    ],
                    shell=True)
    os.system("mv ./output/*.gif ./")