예제 #1
0
def f_bo(single_iter_bo=100):

    sexp = matern52()
    gp = GaussianProcess(sexp)
    '''
        'ExpectedImprovement': self.ExpectedImprovement,
        'IntegratedExpectedImprovement': self.IntegratedExpectedImprovement,
        'ProbabilityImprovement': self.ProbabilityImprovement,
        'IntegratedProbabilityImprovement': self.IntegratedProbabilityImprovement,
        'UCB': self.UCB,
        'IntegratedUCB': self.IntegratedUCB,
        'Entropy': self.Entropy,
        'tExpectedImprovement': self.tExpectedImprovement,
        'tIntegratedExpectedImprovement': self.tIntegratedExpectedImprovement
    '''
    acq = Acquisition(mode='ExpectedImprovement')
    param = OrderedDict()
    for temp in X_name:
        param[temp] = ('cont', x_round[0])
    gpgo = GPGO(gp, acq, f, param)
    gpgo.run(max_iter=single_iter_bo, nstart=100)
    res, f_min_xy = gpgo.getResult()
    f_list = []
    f_list.extend(gpgo.return_max_f())
    print('f_list:', f_list)
    return f_list
예제 #2
0
def SMBO(model, acquisition):
    ''' Define SMBO function for obtain best hyper-parameter '''

    # Setting the range of the hyper-parameter to test
    param = {
        'lr': ('cont', [0.00001, 0.1]),
        'optimizer': ('int', optimizer),
        'n_layers': ('int', n_layers),
        'neurons_l1': ('int', neurons_l1),
        'neurons_l2': ('int', neurons_l2)
    }

    # Start to measure total time needed for the elaboration
    start_time = time()

    # Setting and running GPGO function
    gpgo = GPGO(model, acquisition, objective_function, param, n_jobs=1)
    gpgo.run(max_iter=budget - n_initial_evaluation,
             init_evals=n_initial_evaluation)

    # Printing the total time required for the elaboration
    print("Total execution time:", int((time() - start_time)), "seconds")

    # Plotting history of the best value seen
    plt.plot(gpgo.history)
    plt.title('Accuracy vs Iterations')
    plt.ylabel('Accuracy (%)')
    plt.xlabel('# Iterations')
    plt.show()

    print("Best set of hyper-parameters found:")
    print(gpgo.getResult())

    return gpgo.history
예제 #3
0
def optimize_pyGPGO(problem,
                    max_evals,
                    gp,
                    acq,
                    random_init_evals=3,
                    log=None):
    params = get_variables(problem)

    mon = Monitor("pyGPGO/GP/matern/EI", problem, log=log)

    # Note, pyGPGO seems to maximize by default, objective is therefore negated.
    # Furthermore: passing `int` as type seems to be very fragile.
    # performing manual rounding instead.
    def f(**x):
        mon.commit_start_eval()
        xvec = np.array([
            v if t == 'cont' else round(v)
            for (k, v), t in zip(x.items(), problem.vartype())
        ])
        # print(f"Processed vector: {xvec}")
        r = problem.evaluate(xvec)
        mon.commit_end_eval(xvec, r)
        return -float(r)

    mon.start()
    gpgo = GPGO(gp, acq, f, params)
    gpgo.run(max_iter=max_evals - random_init_evals,
             init_evals=random_init_evals)
    mon.end()
    solX, solY = gpgo.getResult()

    return solX, -solY, mon
예제 #4
0
def test_GPGO_sk():
    np.random.seed(20)
    rf = RandomForest()
    acq = Acquisition(mode='ExpectedImprovement')
    params = {'x': ('cont', (0, 1))}
    gpgo = GPGO(rf, acq, f, params)
    gpgo.run(max_iter=10)
    res = gpgo.getResult()[0]
    assert .7 < res['x'] < .8
예제 #5
0
def test_GPGO_mcmc():
    np.random.seed(20)
    sexp = squaredExponential()
    gp = GaussianProcessMCMC(sexp, step=pm.Slice, niter=100)
    acq = Acquisition(mode='IntegratedExpectedImprovement')
    params = {'x': ('cont', (0, 1))}
    gpgo = GPGO(gp, acq, f, params)
    gpgo.run(max_iter=10)
    res = gpgo.getResult()[0]
    assert .7 < res['x'] < .8
예제 #6
0
def test_GPGO():
    np.random.seed(20)
    sexp = squaredExponential()
    gp = GaussianProcess(sexp)
    acq = Acquisition(mode='ExpectedImprovement')
    params = {'x': ('cont', (0, 1))}
    gpgo = GPGO(gp, acq, f, params)
    gpgo.run(max_iter=10)
    res = gpgo.getResult()[0]
    assert .6 < res['x'] < .8
예제 #7
0
def main():

    def f(x):
        return (np.sin(x))
    
    
    sexp = squaredExponential()
    gp = GaussianProcess(sexp)
    acq = Acquisition(mode='ExpectedImprovement')
    param = {'n_hidden_2': ('int', [80,120]),'aOp'}
    
    np.random.seed(23)
    gpgo = GPGO(gp, acq, Main_Loop, param)
    gpgo.run(max_iter=20)
    res = gpgo.getResult()[0]
    print res
예제 #8
0
def func_first_order(type_f=None, xishu_f=None, point_f=None):
    # 独立变量的线性、非线性判断
    if type_f == 'linear':
        print('执行一阶线性优化')
        # 左端点函数值
        f_left = rbf_hdmr.func_1D_value(x_round[0][0],
                                        type=type_f,
                                        xishu=xishu_f,
                                        point_sample=point_f)
        # 右端点函数值
        f_right = rbf_hdmr.func_1D_value(x_round[0][1],
                                         type=type_f,
                                         xishu=xishu_f,
                                         point_sample=point_f)
        if f_left > f_right:
            f_min_i = f_right
            x_min = x_round[0][1]
        else:
            f_min_i = f_left
            x_min = x_round[0][0]

    # 独立变量的非线性情况
    else:
        print('执行一阶非线性函数优化')

        # 非一维线性函数最好的办法采用BO来找函数最小值
        def f(x):
            return -(rbf_hdmr.func_1D_value(
                x, type=type_f, xishu=xishu_f, point_sample=point_f))

        sexp = matern52()
        gp = GaussianProcess(sexp)
        acq = Acquisition(mode='ExpectedImprovement')
        round_x = (x_round[0][0], x_round[0][1])
        param = {'x': ('cont', round_x)}
        gpgo = GPGO(gp, acq, f, param)
        gpgo.run(max_iter=20, nstart=10)
        res, f_min_i = gpgo.getResult()

        print('res:', res)
        x_min = res[0]

    return x_min, f_min_i
예제 #9
0
def main2():

    sexp = squaredExponential()
    gp = GaussianProcess(sexp)
    acq = Acquisition(mode='ExpectedImprovement')
    param = {
        'r1': ('cont', (0, 1)),
        'r2': ('cont', (0, 1)),
        'r3': ('cont', (0, 1)),
        'r4': ('cont', (0, 1)),
        'r5': ('cont', (0, 1)),
        'r6': ('cont', (0, 1)),
        'r7': ('cont', (0, 1)),
        'r8': ('cont', (0, 1))
    }

    gpgo = GPGO(gp, acq, Main_Loop, param)
    gpgo.run(max_iter=200)
    res = gpgo.getResult()[0]
    print res
예제 #10
0
파일: franke.py 프로젝트: zuoxiaolei/pyGPGO
    """
    Plots Franke's function
    """
    x = np.linspace(0, 1, num=1000)
    y = np.linspace(0, 1, num=1000)
    X, Y = np.meshgrid(x, y)
    Z = f(X, Y)

    fig = plt.figure()
    ax = fig.gca(projection='3d')

    surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                           linewidth=0)
    fig.colorbar(surf, shrink=0.5, aspect=5)
    plt.show()


if __name__ == '__main__':
    plotFranke()

    cov = matern32()     # Using a matern v=3/2 covariance kernel
    gp = GaussianProcess(cov)   # A Gaussian Process regressor without hyperparameter optimization
    acq = Acquisition(mode='ExpectedImprovement')   # Expected Improvement acquisition function
    param = {'x': ('cont', [0, 1]),
             'y': ('cont', [0, 1])}     # Specify parameter space

    np.random.seed(1337)
    gpgo = GPGO(gp, acq, f, param)  # Call GPGO class 
    gpgo.run(max_iter=10)   # 10 iterations
    gpgo.getResult()    # Get your result
예제 #11
0
    def hyperparam_search(  # type: ignore[override]
            self,
            params_dict: PARAM_DICT,
            train_dataset: Dataset,
            valid_dataset: Dataset,
            metric: Metric,
            use_max: bool = True,
            logdir: Optional[str] = None,
            max_iter: int = 20,
            search_range: Union[int, float, PARAM_DICT] = 4,
            logfile: Optional[str] = None):
        """Perform hyperparameter search using a gaussian process.

    Parameters
    ----------
    params_dict : Dict
      Maps hyperparameter names (strings) to possible parameter
      values. The semantics of this list are different than for
      `GridHyperparamOpt`. `params_dict[hp]` must map to an int/float,
      which is used as the center of a search with radius
      `search_range` since pyGPGO can only optimize numerical
      hyperparameters.
    train_dataset : Dataset
      dataset used for training
    valid_dataset : Dataset
      dataset used for validation(optimization on valid scores)
    metric : Metric
      metric used for evaluation
    use_max : bool, (default True)
      Specifies whether to maximize or minimize `metric`.
      maximization(True) or minimization(False)
    logdir : str, optional, (default None)
      The directory in which to store created models. If not set, will
      use a temporary directory.
    max_iter : int, (default 20)
      number of optimization trials
    search_range : int/float/Dict (default 4)
      The `search_range` specifies the range of parameter values to
      search for. If `search_range` is an int/float, it is used as the
      global search range for parameters. This creates a search
      problem on the following space:

      optimization on [initial value / search_range,
                       initial value * search_range]

      If `search_range` is a dict, it must contain the same keys as
      for `params_dict`. In this case, `search_range` specifies a
      per-parameter search range. This is useful in case some
      parameters have a larger natural range than others. For a given
      hyperparameter `hp` this would create the following search
      range:

      optimization on hp on [initial value[hp] / search_range[hp],
                             initial value[hp] * search_range[hp]]
    logfile : str, optional (default None)
      Name of logfile to write results to. If specified, this is must
      be a valid file. If not specified, results of hyperparameter
      search will be written to `logdir/.txt`.


    Returns
    -------
    Tuple[`best_model`, `best_hyperparams`, `all_scores`]
      `(best_model, best_hyperparams, all_scores)` where `best_model` is
      an instance of `dc.model.Model`, `best_hyperparams` is a
      dictionary of parameters, and `all_scores` is a dictionary mapping
      string representations of hyperparameter sets to validation
      scores.
    """
        try:
            from pyGPGO.covfunc import matern32
            from pyGPGO.acquisition import Acquisition
            from pyGPGO.surrogates.GaussianProcess import GaussianProcess
            from pyGPGO.GPGO import GPGO
        except ModuleNotFoundError:
            raise ValueError("This class requires pyGPGO to be installed.")

        # Specify logfile
        log_file = None
        if logfile:
            log_file = logfile
        elif logdir is not None:
            # Make logdir if it doesn't exist.
            if not os.path.exists(logdir):
                os.makedirs(logdir, exist_ok=True)
            log_file = os.path.join(logdir, "results.txt")

        # setup range
        param_range = compute_parameter_range(params_dict, search_range)
        param_keys = list(param_range.keys())

        # Stores all results
        all_results = {}
        # Store all model references so we don't have to reload
        all_models = {}
        # Stores all model locations
        model_locations = {}

        # Demarcating internal function for readability
        def optimizing_function(**placeholders):
            """Private Optimizing function

      Take in hyper parameter values and return valid set performances

      Parameters
      ----------
      placeholders : keyword arguments
        Should be various hyperparameters as specified in `param_keys` above.

      Returns:
      --------
      valid_scores : float
        valid set performances
      """
            hyper_parameters = {}
            for hp in param_keys:
                if param_range[hp][0] == "int":
                    # param values are always float in BO, so this line converts float to int
                    # see : https://github.com/josejimenezluna/pyGPGO/issues/10
                    hyper_parameters[hp] = int(placeholders[hp])
                else:
                    hyper_parameters[hp] = float(placeholders[hp])
            logger.info("Running hyperparameter set: %s" %
                        str(hyper_parameters))
            if log_file:
                with open(log_file, 'w+') as f:
                    # Record hyperparameters
                    f.write("Parameters: %s" % str(hyper_parameters))
                    f.write('\n')

            hp_str = _convert_hyperparam_dict_to_filename(hyper_parameters)
            if logdir is not None:
                filename = "model%s" % hp_str
                model_dir = os.path.join(logdir, filename)
                logger.info("model_dir is %s" % model_dir)
                try:
                    os.makedirs(model_dir)
                except OSError:
                    if not os.path.isdir(model_dir):
                        logger.info(
                            "Error creating model_dir, using tempfile directory"
                        )
                        model_dir = tempfile.mkdtemp()
            else:
                model_dir = tempfile.mkdtemp()
            # Add it on to the information needed for the constructor
            hyper_parameters["model_dir"] = model_dir
            model = self.model_builder(**hyper_parameters)
            model.fit(train_dataset)
            try:
                model.save()
            # Some models autosave
            except NotImplementedError:
                pass

            multitask_scores = model.evaluate(valid_dataset, [metric])
            score = multitask_scores[metric.name]

            if log_file:
                with open(log_file, 'a') as f:
                    # Record performances
                    f.write("Score: %s" % str(score))
                    f.write('\n')
            # Store all results
            all_results[hp_str] = score
            # Store reference to model
            all_models[hp_str] = model
            model_locations[hp_str] = model_dir
            # GPGO maximize performance by default
            # set performance to its negative value for minimization
            if use_max:
                return score
            else:
                return -score

        # execute GPGO
        cov = matern32()
        gp = GaussianProcess(cov)
        acq = Acquisition(mode='ExpectedImprovement')
        gpgo = GPGO(gp, acq, optimizing_function, param_range)
        logger.info("Max number of iteration: %i" % max_iter)
        gpgo.run(max_iter=max_iter)

        hp_opt, valid_performance_opt = gpgo.getResult()
        hyper_parameters = {}
        for hp in param_keys:
            if param_range[hp][0] == "int":
                hyper_parameters[hp] = int(hp_opt[hp])
            else:
                # FIXME: Incompatible types in assignment
                hyper_parameters[hp] = float(hp_opt[hp])  # type: ignore
        hp_str = _convert_hyperparam_dict_to_filename(hyper_parameters)

        # Let's fetch the model with the best parameters
        best_model = all_models[hp_str]

        # Compare best model to default hyperparameters
        if log_file:
            with open(log_file, 'a') as f:
                # Record hyperparameters
                f.write("params_dict:")
                f.write(str(params_dict))
                f.write('\n')

        # Return default hyperparameters
        return best_model, hyper_parameters, all_results
예제 #12
0
def func_model(index_ij=None,
               x_min=None,
               func_min=None,
               max_iter_i=10,
               nstart_i=10):
    '''
    :param index_ij: 选择的函数项
    :param init_x: 初始的函数最优解
    :return: 本地迭代函数最优解
    '''
    first_order = []
    second_order = []
    for k in index_ij:
        if k < len(type_fx):
            # 一阶
            first_order.append(k)
        else:
            second_order.append(x_ij_index[k - len(type_fx)])

    # 二阶中的变量
    x_inter = []
    if len(second_order) != 0:

        for index in range(len(second_order)):
            x_inter.append(list(second_order[index]))
        x_inter = np.unique(x_inter)
    print('first_order:', first_order)
    print('second_order:', second_order)
    print('x_inter:', x_inter)

    # 定义优化维度
    temp_first = first_order.copy()
    temp_first.extend(x_inter)
    index_dimen = np.unique(temp_first)

    # 定义一个数组,用来存放需要和二阶函数一起计算的一阶函数自变量代号
    # 如果所有的一阶函数和二阶函数无关的话,该数组为空,且下面代码自动求取最小值,并根据坐标添加到对应的自变量取值和函数最小值中
    denpend_point_1D = []
    if len(first_order) != 0:
        for i in first_order:
            # print(i)
            # print('x_inter:', x_inter)
            # 独立情况
            if i not in x_inter:
                type_fx_i = type_fx[i]
                xishu_arr_i = xishu_arr[i]
                point_round_i = point_round[i]
                print('执行一阶不相关变量的优化, 函数下标为:', i)
                min_x, min_f = func_first_order(type_f=type_fx_i,
                                                xishu_f=xishu_arr_i,
                                                point_f=point_round_i)
                # print('min_x:', min_x)
                x_min[i] = min_x
                # print('一阶线性无关', x_min)
                func_min += min_f
            else:
                denpend_point_1D.append(i)
    '''
     这里只存在相关的变量 # 函数代号剩余存放在denpend_point(肯定和二维函数共项变量的一维函数) 和 second_order
    接下来要判断一阶函数和哪些二阶函数具有共同的自变量
        1.判断二阶函数中有哪些共项
    '''
    # print('denpend_point_1D:', denpend_point_1D)
    # print('second_order:', second_order)
    # 解决二维情况,分解为二维相关和二维无关
    X = [
        'A', 'B', 'C', 'D', 'E', 'F', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
        'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
    ]
    # 判断二阶变量是否具有相关性
    depend_2D, independ_2D = is_xiangguan_2D(second_order=second_order)
    print('depend_2D:', depend_2D)
    print('independ_2D:', independ_2D)
    # 要判断一维和二维的关系

    # 存在一阶与二阶相关的函数,即二阶函数不为空
    if len(denpend_point_1D) != 0:

        # 情况分为两种:一维函数与二维相关函数有关
        #             一维函数与二维不相关函数有关
        # 1. 构建一个二维数组,行表示一阶非独立函数,列表示二阶非独立函数或者二阶独立函数
        # denpend_point_1D + depend_2D
        if len(depend_2D) != 0:

            #  二维相关函数变量
            unique_2D = []
            for i in range(len(depend_2D)):
                temp_0i = []
                for j in range(len(depend_2D[i])):
                    aaa = list(second_order[depend_2D[i][j]])
                    temp_0i.append(list(aaa))
                temp_0i = np.unique(temp_0i)
                unique_2D.append(temp_0i)
            # print('unique_2d:----------------', unique_2D)
            # 求解depend_2D数组中所对应的second——order中数组去掉相同项
            # 查看每一列,找到数值为1的项,说明该一维函数和对应的二维函数相关。
            flag_arr = np.array([[-1] * len(unique_2D)] *
                                len(denpend_point_1D))

            for row in range(len(denpend_point_1D)):
                for col in range(len(unique_2D)):
                    if denpend_point_1D[row] in list(unique_2D[col]):
                        flag_arr[row, col] = 1

            # print('flag_arr:', flag_arr)
            # 定义二维数组,存放1D函数 和2D函数的的关系,里面的每个一维数组的第一个元素代表二维函数在depend_2D中的下标,后面的每个元素都代表一个一维函数
            f1_f2 = []
            # 先访问列
            for col in range(len(depend_2D)):
                # 再访问行
                f2D = [col]
                for row in range(len(denpend_point_1D)):
                    if flag_arr[row, col] == 1:
                        f2D.append(row)
                f1_f2.append(f2D)
            # print('f1_f2:', f1_f2)
            for row in range(len(f1_f2)):
                # 表示一阶函数与二阶函数存在相关变量
                if len(f1_f2[row]) > 1:
                    print('执行一阶函数与二阶相关函数的优化')
                    # # 一维函数好多,里边每个元素的标号代表函数编号,而且也是自变量编号
                    f_1_depend = f1_f2[row][1:]
                    # print('f_1_depend:', f_1_depend)
                    # 为了找到一维函数和二维函数一共使用了多少变量, #将一维和二维的自变量并起来,并去重

                    # 构建二维函数的系数矩阵
                    ij_index_i = []
                    ij_xishu_i = []
                    ij_point_i = []
                    for i in range(len(depend_2D[row])):
                        ij = second_order[depend_2D[row][i]]
                        # print('ij:', ij)
                        # 为了找函数系数
                        index = -1
                        for j in range(len(x_ij_index)):
                            if x_ij_index[j][0] == ij[0] and x_ij_index[j][
                                    1] == ij[1]:
                                index = j
                            # print('index:', index)
                        ij_index_i.append(x_ij_index[index])
                        ij_xishu_i.append(x_ij_xishu[index])
                        ij_point_i.append(x_ij_point[index])

                    # print('ij_index_i:', ij_index_i)
                    len_ij = np.unique(np.array(ij_index_i))

                    # 数组,存放使用变量的情况,里面编号,表示是那个维度的的变量
                    X_name = []
                    for x in range(len(len_ij)):
                        X_name.append(X[len_ij[x]])

                    def f(X_name):
                        f_index = 0
                        # 一阶函数
                        for i in range(len(f_1_depend)):
                            type_fx_1 = type_fx[f_1_depend[i]]
                            xishu_arr_1 = xishu_arr[f_1_depend[i]]
                            point_round_1 = point_round[f_1_depend[i]]
                            point_index = -1
                            for x in range(len(len_ij)):
                                if f_1_depend[i] == len_ij[x]:
                                    point_index = x
                            x_name = X_name[point_index]
                            f_index += -(rbf_hdmr.func_1D_value(
                                x_name,
                                type=type_fx_1,
                                xishu=xishu_arr_1,
                                point_sample=point_round_1))

                        for index in range(len(depend_2D[row])):
                            ij_index = ij_index_i[index]
                            ij_xishu = ij_xishu_i[index]
                            ij_point = ij_point_i[index]
                            # print('ij_index:', ij_index)
                            # print('X_name:', X_name)
                            left = -1
                            right = -1
                            for x in range(len(len_ij)):
                                if ij_index[0] == len_ij[x]:
                                    left = x
                                if ij_index[1] == len_ij[x]:
                                    right = x
                            x_name = [X_name[left], X_name[right]]
                            # print('x_name:', x_name)
                            f_index += -(rbf_hdmr.func_2D_value(
                                x_name,
                                index_ij=ij_index,
                                xishu=ij_xishu,
                                points=ij_point))
                        return f_index

                    param = OrderedDict()
                    for m in range(len(len_ij)):
                        # print('x_round[ij_index_i[i]]', x_round[len_ij[m]])
                        param[X_name[m]] = ('cont', x_round[len_ij[m]])
                    # print(param)
                    sexp = matern52()
                    gp = GaussianProcess(sexp)
                    acq = Acquisition(mode='ExpectedImprovement')
                    gpgo = GPGO(gp, acq, f, param)
                    gpgo.run(max_iter=max_iter_i, nstart=nstart_i)
                    res, max_xy = gpgo.getResult()
                    # print('ij:', ij)
                    for x in range(len(len_ij)):
                        x_min[len_ij[x]] = res[x]
                    # print('x_min:', x_min)
                    func_min += max_xy

                # 说明该一阶函数二阶相关函数独立
                else:
                    print('一阶函数不与二阶不相关函数相关')
                    # f_2_x表示在depend_2D中对应的相关项
                    f_2_x = depend_2D[f1_f2[row][0]]
                    # print('f_2_x:', f_2_x)
                    # 构建二维函数的系数矩阵
                    ij_index_i = []
                    ij_xishu_i = []
                    ij_point_i = []
                    for i in range(len(depend_2D[row])):
                        ij = second_order[depend_2D[row][i]]
                        # print('ij:', ij)
                        # 为了找函数系数
                        index = -1
                        for j in range(len(x_ij_index)):
                            if x_ij_index[j][0] == ij[0] and x_ij_index[j][
                                    1] == ij[1]:
                                index = j
                            # print('index:', index)
                        ij_index_i.append(x_ij_index[index])
                        ij_xishu_i.append(x_ij_xishu[index])
                        ij_point_i.append(x_ij_point[index])
                    # print('ij_index_i:', ij_index_i)
                    len_ij = np.unique(np.array(ij_index_i))

                    # 数组,存放使用变量的情况,里面编号,表示是那个维度的的变量
                    X_name = []
                    for x in range(len(len_ij)):
                        X_name.append(X[len_ij[x]])

                    def f(X_name):

                        f_index = 0
                        for index in range(len(depend_2D[row])):
                            ij_index = ij_index_i[index]
                            ij_xishu = ij_xishu_i[index]
                            ij_point = ij_point_i[index]
                            # print('ij_index:', ij_index)
                            # print('X_name:', X_name)
                            left = -1
                            right = -1
                            for x in range(len(len_ij)):
                                if ij_index[0] == len_ij[x]:
                                    left = x
                                if ij_index[1] == len_ij[x]:
                                    right = x
                            #  二阶函数
                            x_name = [X_name[left], X_name[right]]
                            # print('x_name:', x_name)
                            f_index += -(rbf_hdmr.func_2D_value(
                                x_name,
                                index_ij=ij_index,
                                xishu=ij_xishu,
                                points=ij_point))
                        return f_index

                    param = OrderedDict()
                    for m in range(len(len_ij)):
                        # print('x_round[ij_index_i[i]]', x_round[len_ij[m]])
                        param[X_name[m]] = ('cont', x_round[len_ij[m]])
                    # print(param)
                    sexp = matern52()
                    gp = GaussianProcess(sexp)
                    acq = Acquisition(mode='ExpectedImprovement')
                    gpgo = GPGO(gp, acq, f, param)
                    gpgo.run(max_iter=max_iter_i, nstart=nstart_i)
                    res, max_xy = gpgo.getResult()
                    # print('ij:', ij)
                    for x in range(len(len_ij)):
                        x_min[len_ij[x]] = res[x]
                    # print('x_min:', x_min)
                    func_min += max_xy

        if len(independ_2D) != 0:
            #  二维不相关函数变量
            unique_2D = []
            for i in range(len(independ_2D)):
                temp_0i = []
                for j in range(len(independ_2D[i])):
                    aaa = list(second_order[independ_2D[i][j]])
                    temp_0i.append(list(aaa))
                temp_0i = np.unique(temp_0i)
                unique_2D.append(temp_0i)
            # print('unique_2d:----------------', unique_2D)

            # 求解independ_2D数组中所对应的second——order中数组去掉相同项
            # 查看每一列,找到数值为1的项,说明该一维函数和对应的二维函数相关。
            flag_arr = np.array([[-1] * len(unique_2D)] *
                                len(denpend_point_1D))
            # print(flag_arr)
            for row in range(len(denpend_point_1D)):
                for col in range(len(unique_2D)):
                    if denpend_point_1D[row] in list(unique_2D[col]):
                        flag_arr[row, col] = 1
            # print(flag_arr)
            # 定义二维数组,存放1D函数 和2D函数的的关系,里面的每个一维数组的第一个元素代表二维函数在depend_2D中的下标,后面的每个元素都代表一个一维函数
            f1_f2 = []
            # 先访问列
            for col in range(len(independ_2D)):
                # 再访问行
                f2D = [col]
                for row in range(len(denpend_point_1D)):
                    if flag_arr[row, col] == 1:
                        f2D.append(row)
                f1_f2.append(f2D)
            # print('f1_f2:', f1_f2)
            for row in range(len(f1_f2)):
                # 表示一阶函数与二阶函数存在相关变量
                if len(f1_f2[row]) > 1:
                    print('执行一阶函数与二阶非相关函数的优化')
                    # f_2_x表示在independ_2D中对应的相关项
                    f_2_x = independ_2D[f1_f2[row][0]]
                    # print('f_2_x:', f_2_x)
                    # 一维函数好多,里边每个元素的标号代表函数编号,而且也是自变量编号
                    f_1_depend = f1_f2[row][1:]
                    # print('f_1_depend:', f_1_depend)
                    # 为了找到一维函数和二维函数一共使用了多少变量, #将一维和二维的自变量并起来,并去重
                    # 一二维函数自变量的自变量(只要确定二维函数使用了哪些变量就可以)
                    f_1 = unique_2D[row]
                    # print('f_1:', f_1)

                    # 构建二维函数的系数矩阵
                    ij_index_i = []
                    ij_xishu_i = []
                    ij_point_i = []
                    for i in range(len(independ_2D[row])):
                        ij = second_order[independ_2D[row][i]]
                        # print('ij:', ij)
                        # 为了找函数系数
                        index = -1
                        for j in range(len(x_ij_index)):
                            if x_ij_index[j][0] == ij[0] and x_ij_index[j][
                                    1] == ij[1]:
                                index = j
                            # print('index:', index)
                        ij_index_i.append(x_ij_index[index])
                        ij_xishu_i.append(x_ij_xishu[index])
                        ij_point_i.append(x_ij_point[index])

                    # print('ij_index_i:', ij_index_i)
                    len_ij = np.unique(np.array(ij_index_i))

                    # 数组,存放使用变量的情况,里面编号,表示是那个维度的的变量
                    X_name = []
                    for x in range(len(len_ij)):
                        X_name.append(X[len_ij[x]])

                    def f(X_name):
                        f_index = 0
                        # 一阶函数
                        for i in range(len(f_1_depend)):
                            # print('f_1_depend:', f_1_depend)
                            type_fx_1 = type_fx[f_1_depend[i]]
                            xishu_arr_1 = xishu_arr[f_1_depend[i]]
                            point_round_1 = point_round[f_1_depend[i]]
                            point_index = -1
                            for x in range(len(len_ij)):
                                if f_1_depend[i] == len_ij[x]:
                                    point_index = x
                            x_name = X_name[point_index]
                            f_index += -(rbf_hdmr.func_1D_value(
                                x_name,
                                type=type_fx_1,
                                xishu=xishu_arr_1,
                                point_sample=point_round_1))
                        for index in range(len(independ_2D[row])):
                            ij_index = ij_index_i[index]
                            ij_xishu = ij_xishu_i[index]
                            ij_point = ij_point_i[index]
                            # print('ij_index:', ij_index)
                            # print('X_name:', X_name)
                            left = -1
                            right = -1
                            for x in range(len(len_ij)):
                                if ij_index[0] == len_ij[x]:
                                    left = x
                                if ij_index[1] == len_ij[x]:
                                    right = x
                            x_name = [X_name[left], X_name[right]]
                            # print('x_name:', x_name)
                            f_index += -(rbf_hdmr.func_2D_value(
                                x_name,
                                index_ij=ij_index,
                                xishu=ij_xishu,
                                points=ij_point))
                        return f_index

                    param = OrderedDict()
                    for m in range(len(len_ij)):
                        # print('x_round[ij_index_i[i]]', x_round[len_ij[m]])
                        param[X_name[m]] = ('cont', x_round[len_ij[m]])
                    # print(param)
                    sexp = matern52()
                    gp = GaussianProcess(sexp)
                    acq = Acquisition(mode='ExpectedImprovement')
                    gpgo = GPGO(gp, acq, f, param)
                    gpgo.run(max_iter=max_iter_i, nstart=nstart_i)
                    res, max_xy = gpgo.getResult()
                    # print('ij:', ij)
                    for x in range(len(len_ij)):
                        x_min[len_ij[x]] = res[x]

                    func_min += max_xy
                    # print('x_min:', x_min)
                    # 说明该一阶函数二阶相关函数独立
                else:
                    print('执行二阶非相关函数的优化')
                    # print('duli')
                    # f_2_x表示在depend_2D中对应的相关项
                    f_2_x = independ_2D[f1_f2[row][0]]
                    # print('f_2_x:', f_2_x)
                    # 构建二维函数的系数矩阵
                    ij_index_i = []
                    ij_xishu_i = []
                    ij_point_i = []
                    for i in range(len(independ_2D[row])):
                        ij = second_order[independ_2D[row][i]]
                        # print('ij:', ij)
                        # 为了找函数系数
                        index = -1
                        for j in range(len(x_ij_index)):
                            if x_ij_index[j][0] == ij[0] and x_ij_index[j][
                                    1] == ij[1]:
                                index = j
                            # print('index:', index)
                        ij_index_i.append(x_ij_index[index])
                        ij_xishu_i.append(x_ij_xishu[index])
                        ij_point_i.append(x_ij_point[index])
                    # print('ij_index_i:', ij_index_i)
                    len_ij = np.unique(np.array(ij_index_i))

                    # 数组,存放使用变量的情况,里面编号,表示是那个维度的的变量
                    X_name = []
                    for x in range(len(len_ij)):
                        X_name.append(X[len_ij[x]])

                    def f(X_name):
                        f_index = 0
                        #  二阶函数
                        for index in range(len(independ_2D[row])):
                            ij_index = ij_index_i[index]
                            ij_xishu = ij_xishu_i[index]
                            ij_point = ij_point_i[index]
                            # print('ij_index:', ij_index)
                            # print('X_name:', X_name)
                            left = -1
                            right = -1
                            for x in range(len(len_ij)):
                                if ij_index[0] == len_ij[x]:
                                    left = x
                                if ij_index[1] == len_ij[x]:
                                    right = x
                            x_name = [X_name[left], X_name[right]]
                            # print('x_name:', x_name)
                            f_index += -(rbf_hdmr.func_2D_value(
                                x_name,
                                index_ij=ij_index,
                                xishu=ij_xishu,
                                points=ij_point))
                        return f_index

                    param = OrderedDict()
                    for m in range(len(len_ij)):
                        # print('x_round[ij_index_i[i]]', x_round[len_ij[m]])
                        param[X_name[m]] = ('cont', x_round[len_ij[m]])
                    # print(param)
                    sexp = matern52()
                    gp = GaussianProcess(sexp)
                    acq = Acquisition(mode='ExpectedImprovement')
                    gpgo = GPGO(gp, acq, f, param)
                    gpgo.run(max_iter=max_iter_i, nstart=nstart_i)
                    res, max_xy = gpgo.getResult()
                    # print('ij:', ij)
                    for x in range(len(len_ij)):
                        x_min[len_ij[x]] = res[x]
                    # print('x_min:', x_min)
                    func_min += max_xy

    # 只存在二维相关性问题
    elif len(denpend_point_1D) == 0:
        # 解决一维与二维不存在相关变量且二维非先关变量的函数
        if len(independ_2D) != 0:
            print('执行二阶不相关变量的优化')
            for i in range(len(independ_2D)):
                ij = second_order[independ_2D[i][0]]
                # 在相关数组中的坐标
                index = -1
                for j in range(len(x_ij_index)):
                    if x_ij_index[j][0] == ij[0] and x_ij_index[j][1] == ij[1]:
                        index = j
                # print(index)
                ij_index = x_ij_index[index]
                ij_xishu = x_ij_xishu[index]
                ij_point = x_ij_point[index]
                # print('ij_index:', ij_index)
                X_name = [X[ij_index[0]], X[ij_index[1]]]

                def f(X_name):
                    return -(rbf_hdmr.func_2D_value(X_name,
                                                    index_ij=ij_index,
                                                    xishu=ij_xishu,
                                                    points=ij_point))

                param = OrderedDict()
                for m in range(len(ij_index)):
                    # print('132', x_round[ij_index[i]])
                    param[X_name[m]] = ('cont', x_round[ij_index[m]])
                # print(param)
                sexp = matern52()
                gp = GaussianProcess(sexp)
                acq = Acquisition(mode='ExpectedImprovement')
                gpgo = GPGO(gp, acq, f, param)
                gpgo.run(max_iter=max_iter_i, nstart=nstart_i)
                res, max_xy = gpgo.getResult()
                # print('ij:', ij)
                # print(res)
                for hiahia in range(len(ij_index)):
                    x_min[ij_index[hiahia]] = res[hiahia]

                func_min += max_xy
                # print('x_min:', x_min)
                # print('f_min:', func_min)
        # 解决二维相关变量问题
        if len(depend_2D) != 0:
            print('执行二阶相关变量的优化')
            for i in range(len(depend_2D)):
                temp = depend_2D[i]
                # print('temp:', temp)
                ij_index_i = []
                ij_xishu_i = []
                ij_point_i = []
                for k in range(len(temp)):
                    ij = second_order[temp[k]]
                    # 为了找函数系数
                    index = -1
                    for j in range(len(x_ij_index)):
                        if x_ij_index[j][0] == ij[0] and x_ij_index[j][
                                1] == ij[1]:
                            index = j
                    ij_index_i.append(x_ij_index[index])
                    ij_xishu_i.append(x_ij_xishu[index])
                    ij_point_i.append(x_ij_point[index])

                len_ij = np.unique(np.array(ij_index_i))
                # 数组,存放使用变量的情况,里面编号,表示是那个维度的的变量
                X_name = []
                for x in range(len(len_ij)):
                    X_name.append(X[len_ij[x]])

                def f(X_name):

                    f_index = 0
                    for index in range(len(ij_index_i)):
                        ij_index = ij_index_i[index]
                        ij_xishu = ij_xishu_i[index]
                        ij_point = ij_point_i[index]
                        # print('ij_index:', ij_index)
                        # print('X_name:', X_name)
                        left = -1
                        right = -1
                        for x in range(len(len_ij)):
                            if ij_index[0] == len_ij[x]:
                                left = x
                            if ij_index[1] == len_ij[x]:
                                right = x
                        #  二阶函数
                        x_name = [X_name[left], X_name[right]]
                        # print('x_name:', x_name)
                        f_index += -(rbf_hdmr.func_2D_value(x_name,
                                                            index_ij=ij_index,
                                                            xishu=ij_xishu,
                                                            points=ij_point))
                    return f_index

                param = OrderedDict()
                for m in range(len(len_ij)):
                    # print('x_round[ij_index_i[i]]', x_round[len_ij[m]])
                    param[X_name[m]] = ('cont', x_round[len_ij[m]])
                # print(param)
                sexp = matern52()
                gp = GaussianProcess(sexp)
                acq = Acquisition(mode='ExpectedImprovement')
                gpgo = GPGO(gp, acq, f, param)
                gpgo.run(max_iter=max_iter_i, nstart=nstart_i)
                res, max_xy = gpgo.getResult()
                # print('ij:', ij)
                for x in range(len(len_ij)):
                    x_min[len_ij[x]] = res[x]
                # print('x_min:', x_min)
                func_min += max_xy
    # 现在只剩下一维和二维相关的两种函数,但是并不知道谁和谁相关
    '''
        但我们知道的是贝叶斯优化在低维阶段有较高的优化能力(D<=5),所以我的想法是
        1.采用精确函数,一阶、二阶精确函数进行计算(待实现)
        2.采用近似函数,一阶、二阶近似函数进行计算(本代码采用)
            分类三类函数:
                只存在一阶函数(已解决)
                只存在二阶函数(且不存在共项变量的情况已解决)
                即存在一阶函数,还存在二阶函数
    '''

    return x_min, func_min, index_dimen
from pyGPGO.acquisition import Acquisition
from pyGPGO.surrogates.GaussianProcess import GaussianProcess
from pyGPGO.GPGO import GPGO

from tube_class import evaluate_tube_design__bayesian


def test_quadratic_function(radius, length, submergence):
    x = radius
    y = length
    z = submergence
    return -((x - 2.15)**2 + (y - 42.0)**2 + (z + 8.75)**2)


sq_exp = squaredExponential(l=3, sigman=0.0)
gp = GaussianProcess(sq_exp)
acq = Acquisition(mode='ExpectedImprovement')
design_parameters = {
    'radius': ('cont', [0.05, 3.0]),
    'length': ('cont', [20.0, 200.0]),
    'submergence': ('cont', [-12.0, 3.0])
}

np.random.seed(42)
gpgo = GPGO(gp, acq, evaluate_tube_design__bayesian, design_parameters)
gpgo.run(max_iter=40, init_evals=20)
optimal_design, optimal_power = gpgo.getResult()

print('Best design after {} iterations is {} with objective function value {}'.
      format(60, optimal_design, optimal_power))
예제 #14
0
        curr_best = str(gpgo.tau)
        if float(curr_eval) >= float(curr_best):
            curr_eval = bcolors.OKGREEN + curr_eval + bcolors.ENDC
        print(self.template.format(eval, proposed, curr_eval, curr_best))

    def _printInit(self, gpgo):
        for init_eval in range(gpgo.init_evals):
            print(self.template.format('init', gpgo.GP.X[init_eval], gpgo.GP.y[init_eval], gpgo.tau))

if __name__ == '__main__':
    import numpy as np
    import matplotlib.pyplot as plt
    from pyGPGO.covfunc import squaredExponential
    from pyGPGO.surrogates.GaussianProcess import GaussianProcess
    from pyGPGO.acquisition import Acquisition
    from pyGPGO.GPGO import GPGO

    np.random.seed(20)

    def f(x):
        return -((6*x-2)**2*np.sin(12*x-4))

    sexp = squaredExponential()
    gp = GaussianProcess(sexp)
    acq = Acquisition(mode = 'ExpectedImprovement')

    params = {'x': ('cont', (0, 1))}
    gpgo = GPGO(gp, acq, f, params)
    gpgo.run(max_iter = 10)
    print(gpgo.getResult())
예제 #15
0
    def hyperparam_search(
            self,
            params_dict,
            train_dataset,
            valid_dataset,
            output_transformers,
            metric,
            prot_desc_dict,
            prot_desc_length,
            tasks=None,
            direction=True,
            n_features=1024,
            n_tasks=1,
            max_iter=20,
            search_range=4,
            early_stopping=True,
            evaluate_freq=3,
            patience=3,
            model_dir="./model_dir",
            hp_invalid_list=[
                'seed', 'nb_epoch', 'penalty_type', 'dropouts',
                'bypass_dropouts', 'n_pair_feat', 'fit_transformers',
                'min_child_weight', 'weight_init_stddevs', 'max_delta_step',
                'subsample', 'colsample_bylevel', 'bias_init_consts',
                'colsample_bytree', 'reg_alpha', 'reg_lambda',
                'scale_pos_weight', 'base_score', 'layer_sizes'
            ],
            log_file='GPhypersearch.log',
            mode='classification',
            tensorboard=True,
            no_concordance_index=False,
            no_r2=False,
            plot=False,
            verbose_search=False,
            aggregated_tasks=[]):
        """Perform hyperparams search using a gaussian process assumption

    params_dict include single-valued parameters being optimized,
    which should only contain int, float and list of int(float)

    parameters with names in hp_invalid_list will not be changed.

    For Molnet models, self.model_class is model name in string,
    params_dict = dc.molnet.preset_hyper_parameters.hps[self.model_class]

    Parameters
    ----------
    params_dict: dict
      dict including parameters and their initial values
      parameters not suitable for optimization can be added to hp_invalid_list
    train_dataset: dc.data.Dataset struct
      dataset used for training
    valid_dataset: dc.data.Dataset struct
      dataset used for validation(optimization on valid scores)
    output_transformers: list of dc.trans.Transformer
      transformers for evaluation
    metric: list of dc.metrics.Metric
      metric used for evaluation
    direction: bool
      maximization(True) or minimization(False)
    n_features: int
      number of input features
    n_tasks: int
      number of tasks
    max_iter: int
      number of optimization trials
    search_range: int(float)
      optimization on [initial values / search_range,
                       initial values * search_range]
    hp_invalid_list: list
      names of parameters that should not be optimized
    logfile: string
      name of log file, hyperparameters and results for each trial will be recorded

    Returns
    -------
    hyper_parameters: dict
      params_dict with all optimized values
    valid_performance_opt: float
      best performance on valid dataset

    """

        #assert len(metric) == 1, 'Only use one metric'
        hyper_parameters = params_dict
        hp_list = list(hyper_parameters.keys())
        for hp in hp_invalid_list:
            if hp in hp_list:
                hp_list.remove(hp)

        hp_list_class = [hyper_parameters[hp].__class__ for hp in hp_list]
        assert set(hp_list_class) <= set([list, int, float])
        # Float or int hyper parameters(ex. batch_size, learning_rate)
        hp_list_single = [
            hp_list[i] for i in range(len(hp_list))
            if not hp_list_class[i] is list
        ]
        # List of float or int hyper parameters(ex. layer_sizes)
        hp_list_multiple = [(hp_list[i], len(hyper_parameters[hp_list[i]]))
                            for i in range(len(hp_list))
                            if hp_list_class[i] is list]

        # Number of parameters
        n_param = len(hp_list_single)
        if len(hp_list_multiple) > 0:
            n_param = n_param + sum([hp[1] for hp in hp_list_multiple])
        # Range of optimization
        param_range = []
        for hp in hp_list_single:
            if hyper_parameters[hp].__class__ is int:
                param_range.append((('int'), [
                    hyper_parameters[hp] // search_range,
                    hyper_parameters[hp] * search_range
                ]))
            else:
                param_range.append((('cont'), [
                    hyper_parameters[hp] / search_range,
                    hyper_parameters[hp] * search_range
                ]))
        for hp in hp_list_multiple:
            if hyper_parameters[hp[0]][0].__class__ is int:
                param_range.extend([(('int'), [
                    hyper_parameters[hp[0]][i] // search_range,
                    hyper_parameters[hp[0]][i] * search_range
                ]) for i in range(hp[1])])
            else:
                param_range.extend([(('cont'), [
                    hyper_parameters[hp[0]][i] / search_range,
                    hyper_parameters[hp[0]][i] * search_range
                ]) for i in range(hp[1])])

        # Dummy names
        param_name = ['l' + format(i, '02d') for i in range(20)]
        param = dict(zip(param_name[:n_param], param_range))

        data_dir = './logs'
        log_file = os.path.join(data_dir, log_file)

        def f(l00=0,
              l01=0,
              l02=0,
              l03=0,
              l04=0,
              l05=0,
              l06=0,
              l07=0,
              l08=0,
              l09=0,
              l10=0,
              l11=0,
              l12=0,
              l13=0,
              l14=0,
              l15=0,
              l16=0,
              l17=0,
              l18=0,
              l19=0):
            """ Optimizing function
      Take in hyper parameter values and return valid set performances

      Parameters
      ----------
      l00~l19: int or float
        placeholders for hyperparameters being optimized,
        hyper_parameters dict is rebuilt based on input values of placeholders

      Returns:
      --------
      valid_scores: float
        valid set performances
      """
            args = locals()
            # Input hyper parameters
            i = 0
            for hp in hp_list_single:
                hyper_parameters[hp] = float(args[param_name[i]])
                if param_range[i][0] == 'int':
                    hyper_parameters[hp] = int(hyper_parameters[hp])
                i = i + 1
            for hp in hp_list_multiple:
                hyper_parameters[hp[0]] = [
                    float(args[param_name[j]]) for j in range(i, i + hp[1])
                ]
                if param_range[i][0] == 'int':
                    hyper_parameters[hp[0]] = list(
                        map(int, hyper_parameters[hp[0]]))
                i = i + hp[1]

            opt_epoch = -1
            print(hyper_parameters)
            nonlocal model_dir

            pdb.set_trace()
            # Run benchmark
            with open(log_file, 'a') as f:
                # Record hyperparameters
                f.write(str(hyper_parameters))
                f.write('\n')
            if isinstance(self.model_class, str) or isinstance(
                    self.model_class, unicode):

                if mode == 'classification':
                    train_scores, valid_scores, _, opt_epoch = model_classification(
                        train_dataset,
                        valid_dataset,
                        valid_dataset,
                        tasks,
                        output_transformers,
                        n_features,
                        metric,
                        self.model_class,
                        prot_desc_dict,
                        prot_desc_length,
                        hyper_parameters=hyper_parameters,
                        early_stopping=early_stopping,
                        evaluate_freq=evaluate_freq,
                        patience=patience,
                        direction=direction,
                        model_dir=model_dir,
                        tensorboard=tensorboard,
                        no_concordance_index=no_concordance_index,
                        verbose_search=verbose_search,
                        log_file=log_file,
                        no_r2=no_r2,
                        aggregated_tasks=aggregated_tasks)
                elif mode == 'regression' or mode == 'reg-threshold':
                    train_scores, valid_scores, _, opt_epoch = model_regression(
                        train_dataset,
                        valid_dataset,
                        valid_dataset,
                        tasks,
                        output_transformers,
                        n_features,
                        metric,
                        self.model_class,
                        prot_desc_dict,
                        prot_desc_length,
                        hyper_parameters=hyper_parameters,
                        early_stopping=early_stopping,
                        evaluate_freq=evaluate_freq,
                        patience=patience,
                        direction=direction,
                        model_dir=model_dir,
                        tensorboard=tensorboard,
                        no_concordance_index=no_concordance_index,
                        verbose_search=verbose_search,
                        log_file=log_file,
                        no_r2=no_r2,
                        aggregated_tasks=aggregated_tasks)
                else:
                    raise ValueError("Invalid mode!")
                # similar to fit() function in tensor_graph.py, we also use combination here.
                if n_tasks > 1:
                    val_scores = valid_scores[self.model_class]['averaged']
                else:
                    val_scores = valid_scores[self.model_class]
                score = 0
                if mode == 'regression':
                    for mtc in metric:
                        mtc_name = mtc.metric.__name__
                        composite_mtc_name = mtc.name
                        if mtc_name == 'rms_score':
                            score += val_scores[composite_mtc_name]
                        if mtc_name == 'r2_score' or mtc_name == 'pearson_r2_score':
                            if no_r2:
                                coef = 0.0
                            else:
                                coef = -0.5
                            score += coef * val_scores[composite_mtc_name]
                        if mtc_name == 'concordance_index':
                            score += -val_scores[composite_mtc_name]
                elif mode == 'reg-threshold' or mode == 'classification':
                    for mtc in metric:
                        mtc_name = mtc.metric.__name__
                        composite_mtc_name = mtc.name
                        if mtc_name == 'roc_auc_score':
                            score += val_scores[composite_mtc_name]
                        if mtc_name == 'prc_auc_score':
                            score += val_scores[composite_mtc_name]

            else:
                model_dir = tempfile.mkdtemp()
                model = self.model_class(hyper_parameters, model_dir)
                model.fit(train_dataset, **hyper_parameters)
                model.save()
                evaluator = Evaluator(model, valid_dataset,
                                      output_transformers)
                multitask_scores = evaluator.compute_model_performance(
                    [metric])
                score = multitask_scores[metric.name]

            #pdb.set_trace()
            if early_stopping:
                best_score = opt_epoch[1]
                opt_epoch = opt_epoch[0]
            epoch_stmt = str(
                opt_epoch) + " is the optimum number of epochs found."
            print(epoch_stmt)
            with open(log_file, 'a') as f:
                # Record performances
                f.write(self.model_class)
                f.write('\n')
                f.write(epoch_stmt)
                f.write('\n')
                f.write(str(score))
                f.write('\n')
                if early_stopping:
                    f.write(str(best_score))
                    f.write('\n')
            if not early_stopping:
                best_score = score
            # GPGO maximize performance by default, set performance to its negative value for minimization
            if direction:
                return best_score
            else:
                return -best_score

        import pyGPGO
        from pyGPGO.covfunc import matern32
        from pyGPGO.acquisition import Acquisition
        from pyGPGO.surrogates.GaussianProcess import GaussianProcess
        from pyGPGO.GPGO import GPGO
        with open(log_file, 'a') as file:
            file.write(
                "------------------------------------------------------------------"
            )
            file.write('\n')
        cov = matern32()
        gp = GaussianProcess(cov)
        acq = Acquisition(mode='ExpectedImprovement')
        gpgo = GPGO(gp, acq, f, param)
        print("Max number of iteration: %i" % max_iter)

        gpgo.run(max_iter=max_iter)

        hp_opt, valid_performance_opt = gpgo.getResult()
        # Readout best hyper parameters
        i = 0
        for hp in hp_list_single:
            hyper_parameters[hp] = float(hp_opt[param_name[i]])
            if param_range[i][0] == 'int':
                hyper_parameters[hp] = int(hyper_parameters[hp])
            i = i + 1
        for hp in hp_list_multiple:
            hyper_parameters[hp[0]] = [
                float(hp_opt[param_name[j]]) for j in range(i, i + hp[1])
            ]
            if param_range[i][0] == 'int':
                hyper_parameters[hp[0]] = list(
                    map(int, hyper_parameters[hp[0]]))
            i = i + hp[1]

        opt_epoch = -1
        # Compare best model to default hyperparameters
        with open(log_file, 'a') as f:
            # Record hyperparameters
            f.write(str(params_dict))
            f.write('\n')
        if isinstance(self.model_class, str) or isinstance(
                self.model_class, unicode):
            if mode == 'classification':
                train_scores, valid_scores, _, opt_epoch = model_classification(
                    train_dataset,
                    valid_dataset,
                    valid_dataset,
                    tasks,
                    output_transformers,
                    n_features,
                    metric,
                    self.model_class,
                    prot_desc_dict,
                    prot_desc_length,
                    hyper_parameters=params_dict,
                    early_stopping=early_stopping,
                    evaluate_freq=evaluate_freq,
                    patience=patience,
                    direction=direction,
                    model_dir=model_dir,
                    tensorboard=tensorboard,
                    no_concordance_index=no_concordance_index,
                    verbose_search=verbose_search,
                    log_file=log_file,
                    no_r2=no_r2,
                    aggregated_tasks=aggregated_tasks)
            elif mode == 'regression' or mode == 'reg-threshold':
                train_scores, valid_scores, _, opt_epoch = model_regression(
                    train_dataset,
                    valid_dataset,
                    valid_dataset,
                    tasks,
                    output_transformers,
                    n_features,
                    metric,
                    self.model_class,
                    prot_desc_dict,
                    prot_desc_length,
                    hyper_parameters=params_dict,
                    early_stopping=early_stopping,
                    evaluate_freq=evaluate_freq,
                    patience=patience,
                    direction=direction,
                    model_dir=model_dir,
                    tensorboard=tensorboard,
                    no_concordance_index=no_concordance_index,
                    verbose_search=verbose_search,
                    log_file=log_file,
                    no_r2=no_r2,
                    aggregated_tasks=aggregated_tasks)
            else:
                raise ValueError("Invalid mode!")

            if n_tasks > 1:
                val_scores = valid_scores[self.model_class]['averaged']
            else:
                val_scores = valid_scores[self.model_class]
            score = 0
            if mode == 'regression':
                for mtc in metric:
                    mtc_name = mtc.metric.__name__
                    composite_mtc_name = mtc.name
                    if mtc_name == 'rms_score':
                        score += val_scores[composite_mtc_name]
                    if mtc_name == 'r2_score' or mtc_name == 'pearson_r2_score':
                        if no_r2:
                            coef = 0.0
                        else:
                            coef = -0.5
                        score += coef * val_scores[composite_mtc_name]
                    if mtc_name == 'concordance_index':
                        score += -val_scores[composite_mtc_name]
            elif mode == 'reg-threshold' or mode == 'classification':
                for mtc in metric:
                    mtc_name = mtc.metric.__name__
                    composite_mtc_name = mtc.name
                    if mtc_name == 'roc_auc_score':
                        score += val_scores[composite_mtc_name]
                    if mtc_name == 'prc_auc_score':
                        score += val_scores[composite_mtc_name]

            if early_stopping:
                best_score = opt_epoch[1]
                opt_epoch = opt_epoch[0]

            epoch_stmt = str(
                opt_epoch) + " is the optimum number of epochs found."
            print(epoch_stmt)

            #pdb.set_trace()
            with open(log_file, 'a') as f:
                f.write(epoch_stmt)
                f.write('\n')
                # Record performances
                f.write(str(score))
                f.write('\n')
                if early_stopping:
                    f.write(str(best_score))
                    f.write('\n')

            if not early_stopping:
                best_score = score

            # I have changed the determination criteria from score to best_score.
            if not direction:
                best_score = -best_score
            if best_score > valid_performance_opt:
                # Default model is better, return hyperparameters
                return params_dict, best_score

        # Return optimized hyperparameters
        return hyper_parameters, valid_performance_opt
예제 #16
0
    def Scan(self):
        from pyGPGO.covfunc import matern32  # pyGPGO est le module qui permet d'éffectuer l'optimisation bayèsienne
        from pyGPGO.acquisition import Acquisition
        from pyGPGO.surrogates.GaussianProcess import GaussianProcess
        from pyGPGO.GPGO import GPGO
        diameterZ = convert_str_int(
            self.Di.text(), 35
        )  # On définit le diamètre en fonction de l'indication de l'utilisateur, 35 est le facteur de conversion de Z
        diameterY = convert_str_int(
            self.Di.text(), 20
        )  # La fonction convert_str_int() convertit une chaîne de caractère et la multiplie par le 2ième argument
        range_focus = convert_str_int(
            self.focus.text(), 0.5
        )  # On divise cette valeur par 2 car l'intervalle va être [-range_focus;range_focus]
        spec.integration_time_micros(
            100 * 1000
        )  #On définit le temps d'intégration sans laisser le choix à l'utilisateur

        def scan_f(Z, Y):

            l1 = spec.intensities()
            #print("\n")
            #print("Before :", int(Y), int(Z), int(positionvalue(5)), int(positionvalue(6)), int(max(l1)))
            #mouve(4, S, 'ABSOL')
            mouve(5, Y, 'ABSOL')
            mouve(6, Z, 'ABSOL')
            #print(execution(ser, "?ASTAT"))
            #time.sleep(0.2)
            #print(execution(ser, "?ASTAT"))
            #time.sleep(1.2)#
            #print(execution(ser, "?ASTAT"))
            ##

            while execution(ser, "?ASTAT") != "RRRRRRUUU":
                time.sleep(0.1)
            l2 = spec.intensities()
            lw = spec.wavelengths()
            W_m = lw[np.array(l2).argmax()]
            #print("After : ", int(Y), int(Z), "Y =",int(positionvalue(5)),"Z =", int(positionvalue(6)),"Imax =", int(max(l2)))
            #print("Longeur d'onde Imax :",W_m)
            #
            return max(l2)

        cov = matern32()
        gp = GaussianProcess(cov, optimize=True, usegrads=True)
        acq = Acquisition(mode='ExpectedImprovement')
        param = {
            'Z': ('cont', [0,
                           diameterZ]),  # On définit les intervalles de Z et Y
            'Y': ('cont', [0, diameterY])
        }

        #np.random.seed(20)
        gpgo = GPGO(gp, acq, scan_f, param)
        gpgo.run(
            max_iter=10
        )  # On lance l'optimisation e nindiquant le nombre d'itérations à réaliser

        gpgo.getResultZ()
        print("Z max :", gpgo.getResultZ())
        gpgo.getResultY()
        print("Y Max:", gpgo.getResultY())

        print(gpgo.getResult())
        mouve(5, gpgo.getResultY(),
              'ABSOL')  # On déplace la cellule aux coordonnées du rubis
        mouve(6, gpgo.getResultZ(), 'ABSOL')

        execution(
            ser, 'CRES4'
        )  # CRESn -> reset current position for an axis      # On réinitialise l'origine de tout les points
        execution(
            ser, 'CRES5'
        )  # Cela est toujours nécéssaires pour se replacer sur les coordonnées
        execution(ser, 'CRES6')  # du rubis après la nouvelle optimisation

        def scan_S(S, Y, Z):

            mouve(
                4, S, 'RELAT'
            )  # Cette fois on se déplace aussi en profondeur, sur l'axe S
            mouve(5, Y, 'RELAT')
            mouve(6, Z, 'RELAT')
            while execution(
                    ser, "?ASTAT"
            ) != "RRRRRRUUU":  # On attend la fin du déplacement
                time.sleep(0.1)

            l = spec.intensities()  #On relève le spèctre
            #
            return max(
                l)  # On renvoie le maximun d'intensité su spectre obtenu
#

        cov = matern32()
        gp = GaussianProcess(cov, optimize=True, usegrads=True)
        acq = Acquisition(mode='ProbabilityImprovement')
        param = {
            'S':
            ('cont', [-range_focus, range_focus
                      ]),  # On définit les intervalles de valeurs de S,Y et Z
            'Z': ('cont', [-50, 50]),
            'Y': ('cont', [-50, 50])
        }
        #
        #np.random.seed(20)
        gpgo = GPGO(gp, acq, scan_S, param)
        gpgo.run(max_iter=10
                 )  # On lance l'lgorithme qui doit éffectuer 10 itérations

        mouve(5, gpgo.getResultY(), 'ABSOL')
        mouve(6, gpgo.getResultZ(), 'ABSOL')
        mouve(4, gpgo.getResultS(), 'ABSOL')

        end = time.time()  # Permet de mesurer le temps d'optimisation
        print(end - start)

        self.Yscan.setText(str(round(
            -1 * gpgo.getResultY(),
            2)))  # On affiche tous les résultats sur l'interface graphique
        self.Zscan.setText(str(round(gpgo.getResultZ(), 2)))
        self.Imax.setText(str(round(gpgo.getResultI(), 0)))
        self.Smax.setText(str(round(gpgo.getResultS(), 2)))
예제 #17
0
sexp = matern52()
gp = GaussianProcess(sexp)
'''
    'ExpectedImprovement': self.ExpectedImprovement,
    'IntegratedExpectedImprovement': self.IntegratedExpectedImprovement,
    'ProbabilityImprovement': self.ProbabilityImprovement,
    'IntegratedProbabilityImprovement': self.IntegratedProbabilityImprovement,
    'UCB': self.UCB,
    'IntegratedUCB': self.IntegratedUCB,
    'Entropy': self.Entropy,
    'tExpectedImprovement': self.tExpectedImprovement,
    'tIntegratedExpectedImprovement': self.tIntegratedExpectedImprovement
'''
acq = Acquisition(mode='ExpectedImprovement')
param = OrderedDict()
for temp in X_name:
    param[temp] = ('cont', x_round[0])
gpgo = GPGO(gp, acq, f, param)
gpgo.run(max_iter=200, nstart=100)
res, f_min_xy = gpgo.getResult()

f_ture = f_objective.fmin

print('原函数精确值最小值:', f_ture)
print('函数最小值输入变量:', res)
print('函数最小值近似结果:', f_min_xy)

if f_ture != 0:
    corr_err = abs((f_ture - f_min_xy) / f_ture) * 100
    print('corr_err:', corr_err)
예제 #18
0
    def hyperparam_search(
            self,
            params_dict,
            train_dataset,
            valid_dataset,
            output_transformers,
            metric,
            direction=True,
            n_features=1024,
            n_tasks=1,
            max_iter=20,
            search_range=4,
            hp_invalid_list=[
                'seed', 'nb_epoch', 'penalty_type', 'dropouts',
                'bypass_dropouts', 'n_pair_feat', 'fit_transformers',
                'min_child_weight', 'max_delta_step', 'subsample',
                'colsample_bylevel', 'colsample_bytree', 'reg_alpha',
                'reg_lambda', 'scale_pos_weight', 'base_score'
            ],
            log_file='GPhypersearch.log'):
        """Perform hyperparams search using a gaussian process assumption

    params_dict include single-valued parameters being optimized,
    which should only contain int, float and list of int(float)

    parameters with names in hp_invalid_list will not be changed.

    For Molnet models, self.model_class is model name in string,
    params_dict = dc.molnet.preset_hyper_parameters.hps[self.model_class]

    Parameters
    ----------
    params_dict: dict
      dict including parameters and their initial values
      parameters not suitable for optimization can be added to hp_invalid_list
    train_dataset: dc.data.Dataset struct
      dataset used for training
    valid_dataset: dc.data.Dataset struct
      dataset used for validation(optimization on valid scores)
    output_transformers: list of dc.trans.Transformer
      transformers for evaluation
    metric: list of dc.metrics.Metric
      metric used for evaluation
    direction: bool
      maximization(True) or minimization(False)
    n_features: int
      number of input features
    n_tasks: int
      number of tasks
    max_iter: int
      number of optimization trials
    search_range: int(float)
      optimization on [initial values / search_range,
                       initial values * search_range]
    hp_invalid_list: list
      names of parameters that should not be optimized
    logfile: string
      name of log file, hyperparameters and results for each trial will be recorded

    Returns
    -------
    hyper_parameters: dict
      params_dict with all optimized values
    valid_performance_opt: float
      best performance on valid dataset

    """

        assert len(metric) == 1, 'Only use one metric'
        hyper_parameters = params_dict
        hp_list = hyper_parameters.keys()
        for hp in hp_invalid_list:
            if hp in hp_list:
                hp_list.remove(hp)

        hp_list_class = [hyper_parameters[hp].__class__ for hp in hp_list]
        assert set(hp_list_class) <= set([list, int, float])
        # Float or int hyper parameters(ex. batch_size, learning_rate)
        hp_list_single = [
            hp_list[i] for i in range(len(hp_list))
            if not hp_list_class[i] is list
        ]
        # List of float or int hyper parameters(ex. layer_sizes)
        hp_list_multiple = [(hp_list[i], len(hyper_parameters[hp_list[i]]))
                            for i in range(len(hp_list))
                            if hp_list_class[i] is list]

        # Number of parameters
        n_param = len(hp_list_single)
        if len(hp_list_multiple) > 0:
            n_param = n_param + sum([hp[1] for hp in hp_list_multiple])
        # Range of optimization
        param_range = []
        for hp in hp_list_single:
            if hyper_parameters[hp].__class__ is int:
                param_range.append((('int'), [
                    hyper_parameters[hp] // search_range,
                    hyper_parameters[hp] * search_range
                ]))
            else:
                param_range.append((('cont'), [
                    hyper_parameters[hp] / search_range,
                    hyper_parameters[hp] * search_range
                ]))
        for hp in hp_list_multiple:
            if hyper_parameters[hp[0]][0].__class__ is int:
                param_range.extend([(('int'), [
                    hyper_parameters[hp[0]][i] // search_range,
                    hyper_parameters[hp[0]][i] * search_range
                ]) for i in range(hp[1])])
            else:
                param_range.extend([(('cont'), [
                    hyper_parameters[hp[0]][i] / search_range,
                    hyper_parameters[hp[0]][i] * search_range
                ]) for i in range(hp[1])])

        # Dummy names
        param_name = ['l' + format(i, '02d') for i in range(20)]
        param = dict(zip(param_name[:n_param], param_range))

        data_dir = os.environ['DEEPCHEM_DATA_DIR']
        log_file = os.path.join(data_dir, log_file)

        def f(l00=0,
              l01=0,
              l02=0,
              l03=0,
              l04=0,
              l05=0,
              l06=0,
              l07=0,
              l08=0,
              l09=0,
              l10=0,
              l11=0,
              l12=0,
              l13=0,
              l14=0,
              l15=0,
              l16=0,
              l17=0,
              l18=0,
              l19=0):
            """ Optimizing function
      Take in hyper parameter values and return valid set performances

      Parameters
      ----------
      l00~l19: int or float
        placeholders for hyperparameters being optimized,
        hyper_parameters dict is rebuilt based on input values of placeholders

      Returns:
      --------
      valid_scores: float
        valid set performances
      """
            args = locals()
            # Input hyper parameters
            i = 0
            for hp in hp_list_single:
                hyper_parameters[hp] = float(args[param_name[i]])
                if param_range[i][0] == 'int':
                    hyper_parameters[hp] = int(hyper_parameters[hp])
                i = i + 1
            for hp in hp_list_multiple:
                hyper_parameters[hp[0]] = [
                    float(args[param_name[j]]) for j in range(i, i + hp[1])
                ]
                if param_range[i][0] == 'int':
                    hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]])
                i = i + hp[1]

            logger.info(hyper_parameters)
            # Run benchmark
            with open(log_file, 'a') as f:
                # Record hyperparameters
                f.write(str(hyper_parameters))
                f.write('\n')
            if isinstance(self.model_class, str) or isinstance(
                    self.model_class, unicode):
                try:
                    train_scores, valid_scores, _ = benchmark_classification(
                        train_dataset,
                        valid_dataset,
                        valid_dataset, ['task_placeholder'] * n_tasks,
                        output_transformers,
                        n_features,
                        metric,
                        self.model_class,
                        hyper_parameters=hyper_parameters)
                except AssertionError:
                    train_scores, valid_scores, _ = benchmark_regression(
                        train_dataset,
                        valid_dataset,
                        valid_dataset, ['task_placeholder'] * n_tasks,
                        output_transformers,
                        n_features,
                        metric,
                        self.model_class,
                        hyper_parameters=hyper_parameters)
                score = valid_scores[self.model_class][metric[0].name]
            else:
                model_dir = tempfile.mkdtemp()
                model = self.model_class(hyper_parameters, model_dir)
                model.fit(train_dataset, **hyper_parameters)
                model.save()
                evaluator = Evaluator(model, valid_dataset,
                                      output_transformers)
                multitask_scores = evaluator.compute_model_performance(metric)
                score = multitask_scores[metric[0].name]

            with open(log_file, 'a') as f:
                # Record performances
                f.write(str(score))
                f.write('\n')
            # GPGO maximize performance by default, set performance to its negative value for minimization
            if direction:
                return score
            else:
                return -score

        import pyGPGO
        from pyGPGO.covfunc import matern32
        from pyGPGO.acquisition import Acquisition
        from pyGPGO.surrogates.GaussianProcess import GaussianProcess
        from pyGPGO.GPGO import GPGO
        cov = matern32()
        gp = GaussianProcess(cov)
        acq = Acquisition(mode='ExpectedImprovement')
        gpgo = GPGO(gp, acq, f, param)
        logger.info("Max number of iteration: %i" % max_iter)
        gpgo.run(max_iter=max_iter)

        hp_opt, valid_performance_opt = gpgo.getResult()
        # Readout best hyper parameters
        i = 0
        for hp in hp_list_single:
            hyper_parameters[hp] = float(hp_opt[param_name[i]])
            if param_range[i][0] == 'int':
                hyper_parameters[hp] = int(hyper_parameters[hp])
            i = i + 1
        for hp in hp_list_multiple:
            hyper_parameters[hp[0]] = [
                float(hp_opt[param_name[j]]) for j in range(i, i + hp[1])
            ]
            if param_range[i][0] == 'int':
                hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]])
            i = i + hp[1]

        # Compare best model to default hyperparameters
        with open(log_file, 'a') as f:
            # Record hyperparameters
            f.write(str(params_dict))
            f.write('\n')
        if isinstance(self.model_class, str) or isinstance(
                self.model_class, unicode):
            try:
                train_scores, valid_scores, _ = benchmark_classification(
                    train_dataset,
                    valid_dataset,
                    valid_dataset, ['task_placeholder'] * n_tasks,
                    output_transformers,
                    n_features,
                    metric,
                    self.model_class,
                    hyper_parameters=params_dict)
            except AssertionError:
                train_scores, valid_scores, _ = benchmark_regression(
                    train_dataset,
                    valid_dataset,
                    valid_dataset, ['task_placeholder'] * n_tasks,
                    output_transformers,
                    n_features,
                    metric,
                    self.model_class,
                    hyper_parameters=params_dict)
            score = valid_scores[self.model_class][metric[0].name]
            with open(log_file, 'a') as f:
                # Record performances
                f.write(str(score))
                f.write('\n')
            if not direction:
                score = -score
            if score > valid_performance_opt:
                # Optimized model is better, return hyperparameters
                return params_dict, score

        # Return default hyperparameters
        return hyper_parameters, valid_performance_opt
예제 #19
0
    def f(x):
        return -np.mean(
            np.sqrt(-(cross_val_score(Lasso(alpha=x),
                                      x_train,
                                      y=y_train,
                                      scoring='neg_mean_squared_error',
                                      cv=10))))

    sexp = squaredExponential()
    gp = GaussianProcess(sexp)
    acq = Acquisition(mode='ExpectedImprovement')
    param = {'x': ('cont', [0, 50])}

    gpgo = GPGO(gp, acq, f, param)
    gpgo.run(max_iter=100)
    best_x, best_y = gpgo.getResult()
    print('best_x:', best_x)
    print('best_y:', best_y)
    model_Lasso = Lasso(alpha=best_x)

    model_Lasso.fit(x_train, y_train)
    y_predict = model_Lasso.predict(x_test)

    RMSE_Lasso = np.sqrt(mean_squared_error(y_test, y_predict))
    R2_Lasso = r2_score(y_test, y_predict)
    MAE_Lasso = median_absolute_error(y_test, y_predict)

    print('****************' + 'Lasso' + '****************')
    print('RMSE_Lasso:', RMSE_Lasso)
    print('R2_Lasso:', R2_Lasso)
    print('MAE_Lasso:', MAE_Lasso)
예제 #20
0
    def Scan(self):
        from pyGPGO.covfunc import matern32
        from pyGPGO.acquisition import Acquisition
        from pyGPGO.surrogates.GaussianProcess import GaussianProcess
        from pyGPGO.GPGO import GPGO
        spec.integration_time_micros(100 * 1000)

        def scan_f(Z, Y):

            l1 = spec.intensities()
            print("\n")
            print("Before :", int(Y), int(Z), int(positionvalue(5)),
                  int(positionvalue(6)), int(max(l1)))
            #mouve(4, S, 'ABSOL')
            mouve(5, Y, 'ABSOL')
            mouve(6, Z, 'ABSOL')

            if positionvalue(5) != Y and positionvalue(6) != Z:
                time.sleep(0.6)
            l2 = spec.intensities()
            lw = spec.wavelengths()
            W_m = lw[np.array(l2).argmax()]
            print("After : ", int(Y), int(Z), "Y =", int(positionvalue(5)),
                  "Z =", int(positionvalue(6)), "Imax =", int(max(l2)))
            print("Longeur d'onde Imax :", W_m)

            return max(l2)

        cov = matern32()
        gp = GaussianProcess(cov, optimize=True, usegrads=True)
        acq = Acquisition(mode='ExpectedImprovement')
        param = {'Z': ('cont', [0, 120 * 35]), 'Y': ('cont', [0, 120 * 20])}

        #np.random.seed(20)
        gpgo = GPGO(gp, acq, scan_f, param)
        gpgo.run(max_iter=10)

        gpgo.getResultZ()  #
        print("Z max :", gpgo.getResultZ())
        gpgo.getResultY()
        print("Y Max:", gpgo.getResultY())

        print(gpgo.getResult())
        mouve(5, gpgo.getResultY(), 'ABSOL')
        mouve(6, gpgo.getResultZ(), 'ABSOL')

        def scan_S(S, Y, Z):
            execution(ser,
                      'CRES4')  # CRESn -> reset current position for an axis
            execution(ser, 'CRES5')
            execution(ser, 'CRES6')

            mouve(4, S, 'RELAT')
            mouve(5, Y, 'RELAT')
            mouve(6, Z, 'RELAT')
            if positionvalue(4) != S:
                time.sleep(0.2)
            l = spec.intensities()
            #
            return max(l)
#

        cov = matern32()
        gp = GaussianProcess(cov, optimize=True, usegrads=True)
        acq = Acquisition(mode='ProbabilityImprovement')
        param = {
            'S': ('cont', [-150, 150]),
            'Z': ('cont', [-50, 50]),
            'Y': ('cont', [-50, 50])
        }
        #
        np.random.seed(20)
        gpgo = GPGO(gp, acq, scan_S, param)
        gpgo.run(max_iter=10)

        mouve(5, gpgo.getResultY(), 'ABSOL')
        mouve(6, gpgo.getResultZ(), 'ABSOL')
        mouve(4, gpgo.getResultS(), 'ABSOL')
예제 #21
0
def func(res_idx, mul_idx):
    start_time = time.time()
    f = -eval_core.cost_fun(res=eval_core.res_vec[int(res_idx)],
                            mul=eval_core.mul_vec[int(mul_idx)],
                            verbose=False)
    end_time = time.time()
    eval_time = end_time - start_time
    time_vec.append(eval_time)
    print("     eval_time   %s seconds " % (eval_time))
    return f


if __name__ == '__main__':
    cov = matern32()
    gp = GaussianProcess(cov)
    acq = Acquisition(mode='ExpectedImprovement')
    param = {
        'res_idx': ('int', [0, len(eval_core.res_vec) - 1]),
        'mul_idx': ('int', [0, len(eval_core.mul_vec) - 1]),
    }

    np.random.seed(22)
    gpgo = GPGO(gp, acq, func, param)
    gpgo.run(max_iter=100, init_evals=89)
    result = gpgo.getResult()
    best_params = {}
    best_params['res'] = eval_core.res_vec[int(result[0]['res_idx'])]
    best_params['mul'] = eval_core.mul_vec[int(result[0]['mul_idx'])]
    print(best_params)
예제 #22
0
randomForest_1_start = timeit.default_timer()
gpgo_random_forest_1.run(max_iter=furtherEvaluations, init_evals=initialPoints)
randomForest_1_stop = timeit.default_timer()

randomForest_2_start = timeit.default_timer()
gpgo_random_forest_2.run(max_iter=furtherEvaluations, init_evals=initialPoints)
randomForest_2_stop = timeit.default_timer()

randomForest_3_start = timeit.default_timer()
gpgo_random_forest_3.run(max_iter=furtherEvaluations, init_evals=initialPoints)
randomForest_3_stop = timeit.default_timer()

#Get the results
print("\n---Results---\n")
print("\n", gpgo_gaussian_model_1.getResult())
print('Gaussian Model 1 Time: ', gaussianModel_1_start - gaussianModel_1_stop)
print("\n", gpgo_gaussian_model_2.getResult())
print('Gaussian Model 2 Time: ', gaussianModel_2_start - gaussianModel_2_stop)
print("\n", gpgo_gaussian_model_3.getResult())
print('Gaussian Model 3 Time: ', gaussianModel_3_start - gaussianModel_3_start)

print("\n", gpgo_random_forest_1.getResult())
print('Random Forest 1 Time: ', randomForest_1_start - randomForest_1_stop)
print("\n", gpgo_random_forest_2.getResult())
print('Random Forest 2 Time: ', randomForest_2_start - randomForest_2_stop)
print("\n", gpgo_random_forest_3.getResult())
print('Random Forest 3 Time: ', randomForest_3_start - randomForest_3_stop)

#------------------------------
#GRID SEARCH
예제 #23
0
  def hyperparam_search(
      self,
      params_dict,
      train_dataset,
      valid_dataset,
      output_transformers,
      metric,
      direction=True,
      n_features=1024,
      n_tasks=1,
      max_iter=20,
      search_range=4,
      hp_invalid_list=[
          'seed', 'nb_epoch', 'penalty_type', 'dropouts', 'bypass_dropouts',
          'n_pair_feat', 'fit_transformers', 'min_child_weight',
          'max_delta_step', 'subsample', 'colsample_bylevel',
          'colsample_bytree', 'reg_alpha', 'reg_lambda', 'scale_pos_weight',
          'base_score'
      ],
      log_file='GPhypersearch.log'):
    """Perform hyperparams search using a gaussian process assumption

    params_dict include single-valued parameters being optimized,
    which should only contain int, float and list of int(float)

    parameters with names in hp_invalid_list will not be changed.

    For Molnet models, self.model_class is model name in string,
    params_dict = dc.molnet.preset_hyper_parameters.hps[self.model_class]

    Parameters
    ----------
    params_dict: dict
      dict including parameters and their initial values
      parameters not suitable for optimization can be added to hp_invalid_list
    train_dataset: dc.data.Dataset struct
      dataset used for training
    valid_dataset: dc.data.Dataset struct
      dataset used for validation(optimization on valid scores)
    output_transformers: list of dc.trans.Transformer
      transformers for evaluation
    metric: list of dc.metrics.Metric
      metric used for evaluation
    direction: bool
      maximization(True) or minimization(False)
    n_features: int
      number of input features
    n_tasks: int
      number of tasks
    max_iter: int
      number of optimization trials
    search_range: int(float)
      optimization on [initial values / search_range,
                       initial values * search_range]
    hp_invalid_list: list
      names of parameters that should not be optimized
    logfile: string
      name of log file, hyperparameters and results for each trial will be recorded

    Returns
    -------
    hyper_parameters: dict
      params_dict with all optimized values
    valid_performance_opt: float
      best performance on valid dataset

    """

    assert len(metric) == 1, 'Only use one metric'
    hyper_parameters = params_dict
    hp_list = list(hyper_parameters.keys())
    for hp in hp_invalid_list:
      if hp in hp_list:
        hp_list.remove(hp)

    hp_list_class = [hyper_parameters[hp].__class__ for hp in hp_list]
    assert set(hp_list_class) <= set([list, int, float])
    # Float or int hyper parameters(ex. batch_size, learning_rate)
    hp_list_single = [
        hp_list[i] for i in range(len(hp_list)) if not hp_list_class[i] is list
    ]
    # List of float or int hyper parameters(ex. layer_sizes)
    hp_list_multiple = [(hp_list[i], len(hyper_parameters[hp_list[i]]))
                        for i in range(len(hp_list))
                        if hp_list_class[i] is list]

    # Number of parameters
    n_param = len(hp_list_single)
    if len(hp_list_multiple) > 0:
      n_param = n_param + sum([hp[1] for hp in hp_list_multiple])
    # Range of optimization
    param_range = []
    for hp in hp_list_single:
      if hyper_parameters[hp].__class__ is int:
        param_range.append((('int'), [
            hyper_parameters[hp] // search_range,
            hyper_parameters[hp] * search_range
        ]))
      else:
        param_range.append((('cont'), [
            hyper_parameters[hp] / search_range,
            hyper_parameters[hp] * search_range
        ]))
    for hp in hp_list_multiple:
      if hyper_parameters[hp[0]][0].__class__ is int:
        param_range.extend([(('int'), [
            hyper_parameters[hp[0]][i] // search_range,
            hyper_parameters[hp[0]][i] * search_range
        ]) for i in range(hp[1])])
      else:
        param_range.extend([(('cont'), [
            hyper_parameters[hp[0]][i] / search_range,
            hyper_parameters[hp[0]][i] * search_range
        ]) for i in range(hp[1])])

    # Dummy names
    param_name = ['l' + format(i, '02d') for i in range(20)]
    param = dict(zip(param_name[:n_param], param_range))

    data_dir = os.environ['DEEPCHEM_DATA_DIR']
    log_file = os.path.join(data_dir, log_file)

    def f(l00=0,
          l01=0,
          l02=0,
          l03=0,
          l04=0,
          l05=0,
          l06=0,
          l07=0,
          l08=0,
          l09=0,
          l10=0,
          l11=0,
          l12=0,
          l13=0,
          l14=0,
          l15=0,
          l16=0,
          l17=0,
          l18=0,
          l19=0):
      """ Optimizing function
      Take in hyper parameter values and return valid set performances

      Parameters
      ----------
      l00~l19: int or float
        placeholders for hyperparameters being optimized,
        hyper_parameters dict is rebuilt based on input values of placeholders

      Returns:
      --------
      valid_scores: float
        valid set performances
      """
      args = locals()
      # Input hyper parameters
      i = 0
      for hp in hp_list_single:
        hyper_parameters[hp] = float(args[param_name[i]])
        if param_range[i][0] == 'int':
          hyper_parameters[hp] = int(hyper_parameters[hp])
        i = i + 1
      for hp in hp_list_multiple:
        hyper_parameters[hp[0]] = [
            float(args[param_name[j]]) for j in range(i, i + hp[1])
        ]
        if param_range[i][0] == 'int':
          hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]])
        i = i + hp[1]

      logger.info(hyper_parameters)
      # Run benchmark
      with open(log_file, 'a') as f:
        # Record hyperparameters
        f.write(str(hyper_parameters))
        f.write('\n')
      if isinstance(self.model_class, str) or isinstance(
          self.model_class, unicode):
        try:
          train_scores, valid_scores, _ = benchmark_classification(
              train_dataset,
              valid_dataset,
              valid_dataset, ['task_placeholder'] * n_tasks,
              output_transformers,
              n_features,
              metric,
              self.model_class,
              hyper_parameters=hyper_parameters)
        except AssertionError:
          train_scores, valid_scores, _ = benchmark_regression(
              train_dataset,
              valid_dataset,
              valid_dataset, ['task_placeholder'] * n_tasks,
              output_transformers,
              n_features,
              metric,
              self.model_class,
              hyper_parameters=hyper_parameters)
        score = valid_scores[self.model_class][metric[0].name]
      else:
        model_dir = tempfile.mkdtemp()
        model = self.model_class(hyper_parameters, model_dir)
        model.fit(train_dataset, **hyper_parameters)
        model.save()
        evaluator = Evaluator(model, valid_dataset, output_transformers)
        multitask_scores = evaluator.compute_model_performance(metric)
        score = multitask_scores[metric[0].name]

      with open(log_file, 'a') as f:
        # Record performances
        f.write(str(score))
        f.write('\n')
      # GPGO maximize performance by default, set performance to its negative value for minimization
      if direction:
        return score
      else:
        return -score

    import pyGPGO
    from pyGPGO.covfunc import matern32
    from pyGPGO.acquisition import Acquisition
    from pyGPGO.surrogates.GaussianProcess import GaussianProcess
    from pyGPGO.GPGO import GPGO
    cov = matern32()
    gp = GaussianProcess(cov)
    acq = Acquisition(mode='ExpectedImprovement')
    gpgo = GPGO(gp, acq, f, param)
    logger.info("Max number of iteration: %i" % max_iter)
    gpgo.run(max_iter=max_iter)

    hp_opt, valid_performance_opt = gpgo.getResult()
    # Readout best hyper parameters
    i = 0
    for hp in hp_list_single:
      hyper_parameters[hp] = float(hp_opt[param_name[i]])
      if param_range[i][0] == 'int':
        hyper_parameters[hp] = int(hyper_parameters[hp])
      i = i + 1
    for hp in hp_list_multiple:
      hyper_parameters[hp[0]] = [
          float(hp_opt[param_name[j]]) for j in range(i, i + hp[1])
      ]
      if param_range[i][0] == 'int':
        hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]])
      i = i + hp[1]

    # Compare best model to default hyperparameters
    with open(log_file, 'a') as f:
      # Record hyperparameters
      f.write(str(params_dict))
      f.write('\n')
    if isinstance(self.model_class, str) or isinstance(self.model_class,
                                                       unicode):
      try:
        train_scores, valid_scores, _ = benchmark_classification(
            train_dataset,
            valid_dataset,
            valid_dataset, ['task_placeholder'] * n_tasks,
            output_transformers,
            n_features,
            metric,
            self.model_class,
            hyper_parameters=params_dict)
      except AssertionError:
        train_scores, valid_scores, _ = benchmark_regression(
            train_dataset,
            valid_dataset,
            valid_dataset, ['task_placeholder'] * n_tasks,
            output_transformers,
            n_features,
            metric,
            self.model_class,
            hyper_parameters=params_dict)
      score = valid_scores[self.model_class][metric[0].name]
      with open(log_file, 'a') as f:
        # Record performances
        f.write(str(score))
        f.write('\n')
      if not direction:
        score = -score
      if score > valid_performance_opt:
        # Optimized model is better, return hyperparameters
        return params_dict, score

    # Return default hyperparameters
    return hyper_parameters, valid_performance_opt
예제 #24
0
from pyGPGO.GPGO import GPGO
from pyGPGO.surrogates.GaussianProcess import GaussianProcess
from pyGPGO.acquisition import Acquisition
from pyGPGO.covfunc import squaredExponential


def evaluateModel(C, gamma):
    clf = SVC(C=10**C, gamma=10**gamma)
    return np.average(cross_val_score(clf, X, y))


if __name__ == '__main__':
    np.random.seed(20)
    X, y = make_moons(n_samples=200, noise=0.3)

    cm_bright = ListedColormap(['#fc4349', '#6dbcdb'])

    fig = plt.figure()
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright)
    plt.show()

    sexp = squaredExponential()
    gp = GaussianProcess(sexp, optimize=True, usegrads=True)
    acq = Acquisition(mode='UCB', beta=1.5)

    params = {'C': ('cont', (-4, 5)), 'gamma': ('cont', (-4, 5))}

    gpgo = GPGO(gp, acq, evaluateModel, params)
    gpgo.run(max_iter=50)
    gpgo.getResult()
예제 #25
0
         'lsigma_t':  ('cont', [-3, 1]),
         'l_corr':  ('cont', [10, 1000]),
         't_corr':  ('cont', [1, 20])}
for key, value in json.loads(args.range).items():
    param[key] = value
print("Prameter ranges as below:")
if not args.full:
    del param['l_corr']
    del param['t_corr']
print(param)

# preparation of data_manager
dm = data_manupulation.impute_shield_dm(100)
gene_df = pd.read_csv("data/gene_list/selected_cluster_gene_list.txt")
selected_gene_df = dm.select_gene_df(gene_df)
dm = data_manupulation.standard_dm(args.refnum)
# setting for BO
cov = squaredExponential()
if args.mcmc:
    gp = GaussianProcessMCMC(cov, niter=300, burnin=100, step=pm.Slice)
    acq = Acquisition(mode='IntegratedExpectedImprovement')
else:
    gp = GaussianProcess(cov, optimize=True, usegrads=True)
    acq = Acquisition(mode='ExpectedImprovement')
np.random.seed(100000)
gpgo = GPGO(gp, acq, ts_recovery_correlation, param)
gpgo.run(max_iter=args.boiter)
print(gpgo.getResult())
f = open(args.filepath, "w")
json.dump(gpgo.getResult(), f)
예제 #26
0
        # Load binary truths
        binary_truth = np.loadtxt(
            "./data/" + lang + "/semeval2020_ulscd_" + lang[:3] +
            "/truth/binary.txt",
            dtype=str,
            delimiter="\t",
        )
        # Creating a GP surrogate model with a Squared Exponantial
        # covariance function, aka kernel
        sexp = squaredExponential()
        sur_model = GaussianProcess(sexp)
        fitness = get_fitness_for_automl(model1, model2, binary_truth, logger)
        # setting the acquisition function
        acq = Acquisition(mode="ExpectedImprovement")

        # creating an object Bayesian Optimization
        bo = GPGO(sur_model, acq, fitness, param, n_jobs=4)
        bo._firstRun = functools.partial(myFirstRun, bo)
        bo.updateGP = functools.partial(myUpdateGP, bo)
        bo._firstRun(init_rand_configs=init_rand_configs)
        bo.logger._printInit(bo)

        bo.run(furtherEvaluations, resume=True)
        best = bo.getResult()
        logger.info("BEST PARAMETERS: " +
                    ", ".join([k + ": " + str(v)
                               for k, v in best[0].items()]) + ", ACCU: " +
                    str(best[1]))
        logger.info("OPTIMIZATION HISTORY")
        logger.info(pprint.pformat(bo.history))
예제 #27
0
                # 非一维线性函数最好的办法采用BO来找函数最小值
                def f(x):
                    return -(rbf_hdmr.func_1D_value(
                        x,
                        type=type_fx_i,
                        xishu=xishu_arr_i,
                        point_sample=point_round_i))

                sexp = matern52()
                gp = GaussianProcess(sexp)
                acq = Acquisition(mode='UCB')
                round_x = (x_round[i][0], x_round[i][1])
                param = {'x': ('cont', round_x)}
                gpgo = GPGO(gp, acq, f, param)
                gpgo.run(max_iter=5, nstart=1)
                res, max_y = gpgo.getResult()
                round_xyz[i] = res['x']
                print('i:', i)
                print(max_y)
                f_min_value += max_y

        else:
            # 说明该变量存在有且仅有一个相关变量
            for index in range(len(x_ij_index)):
                if i == x_ij_index[index, 0]:

                    # 一阶模型情况 i
                    print(
                        '存在相关联变量**********************************************',
                        i)
                    type_fx_i = type_fx[i]