Esempio n. 1
0
 def __init__(self, dimSpace, degree, dimSimplex):
     self.dimSpace = dimSpace  # degree of bezier simplex
     self.dimSimplex = dimSimplex  # dimension of bezier simplex
     self.degree = degree  # dimension of constol point
     self.bezier_simplex = model.BezierSimplex(dimSpace=self.dimSpace,
                                               dimSimplex=self.dimSimplex,
                                               degree=self.degree)
Esempio n. 2
0
 def __init__(self, dimSpace, degree, dimSimplex):
     self.dimSpace = dimSpace
     self.dimSimplex = dimSimplex
     self.degree = degree
     self.bezier_simplex = model.BezierSimplex(dimSpace=self.dimSpace,
                                               dimSimplex=self.dimSimplex,
                                               degree=self.degree)
Esempio n. 3
0
    def __init__(self, degree, dim_space, dim_simplex):
        self.degree = degree
        self.dim_space = dim_space
        self.dim_simplex = dim_simplex
        self.objective_function_indices_list = list(range(self.dim_simplex))

        self.subproblem_indices_list = []
        for i in range(1, len(self.objective_function_indices_list) + 1):
            for c in combinations(self.objective_function_indices_list, i):
                self.subproblem_indices_list.append(c)

        # prepare class to generate data points on bezier simplex
        self.uniform_sampling = sampling.UniformSampling(
            dimension=self.dim_simplex)
        self.bezier_simplex = model.BezierSimplex(dim_space=self.dim_space,
                                                  dim_simplex=self.dim_simplex,
                                                  degree=self.degree)
        self.monomial_degree_list = list(
            subfunction.BezierIndex(dim=self.dim_simplex, deg=self.degree))
        # generate true control points
        generate_control_point = model.GenerateControlPoint(
            dim_space=self.dim_space,
            dim_simplex=self.dim_simplex,
            degree=self.degree)
        self.control_point_true = generate_control_point.simplex()
Esempio n. 4
0
    def initialize_control_point(self, data):
        """Initialize control point.

        Parameters
        ----------
        data : list
            test data
        
        Returns
        ----------
        C : dict
            control point
        """
        bezier_simplex = model.BezierSimplex(dimSpace=self.dimSpace,
                                             dimSimplex=self.dimSimplex,
                                             degree=self.degree)
        C = bezier_simplex.initialize_control_point(data)
        return (C)
Esempio n. 5
0
    def __init__(self, dimSpace, degree, dimSimplex):
        """Borges Pastva Trainer initialize.

        Parameters
        ----------
        dimSpace : int
            degree
        degree : int
            dimension of constol point
        dimSimplex : int
            dimension
        
        Returns
        ----------
        None
        """
        self.dimSpace = dimSpace
        self.dimSimplex = dimSimplex
        self.degree = degree
        self.bezier_simplex = model.BezierSimplex(dimSpace=self.dimSpace,
                                                  dimSimplex=self.dimSimplex,
                                                  degree=self.degree)
Esempio n. 6
0
    NEWTON_ITR = 20
    MAX_ITR = 30  # 制御点の更新回数の上界

    # input data
    base_index = ['1', '2', '3', '4', '5']
    subsets = []
    for i in range(len(base_index) + 1):
        for c in combinations(base_index, i):
            subsets.append(c)
    data = {}
    for e in subsets:
        if len(e) == 1:
            data[e] = np.loadtxt('../data/normalized_pf/normalized_5-MED.pf_' +
                                 e[0])
        if len(e) == 5:
            #data[e] = np.loadtxt('data/normalized_5-MED.pf_1_2_3_4_5')
            data[e] = np.loadtxt(
                '../data/normalized_pf/normalized_5-MED.pf_1_2_3_4_5_itr0')

    bezier_simplex = model.BezierSimplex(dimSpace=DIM_SPACE,
                                         dimSimplex=DIM_SIMPLEX,
                                         degree=DEGREE)
    C_init = bezier_simplex.initialize_control_point(data)
    for key in C_init:
        print(key, C_init[key])
    x = bezier_simplex.sampling(C_init, [1, 0, 0, 0])
    print(x)
    tt, xx = bezier_simplex.meshgrid(C_init)
    print(tt.shape, xx.shape)
    bezier_simplex.write_meshgrid(C_init, "sample_mesghgrid")
def exp_practical_instances(trn_data,test_data):
    TRN_DATA = trn_data
    TEST_DATA = test_data
    for NUM_SAMPLE_BORGES in [50, 100]:
        DATA_NAME = TRN_DATA
        for DEGREE in [2,3]:
            DATA_DIR = "../data"
            RESULT_DIR = "../results_practical_instances/D"+str(DEGREE)+"_N"+str(NUM_SAMPLE_BORGES)
            DIMENSION_SIMPLEX = 3
            DIMENSION_SPACE = 3
            def sampling_data_and_param(d,p,n,seed):
                random.seed(seed)
                s = [i for i in range(d.shape[0])]
                s_=random.sample(s,n)
                print(len(s_))
                return(d[s_,:],p[s_,:])

            if DEGREE == 2:
                if DIMENSION_SIMPLEX==3:
                    NUM_SAMPLE_INDUCTIVE = [int(round(NUM_SAMPLE_BORGES*(1-0.739)/3)),
                                            int(round(NUM_SAMPLE_BORGES*0.739/3))]
                if DIMENSION_SIMPLEX==9:
                    NUM_SAMPLE_INDUCTIVE = [int(round(NUM_SAMPLE_BORGES*(1-0.758)/3)),
                                            int(round(NUM_SAMPLE_BORGES*0.758/3))]
                if NUM_SAMPLE_BORGES == 6:
                    NUM_SAMPLE_INDUCTIVE = [1,1]
            if DEGREE == 3:
                NUM_SAMPLE_INDUCTIVE = [int(round(NUM_SAMPLE_BORGES*(1-0.587-0.314)/3)),
                                        int(round(NUM_SAMPLE_BORGES*0.587/3)),
                                        int(round(NUM_SAMPLE_BORGES*0.314))]

            SEED_LIST = [i for i in range(20)]

            for SEED in SEED_LIST:
                np.random.seed(SEED)
                objective_function_indices_list = [i for i in range(DIMENSION_SIMPLEX)]
                subproblem_indices_list = []
                for i in range(1,len(objective_function_indices_list)+1):
                    for c in combinations(objective_function_indices_list, i):
                        subproblem_indices_list.append(c)

                bezier_simplex = model.BezierSimplex(dimSpace=DIMENSION_SPACE,
                                                 dimSimplex=DIMENSION_SIMPLEX,
                                                 degree = DEGREE)
                borges_pastva_trainer = trainer.BorgesPastvaTrainer(dimSpace=DIMENSION_SPACE,
                                                    dimSimplex=DIMENSION_SIMPLEX,
                                                 degree = DEGREE)
                monomial_degree_list = [i for i in subfunction.BezierIndex(dim=DIMENSION_SIMPLEX,
                                        deg=DEGREE)]


                data_all = {}
                param_all = {}
                for e in subproblem_indices_list:
                    if len(e) <= DEGREE or len(e) == DIMENSION_SIMPLEX:
                        string = '_'.join([str(i+1) for i in e])
                        tmp = data.Dataset(DATA_DIR+'/'+DATA_NAME+',f_'+string)
                        data_all[e] = tmp.values

                        tmp = data.Dataset(DATA_DIR+'/'+DATA_NAME+',w_'+string)
                        param_all[e] = tmp.values

                # sampling
                data_trn_borges = {}
                param_trn_borges = {}
                e_borges = tuple([i for i in range(DIMENSION_SIMPLEX)])
                data_trn_borges[e_borges], param_trn_borges[e_borges] = sampling_data_and_param(d=data_all[e_borges],p=param_all[e_borges],n=NUM_SAMPLE_BORGES,seed=SEED)

                data_trn_inductive = {}
                param_trn_inductive = {}
                for e in data_all:
                    if len(e) <= DEGREE:
                        data_trn_inductive[e],param_trn_inductive[e] = sampling_data_and_param(d=data_all[e],
                        p=param_all[e],
                        n=NUM_SAMPLE_INDUCTIVE[len(e)-1],
                        seed=SEED+sum(e))
                print(param_trn_borges[e_borges][0:10])
                print(data_trn_borges[e_borges][0:10])
                print(param_trn_borges[e_borges].shape,data_trn_borges[e_borges].shape,)
                print(param_trn_inductive[(0,)].shape,data_trn_inductive[(0,)].shape,)
                print(param_trn_inductive[(0,1)].shape,data_trn_inductive[(0,1)].shape,)
                if DEGREE == 3:
                    print(param_trn_inductive[(0,1)].shape,data_trn_inductive[(0,1)].shape,)
                # borges learning
                control_point_borges = borges_pastva_trainer.update_control_point(t_mat = param_trn_borges[e_borges],
                                                                                  data=data_trn_borges[e_borges],
                                                                                  c = {},
                                                                                  indices_all = monomial_degree_list,
                                                                                  indices_fix = [])
                print("borges")
                for key in control_point_borges:
                    print(key,control_point_borges[key])

                # inductive learning
                freeze_multiple_degree_set = set()
                control_point_inductive =  {}
                for dim in range(1,DIMENSION_SIMPLEX+1):
                    for index in data_trn_inductive:
                        if len(index) == dim:
                            target_multiple_degree_set  = subfunction.extract_multiple_degree(degree_list=monomial_degree_list,index_list=index)
                            target_multiple_degree_list = list(target_multiple_degree_set)
                            freeze_multiple_degree_list = list(freeze_multiple_degree_set.intersection(target_multiple_degree_set))
                            a = borges_pastva_trainer.update_control_point(t_mat=param_trn_inductive[index],
                                                                           data=data_trn_inductive[index],
                                                                           c = control_point_inductive,
                                                                           indices_all = target_multiple_degree_list,
                                                                           indices_fix = freeze_multiple_degree_list)
                            control_point_inductive.update(a)
                            freeze_multiple_degree_set = freeze_multiple_degree_set.union(target_multiple_degree_set)
                            if len(freeze_multiple_degree_set) == len(monomial_degree_list):
                                break
                print("inductive")
                for key in control_point_inductive:
                    print(key,control_point_inductive[key])

                # evaluation
                data_tst = data.Dataset(DATA_DIR+'/'+TEST_DATA+',f_'+'_'.join([str(i+1) for i in e_borges])).values
                param_tst = data.Dataset(DATA_DIR+'/'+TEST_DATA+',w_'+'_'.join([str(i+1) for i in e_borges])).values
                pred_borges = bezier_simplex.generate_points(c=control_point_borges,tt=param_tst)
                pred_inductive = bezier_simplex.generate_points(c=control_point_inductive,tt=param_tst)

                random_l2risk_borges = subfunction.calculate_l2_expected_error(true=data_tst,
                                                                              pred=pred_borges)
                random_l2risk_inductive = subfunction.calculate_l2_expected_error(true=data_tst,
                                                                        pred=pred_inductive)
                print(random_l2risk_borges,random_l2risk_inductive)

                results = {}
                results['random_l2risk'] = {}
                results['random_l2risk']['borges'] = float(random_l2risk_borges)
                results['random_l2risk']['inductive'] = float(random_l2risk_inductive)
                resultdir = RESULT_DIR + '/'+DATA_NAME+'/'+str(SEED)
                subfunction.create_directory(resultdir)
                subfunction.write_result(result=results,fname=resultdir+'/output.yml')

                if SEED == 0:
                    visualize.plot_estimated_pairplot(d1=data_tst,
                                                  d2=pred_borges,
                                                  d3=data_trn_borges[e_borges],
                                                  output_name=resultdir+'/borges.png')
                    visualize.plot_estimated_pairplot(d1=data_tst,
                                                  d2=pred_inductive,
                                                  d3=np.r_[data_trn_inductive[(0,)],
                                                           data_trn_inductive[(1,)],
                                                           data_trn_inductive[(2,)],
                                                           data_trn_inductive[(0,1)],
                                                           data_trn_inductive[(1,2)],
                                                           data_trn_inductive[(0,2)],
                                                           ],
                                                  output_name=resultdir+'/inductive.png')
Esempio n. 8
0
 def initialize_control_point(self, data):
     bezier_simplex = model.BezierSimplex(dimSpace=self.dimSpace,
                                          dimSimplex=self.dimSimplex,
                                          degree=self.degree)
     C = bezier_simplex.initialize_control_point(data)
     return (C)
def experiments_synthetic_data(n,
                               degree,
                               dim_space,
                               dim_simplex,
                               sigma,
                               method,
                               seed,
                               results_dir,
                               opt_flag=1):
    """
    conduct experiments with synthetic data

    Parameters
    ----------
    n : int
        number of sample points to be trained
    degree : int
        max degree of bezier simplex fittng
    dim_space : int
        the number of dimension of the Eucledian space
        where the bezier simplex is embedded
    dim_simplex : int
        the number of dimension of bezier simplex
    sigma : float
        the scale of noise.
        Noises are chosen form a normal distribution N(0,sigma^2I)
    method: "borges"/"inductive"
    result_dir: str
        where to output results
    opt_flag : 0/1 (default is 1)
        0 : optimal sampling strategy for inductive skeleton fitting
        1 : nonoptimal sampling strategy inductive skeketon fitting
        "borges" does not care about this parameter.
    """
    # data generation class
    synthetic_data = data.SyntheticData(degree=degree,
                                        dim_space=dim_space,
                                        dim_simplex=dim_simplex)

    # train
    if method == "borges":
        param_trn, data_trn = synthetic_data.sampling_borges(n=n,
                                                             seed=seed,
                                                             sigma=sigma)
        monomial_degree_list = list(
            subfunction.BezierIndex(dim=dim_simplex, deg=degree))
        borges_pastva_trainer = trainer.BorgesPastvaTrainer(
            dim_space=dim_space, dim_simplex=dim_simplex, degree=degree)
        control_point = borges_pastva_trainer.update_control_point(
            t_mat=param_trn,
            data=data_trn,
            c={},
            indices_all=monomial_degree_list,
            indices_fix=[],
        )
    elif method == "inductive":
        # calculate sample size of each skeleton
        calc_sample_size = sampling.CalcSampleSize(degree=degree,
                                                   dim_simplex=dim_simplex)
        train_sample_size_list = calc_sample_size.get_sample_size_list(
            n=n, opt_flag=opt_flag)
        # data generation
        param_trn, data_trn = synthetic_data.sampling_inductive(
            n=n,
            seed=seed,
            sample_size_list=train_sample_size_list,
            sigma=sigma)
        monomial_degree_list = list(
            subfunction.BezierIndex(dim=dim_simplex, deg=degree))
        inductive_skeleton_trainer = trainer.InductiveSkeletonTrainer(
            dim_space=dim_space, dim_simplex=dim_simplex, degree=degree)
        control_point = inductive_skeleton_trainer.update_control_point(
            t_dict=param_trn,
            data_dict=data_trn,
            c={},
            indices_all=monomial_degree_list,
            indices_fix=[],
        )
    else:
        pass

    # generate test data which does not include gaussian noise
    param_tst, data_tst = synthetic_data.sampling_borges(n=10000,
                                                         seed=seed * 2,
                                                         sigma=0)
    bezier_simplex = model.BezierSimplex(dim_space=dim_space,
                                         dim_simplex=dim_simplex,
                                         degree=degree)
    data_pred = bezier_simplex.generate_points(c=control_point, tt=param_tst)
    l2_risk = subfunction.calculate_l2_expected_error(true=data_tst,
                                                      pred=data_pred)

    # output result
    settings = {}
    settings["n"] = n
    settings["degree"] = degree
    settings["dim_space"] = dim_space
    settings["dim_simplex"] = dim_simplex
    settings["sigma"] = sigma
    settings["method"] = method
    settings["seed"] = seed
    settings["opt_flag"] = opt_flag
    results = {}
    results["l2_risk"] = "{:5E}".format(l2_risk)

    o = {}
    o["reults"] = results
    o["settings"] = settings

    ymlfilename = results_dir + "/"
    for key in [
            "dim_simplex", "dim_space", "degree", "n", "method", "opt_flag",
            "seed"
    ]:
        ymlfilename += key + "." + str(settings[key]) + "_"
    ymlfilename += ".yml"
    wf = open(ymlfilename, "w")
    wf.write(yaml.dump(o, default_flow_style=False))
    wf.close()
Esempio n. 10
0
def main(ymlfilename, resultdir, borges_flag=1):
    with open(ymlfilename, "r+") as f:
        params = yaml.load(f)

    subfunction.create_directory(resultdir)

    SEED = int(params["seed"])
    SIGMA = float(params["sigma"])
    DEGREE = int(params["degree"])
    DIMENSION_SIMPLEX = int(params["dimension_simplex"])
    DIMENSION_SPACE = int(params["dimension_space"])
    NUM_SAMPLE = params["num_sample_train"]
    NUM_SAMPLE_TEST = params["num_sample_test"]
    NUM_SAMPLE_ALL = params["num_sample_train_all"]
    SIMPLEX_TYPE = params["simplextype"]

    np.random.seed(SEED)
    objective_function_indices_list = list(range(DIMENSION_SIMPLEX))
    subproblem_indices_list = []
    for i in range(1, len(objective_function_indices_list) + 1):
        for c in combinations(objective_function_indices_list, i):
            subproblem_indices_list.append(c)
    uniform_sampling = sampling.UniformSampling(dimension=DIMENSION_SIMPLEX)
    bezier_simplex = model.BezierSimplex(dim_space=DIMENSION_SPACE,
                                         dim_simplex=DIMENSION_SIMPLEX,
                                         degree=DEGREE)
    borges_pastva_trainer = trainer.BorgesPastvaTrainer(
        dim_space=DIMENSION_SPACE,
        dim_simplex=DIMENSION_SIMPLEX,
        degree=DEGREE)
    monomial_degree_list = list(
        subfunction.BezierIndex(dim=DIMENSION_SIMPLEX, deg=DEGREE))

    # generate true control points
    generate_control_point = model.GenerateControlPoint(
        dim_space=DIMENSION_SPACE,
        dim_simplex=DIMENSION_SIMPLEX,
        degree=DEGREE)
    if SIMPLEX_TYPE == "linear":
        control_point_true = generate_control_point.simplex()
        SEED += 5
    elif SIMPLEX_TYPE == "squareroot":
        control_point_true = generate_control_point.squareroot()

    # generate training data
    print("generating data start")
    start = time.time()
    param_trn = {}
    data_trn = {}
    for c in subproblem_indices_list:
        if len(c) <= min(DIMENSION_SIMPLEX, DEGREE):
            n = NUM_SAMPLE[len(c) - 1]
            SEED += 30
            z = uniform_sampling.subsimplex(indices=c, num_sample=n, seed=SEED)
            param_trn[c] = z
            b = bezier_simplex.generate_points(c=control_point_true,
                                               tt=param_trn[c])
            epsilon = np.random.multivariate_normal(
                [0] * DIMENSION_SPACE,
                np.identity(DIMENSION_SPACE) * (SIGMA**2),
                n,
            )
            data_trn[c] = b + epsilon
    data_trn_array = subfunction.concat_data_to_arrray(d=data_trn)
    param_trn_array = subfunction.concat_data_to_arrray(d=param_trn)

    # todo create daata for borges
    if borges_flag == 1:
        param_trn_borges = uniform_sampling.subsimplex(
            indices=objective_function_indices_list,
            num_sample=NUM_SAMPLE_ALL,
            seed=(SEED + 1) * len(objective_function_indices_list),
        )
        data_trn_borges = bezier_simplex.generate_points(c=control_point_true,
                                                         tt=param_trn_borges)
        epsilon = np.random.multivariate_normal(
            [0] * DIMENSION_SPACE,
            np.identity(DIMENSION_SPACE) * (SIGMA**2),
            NUM_SAMPLE_ALL,
        )
        data_trn_borges += epsilon
    print("geenrating data finihed, elapsed_time:{0}".format(time.time() -
                                                             start) + "[sec]")

    # fitting by borges
    if borges_flag == 1:
        print("fitting by borges")
        start = time.time()
        control_point_borges = borges_pastva_trainer.update_control_point(
            t_mat=param_trn_borges,
            data=data_trn_borges,
            c={},
            indices_all=monomial_degree_list,
            indices_fix=[],
        )
        elapsed_time_borges = time.time() - start
        print("fitting by borges finihed, elapsed_time:{0}".format(
            elapsed_time_borges) + "[sec]")

    print("fitting by inductive")
    start = time.time()
    freeze_multiple_degree_set = set()
    control_point_inductive = {}
    for dim in range(1, DIMENSION_SIMPLEX + 1):
        for index in data_trn:
            if len(index) == dim:
                target_multiple_degree_set = subfunction.extract_multiple_degree(
                    degree_list=monomial_degree_list, index_list=index)
                target_multiple_degree_list = list(target_multiple_degree_set)
                freeze_multiple_degree_list = list(
                    freeze_multiple_degree_set.intersection(
                        target_multiple_degree_set))
                a = borges_pastva_trainer.update_control_point(
                    t_mat=param_trn[index],
                    data=data_trn[index],
                    c=control_point_inductive,
                    indices_all=target_multiple_degree_list,
                    indices_fix=freeze_multiple_degree_list,
                )
                control_point_inductive.update(a)
                freeze_multiple_degree_set = freeze_multiple_degree_set.union(
                    target_multiple_degree_set)
                if len(freeze_multiple_degree_set) == len(
                        monomial_degree_list):
                    break
    elapsed_time_inductive = time.time() - start
    print("fitting by inductive finihed, elapsed_time:{0}".format(
        elapsed_time_inductive) + "[sec]")

    # calculate risk
    print("calc risks")
    start = time.time()

    if SIMPLEX_TYPE == "linear":
        SEED += 1
    else:
        SEED += 5
    param_tst_array = uniform_sampling.subsimplex(
        indices=objective_function_indices_list,
        num_sample=NUM_SAMPLE_TEST,
        seed=(SEED + 1) * (DIMENSION_SIMPLEX),
    )
    print("laptime array", time.time() - start)
    random_tst = bezier_simplex.generate_points(c=control_point_true,
                                                tt=param_tst_array)
    if borges_flag == 1:
        print("laptime test", time.time() - start)
        random_borges = bezier_simplex.generate_points(c=control_point_borges,
                                                       tt=param_tst_array)
        print("laptime borges", time.time() - start)
    random_inductive = bezier_simplex.generate_points(
        c=control_point_inductive, tt=param_tst_array)
    print("laptime inductive", time.time() - start)
    if borges_flag == 1:
        random_l2risk_borges = subfunction.calculate_l2_expected_error(
            true=random_tst, pred=random_borges)
    random_l2risk_inductive = subfunction.calculate_l2_expected_error(
        true=random_tst, pred=random_inductive)
    # calc risk from grid sampled data points
    elapsed_time_risk = time.time() - start
    print("calc risk finihed, elapsed_time:{0}".format(elapsed_time_risk) +
          "[sec]")

    # print("random ",random_l2risk_borges,random_l2risk_inductive)
    results = {}
    results["random_l2risk"] = {}
    results["time"] = {}
    if borges_flag == 1:
        results["random_l2risk"]["borges"] = float(random_l2risk_borges)
        results["time"]["borges"] = elapsed_time_borges
    results["random_l2risk"]["inductive"] = float(random_l2risk_inductive)
    results["time"]["inductive"] = elapsed_time_inductive
    results["time"]["risk"] = elapsed_time_risk

    output_dict = {}
    output_dict["params"] = params
    output_dict["results"] = results
    subfunction.write_result(result=output_dict,
                             fname=resultdir + "/output.yml")
    """
    visualize.plot_estimated_pairplot(d1=random_tst,
                                  d2=random_borges,
                                  d3=data_trn_borges,
                                  output_name=resultdir+'/random_borges.png')
    visualize.plot_estimated_pairplot(d1=random_tst,
                                  d2=random_inductive,
                                  d3=data_trn_array,
                                  output_name=resultdir+'/random_inductive.png')

    visualize.plot_graph3d(pareto=grid_tst,
                           simplex=grid_borges,
                           sample=data_trn_array,
                           loc='lower right',
                           output_name=resultdir+'/grid_borges.png')
    visualize.plot_graph3d(pareto=grid_tst,
                           simplex=grid_inductive,
                           sample=data_trn_array,
                           loc='lower right',
                           output_name=resultdir+'/grid_inductive.png')
    visualize.plot_graph3d(pareto=random_tst[:,1:4],
                           simplex=random_borges[:,1:4],
                           sample=data_trn_borges[:,1:4],
                           loc='lower right',
                           output_name=resultdir+'/3d_random_borges.png')
    visualize.plot_graph3d(pareto=random_tst[:,1:4],
                           simplex=random_inductive[:,1:4],
                           sample=data_trn_array[:,1:4],
                           loc='lower right',
                           output_name=resultdir+'/3d_random_inductive.png')
    """

    pi = {}
    pi["control_points"] = {}
    pi["train"] = {}
    pi["train"]["borges"] = {}
    pi["train"]["inductive"] = {}
    pi["test"] = {}
    pi["control_points"]["true"] = control_point_true
    if borges_flag == 1:
        pi["control_points"]["borges"] = control_point_borges
    pi["control_points"]["inductive"] = control_point_inductive
    pi["train"]["inductive"]["param"] = param_trn
    pi["train"]["inductive"]["val"] = data_trn
    pi["test"]["param"] = param_tst_array
    pi["test"]["val"] = random_tst

    output_name = resultdir + "/parameters.pkl"
    with open(output_name, "wb") as wf:
        pickle.dump(pi, wf)
Esempio n. 11
0
def experiments_practical_instances(
    data_dir,
    trn_data,
    test_data,
    n,
    solution_type,
    dim_simplex,
    degree,
    method,
    seed,
    results_dir,
    opt_flag=1,
):
    """
    conduct experiments with synthetic data

    Parameters
    ----------
    data_dir : str
        the name of directory which include datasets
    trn_data : str
        dataname to be trained
    test_data : str
        dataname for test
    n : int
        number of samples to be trained
    dim_simplex : int
        dimension of bezier simplex
    degree : int
        max degree of bezier simplex fittng
    method: "borges"/"inductive"
    result_dir: str
        where to output results
    opt_flag : 0/1 (default is 1)
        0 : optimal sampling strategy for inductive skeleton fitting
        1 : nonoptimal sampling strategy inductive skeketon fitting
        "borges" does not care about this parameter.
    """
    # data preparation
    objective_function_indices_list = list(range(dim_simplex))
    subproblem_indices_list = []
    for i in range(1, len(objective_function_indices_list) + 1):
        for c in combinations(objective_function_indices_list, i):
            subproblem_indices_list.append(c)
    monomial_degree_list = list(
        subfunction.BezierIndex(dim=dim_simplex, deg=degree))
    data_all = {}
    param_all = {}
    for e in subproblem_indices_list:
        if len(e) <= degree or len(e) == dim_simplex:
            string = "_".join(str(i + 1) for i in e)
            tmp = data.Dataset(data_dir + "/" + trn_data + "," +
                               solution_type + "_" + string)
            data_all[e] = tmp.values

            tmp = data.Dataset(data_dir + "/" + trn_data + ",w_" + string)
            param_all[e] = tmp.values

    dim_space = data_all[(
        0,
        1,
        2,
    )].shape[1]
    # train
    if method == "borges":
        param_trn = {}
        data_trn = {}
        e = tuple(range(dim_simplex))
        data_trn[e], param_trn[e] = sampling_data_and_param(d=data_all[e],
                                                            p=param_all[e],
                                                            n=n,
                                                            seed=seed)
        borges_pastva_trainer = trainer.BorgesPastvaTrainer(
            dim_space=dim_space, dim_simplex=dim_simplex, degree=degree)
        control_point = borges_pastva_trainer.update_control_point(
            t_mat=param_trn[e],
            data=data_trn[e],
            c={},
            indices_all=monomial_degree_list,
            indices_fix=[],
        )
    elif method == "inductive":
        # calculate sample size of each skeleton
        calc_sample_size = sampling.CalcSampleSize(degree=degree,
                                                   dim_simplex=dim_simplex)
        train_sample_size_list = calc_sample_size.get_sample_size_list(
            n=n, opt_flag=opt_flag)
        # sampling
        data_trn = {}
        param_trn = {}
        for e in data_all:
            if len(e) <= degree:
                data_trn[e], param_trn[e] = sampling_data_and_param(
                    d=data_all[e],
                    p=param_all[e],
                    n=train_sample_size_list[len(e) - 1],
                    seed=seed + sum(e),
                )
        inductive_skeleton_trainer = trainer.InductiveSkeletonTrainer(
            dim_space=dim_space, dim_simplex=dim_simplex, degree=degree)
        control_point = inductive_skeleton_trainer.update_control_point(
            t_dict=param_trn,
            data_dict=data_trn,
            c={},
            indices_all=monomial_degree_list,
            indices_fix=[],
        )
    else:
        pass

    # evaluate empirical l2 risk
    e = tuple(range(dim_simplex))
    data_tst = data.Dataset(data_dir + "/" + test_data + "," + solution_type +
                            "_" + "_".join(str(i + 1) for i in e)).values
    param_tst = data.Dataset(data_dir + "/" + test_data + ",w_" +
                             "_".join(str(i + 1) for i in e)).values
    bezier_simplex = model.BezierSimplex(dim_space=dim_space,
                                         dim_simplex=dim_simplex,
                                         degree=degree)
    data_pred = bezier_simplex.generate_points(c=control_point, tt=param_tst)
    l2_risk = subfunction.calculate_l2_expected_error(true=data_tst,
                                                      pred=data_pred)
    # output result
    settings = {}
    settings["trn_data"] = trn_data
    settings["tset_data"] = test_data
    settings["solution_type"] = solution_type
    settings["n"] = n
    settings["degree"] = degree
    settings["dim_space"] = dim_space
    settings["dim_simplex"] = dim_simplex
    settings["method"] = method
    settings["seed"] = seed
    settings["opt_flag"] = opt_flag
    results = {}
    results["l2_risk"] = "{:5E}".format(l2_risk)

    o = {}
    o["reults"] = results
    o["settings"] = settings

    ymlfilename = results_dir + "/" + trn_data + "solution_type." + solution_type + "/"
    subfunction.create_directory(dir_name=ymlfilename)
    for key in ["degree", "n", "method", "opt_flag", "seed"]:
        ymlfilename = ymlfilename + key + "." + str(settings[key]) + "_"
    ymlfilename = ymlfilename + ".yml"
    wf = open(ymlfilename, "w")
    wf.write(yaml.dump(o, default_flow_style=False))
    wf.close()