Ejemplo n.º 1
0
    def load_history_func_eval(self, data: Data, problem: Problem,
                               Igiven: np.ndarray):
        """ Init history database JSON file """
        if (self.tuning_problem_name is not None):
            json_data_path = self.history_db_path + "/" + self.tuning_problem_name + ".json"
            if os.path.exists(json_data_path):
                print("[HistoryDB] Found a history database file")
                if self.file_synchronization_method == 'filelock':
                    with FileLock(json_data_path + ".lock"):
                        with open(json_data_path, "r") as f_in:
                            history_data = json.load(f_in)
                elif self.file_synchronization_method == 'rsync':
                    temp_path = json_data_path + "." + self.process_uid + ".temp"
                    os.system("rsync -a " + json_data_path + " " + temp_path)
                    with open(temp_path, "r") as f_in:
                        history_data = json.load(f_in)
                    os.system("rm " + temp_path)
                else:
                    with open(json_data_path, "r") as f_in:
                        history_data = json.load(f_in)

                num_tasks = len(Igiven)

                num_loaded_data = 0

                PS_history = [[] for i in range(num_tasks)]
                OS_history = [[] for i in range(num_tasks)]

                for func_eval in history_data["func_eval"]:
                    if (self.check_load_deps(func_eval)):
                        task_id = self.search_func_eval_task_id(
                            func_eval, problem, Igiven)
                        if (task_id != -1):
                            # # current policy: skip loading the func eval result
                            # # if the same parameter data has been loaded once (duplicated)
                            # # YL: only need to search in PS_history[task_id], not PS_history
                            # if self.is_parameter_duplication(problem, PS_history[task_id], func_eval["tuning_parameter"]):

                            # current policy: allow duplicated samples
                            # YL: This makes RCI-based multi-armed bandit much easier to implement, maybe we can add an option for changing this behavior
                            if False:  # self.is_parameter_duplication(problem, PS_history[task_id], func_eval["tuning_parameter"]):
                                continue
                            else:
                                parameter_arr = []
                                for k in range(len(problem.PS)):
                                    if type(problem.PS[k]
                                            ).__name__ == "Categoricalnorm":
                                        parameter_arr.append(
                                            str(func_eval["tuning_parameter"][
                                                problem.PS[k].name]))
                                    elif type(problem.PS[k]
                                              ).__name__ == "Integer":
                                        parameter_arr.append(
                                            int(func_eval["tuning_parameter"][
                                                problem.PS[k].name]))
                                    elif type(
                                            problem.PS[k]).__name__ == "Real":
                                        parameter_arr.append(
                                            float(func_eval["tuning_parameter"]
                                                  [problem.PS[k].name]))
                                    else:
                                        parameter_arr.append(
                                            func_eval["tuning_parameter"][
                                                problem.PS[k].name])
                                PS_history[task_id].append(parameter_arr)
                                OS_history[task_id].append(\
                                    [func_eval["evaluation_result"][problem.OS[k].name] \
                                    for k in range(len(problem.OS))])
                                num_loaded_data += 1

                if (num_loaded_data > 0):
                    data.I = Igiven  #IS_history
                    data.P = PS_history
                    data.O = []  # YL: OS is a list of 2D numpy arrays
                    for i in range(len(OS_history)):
                        if (len(OS_history[i]) == 0):
                            data.O.append(np.empty(shape=(0, problem.DO)))
                        else:
                            data.O.append(np.array(OS_history[i]))
                            if (any(ele == [None] for ele in OS_history[i])):
                                print(
                                    "history data contains null function values"
                                )
                                exit()
                    # print ("data.I: " + str(data.I))
                    # print ("data.P: " + str(data.P))
                    # print ("data.O: " + str(OS_history))
                else:
                    print("no history data has been loaded")
            else:
                print("[HistoryDB] Create a JSON file at " + json_data_path)

                if self.file_synchronization_method == 'filelock':
                    with FileLock(json_data_path + ".lock"):
                        with open(json_data_path, "w") as f_out:
                            json_data = {
                                "tuning_problem_name":
                                self.tuning_problem_name,
                                "model_data": [],
                                "func_eval": []
                            }
                            json.dump(json_data, f_out, indent=2)
                elif self.file_synchronization_method == 'rsync':
                    temp_path = json_data_path + "." + self.process_uid + ".temp"
                    with open(temp_path, "w") as f_out:
                        json_data = {
                            "tuning_problem_name": self.tuning_problem_name,
                            "model_data": [],
                            "func_eval": []
                        }
                        json.dump(json_data, f_out, indent=2)
                    os.system("rsync -u " + temp_path + " " + json_data_path)
                    os.system("rm " + temp_path)
                else:
                    with open(json_data_path, "w") as f_out:
                        json_data = {
                            "tuning_problem_name": self.tuning_problem_name,
                            "model_data": [],
                            "func_eval": []
                        }
                        json.dump(json_data, f_out, indent=2)
Ejemplo n.º 2
0
    def MLA(self, NS, NS1 = None, NI = None, Igiven = None, **kwargs):

        print('\n\n\n------Starting MLA with %d tasks and %d samples each '%(NI,NS))
        stats = {
            "time_total": 0,
            "time_sample_init": 0,
            "time_fun": 0,
            "time_search": 0,
            "time_model": 0
        }
        time_fun=0
        time_sample_init=0
        time_search=0
        time_model=0

        np.set_printoptions(suppress=False,precision=4)

        if (self.data.P is not None and len(self.data.P[0])>=NS):
            print('self.data.P[0])>=NS, no need to run MLA. Returning...')
            return (copy.deepcopy(self.data), None,stats)

        t3 = time.time_ns()

        t1 = time.time_ns()

        options1 = copy.deepcopy(self.options)
        kwargs.update(options1)

        """ Multi-task Learning Autotuning """


        if(Igiven is not None and self.data.I is None):  # building the MLA model for each of the given tasks
            self.data.I = Igiven

########## normalize the data as the user always work in the original space

        if self.data.I is not None: # from a list of lists to a 2D numpy array
            self.data.I = self.problem.IS.transform(self.data.I)

        if self.data.P is not None: # from a list of (list of lists) to a list of 2D numpy arrays
            tmp=[]
            for x in self.data.P:
                xNorm = self.problem.PS.transform(x)
                tmp.append(xNorm)
            self.data.P=tmp

#        if (self.mpi_rank == 0):

        sampler = eval(f'{kwargs["sample_class"]}()')
        if (self.data.I is None):

            if (NI is None):
                raise Exception("Number of problems to be generated (NI) is not defined")

            check_constraints = functools.partial(self.computer.evaluate_constraints, self.problem, inputs_only = True, kwargs = kwargs)
            self.data.I = sampler.sample_inputs(n_samples = NI, IS = self.problem.IS, check_constraints = check_constraints, **kwargs)
            # print("riji",type(self.data.I),type(self.data.I[0]))
            self.data.D = [{}] * NI
        else:
            if (self.data.D is None):
                self.data.D = [{}] * NI

        if (self.data.P is not None and len(self.data.P) !=len(self.data.I)):
            raise Exception("len(self.data.P) !=len(self.data.I)")

        if (self.data.P is None):
            if (NS1 is not None and NS1>NS):
                raise Exception("NS1>NS")

            if (NS1 is None):
                NS1 = min(NS - 1, 3 * self.problem.DP) # General heuristic rule in the litterature

            check_constraints = functools.partial(self.computer.evaluate_constraints, self.problem, inputs_only = False, kwargs = kwargs)
            self.data.P = sampler.sample_parameters(n_samples = NS1, I = self.data.I, IS = self.problem.IS, PS = self.problem.PS, check_constraints = check_constraints, **kwargs)
#            #XXX add the info of problem.models here
#            for P2 in P:
#                for x in P2:
#                    x = np.concatenate(x, np.array([m(x) for m in self.problems.models]))
        # print("good?")

        if (self.data.O is not None and len(self.data.O) !=len(self.data.I)):
            raise Exception("len(self.data.O) !=len(self.data.I)")

        t2 = time.time_ns()
        time_sample_init = time_sample_init + (t2-t1)/1e9

        t1 = time.time_ns()
        if (self.data.O is None):
            self.data.O = self.computer.evaluate_objective(self.problem, self.data.I, self.data.P, self.data.D, options = kwargs)
        t2 = time.time_ns()
        time_fun = time_fun + (t2-t1)/1e9
        # print(self.data.O)
        # print("good!")
#            if ((self.mpi_comm is not None) and (self.mpi_size > 1)):
#                mpi_comm.bcast(self.data, root=0)
#
#        else:
#
#            self.data = mpi_comm.bcast(None, root=0)
        # mpi4py.MPI.COMM_WORLD.Barrier()
        modelers  = [eval(f'{kwargs["model_class"]} (problem = self.problem, computer = self.computer)')]*self.problem.DO
        searcher = eval(f'{kwargs["search_class"]}(problem = self.problem, computer = self.computer)')
        optiter = 0
        while len(self.data.P[0])<NS:# YL: each iteration adds 1 (if single objective) or at most kwargs["search_more_samples"] (if multi-objective) sample until total #sample reaches NS
        # for optiter in range(NS - len(self.data.P[0])):

            if(self.problem.models_update is not None):
                ########## denormalize the data as the user always work in the original space
                tmpdata = copy.deepcopy(self.data)
                if tmpdata.I is not None:    # from 2D numpy array to a list of lists
                    tmpdata.I = self.problem.IS.inverse_transform(tmpdata.I)
                if tmpdata.P is not None:    # from a collection of 2D numpy arrays to a list of (list of lists)
                    tmp=[]
                    for x in tmpdata.P:
                        xOrig = self.problem.PS.inverse_transform(x)
                        tmp.append(xOrig)
                    tmpdata.P=tmp
                self.problem.models_update(tmpdata)
                self.data.D = tmpdata.D

            # print("riji",type(self.data.I),type(self.data.I[0]))
            newdata = Data(problem = self.problem, I = self.data.I, D = self.data.D)
            print("MLA iteration: ",optiter)
            optiter = optiter + 1
            t1 = time.time_ns()
            for o in range(self.problem.DO):
                tmpdata = copy.deepcopy(self.data)
                tmpdata.O = [copy.deepcopy(self.data.O[i][:,o].reshape((-1,1))) for i in range(len(self.data.I))]
                if(self.problem.models is not None):
                    for i in range(len(tmpdata.P)):
                        points0 = tmpdata.D[i]
                        t = tmpdata.I[i]
                        I_orig = self.problem.IS.inverse_transform(np.array(t, ndmin=2))[0]
                        points1 = {self.problem.IS[k].name: I_orig[k] for k in range(self.problem.DI)}
                        modeldata=[]
                        for p in range(len(tmpdata.P[i])):
                            x = tmpdata.P[i][p]
                            x_orig = self.problem.PS.inverse_transform(np.array(x, ndmin=2))[0]
                            points = {self.problem.PS[k].name: x_orig[k] for k in range(self.problem.DP)}
                            points.update(points1)
                            points.update(points0)
                            modeldata.append(self.problem.models(points))
                        modeldata=np.array(modeldata)
                        tmpdata.P[i] = np.hstack((tmpdata.P[i],modeldata))  # YL: here tmpdata in the normalized space, but modeldata is the in the original space
                # print(tmpdata.P[0])
                modelers[o].train(data = tmpdata, **kwargs)

            t2 = time.time_ns()
            time_model = time_model + (t2-t1)/1e9

            t1 = time.time_ns()
            res = searcher.search_multitask(data = self.data, models = modelers, **kwargs)

            more_samples=NS-len(self.data.P[0]) # YL: this makes sure P has the same length across all tasks
            for x in res:
                more_samples=min(more_samples,x[1][0].shape[0])
            newdata.P = [x[1][0][0:more_samples,:] for x in res]
            # print(more_samples,newdata.P)
            t2 = time.time_ns()
            time_search = time_search + (t2-t1)/1e9
    #XXX add the info of problem.models here

    #            if (self.mpi_rank == 0):

            t1 = time.time_ns()
            newdata.O = self.computer.evaluate_objective(problem = self.problem, I = newdata.I, P = newdata.P, D = newdata.D, options = kwargs)
            t2 = time.time_ns()
            time_fun = time_fun + (t2-t1)/1e9
    #                if ((self.mpi_comm is not None) and (self.mpi_size > 1)):
    #                    mpi_comm.bcast(newdata.O, root=0)
    #
    #            else:
    #
    #                newdata.O = mpi_comm.bcast(None, root=0)
            self.data.merge(newdata)

########## denormalize the data as the user always work in the original space
        if self.data.I is not None:    # from 2D numpy array to a list of lists
            self.data.I = self.problem.IS.inverse_transform(self.data.I)
        if self.data.P is not None:    # from a collection of 2D numpy arrays to a list of (list of lists)
            tmp=[]
            for x in self.data.P:
                xOrig = self.problem.PS.inverse_transform(x)
                tmp.append(xOrig)
            self.data.P=tmp

        t4 = time.time_ns()
        stats['time_total'] = (t4-t3)/1e9
        stats['time_fun'] = time_fun
        stats['time_model'] = time_model
        stats['time_search'] = time_search
        stats['time_sample_init'] = time_sample_init


        return (copy.deepcopy(self.data), modelers,stats)
Ejemplo n.º 3
0
def HpBandSter(T, NS, tp : TuningProblem, computer : Computer, options: Options = None, run_id="HpBandSter", niter=1):

    # Initialize
    min_budget   = options['budget_min'] # Minimum budget used during the optimization.
    max_budget   = options['budget_max'] # Maximum budget used during the optimization.
    budget_base  = options['budget_base']
    n_iterations = NS # Number of iterations performed by the optimizer
    n_workers    = 1  # Number of workers to run in parallel.
    
    X = []
    Y = []
    # Xopt = []
    # Yopt = []
    data = Data(tp)

    server = hpbandster.core.nameserver.NameServer(run_id=run_id, host='127.0.0.1', port=None)
    server.start()

    # Tune
    stats = {
        "time_total": 0,
        "time_fun": 0
    }

    timefun=0
    t1 = time.time_ns()
    for i in range(len(T)):

        workers=[]
        for j in range(n_workers):
            w = HpBandSterWorker(t=T[i], NS=NS, tp=tp, computer=computer, niter=niter, run_id=run_id, nameserver='127.0.0.1', id=j)
            w.run(background=True)
            workers.append(w)
            
        # XZ: set eta=3, bmin=.1, bmax=1, so smax=2
        bohb = hpbandster.optimizers.BOHB(configspace=workers[0].get_configspace(), run_id=run_id, nameserver='127.0.0.1', min_budget=min_budget, max_budget=max_budget, eta=budget_base)
        res = bohb.run(n_iterations=n_iterations, min_n_workers=n_workers)

        config_mapping = res.get_id2config_mapping()

        xs = [[config_mapping[idx]['config'][p] for p in tp.parameter_space.dimension_names] for idx in config_mapping.keys()]
        ys = [[(k, v['loss']) for k,v in res[idx].results.items()] for idx in config_mapping.keys()]
        
        X.append(xs)
        tmp = np.array(ys).reshape((len(ys), 1))
        Y.append(tmp)
        timefun=timefun+workers[0].timefun
        bohb.shutdown(shutdown_workers=True)

    t2 = time.time_ns()
    stats['time_total'] = (t2-t1)/1e9	
    stats['time_fun'] = timefun
    # Finalize

    server.shutdown()

    data.I=T
    data.P=X
    data.O=Y
    # Finalize

    return (data, stats)
Ejemplo n.º 4
0
    def MB_LCM(self, NS = None, Igiven = None, **kwargs):
        """
        Igiven       : a list of tasks
        NS           : number of samples in the highest budget arm
        """

        np.set_printoptions(suppress=False,precision=4)
        print('\n\n\n------Starting MB_LCM (multi-arm bandit with LCM) with %d samples for task'%(NS),Igiven)

        stats = {
            "time_total": 0,
            "time_sample_init": 0,
            "time_fun": 0,
            "time_search": 0,
            "time_model": 0
        }
        time_fun=0
        time_sample_init=0
        time_search=0
        time_model=0

        self.NSs=[int(self.options['budget_max']/x*NS) for x in self.budgets]
        info = [[x,y] for x,y in zip(self.budgets,self.NSs)]
        print('total samples:',info)

        data = Data(self.tp)   # having the budgets not fully sampled before SH
        data1 = Data(self.tp)  # having the budgets fully sampled before SH
        data1.I=[]
        data1.P=[]
        data1.O=[]
        data1.D=[]

        for s in range(len(self.budgets)): # loop over the budget levels
            budget = self.budgets[s]
            ns = self.NSs[s]
            newtasks=[]
            for s1 in range(s,len(self.budgets)):
                for t in range(len(Igiven)):
                    budget1 = self.budgets[s1]
                    tmp = [budget1]+Igiven[t]
                    newtasks.append(tmp)

            gt = GPTune(self.tp, computer=self.computer, data=data, options=self.options)
            (data, modeler, stats0) = gt.MLA(NS=ns, Igiven=newtasks, NI=len(newtasks), NS1=int(ns/2))
            data1.I += data.I[0:len(Igiven)]
            data1.P += data.P[0:len(Igiven)]
            data1.O += data.O[0:len(Igiven)]
            data1.D += data.D[0:len(Igiven)]
            del data.I[0:len(Igiven)]
            del data.P[0:len(Igiven)]
            del data.O[0:len(Igiven)]
            del data.D[0:len(Igiven)]


            stats['time_total'] += stats0['time_total']
            stats['time_fun'] += stats0['time_fun']
            stats['time_model'] += stats0['time_model']
            stats['time_search'] += stats0['time_search']
            stats['time_sample_init'] += stats0['time_sample_init']

        # print(data1.I)
        # print(data1.P)
        # print(data1.O)
        self.data.I = Igiven
        self.data.P = data1.P[0:len(Igiven)]  # this will be updated by SH
        self.data.O = data1.O[0:len(Igiven)]  # this will be updated by SH
        #todo SH on each arm and return all samples of the highest fidelity in self.data

        return (copy.deepcopy(self.data), stats)