Example #1
0
    def train_mpi(self, data : Data, i_am_manager : bool, restart_iters : Collection[int] = None, **kwargs):

        if (kwargs['model_latent'] is None):
            Q = data.NI
        else:
            Q = kwargs['model_latent']

        if (kwargs['distributed_memory_parallelism'] and i_am_manager):
            mpi_comm = self.computer.spawn(__file__, nproc=kwargs['model_restart_processes'], nthreads=kwargs['model_restart_threads'], kwargs=kwargs) # XXX add args and kwargs
            kwargs_tmp = kwargs
            # print("kwargs_tmp",kwargs_tmp)

            if "mpi_comm" in kwargs_tmp:
                del kwargs_tmp["mpi_comm"]   # mpi_comm is not picklable
            _ = mpi_comm.bcast((self, data, restart_iters, kwargs_tmp), root=mpi4py.MPI.ROOT)
            tmpdata = mpi_comm.gather(None, root=mpi4py.MPI.ROOT)
            mpi_comm.Disconnect()
            res=[]
            for p in range(int(kwargs['model_restart_processes'])):
                res = res + tmpdata[p]

        elif (kwargs['shared_memory_parallelism']): #YL: not tested

            #with concurrent.futures.ProcessPoolExecutor(max_workers = kwargs['search_multitask_threads']) as executor:
            with concurrent.futures.ThreadPoolExecutor(max_workers = kwargs['model_restart_threads']) as executor:
                def fun(restart_iter):
                    if ('seed' in kwargs):
                        seed = kwargs['seed'] * kwargs['model_restart_threads'] + restart_iter
                    else:
                        seed = restart_iter
                    np.random.seed(seed)
                    kern = LCM(input_dim = len(data.P[0][0]), num_outputs = data.NI, Q = Q)
                    if (restart_iter == 0 and self.M is not None):
                        kern.set_param_array(self.M.kern.get_param_array())
                    return kern.train_kernel(X = data.P, Y = data.O, computer = self.computer, kwargs = kwargs)
                res = list(executor.map(fun, restart_iters, timeout=None, chunksize=1))

        else:
            def fun(restart_iter):
                np.random.seed(restart_iter)
                kern = LCM(input_dim = len(data.P[0][0]), num_outputs = data.NI, Q = Q)
                # print('I am here')
                return kern.train_kernel(X = data.P, Y = data.O, computer = self.computer, kwargs = kwargs)
            res = list(map(fun, restart_iters))

        if (kwargs['distributed_memory_parallelism'] and i_am_manager == False):
            return res

        kern = LCM(input_dim = len(data.P[0][0]), num_outputs = data.NI, Q = Q)
        bestxopt = min(res, key = lambda x: x[1])[0]
        kern.set_param_array(bestxopt)

        # YL: why sigma is enough to compute the likelihood, see https://gpy.readthedocs.io/en/deploy/GPy.likelihoods.html
        likelihoods_list = [GPy.likelihoods.Gaussian(variance = kern.sigma[i], name = "Gaussian_noise_%s" %i) for i in range(data.NI)]
        self.M = GPy.models.GPCoregionalizedRegression(data.P, data.O, kern, likelihoods_list = likelihoods_list)

        return
Example #2
0
 def fun(restart_iter):
     if ('seed' in kwargs):
         seed = kwargs['seed'] * kwargs['model_restart_threads'] + restart_iter
     else:
         seed = restart_iter
     np.random.seed(seed)
     kern = LCM(input_dim = len(data.P[0][0]), num_outputs = data.NI, Q = Q)
     if (restart_iter == 0 and self.M is not None):
         kern.set_param_array(self.M.kern.get_param_array())
     return kern.train_kernel(X = data.P, Y = data.O, computer = self.computer, kwargs = kwargs)
Example #3
0
    def gen_model_from_hyperparameters(self, data: Data, hyperparameters: list,
                                       **kwargs):
        if (kwargs['RCI_mode'] is False):
            from lcm import LCM

        if (kwargs['model_latent'] is None):
            Q = data.NI
        else:
            Q = kwargs['model_latent']

        kern = LCM(input_dim=len(data.P[0][0]), num_outputs=data.NI, Q=Q)
        #print ("received hyperparameters: " + str(hyperparameters))
        kern.set_param_array(hyperparameters)

        likelihoods_list = [
            GPy.likelihoods.Gaussian(variance=kern.sigma[i],
                                     name="Gaussian_noise_%s" % i)
            for i in range(data.NI)
        ]
        self.M = GPy.models.GPCoregionalizedRegression(
            data.P, data.O, kern, likelihoods_list=likelihoods_list)

        return
Example #4
0
    def train_mpi(self,
                  data: Data,
                  i_am_manager: bool,
                  restart_iters: Collection[int] = None,
                  **kwargs):

        if (kwargs['RCI_mode'] is False):
            from lcm import LCM

        if (kwargs['model_latent'] is None):
            Q = data.NI
        else:
            Q = kwargs['model_latent']

        if (kwargs['distributed_memory_parallelism'] and i_am_manager):
            mpi_comm = self.computer.spawn(
                __file__,
                nproc=kwargs['model_restart_processes'],
                nthreads=kwargs['model_restart_threads'],
                kwargs=kwargs)  # XXX add args and kwargs
            kwargs_tmp = kwargs
            # print("kwargs_tmp",kwargs_tmp)

            if "mpi_comm" in kwargs_tmp:
                del kwargs_tmp["mpi_comm"]  # mpi_comm is not picklable
            _ = mpi_comm.bcast((self, data, restart_iters, kwargs_tmp),
                               root=mpi4py.MPI.ROOT)
            tmpdata = mpi_comm.gather(None, root=mpi4py.MPI.ROOT)
            mpi_comm.Disconnect()
            res = []
            for p in range(int(kwargs['model_restart_processes'])):
                res = res + tmpdata[p]

        elif (kwargs['shared_memory_parallelism']):  #YL: not tested

            #with concurrent.futures.ProcessPoolExecutor(max_workers = kwargs['search_multitask_threads']) as executor:
            with concurrent.futures.ThreadPoolExecutor(
                    max_workers=kwargs['model_restart_threads']) as executor:

                def fun(restart_iter):
                    # if ('seed' in kwargs):
                    #     seed = kwargs['seed'] * kwargs['model_restart_threads'] + restart_iter
                    # else:
                    #     seed = restart_iter
                    # np.random.seed(seed)
                    ## np.random.seed()
                    kern = LCM(input_dim=len(data.P[0][0]),
                               num_outputs=data.NI,
                               Q=Q)
                    # if (restart_iter == 0 and self.M is not None):
                    #     kern.set_param_array(self.M.kern.get_param_array())
                    return kern.train_kernel(X=data.P,
                                             Y=data.O,
                                             computer=self.computer,
                                             kwargs=kwargs)

                res = list(
                    executor.map(fun, restart_iters, timeout=None,
                                 chunksize=1))

        else:

            def fun(restart_iter):
                # np.random.seed(restart_iter)
                np.random.seed()
                kern = LCM(input_dim=len(data.P[0][0]),
                           num_outputs=data.NI,
                           Q=Q)
                return kern.train_kernel(X=data.P,
                                         Y=data.O,
                                         computer=self.computer,
                                         kwargs=kwargs)

            res = list(map(fun, restart_iters))

        if (kwargs['distributed_memory_parallelism']
                and i_am_manager == False):
            return res

        kern = LCM(input_dim=len(data.P[0][0]), num_outputs=data.NI, Q=Q)
        best_result = min(res, key=lambda x: x[1])
        bestxopt = best_result[0]
        neg_log_marginal_likelihood = best_result[1]
        gradients = best_result[2]
        iteration = best_result[3]
        kern.set_param_array(bestxopt)
        if (kwargs['verbose'] == True):
            # print('hyperparameters:', kern.get_param_array())
            print('theta:', kern.theta)
            print('var:', kern.var)
            print('kappa:', kern.kappa)
            print('sigma:', kern.sigma)
            print('WS:', kern.WS)

        # YL: likelihoods needs to be provided, since K operator doesn't take into account sigma/jittering, but Kinv does. The GPCoregionalizedRegression intialization will call inference in GPy/interence/latent_function_inference/exact_gaussian_inference.py, and add to diagonals of the K operator with sigma+1e-8
        likelihoods_list = [
            GPy.likelihoods.Gaussian(variance=kern.sigma[i],
                                     name="Gaussian_noise_%s" % i)
            for i in range(data.NI)
        ]
        self.M = GPy.models.GPCoregionalizedRegression(
            data.P, data.O, kern, likelihoods_list=likelihoods_list)

        #print ("kernel: " + str(kern))
        #print ("bestxopt:" + str(bestxopt))
        #print ("neg_log_marginal_likelihood:" + str(neg_log_marginal_likelihood))
        #print ("gradients: " + str(gradients))
        #print ("iteration: " + str(iteration))
        #for i in range(data.NI):
        #    print ("i: " + str(i))
        #    print ("sigma: " + str(kern.sigma[i]))
        #    print ("likelihoods_list: " + str(likelihoods_list[i].to_dict()))
        #print ("likelihoods_list_len: " + str(data.NI))
        #print ("self.M: " + str(self.M))

        return (bestxopt, neg_log_marginal_likelihood, gradients, iteration)