def __init__(self, M=20, vem_iters=3): self.likelihoods_list = [Bernoulli(), Bernoulli()] # Real + Binary self.likelihood = HetLikelihood(self.likelihoods_list) self.Y_metadata = self.likelihood.generate_metadata() self.D = self.likelihood.num_output_functions(self.Y_metadata) self.M = M self.vem_iters = vem_iters pass
class HetmogpWrapeper(BaseEstimator): def __init__(self, M=20, vem_iters=3): self.likelihoods_list = [Bernoulli(), Bernoulli()] # Real + Binary self.likelihood = HetLikelihood(self.likelihoods_list) self.Y_metadata = self.likelihood.generate_metadata() self.D = self.likelihood.num_output_functions(self.Y_metadata) self.M = M self.vem_iters = vem_iters pass def fit(self, X_l, y_l, X_h, y_h): input_dim = X_h.shape[1] X = [X_l, X_h] Y = [y_l.reshape(-1, 1), y_h.reshape(-1, 1)] Q = 2 # number of latent functions ls_q = np.array(([.05] * Q)) var_q = np.array(([.5] * Q)) kern_list = util.latent_functions_prior(Q, lenghtscale=ls_q, variance=var_q, input_dim=input_dim) #Z = np.linspace(0, 1, self.M) #Z = Z[:, np.newaxis] Z = np.random.randn(self.M, input_dim) self.model = SVMOGP(X=X, Y=Y, Z=Z, kern_list=kern_list, likelihood=self.likelihood, Y_metadata=self.Y_metadata) self.model = VEM(self.model, stochastic=False, vem_iters=self.vem_iters, optZ=True, verbose=False, verbose_plot=False, non_chained=False) def predict_proba(self, X): htmogp_1_m, htmogp_1_v = self.model._raw_predict_f( X, output_function_ind=1) pos_preds = expit(htmogp_1_m) return np.hstack((1 - pos_preds, pos_preds)) def predict(self, X): proba = self.predict_proba(X)[:, 1] return (proba > 0.5).astype(int)
for conti in range(likelihoods_list.__len__()): Ntotal_with_test.append(Xtrain[conti].shape[0]) Ntotal_without_test.append(int(Ntotal_with_test[conti] * for_train)) Ntotal_for_test.append( Ntotal_with_test[conti] - Ntotal_without_test[conti]) index_train.append( np.random.permutation(np.arange(0, Ntotal_with_test[conti])) ) rescale = 1 Xtrain_new = [rescale * Xtrain[index_train[conti][0:Ntotal_without_test[conti]], :].copy() for conti,Xtrain in enumerate(Xtrain)] * how_many_outs Ytrain_new = [Ytrain[index_train[conti][0:Ntotal_without_test[conti]]].copy() for conti,Ytrain in enumerate(Ytrain)] * how_many_outs Xtest = [rescale * Xtrain[index_train[conti][Ntotal_without_test[conti]:], :].copy() for conti,Xtrain in enumerate(Xtrain)] * how_many_outs Ytest = [Ytrain[index_train[conti][Ntotal_without_test[conti]:]].copy() for conti,Ytrain in enumerate(Ytrain)] * how_many_outs Xtrain = Xtrain_new Ytrain = Ytrain_new likelihood = HetLikelihood(likelihoods_list) Y_metadata = likelihood.generate_metadata() # np.random.seed(101) myindex = [] ind_split_aux = [] # kmeans for selecting Z from scipy.cluster.vq import kmeans np.random.seed(myseed) # Z = 1.0 * kmeans(Xtrain, num_inducing)[0] minis = Xtrain[0].min(0) maxis = Xtrain[0].max(0) Dim = Xtrain[0].shape[1]
def load_toy5(N=1000, input_dim=1): if input_dim == 2: Nsqrt = int(N**(1.0 / input_dim)) print('input_dim:', input_dim) #Q = 5 # number of latent functions # Heterogeneous Likelihood Definition # likelihoods_list = [Gaussian(sigma=1.0), Bernoulli()] # Real + Binary likelihoods_list = [HetGaussian(), Beta(), Gamma()] # Real + Binary # likelihoods_list = [Gaussian(sigma=1.0)] likelihood = HetLikelihood(likelihoods_list) Y_metadata = likelihood.generate_metadata() D = likelihoods_list.__len__() Q = 3 """""" """""" """""" """""" """""" Dim = input_dim if input_dim == 2: xy = np.linspace(0.0, 1.0, Nsqrt) xx = np.linspace(0.0, 1.0, Nsqrt) XX, XY = np.meshgrid(xx, xy) XX = XX.reshape(Nsqrt**2, 1) XY = XY.reshape(Nsqrt**2, 1) Xtoy = np.hstack((XX, XY)) else: minis = 0 * np.ones(Dim) maxis = 1 * np.ones(Dim) Xtoy = np.linspace(minis[0], maxis[0], N).reshape(1, -1) for i in range(Dim - 1): Xaux = np.linspace(minis[i + 1], maxis[i + 1], N) Xtoy = np.concatenate( (Xtoy, Xaux[np.random.permutation(N)].reshape(1, -1)), axis=0) # Z = np.concatenate((Z, Zaux.reshape(1, -1)), axis=0) Xtoy = 1.0 * Xtoy.T def latent_functions_prior(Q, lenghtscale=None, variance=None, input_dim=None): if lenghtscale is None: lenghtscale = np.array( [0.5, 0.05, 0.1]) #This is the one used for previous experiments #lenghtscale = np.array([1.0, 0.1, 0.2]) else: lenghtscale = lenghtscale if variance is None: #variance = 1.0*np.random.rand(Q) variance = 1 * np.ones(Q) else: variance = variance kern_list = [] for q in range(Q): #print("length:",lenghtscale[q]) #print("var:", variance[q]) kern_q = GPy.kern.RBF(input_dim=input_dim, lengthscale=lenghtscale[q], variance=variance[q], name='rbf') kern_q.name = 'kern_q' + str(q) kern_list.append(kern_q) return kern_list kern_list = latent_functions_prior(Q, lenghtscale=None, variance=None, input_dim=Dim) # True U and F functions def experiment_true_u_functions(kern_list, X): Q = kern_list.__len__() # for d,X in enumerate(X_list): u_latent = np.zeros((X.shape[0], Q)) np.random.seed(104) for q in range(Q): u_latent[:, q] = np.random.multivariate_normal( np.zeros(X.shape[0]), kern_list[q].K(X)) return u_latent def experiment_true_f_functions(true_u, X_list, J): #true_f = [] Q = true_u.shape[1] W = W_lincombination(Q, J) #print(W) #for j in range(J): f_j = np.zeros((X_list.shape[0], J)) for q in range(Q): f_j += (W[q] * true_u[:, q]).T #true_f.append(f_d) return f_j # True Combinations def W_lincombination(Q, J): W_list = [] # q=1 for q in range(Q): # W_list.append(np.array(([[-0.5], [0.1]]))) if q == 0: #W_list.append(0.3*np.random.randn(J, 1)) W_list.append( np.array([-0.1, -0.1, 1.1, 2.1, -0.5, -0.6])[:, None]) elif q == 1: #W_list.append(2.0 * np.random.randn(J, 1)) W_list.append( np.array([1.4, -0.5, 0.3, 0.7, -0.3, 0.4])[:, None]) else: #W_list.append(10.0 * np.random.randn(J, 1)+0.1) W_list.append( np.array([0.1, -0.8, 1.3, 1.5, -0.02, 0.01])[:, None]) return W_list """""" """""" """""" """""" """""" "" # True functions values for inputs X f_index = Y_metadata['function_index'].flatten() J = f_index.__len__() trueU = experiment_true_u_functions(kern_list, Xtoy) trueF = experiment_true_f_functions(trueU, Xtoy, J) # if input_dim==2: # #from mpl_toolkits.mplot3d import Axes3D # noqa: F401 unused import # # from matplotlib import cm # from matplotlib.ticker import LinearLocator, FormatStrFormatter # fig = plt.figure(15) # ax = fig.gca(projection='3d') # # # Make data. # # X = np.arange(-5, 5, 0.25) # # Y = np.arange(-5, 5, 0.25) # # X, Y = np.meshgrid(X, Y) # # R = np.sqrt(X ** 2 + Y ** 2) # # Z = np.sin(R) # # # Plot the surface. # surf = ax.plot_surface(Xtoy[:,0].reshape(Nsqrt,Nsqrt), Xtoy[:,1].reshape(Nsqrt,Nsqrt), trueF[:,4].reshape(Nsqrt,Nsqrt), cmap=cm.coolwarm,linewidth=0, antialiased=False) # # # Customize the z axis. # #ax.set_zlim(-1.01, 1.01) # ax.zaxis.set_major_locator(LinearLocator(10)) # ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) # # # Add a color bar which maps values to colors. # fig.colorbar(surf, shrink=0.5, aspect=5) # # plt.show() # # else: # plt.figure(15) # plt.plot(trueF[:,4]) # plt.figure(16) # plt.plot(trueU) d_index = Y_metadata['d_index'].flatten() F_true = [] # for i,f_latent in enumerate(trueF): # if for t in range(D): _, num_f_task, _ = likelihoods_list[t].get_metadata() f = np.empty((Xtoy.shape[0], num_f_task)) for j in range(J): if f_index[j] == t: f[:, d_index[j], None] = trueF[:, j][:, None] F_true.append(f) # Generating training data Y (sampling from heterogeneous likelihood) Ytrain = likelihood.samples(F=F_true, Y_metadata=Y_metadata) #Yreg = (Ytrain[0]-Ytrain[0].min())/(Ytrain[0].max()-Ytrain[0].min()) Yreg = (Ytrain[0] - Ytrain[0].mean(0)) / (Ytrain[0].std(0)) Ytrain = [Yreg, np.clip(Ytrain[1], 1.0e-9, 0.99999), Ytrain[2]] Xtrain = [] for d in range(likelihoods_list.__len__()): Xtrain.append(Xtoy) return Xtrain, Ytrain
def load_toy4(N=1000, input_dim=1): if input_dim == 2: Nsqrt = int(N**(1.0 / input_dim)) print('input_dim:', input_dim) likelihoods_list = [HetGaussian(), Beta()] # Real + Binary likelihood = HetLikelihood(likelihoods_list) Y_metadata = likelihood.generate_metadata() D = likelihoods_list.__len__() Q = 3 """""" """""" """""" """""" """""" Dim = input_dim if input_dim == 2: xy = np.linspace(0.0, 1.0, Nsqrt) xx = np.linspace(0.0, 1.0, Nsqrt) XX, XY = np.meshgrid(xx, xy) XX = XX.reshape(Nsqrt**2, 1) XY = XY.reshape(Nsqrt**2, 1) Xtoy = np.hstack((XX, XY)) else: minis = 0 * np.ones(Dim) maxis = 1 * np.ones(Dim) Xtoy = np.linspace(minis[0], maxis[0], N).reshape(1, -1) for i in range(Dim - 1): Xaux = np.linspace(minis[i + 1], maxis[i + 1], N) Xtoy = np.concatenate( (Xtoy, Xaux[np.random.permutation(N)].reshape(1, -1)), axis=0) Xtoy = 1.0 * Xtoy.T def latent_functions_prior(Q, lenghtscale=None, variance=None, input_dim=None): if lenghtscale is None: lenghtscale = np.array( [0.5, 0.05, 0.1]) #This is the one used for previous experiments else: lenghtscale = lenghtscale if variance is None: variance = 1 * np.ones(Q) else: variance = variance kern_list = [] for q in range(Q): kern_q = GPy.kern.RBF(input_dim=input_dim, lengthscale=lenghtscale[q], variance=variance[q], name='rbf') kern_q.name = 'kern_q' + str(q) kern_list.append(kern_q) return kern_list kern_list = latent_functions_prior(Q, lenghtscale=None, variance=None, input_dim=Dim) # True U and F functions def experiment_true_u_functions(kern_list, X): Q = kern_list.__len__() u_latent = np.zeros((X.shape[0], Q)) np.random.seed(104) for q in range(Q): u_latent[:, q] = np.random.multivariate_normal( np.zeros(X.shape[0]), kern_list[q].K(X)) return u_latent def experiment_true_f_functions(true_u, X_list, J): #true_f = [] Q = true_u.shape[1] W = W_lincombination(Q, J) #print(W) #for j in range(J): f_j = np.zeros((X_list.shape[0], J)) for q in range(Q): f_j += (W[q] * true_u[:, q]).T #true_f.append(f_d) return f_j # True Combinations def W_lincombination(Q, J): W_list = [] # q=1 for q in range(Q): if q == 0: W_list.append(np.array([-0.1, -0.1, 1.1, 2.1])[:, None]) elif q == 1: W_list.append(np.array([1.4, -0.5, 0.3, 0.7])[:, None]) else: W_list.append(np.array([0.1, -0.8, 1.3, 1.5])[:, None]) return W_list """""" """""" """""" """""" """""" "" # True functions values for inputs X f_index = Y_metadata['function_index'].flatten() J = f_index.__len__() trueU = experiment_true_u_functions(kern_list, Xtoy) trueF = experiment_true_f_functions(trueU, Xtoy, J) d_index = Y_metadata['d_index'].flatten() F_true = [] for t in range(D): _, num_f_task, _ = likelihoods_list[t].get_metadata() f = np.empty((Xtoy.shape[0], num_f_task)) for j in range(J): if f_index[j] == t: f[:, d_index[j], None] = trueF[:, j][:, None] F_true.append(f) # Generating training data Y (sampling from heterogeneous likelihood) Ytrain = likelihood.samples(F=F_true, Y_metadata=Y_metadata) Yreg = (Ytrain[0] - Ytrain[0].mean(0)) / (Ytrain[0].std(0)) Ytrain = [Yreg, np.clip(Ytrain[1], 1.0e-9, 0.99999)] Xtrain = [] for d in range(likelihoods_list.__len__()): Xtrain.append(Xtoy) return Xtrain, Ytrain