def setUp(self): u_dim = 2 y_dim = 3 ts_length = 20 sequences_no = 3 #U, Y = generate_data( sequences_no, ts_length, u_dim = u_dim, y_dim = y_dim) U_2, Y_2 = generate_data(sequences_no * 2, ts_length, u_dim=u_dim, y_dim=y_dim) Q = 3 # 200 # Inducing points num. Take small number ofr speed back_cstr = True inference_method = 'svi' minibatch_inference = True # # 1 layer: # wins = [0, win_out] # 0-th is output layer # nDims = [out_train.shape[1],1] # 2 layers: win_out = 3 win_in = 2 wins = [0, win_out, win_out] nDims = [y_dim, 2, 3] # MLP_dims = [3, 2] # !!! 300, 200 For speed. #print("Input window: ", win_in) #print("Output window: ", win_out) data_streamer = RandomPermutationDataStreamer(Y_2, U_2) minibatch_index, minibatch_indices, Y_mb, X_mb = data_streamer.next_minibatch( ) m_1 = autoreg.DeepAutoreg_new( wins, Y_mb, U=X_mb, U_win=win_in, num_inducing=Q, back_cstr=back_cstr, MLP_dims=MLP_dims, nDims=nDims, init='Y', # how to initialize hidden states means X_variance=0.05, # how to initialize hidden states variances inference_method=inference_method, # Inference method minibatch_inference=minibatch_inference, mb_inf_tot_data_size=sequences_no * 2, mb_inf_init_xs_means='all', mb_inf_init_xs_vars='all', mb_inf_sample_idxes=minibatch_indices, # 1 layer: # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True), # GPy.kern.RBF(win_in + win_out,ARD=True,inv_l=True)] ) # 2 layers: kernels=[ GPy.kern.RBF(win_out * nDims[1], ARD=True, inv_l=True), GPy.kern.RBF(win_out * nDims[1] + win_out * nDims[2], ARD=True, inv_l=True), GPy.kern.RBF(win_out * nDims[2] + win_in * u_dim, ARD=True, inv_l=True) ]) self.model_1 = m_1 self.model_1._trigger_params_changed() self.mll_1_1 = float(self.model_1._log_marginal_likelihood) self.g_mll_1_1 = np.hstack( self.model_1[pp.replace(' ', '_')].gradient.flatten() for pp in self.model_1.parameter_names() if ('init_Xs' not in pp) and ('X_var' not in pp)).copy() #self.g_mll_1_1 = self.model_1._log_likelihood_gradients().copy() self.model_2 = copy.deepcopy(m_1) self.model_1.set_DataStreamer(data_streamer) self.model_1._trigger_params_changed() self.model_1._next_minibatch() self.model_1._trigger_params_changed() self.mll_1_2 = float(self.model_1._log_marginal_likelihood) self.g_mll_1_2 = np.hstack( self.model_1[pp.replace(' ', '_')].gradient.flatten() for pp in self.model_1.parameter_names() if ('init_Xs' not in pp) and ('X_var' not in pp)).copy() #self.g_mll_1_2 = self.model_1._log_likelihood_gradients().copy() data_streamer_1 = StdMemoryDataStreamer(Y_2, U_2, sequences_no) self.model_1.set_DataStreamer(data_streamer_1) self.model_1._next_minibatch() self.model_1._trigger_params_changed() self.mll_2_1 = float(self.model_1._log_marginal_likelihood) # exclude 'init_Xs' and 'X_var' from gradients self.g_mll_2_1 = np.hstack( self.model_1[pp.replace(' ', '_')].gradient.flatten() for pp in self.model_1.parameter_names() if ('init_Xs' not in pp) and ('X_var' not in pp)).copy() #import pdb; pdb.set_trace() self.model_1._next_minibatch() self.model_1._trigger_params_changed() self.mll_2_2 = float(self.model_1._log_marginal_likelihood) # exclude 'init_Xs' and 'X_var' from gradients self.g_mll_2_2 = np.hstack( self.model_1[pp.replace(' ', '_')].gradient.flatten() for pp in self.model_1.parameter_names() if ('init_Xs' not in pp) and ('X_var' not in pp)).copy()
def setUp(self): print("ho-ho") u_dim = 2 y_dim = 3 U, Y = generate_data(3, 20, u_dim=2, y_dim=3) Q = 3 # 200 # Inducing points num. Take small number ofr speed back_cstr = True inference_method = 'svi' minibatch_inference = True # # 1 layer: # wins = [0, win_out] # 0-th is output layer # nDims = [out_train.shape[1],1] # 2 layers: win_out = 3 win_in = 2 wins = [0, win_out, win_out] nDims = [y_dim, 2, 3] # MLP_dims = [3, 2] # !!! 300, 200 For speed. #print("Input window: ", win_in) #print("Output window: ", win_out) m = autoreg.DeepAutoreg_new( wins, Y, U=U, U_win=win_in, num_inducing=Q, back_cstr=back_cstr, MLP_dims=MLP_dims, nDims=nDims, init='Y', # how to initialize hidden states means X_variance=0.05, # how to initialize hidden states variances inference_method=inference_method, # Inference method minibatch_inference=minibatch_inference, mb_inf_tot_data_size=len(Y), mb_inf_init_xs_means='one', mb_inf_init_xs_vars='one', mb_inf_sample_idxes=range(len(Y)), # 1 layer: # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True), # GPy.kern.RBF(win_in + win_out,ARD=True,inv_l=True)] ) # 2 layers: kernels=[ GPy.kern.RBF(win_out * nDims[1], ARD=True, inv_l=True), GPy.kern.RBF(win_out * nDims[1] + win_out * nDims[2], ARD=True, inv_l=True), GPy.kern.RBF(win_out * nDims[2] + win_in * u_dim, ARD=True, inv_l=True) ]) self.model_1 = m self.model_1._trigger_params_changed() self.model_2 = copy.deepcopy(m) data_streamer = TrivialDataStreamer(Y, U) self.model_2.set_DataStreamer(data_streamer) self.model_2._trigger_params_changed() print("ho-ho")
def svi_test_1(debug=False, train_model=False, model=1, second_model_svi=False, input_scaling_factor=1): """ After new svi classes are implemented the first test is chaking that non-svi inference is not broken. Basically, two similar models are created using corresponding classes and the results are tested. This case is tested when second_model_svi= False. We can also test """ experiment_path = '/Users/grigoral/work/code/RGP/examples' #data = load_data() data = load_data_xyz() # In[7]: y = data['Y'] u = data['Yxyz_list'] u_flat = np.vstack(u) lbls = data['lbls'] data_out_train = y # Ask: why first 3 dimensions are removed? # 44 and 56 -output variable is 0. data_out_train = y[:, 3:] data_out_mean = data_out_train.mean(axis=0) data_out_std = data_out_train.std(axis=0) data_out_train = (y[:, 3:] - data_out_mean) / data_out_std #data_out_train_list = [data_out_train[np.where(lbls[:,i]==1)[0]][1:] for i in range(lbls.shape[1])] data_out_train_list = [ data_out_train[np.where(lbls[:, i] == 1)[0]] for i in range(lbls.shape[1]) ] # Create controls #data_in_train_list = [y[np.where(lbls[:,i]==1)[0]][:,2][1:] - y[np.where(lbls[:,i]==1)[0]][:,2][:-1] for i in range(lbls.shape[1])] #from scipy.ndimage.filters import gaussian_filter1d #data_in_train_list = [np.ones(d.shape+(1,))*d.mean() for d in data_in_train_list] ##data_in_train_list = [gaussian_filter1d(d,8.)[:,None] for d in data_in_train_list] ##data_in_train_list = [np.vstack([d[:10],d]) for d in data_in_train_list] data_in_train_list = u u_flat_mean = u_flat.mean(axis=0) u_flat_std = u_flat.std(axis=0) data_in_train = (u_flat - u_flat_mean) / u_flat_std #data_in_train_list = u data_in_train_list = [(d - u_flat_mean) / u_flat_std for d in data_in_train_list] # In[8]: # print data_in_train_list[0].shape # print data_out_train_list[0].shape # # for i in range(len(data_in_train_list)): # plt.figure() # plt.plot(data_in_train_list[i], 'x-') # plt.title(i) # print data_in_train_list[i].shape[0] # In[9]: print(y.shape) print(data_out_train.shape) print(u_flat.shape) print(data_in_train.shape) # In[10]: if debug: import pdb pdb.set_trace() ytest = data['Ytest'] lblstest = data['lblstest'] u = data['Yxyz_list_test'] #data_out_test = ytest data_out_test = ytest[:, 3:] data_out_test = (ytest[:, 3:] - data_out_mean) / data_out_std #data_out_test_list = [data_out_test[np.where(lblstest[:,i]==1)[0]][1:] for i in range(lblstest.shape[1])] data_out_test_list = [ data_out_test[np.where(lblstest[:, i] == 1)[0]] for i in range(lblstest.shape[1]) ] # Create controls #data_in_test_list = [ytest[np.where(lblstest[:,i]==1)[0]][:,2][1:] - ytest[np.where(lblstest[:,i]==1)[0]][:,2][:-1] for i in range(lblstest.shape[1])] #data_in_test_list = [np.ones(d.shape+(1,))*d.mean() for d in data_in_test_list] #data_in_test_list = u data_in_test_list = u #data_in_test = (u_flat-u_flat_mean)/u_flat_std data_in_test_list = [(d - u_flat_mean) / u_flat_std for d in u] # ## Fit a model without NN-constraint # In[11]: # Down-scaling the input signals #data_in_train_list = [d*0.1 for d in data_in_train_list] #data_in_test_list = [d*0.1 for d in data_in_test_list] #data_in_train = data_in_train*0.1 # In[13]: if debug: import pdb pdb.set_trace() #============================= # Initialize a model #============================= Q = 100 # 200 win_in = 20 # 20 win_out = 20 # 20 use_controls = True back_cstr = False if input_scaling_factor is None: input_scaling_factor = 1 if model == 1: # create the model if use_controls: #m = autoreg.DeepAutoreg([0, win_out], data_out_train, U=data_in_train, U_win=win_in, X_variance=0.05, # num_inducing=Q, back_cstr=back_cstr, MLP_dims=[300,200], nDims=[data_out_train.shape[1],1], # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True, useGPU=True), # GPy.kern.RBF(win_out+win_in,ARD=True,inv_l=True, useGPU=True)]) # Model without lists # m = autoreg.DeepAutoreg([0, win_out, win_out], data_out_train, U=data_in_train, U_win=win_in, X_variance=0.05, # num_inducing=Q, back_cstr=back_cstr, MLP_dims=[300,200], nDims=[data_out_train.shape[1],1,1], # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True, useGPU=False), # GPy.kern.RBF(win_out+win_out,ARD=True,inv_l=True, useGPU=False), # GPy.kern.RBF(win_out+win_in,ARD=True,inv_l=True, useGPU=False)]) # Model with lists m = autoreg.DeepAutoreg( [0, win_out, win_out], data_out_train_list, U=[d * input_scaling_factor for d in data_in_train_list], U_win=win_in, X_variance=0.05, num_inducing=Q, back_cstr=back_cstr, MLP_dims=[300, 200], nDims=[data_out_train.shape[1], 1, 1], kernels=[ GPy.kern.RBF(win_out, ARD=True, inv_l=True, useGPU=False), GPy.kern.RBF(win_out + win_out, ARD=True, inv_l=True, useGPU=False), GPy.kern.RBF(win_out + win_in, ARD=True, inv_l=True, useGPU=False) ]) if not second_model_svi: m_svi = autoreg.DeepAutoreg_new( [0, win_out, win_out], data_out_train_list, U=[d * input_scaling_factor for d in data_in_train_list], U_win=win_in, X_variance=0.05, num_inducing=Q, back_cstr=back_cstr, MLP_dims=[300, 200], nDims=[data_out_train.shape[1], 1, 1], kernels=[ GPy.kern.RBF(win_out, ARD=True, inv_l=True, useGPU=False), GPy.kern.RBF(win_out + win_out, ARD=True, inv_l=True, useGPU=False), GPy.kern.RBF(win_out + win_in, ARD=True, inv_l=True, useGPU=False) ]) m_svi.param_array[:] = m.param_array m_svi._trigger_params_changed() else: m_svi = autoreg.DeepAutoreg_new( [0, win_out, win_out], data_out_train_list, U=[d * input_scaling_factor for d in data_in_train_list], U_win=win_in, X_variance=0.05, num_inducing=Q, back_cstr=back_cstr, MLP_dims=[300, 200], nDims=[data_out_train.shape[1], 1, 1], kernels=[ GPy.kern.RBF(win_out, ARD=True, inv_l=True, useGPU=False), GPy.kern.RBF(win_out + win_out, ARD=True, inv_l=True, useGPU=False), GPy.kern.RBF(win_out + win_in, ARD=True, inv_l=True, useGPU=False) ], inference_method='svi') # used with back_cstr=True in the end of the notebook # m = autoreg.DeepAutoreg([0, win_out], data_out_train_list, U=[d*0.1 for d in data_in_train_list], U_win=win_in, X_variance=0.05, # num_inducing=Q, back_cstr=back_cstr, MLP_dims=[500,200], nDims=[data_out_train.shape[1],1], # kernels=[GPy.kern.MLP(win_out,bias_variance=10.), # GPy.kern.MLP(win_out+win_in,bias_variance=10.)]) else: m = autoreg.DeepAutoreg([0, win_out], data_in_train, U=None, U_win=win_in, X_variance=0.05, num_inducing=Q, back_cstr=back_cstr, MLP_dims=[200, 100], nDims=[data_out_train.shape[1], 1], kernels=[ GPy.kern.RBF(win_out, ARD=True, inv_l=True, useGPU=False), GPy.kern.RBF(win_out, ARD=True, inv_l=True, useGPU=False) ]) if not second_model_svi: m_svi = autoreg.DeepAutoreg_new( [0, win_out, win_out], data_out_train_list, U=[d * input_scaling_factor for d in data_in_train_list], U_win=win_in, X_variance=0.05, num_inducing=Q, back_cstr=back_cstr, MLP_dims=[300, 200], nDims=[data_out_train.shape[1], 1, 1], kernels=[ GPy.kern.RBF(win_out, ARD=True, inv_l=True, useGPU=False), GPy.kern.RBF(win_out + win_out, ARD=True, inv_l=True, useGPU=False), GPy.kern.RBF(win_out + win_in, ARD=True, inv_l=True, useGPU=False) ]) m_svi.param_array[:] = m.param_array m_svi._trigger_params_changed() else: m_svi = autoreg.DeepAutoreg_new( [0, win_out, win_out], data_out_train_list, U=[d * input_scaling_factor for d in data_in_train_list], U_win=win_in, X_variance=0.05, num_inducing=Q, back_cstr=back_cstr, MLP_dims=[300, 200], nDims=[data_out_train.shape[1], 1, 1], kernels=[ GPy.kern.RBF(win_out, ARD=True, inv_l=True, useGPU=False), GPy.kern.RBF(win_out + win_out, ARD=True, inv_l=True, useGPU=False), GPy.kern.RBF(win_out + win_in, ARD=True, inv_l=True, useGPU=False) ], inference_method='svi') elif model == 2: # Ask: no b tern in NLP regularization. #============================= # Model with NN-constraint #============================= Q = 500 win_in = 20 win_out = 20 use_controls = True back_cstr = True m = autoreg.DeepAutoreg( [0, win_out], data_out_train_list, U=[d * input_scaling_factor for d in data_in_train_list], U_win=win_in, X_variance=0.05, num_inducing=Q, back_cstr=back_cstr, MLP_dims=[500, 200], nDims=[data_out_train.shape[1], 1], kernels=[ GPy.kern.MLP(win_out, bias_variance=10.), GPy.kern.MLP(win_out + win_in, bias_variance=10.) ]) # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True, useGPU=True), # GPy.kern.RBF(win_out+win_in,ARD=True,inv_l=True, useGPU=True)]) if not second_model_svi: m_svi = autoreg.DeepAutoreg_new( [0, win_out], data_out_train_list, U=[d * input_scaling_factor for d in data_in_train_list], U_win=win_in, X_variance=0.05, num_inducing=Q, back_cstr=back_cstr, MLP_dims=[500, 200], nDims=[data_out_train.shape[1], 1], kernels=[ GPy.kern.MLP(win_out, bias_variance=10.), GPy.kern.MLP(win_out + win_in, bias_variance=10.) ]) # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True, useGPU=True), # GPy.kern.RBF(win_out+win_in,ARD=True,inv_l=True, useGPU=True)]) m_svi.param_array[:] = m.param_array m_svi._trigger_params_changed() else: m_svi = autoreg.DeepAutoreg_new( [0, win_out], data_out_train_list, U=[d * input_scaling_factor for d in data_in_train_list], U_win=win_in, X_variance=0.05, num_inducing=Q, back_cstr=back_cstr, MLP_dims=[500, 200], nDims=[data_out_train.shape[1], 1], kernels=[ GPy.kern.MLP(win_out, bias_variance=10.), GPy.kern.MLP(win_out + win_in, bias_variance=10.) ], inference_method='svi') # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True, useGPU=True), # GPy.kern.RBF(win_out+win_in,ARD=True,inv_l=True, useGPU=True)]) print("Old model:") print(m) print("New model:") print(m_svi) if not second_model_svi: print( "Maximum ll difference: ", np.max( np.abs(m._log_marginal_likelihood - m_svi._log_marginal_likelihood))) print( "Maximum ll_grad difference: ", np.max( np.abs(m._log_likelihood_gradients() - m_svi._log_likelihood_gradients()))) globals().update(locals()) return # Alex
def rgp_experiment_raw(p_task_name, p_iteration, train_U, train_Y, p_init_runs, p_max_runs, p_num_layers, p_hidden_dims, p_inference_method, p_back_cstr, p_MLP_Dims, p_Q, p_win_in, p_win_out, p_init, p_x_init_var): """ Experiment file for NON MINIBATCH inference. So, DeepAutoreg is run here. Inputs: ------------------------------- p_task_name: string Experiment name, used only in file name p_iteration: int or string Iteration of the experiment, used only in file name p_init_runs: int: Number of initial runs when likelihood variances and covariance magnitudes are fixed p_max_runs: int Maximum runs of general optimization p_num_layers: int [1,2] Number of RGP layers p_hidden_dims: list[ length is the number of hidden layers] Dimensions of hidden layers p_inference_method: string If 'svi' then SVI inference is used. p_back_cstr: bool Use back constrains or not. p_MLP_Dims: list[length is the number of MLP hidden layers, ignoring input and output layers] Values are the number of neurons at each layer. p_Q: int Number of inducing points p_win_in, p_win_out: int Inpput window and hidden layer window. p_init: string 'Y', 'rand', 'zero' Initialization of RGP hidden layers p_x_init_var: float Initial variance for X, usually 0.05 for data close to normalized data. """ win_in = p_win_in # 20 win_out = p_win_out # 20 inference_method = p_inference_method if p_inference_method == 'svi' else None #import pdb; pdb.set_trace() if p_num_layers == 1: # 1 layer: wins = [0, win_out] # 0-th is output layer nDims = [train_Y.shape[1], p_hidden_dims[0]] kernels = [ GPy.kern.RBF(win_out, ARD=True, inv_l=True), GPy.kern.RBF(win_in + win_out, ARD=True, inv_l=True) ] elif p_num_layers == 2: # 2 layers: wins = [0, win_out, win_out] nDims = [train_Y.shape[1], p_hidden_dims[0], p_hidden_dims[1]] kernels = [ GPy.kern.RBF(win_out, ARD=True, inv_l=True), GPy.kern.RBF(win_out + win_out, ARD=True, inv_l=True), GPy.kern.RBF(win_out + win_in, ARD=True, inv_l=True) ] else: raise NotImplemented() print("Input window: ", win_in) print("Output window: ", win_out) m = autoreg.DeepAutoreg_new( wins, train_Y, U=train_U, U_win=win_in, num_inducing=p_Q, back_cstr=p_back_cstr, MLP_dims=p_MLP_Dims, nDims=nDims, init=p_init, # how to initialize hidden states means X_variance= p_x_init_var, #0.05, # how to initialize hidden states variances inference_method=inference_method, # Inference method kernels=kernels) # pattern for model name: #task_name, inf_meth=?, wins=layers, Q = ?, backcstr=?,MLP_dims=?, nDims= model_file_name = '%s_%s--inf_meth=%s--backcstr=%s--wins=%s_%s--Q=%i--nDims=%s--init=%s--x_init=%s' % ( p_task_name, str(p_iteration), 'reg' if inference_method is None else inference_method, str(p_back_cstr) if p_back_cstr == False else str(p_back_cstr) + '_' + str(p_MLP_Dims), str(win_in), str(wins), p_Q, str(nDims), p_init, str(p_x_init_var)) print('Model file name: ', model_file_name) print(m) #import pdb; pdb.set_trace() #Initialization # Here layer numbers are different than in initialization. 0-th layer is the top one for i in range(m.nLayers): m.layers[i].kern.inv_l[:] = np.mean( 1. / ((m.layers[i].X.mean.values.max(0) - m.layers[i].X.mean.values.min(0)) / np.sqrt(2.))) m.layers[i].likelihood.variance[:] = 0.01 * train_Y.var() m.layers[i].kern.variance.fix(warning=False) m.layers[i].likelihood.fix(warning=False) print(m) #init_runs = 50 if out_train.shape[0]<1000 else 100 print("Init runs: ", p_init_runs) m.optimize('bfgs', messages=1, max_iters=p_init_runs) for i in range(m.nLayers): m.layers[i].kern.variance.constrain_positive(warning=False) m.layers[i].likelihood.constrain_positive(warning=False) m.optimize('bfgs', messages=1, max_iters=p_max_runs) io.savemat(model_file_name, {'params': m.param_array[:]}) print(m) return -float(m._log_marginal_likelihood), m
def svi_test_5(): """ This class tests the initial mlp implemetration """ u_dim = 2 y_dim = 3 U, Y = generate_data(3, 20, u_dim=2, y_dim=3) Q = 3 # 200 # Inducing points num. Take small number ofr speed back_cstr = True inference_method = 'svi' minibatch_inference = True # # 1 layer: # wins = [0, win_out] # 0-th is output layer # nDims = [out_train.shape[1],1] # 2 layers: win_out = 3 win_in = 2 wins = [0, win_out, win_out] nDims = [y_dim, 2, 3] # MLP_dims = [3, 2] # !!! 300, 200 For speed. #print("Input window: ", win_in) #print("Output window: ", win_out) m = autoreg.DeepAutoreg_new( wins, Y, U=U, U_win=win_in, num_inducing=Q, back_cstr=back_cstr, MLP_dims=MLP_dims, nDims=nDims, init='Y', # how to initialize hidden states means X_variance=0.05, # how to initialize hidden states variances inference_method=inference_method, # Inference method minibatch_inference=minibatch_inference, mb_inf_init_xs_vals='mlp', # 1 layer: # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True), # GPy.kern.RBF(win_in + win_out,ARD=True,inv_l=True)] ) # 2 layers: kernels=[ GPy.kern.RBF(win_out * nDims[1], ARD=True, inv_l=True), GPy.kern.RBF(win_out * nDims[1] + win_out * nDims[2], ARD=True, inv_l=True), GPy.kern.RBF(win_out * nDims[2] + win_in * u_dim, ARD=True, inv_l=True) ]) model_1 = m model_1._trigger_params_changed() mll_1 = model_1._log_marginal_likelihood g_mll_1 = model_1._log_likelihood_gradientsss return data_streamer = RandomPermutationDataStreamer(Y, U) model_1.set_DataStreamer(data_streamer) model_1._trigger_params_changed() model_1._next_minibatch() model_1._trigger_params_changed() np.testing.assert_equal(model_1._log_marginal_likelihood, mll_1, err_msg="Likelihoods must be equal") np.testing.assert_array_equal(model_1._log_likelihood_gradients, g_mll_1, err_msg="Likelihood gradients must be equal") model_1._next_minibatch() model_1._trigger_params_changed() np.testing.assert_equal(model_1._log_marginal_likelihood, mll_1, err_msg="Likelihoods must be equal") np.testing.assert_array_equal(model_1._log_likelihood_gradients, g_mll_1, err_msg="Likelihood gradients must be equal")
def svi_test_4(): """ This class tests that the model with minibatch turned on and with a separate initial value for every samples and one latent space variance for every sample. Gradients are not compared but tested separately """ u_dim = 2 y_dim = 3 ts_length = 20 sequences_no = 3 U, Y = generate_data(sequences_no, ts_length, u_dim=u_dim, y_dim=y_dim) Q = 3 # 200 # Inducing points num. Take small number ofr speed back_cstr = True inference_method = 'svi' minibatch_inference = True # # 1 layer: # wins = [0, win_out] # 0-th is output layer # nDims = [out_train.shape[1],1] # 2 layers: win_out = 3 win_in = 2 wins = [0, win_out, win_out] nDims = [y_dim, 2, 3] # MLP_dims = [3, 2] # !!! 300, 200 For speed. #print("Input window: ", win_in) #print("Output window: ", win_out) data_streamer = TrivialDataStreamer(Y, U) minibatch_index, minibatch_indices, Y_mb, X_mb = data_streamer.next_minibatch( ) m = autoreg.DeepAutoreg_new( wins, Y_mb, U=X_mb, U_win=win_in, num_inducing=Q, back_cstr=back_cstr, MLP_dims=MLP_dims, nDims=nDims, init='Y', # how to initialize hidden states means X_variance=0.05, # how to initialize hidden states variances inference_method=inference_method, # Inference method minibatch_inference=minibatch_inference, mb_inf_tot_data_size=sequences_no, mb_inf_init_xs_means='all', mb_inf_init_xs_vars='all', mb_inf_sample_idxes=minibatch_indices, # 1 layer: # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True), # GPy.kern.RBF(win_in + win_out,ARD=True,inv_l=True)] ) # 2 layers: kernels=[ GPy.kern.RBF(win_out * nDims[1], ARD=True, inv_l=True), GPy.kern.RBF(win_out * nDims[1] + win_out * nDims[2], ARD=True, inv_l=True), GPy.kern.RBF(win_out * nDims[2] + win_in * u_dim, ARD=True, inv_l=True) ]) model_1 = m model_1._trigger_params_changed() mll_1 = model_1._log_marginal_likelihood g_mll_1 = model_1._log_likelihood_gradients #self.assertTrue(self.model_1.checkgrad()) #model_1.checkgrad(verbose=True) #return data_streamer = RandomPermutationDataStreamer(Y, U) #data_streamer = TrivialDataStreamer(Y, U) model_1.set_DataStreamer(data_streamer) model_1._trigger_params_changed() #model_1.checkgrad(verbose=True) model_1._next_minibatch() model_1._trigger_params_changed() model_1.checkgrad(verbose=True) np.testing.assert_equal(model_1._log_marginal_likelihood, mll_1, err_msg="Likelihoods must be equal") np.testing.assert_array_equal(model_1._log_likelihood_gradients, g_mll_1, err_msg="Likelihood gradients must be equal") model_1._next_minibatch() model_1._trigger_params_changed() np.testing.assert_equal(model_1._log_marginal_likelihood, mll_1, err_msg="Likelihoods must be equal") np.testing.assert_array_equal(model_1._log_likelihood_gradients, g_mll_1, err_msg="Likelihood gradients must be equal")
def svi_test_2(): """ The goal of this function is to compare the minibatch SVI with not minibatch SVI. """ trainned_models_folder_name = "/Users/grigoral/work/code/RGP/examples/identif_trainded" Q = 3 # 200 # Inducing points num win_in = task.win_in # 20 win_out = task.win_out # 20 use_controls = True back_cstr = True inference_method = 'svi' minibatch_inference = True # 1 layer: wins = [0, win_out] # 0-th is output layer nDims = [out_train.shape[1], 1] # 2 layers: # wins = [0, win_out, win_out] # nDims = [out_train.shape[1],1,1] MLP_dims = [3, 2] # !!! 300, 200 print("Input window: ", win_in) print("Output window: ", win_out) m = autoreg.DeepAutoreg_new( wins, out_train, U=in_train, U_win=win_in, num_inducing=Q, back_cstr=back_cstr, MLP_dims=MLP_dims, nDims=nDims, init='Y', # how to initialize hidden states means X_variance=0.05, # how to initialize hidden states variances inference_method=inference_method, # Inference method minibatch_inference=minibatch_inference, # 1 layer: kernels=[ GPy.kern.RBF(win_out, ARD=True, inv_l=True), GPy.kern.RBF(win_in + win_out, ARD=True, inv_l=True) ]) # 2 layers: #kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True), # GPy.kern.RBF(win_out+win_out,ARD=True,inv_l=True), # GPy.kern.RBF(win_out+win_in,ARD=True,inv_l=True)]) data_streamer = TrivialDataStreamer(out_train, in_train) m.set_DataStreamer(data_streamer) m._trigger_params_changed() print(m) m._next_minibatch() m._trigger_params_changed() #m = autoreg.DeepAutoreg([0,win_out],out_train, U=in_train, U_win=win_in,X_variance=0.01, # num_inducing=50) # pattern for model name: #task_name, inf_meth=?, wins=layers, Q = ?, backcstr=?,MLP_dims=?, nDims= model_file_name = '%s--inf_meth=%s--wins=%s--Q=%i--backcstr=%i--nDims=%s' % ( task.name, 'reg' if inference_method is None else inference_method, str(wins), Q, back_cstr, str(nDims)) if back_cstr == True: model_file_name += '--MLP_dims=%s' % (MLP_dims, ) print('Model file name: ', model_file_name) print(m) m.checkgrad(verbose=True) return # ### Model initialization: # In[36]: # Here layer numbers are different than in initialization. 0-th layer is the top one for i in range(m.nLayers): m.layers[i].kern.inv_l[:] = np.mean( 1. / ((m.layers[i].X.mean.values.max(0) - m.layers[i].X.mean.values.min(0)) / np.sqrt(2.))) m.layers[i].likelihood.variance[:] = 0.01 * out_train.var() m.layers[i].kern.variance.fix(warning=False) m.layers[i].likelihood.fix(warning=False) print(m) # In[37]: print(m.layer_1.kern.inv_l) print(m.layer_0.kern.inv_l) print( np.mean(1. / ( (m.layer_1.X.mean.values.max(0) - m.layer_1.X.mean.values.min(0)) / np.sqrt(2.)))) # In[38]: # Plot initialization of hidden layer: def plot_hidden_states(fig_no, layer, layer_start_point=None, layer_end_point=None, data_start_point=None, data_end_point=None): if layer_start_point is None: layer_start_point = 0 if layer_end_point is None: layer_end_point = len(layer.mean) if data_start_point is None: data_start_point = 0 if data_end_point is None: layer_end_point = len(out_train) data = out_train[data_start_point:data_end_point] layer_means = layer.mean[layer_start_point:layer_end_point] layer_vars = layer.variance[layer_start_point:layer_end_point] fig4 = plt.figure(fig_no, figsize=(10, 8)) ax1 = plt.subplot(1, 1, 1) fig4.suptitle('Hidden layer plotting') ax1.plot(out_train[data_start_point:data_end_point], label="Orig data Train_out", color='b') ax1.plot(layer_means, label='pred mean', color='r') ax1.plot(layer_means + 2 * np.sqrt(layer_vars), label='pred var', color='r', linestyle='--') ax1.plot(layer_means - 2 * np.sqrt(layer_vars), label='pred var', color='r', linestyle='--') ax1.legend(loc=4) ax1.set_title('Hidden layer vs Training data') del ax1 plot_hidden_states(5, m.layer_1.qX_0) #plot_hidden_states(6,m.layer_2.qX_0) # ### Model training: # In[39]: #init_runs = 50 if out_train.shape[0]<1000 else 100 init_runs = 100 print("Init runs: ", init_runs) m.optimize('bfgs', messages=1, max_iters=init_runs) for i in range(m.nLayers): m.layers[i].kern.variance.constrain_positive(warning=False) m.layers[i].likelihood.constrain_positive(warning=False) m.optimize('bfgs', messages=1, max_iters=10000) print(m) # ### Look at trained parameters # In[40]: if hasattr(m, 'layer_1'): print("Layer 1: ") print("States means (min and max), shapes: ", m.layer_1.qX_0.mean.min(), m.layer_1.qX_0.mean.max(), m.layer_1.qX_0.mean.shape) print("States variances (min and max), shapes: ", m.layer_1.qX_0.variance.min(), m.layer_1.qX_0.variance.max(), m.layer_1.qX_0.mean.shape) print("Inverse langthscales (min and max), shapes: ", m.layer_1.rbf.inv_lengthscale.min(), m.layer_1.rbf.inv_lengthscale.max(), m.layer_1.rbf.inv_lengthscale.shape) if hasattr(m, 'layer_0'): print("") print("Layer 0 (output): ") print("Inverse langthscales (min and max), shapes: ", m.layer_0.rbf.inv_lengthscale.min(), m.layer_0.rbf.inv_lengthscale.max(), m.layer_0.rbf.inv_lengthscale.shape) # In[41]: print(m.layer_0.rbf.inv_lengthscale) # In[42]: print(m.layer_1.rbf.inv_lengthscale) # ### Analyze and plot model on test data: # In[43]: # Free-run on the train data # initialize to last part of trained latent states #init_Xs = [None, m.layer_1.qX_0[0:win_out]] # init_Xs for train prediction # initialize to zeros init_Xs = None predictions_train = m.freerun(init_Xs=init_Xs, U=in_train, m_match=True) # initialize to last part of trainig latent states #init_Xs = [None, m.layer_1.qX_0[-win_out:] ] # init_Xs for test prediction #U_test = np.vstack( (in_train[-win_in:], in_test) ) # initialize to zeros init_Xs = None U_test = in_test # Free-run on the test data predictions_test = m.freerun(init_Xs=init_Xs, U=U_test, m_match=True) del init_Xs, U_test # In[44]: # Plot predictions def plot_predictions(fig_no, posterior_train, posterior_test=None, layer_no=None): """ Plots the output data along with posterior of the layer. Used for plotting the hidden states or layer_no: int or Normal posterior plot states of this layer (0-th is output). There is also some logic about compting the MSE, and aligning with actual data. """ if layer_no is None: #default layer_no = 1 if posterior_test is None: no_test_data = True else: no_test_data = False if isinstance(posterior_train, list): layer_in_list = len( predictions_train ) - 1 - layer_no # standard layer no (like in printing the model) predictions_train_layer = predictions_train[layer_in_list] else: predictions_train_layer = posterior_train if not no_test_data: if isinstance(posterior_test, list): predictions_test_layer = predictions_test[layer_in_list] else: predictions_test_layer = posterior_test # Aligning the data -> # training of test data can be longer than leyer data because of the initial window. if out_train.shape[0] > predictions_train_layer.mean.shape[0]: out_train_tmp = out_train[win_out:] else: out_train_tmp = out_train if out_test.shape[0] > predictions_test_layer.mean.shape[0]: out_test_tmp = out_test[win_out:] else: out_test_tmp = out_test # Aligning the data <- if layer_no == 0: # Not anymore! Compute RMSE ignoring first output values of length "win_out" train_rmse = [ comp_RMSE(predictions_train_layer.mean, out_train_tmp) ] print("Train overall RMSE: ", str(train_rmse)) if not no_test_data: # Compute RMSE ignoring first output values of length "win_out" test_rmse = [ comp_RMSE(predictions_test_layer.mean, out_test_tmp) ] print("Test overall RMSE: ", str(test_rmse)) # Plot predictions: if not no_test_data: fig5 = plt.figure(10, figsize=(20, 8)) else: fig5 = plt.figure(10, figsize=(10, 8)) fig5.suptitle('Predictions on Training and Test data') if not no_test_data: ax1 = plt.subplot(1, 2, 1) else: ax1 = plt.subplot(1, 1, 1) ax1.plot(out_train_tmp, label="Train_out", color='b') ax1.plot(predictions_train_layer.mean, label='pred mean', color='r') ax1.plot(predictions_train_layer.mean + 2 * np.sqrt(predictions_train_layer.variance), label='pred var', color='r', linestyle='--') ax1.plot(predictions_train_layer.mean - 2 * np.sqrt(predictions_train_layer.variance), label='pred var', color='r', linestyle='--') ax1.legend(loc=4) ax1.set_title('Predictions on Train') if not no_test_data: ax2 = plt.subplot(1, 2, 2) ax2.plot(out_test_tmp, label="Test_out", color='b') ax2.plot(predictions_test_layer.mean, label='pred mean', color='r') #ax2.plot( predictions_test_layer.mean +\ # 2*np.sqrt( predictions_test_layer.variance ), label = 'pred var', color='r', linestyle='--' ) #ax2.plot( predictions_test_layer.mean -\ # 2*np.sqrt( predictions_test_layer.variance ), label = 'pred var', color='r', linestyle='--' ) ax2.legend(loc=4) ax2.set_title('Predictions on Test') del ax2 del ax1 plot_predictions(7, predictions_train, predictions_test, layer_no=0)