def get_non_linear_input_dependent_kernel(original_X, current_X): X_dim = original_X.shape[1] Y_dim = current_X.shape[1] - X_dim k1 = RBF(input_dim=X_dim, active_dims=list(range(X_dim))) k2 = RationalQuadratic(input_dim=X_dim + Y_dim, active_dims=list(range(0, X_dim + Y_dim))) return k1 + k2
def temporal_kernel(self): kernel = White(variance=self.model_config['noise_inner']) m_inds = list(range(self.m)) # Initialize a non-linear kernels over inputs if self.model_config['input_nonlinear']: scales = [self.model_config['scale'] ] * self.m if self.model_config[ 'scale_tie'] else self.model_config['scale'] if self.model_config['rq']: kernel += RationalQuadratic(active_dims=m_inds, variance=1.0, lengthscales=scales, alpha=1e-2) else: kernel += SquaredExponential(active_dims=m_inds, variance=1.0, lengthscales=scales) # Add a periodic kernel over inputs # Decay????? if self.model_config['per']: scales = [self.model_config['per_scale']] * self.m periods = [self.model_config['per_period']] * self.m base_kernel = SquaredExponential(active_dims=m_inds, variance=1.0, lengthscales=scales) kernel += Periodic(base_kernel, period=periods) # Add a linear kernel over inputs if self.model_config['input_linear']: variances = [self.model_config['input_linear_scale']] * self.m kernel += LinearKernel(active_dims=m_inds, variance=variances) return kernel
def boosting_kernel(self): kernels = [] if self.model_config['nonlinear']: if self.model_config['rq']: kernels.append( RationalQuadratic( variance=1.0, lengthscales=self.model_config['nonlinear_scale'], alpha=1e-2)) else: kernels.append( SquaredExponential( variance=1.0, lengthscales=self.model_config['nonlinear_scale'])) if self.model_config['linear']: kernels.append( LinearKernel(variance=self.model_config['linear_scale'])) kernel = kernels[0] for kerneli in kernels[1:]: kernel += kerneli return kernel
def _kernels_generator(self): def _determine_indicies(m, pi, markov): # Build in the Markov structure: juggle with the indices of the outputs. p_last = pi - 1 # Index of last output that is given as input. p_start = 0 if markov is None else max(p_last - (markov - 1), 0) p_num = p_last - p_start + 1 # Determine the indices corresponding to the outputs and inputs. m_inds = list(range(m)) p_inds = list(range(m + p_start, m + p_last + 1)) return m_inds, p_inds, p_num kernels = [] for pi in range(self.num_outputs): m_inds, p_inds, p_num = _determine_indicies( self.m, pi, self.model_config['markov']) # Construct inner-layers noise kernel kernel = White(variance=self.model_config['noise_inner']) # Initialize a non-linear kernels over inputs #if pi==0: scales = [self.model_config['scale'] ] * self.m if self.model_config[ 'scale_tie'] else self.model_config['scale'] if self.model_config['rq']: kernel += RationalQuadratic(active_dims=m_inds, variance=1.0, lengthscales=scales, alpha=1e-2) else: kernel += SquaredExponential(active_dims=m_inds, variance=1.0, lengthscales=scales) # Add a periodic kernel over inputs # Decay????? if self.model_config['per']: scales = [self.model_config['per_scale']] * self.m periods = [self.model_config['per_period']] * self.m base_kernel = SquaredExponential(active_dims=m_inds, variance=1.0, lengthscales=scales) kernel += Periodic(base_kernel, period=periods) # Add a linear kernel over inputs if self.model_config['input_linear']: variances = [self.model_config['input_linear_scale']] * self.m kernel += LinearKernel(active_dims=m_inds, variance=variances) # Add a linear kernel over outputs if self.model_config['linear'] and pi > 0: variances = [self.model_config['linear_scale']] * p_num kernel += LinearKernel(active_dims=p_inds, variance=variances) # Add a non-linear kernel over outputs if self.model_config['nonlinear'] and pi > 0: if self.model_config['nonlinear_dependent']: active_dims = m_inds.extend(p_inds) scales = [self.model_config['scale']] * self.m scales.extend([self.model_config['nonlinear_scale']] * p_num) else: active_dims = p_inds scales = [self.model_config['nonlinear_scale']] * p_num if self.model_config['rq']: kernel += RationalQuadratic(active_dims=active_dims, variance=1.0, lengthscales=scales, alpha=1e-2) else: kernel += SquaredExponential(active_dims=active_dims, variance=1.0, lengthscales=scales) kernels.append(kernel) return kernels
def run_gp_optim(target_column: str, split_perc: float, imputation: str, featureset: str): """ Run whole GPR optimization loop :param target_column: target variable for predictions :param split_perc: percentage of samples to use for train set :param imputation: imputation method for missing values :param featureset: featureset to use """ config = configparser.ConfigParser() config.read('Configs/dataset_specific_config.ini') # get optim parameters base_dir, seasonal_periods, split_perc, init_train_len, test_len, resample_weekly = \ TrainHelper.get_optimization_run_parameters(config=config, target_column=target_column, split_perc=split_perc) # load datasets datasets = TrainHelper.load_datasets(config=config, target_column=target_column) # prepare parameter grid kernels = [] base_kernels = [ SquaredExponential(), Matern52(), White(), RationalQuadratic(), Polynomial() ] for kern in base_kernels: if isinstance(kern, IsotropicStationary): base_kernels.append(Periodic(kern, period=seasonal_periods)) TrainHelper.extend_kernel_combinations(kernels=kernels, base_kernels=base_kernels) param_grid = { 'dataset': datasets, 'imputation': [imputation], 'featureset': [featureset], 'dim_reduction': ['None', 'pca'], 'kernel': kernels, 'mean_function': [None, gpflow.mean_functions.Constant()], 'noise_variance': [0.01, 1, 10, 100], 'optimizer': [gpflow.optimizers.Scipy()], 'standardize_x': [False, True], 'standardize_y': [False, True], 'osa': [True] } # random sample from parameter grid params_lst = TrainHelper.random_sample_parameter_grid( param_grid=param_grid, sample_share=0.2) doc_results = None best_rmse = 5000000.0 best_mape = 5000000.0 best_smape = 5000000.0 dataset_last_name = 'Dummy' imputation_last = 'Dummy' dim_reduction_last = 'Dummy' featureset_last = 'Dummy' for i in tqdm(range(len(params_lst))): warnings.simplefilter('ignore') dataset = params_lst[i]['dataset'] imputation = params_lst[i]['imputation'] featureset = params_lst[i]['featureset'] dim_reduction = None if params_lst[i][ 'dim_reduction'] == 'None' else params_lst[i]['dim_reduction'] # deepcopy to prevent impact of previous optimizations kernel = gpflow.utilities.deepcopy(params_lst[i]['kernel']) mean_fct = gpflow.utilities.deepcopy(params_lst[i]['mean_function']) noise_var = params_lst[i]['noise_variance'] optimizer = gpflow.utilities.deepcopy(params_lst[i]['optimizer']) stand_x = params_lst[i]['standardize_x'] stand_y = params_lst[i]['standardize_y'] one_step_ahead = params_lst[i]['osa'] # dim_reduction only done without NaNs if imputation is None and dim_reduction is not None: continue # dim_reduction does not make sense for few features if featureset == 'none' and dim_reduction is not None: continue if not ((dataset.name == dataset_last_name) and (imputation == imputation_last) and (dim_reduction == dim_reduction_last) and (featureset == featureset_last)): if resample_weekly and 'weekly' not in dataset.name: dataset.name = dataset.name + '_weekly' print(dataset.name + ' ' + str('None' if imputation is None else imputation) + ' ' + str('None' if dim_reduction is None else dim_reduction) + ' ' + featureset + ' ' + target_column) train_test_list = TrainHelper.get_ready_train_test_lst( dataset=dataset, config=config, init_train_len=init_train_len, test_len=test_len, split_perc=split_perc, imputation=imputation, target_column=target_column, dimensionality_reduction=dim_reduction, featureset=featureset) if dataset.name != dataset_last_name: best_rmse = 5000000.0 best_mape = 5000000.0 best_smape = 5000000.0 dataset_last_name = dataset.name imputation_last = imputation dim_reduction_last = dim_reduction featureset_last = featureset kernel_string, mean_fct_string, optimizer_string = get_docresults_strings( kernel=kernel, mean_function=mean_fct, optimizer=optimizer) sum_dict = None try: for train, test in train_test_list: model = ModelsGPR.GaussianProcessRegressionGPFlow( target_column=target_column, seasonal_periods=seasonal_periods, kernel=kernel, mean_function=mean_fct, noise_variance=noise_var, optimizer=optimizer, standardize_x=stand_x, standardize_y=stand_y, one_step_ahead=one_step_ahead) cross_val_dict = model.train(train=train, cross_val_call=False) eval_dict = model.evaluate(train=train, test=test) eval_dict.update(cross_val_dict) if sum_dict is None: sum_dict = eval_dict else: for k, v in eval_dict.items(): sum_dict[k] += v evaluation_dict = { k: v / len(train_test_list) for k, v in sum_dict.items() } params_dict = { 'dataset': dataset.name, 'featureset': featureset, 'imputation': str('None' if imputation is None else imputation), 'dim_reduction': str('None' if dim_reduction is None else dim_reduction), 'init_train_len': init_train_len, 'test_len': test_len, 'split_perc': split_perc, 'kernel': kernel_string, 'mean_function': mean_fct_string, 'noise_variance': noise_var, 'optimizer': optimizer_string, 'standardize_x': stand_x, 'standardize_y': stand_y, 'one_step_ahead': one_step_ahead, 'optim_mod_params': model.model.parameters } save_dict = params_dict.copy() save_dict.update(evaluation_dict) if doc_results is None: doc_results = pd.DataFrame(columns=save_dict.keys()) doc_results = doc_results.append(save_dict, ignore_index=True) best_rmse, best_mape, best_smape = TrainHelper.print_best_vals( evaluation_dict=evaluation_dict, best_rmse=best_rmse, best_mape=best_mape, best_smape=best_smape, run_number=i) except KeyboardInterrupt: print('Got interrupted') break except Exception as exc: # print(exc) params_dict = { 'dataset': 'Failure', 'featureset': featureset, 'imputation': str('None' if imputation is None else imputation), 'dim_reduction': str('None' if dim_reduction is None else dim_reduction), 'init_train_len': init_train_len, 'test_len': test_len, 'split_perc': split_perc, 'kernel': kernel_string, 'mean_function': mean_fct_string, 'noise_variance': noise_var, 'optimizer': optimizer_string, 'standardize_x': stand_x, 'standardize_y': stand_y, 'one_step_ahead': one_step_ahead, 'optim_mod_params': 'failed' } save_dict = params_dict.copy() save_dict.update(TrainHelper.get_failure_eval_dict()) if doc_results is None: doc_results = pd.DataFrame(columns=save_dict.keys()) doc_results = doc_results.append(save_dict, ignore_index=True) TrainHelper.save_csv_results(doc_results=doc_results, save_dir=base_dir + 'OptimResults/', company_model_desc='gpr', target_column=target_column, seasonal_periods=seasonal_periods, datasets=datasets, featuresets=param_grid['featureset'], imputations=param_grid['imputation'], split_perc=split_perc) print('Optimization Done. Saved Results.')