def test_kernel_decompose_1d(): '''Checks that a kernel decomposes into a sum properly''' sk = fk.repr_string_to_kernel('ScoredKernel(k_opt=ProductKernel([ MaskKernel(ndim=1, active_dimension=0, base_kernel=RQKernel(lengthscale=4.853529, output_variance=-0.648382, alpha=0.457387)), SumKernel([ ProductKernel([ MaskKernel(ndim=1, active_dimension=0, base_kernel=SqExpPeriodicKernel(lengthscale=1.395371, period=-3.990523, output_variance=0.565365)), MaskKernel(ndim=1, active_dimension=0, base_kernel=LinKernel(offset=0.000420, lengthscale=-0.120045)) ]), ProductKernel([ MaskKernel(ndim=1, active_dimension=0, base_kernel=LinKernel(offset=0.802417, lengthscale=3.350816)), SumKernel([ MaskKernel(ndim=1, active_dimension=0, base_kernel=SqExpPeriodicKernel(lengthscale=-3.899540, period=0.087011, output_variance=2.430187)), MaskKernel(ndim=1, active_dimension=0, base_kernel=RQKernel(lengthscale=3.865315, output_variance=4.028649, alpha=-5.060996)), ProductKernel([ MaskKernel(ndim=1, active_dimension=0, base_kernel=SqExpPeriodicKernel(lengthscale=3.251723, period=1.540000, output_variance=-2.497487)), MaskKernel(ndim=1, active_dimension=0, base_kernel=LinKernel(offset=-1.424416, lengthscale=-1.732677)) ]) ]) ]) ]) ]), nll=558.339977, laplace_nle=-266.580399, bic_nle=1216.076221, noise=[ 1.66059002])') k = fk.strip_masks(sk.k_opt) correct_answer = ['RQ \\times Per \\times Lin', 'RQ \\times Lin \\times Per', 'RQ \\times Lin \\times RQ', 'RQ \\times Lin \\times Per \\times Lin'] kd = fk.break_kernel_into_summands(k) assert( [k.latex_print() for k in kd] == correct_answer )
def test_kernel_decompose_1d(): '''Checks that a kernel decomposes into a sum properly''' sk = fk.repr_string_to_kernel( 'ScoredKernel(k_opt=ProductKernel([ MaskKernel(ndim=1, active_dimension=0, base_kernel=RQKernel(lengthscale=4.853529, output_variance=-0.648382, alpha=0.457387)), SumKernel([ ProductKernel([ MaskKernel(ndim=1, active_dimension=0, base_kernel=SqExpPeriodicKernel(lengthscale=1.395371, period=-3.990523, output_variance=0.565365)), MaskKernel(ndim=1, active_dimension=0, base_kernel=LinKernel(offset=0.000420, lengthscale=-0.120045)) ]), ProductKernel([ MaskKernel(ndim=1, active_dimension=0, base_kernel=LinKernel(offset=0.802417, lengthscale=3.350816)), SumKernel([ MaskKernel(ndim=1, active_dimension=0, base_kernel=SqExpPeriodicKernel(lengthscale=-3.899540, period=0.087011, output_variance=2.430187)), MaskKernel(ndim=1, active_dimension=0, base_kernel=RQKernel(lengthscale=3.865315, output_variance=4.028649, alpha=-5.060996)), ProductKernel([ MaskKernel(ndim=1, active_dimension=0, base_kernel=SqExpPeriodicKernel(lengthscale=3.251723, period=1.540000, output_variance=-2.497487)), MaskKernel(ndim=1, active_dimension=0, base_kernel=LinKernel(offset=-1.424416, lengthscale=-1.732677)) ]) ]) ]) ]) ]), nll=558.339977, laplace_nle=-266.580399, bic_nle=1216.076221, noise=[ 1.66059002])' ) k = fk.strip_masks(sk.k_opt) correct_answer = [ 'RQ \\times Per \\times Lin', 'RQ \\times Lin \\times Per', 'RQ \\times Lin \\times RQ', 'RQ \\times Lin \\times Per \\times Lin' ] kd = fk.break_kernel_into_summands(k) assert ([k.latex_print() for k in kd] == correct_answer)
def make_all_1d_figures(folder, save_folder='../figures/decomposition/', max_level=None, prefix='', rescale=True, data_folder=None): """Crawls the results directory, and makes decomposition plots for each file. prefix is an optional string prepended to the output directory """ #### Quick fix to axis scaling #### TODO - Ultimately this and the shunt below should be removed / made elegant if rescale: data_sets = list(exp.gen_all_datasets("../data/1d_data_rescaled/")) else: if data_folder is None: data_sets = list(exp.gen_all_datasets("../data/1d_data/")) else: data_sets = list(exp.gen_all_datasets(data_folder)) for r, file in data_sets: results_file = os.path.join(folder, file + "_result.txt") # Is the experiment complete if os.path.isfile(results_file): # Find best kernel and produce plots datafile = os.path.join(r,file + ".mat") X, y, D = gpml.load_mat(datafile) if rescale: # Load unscaled data to remove scaling later unscaled_file = os.path.join('../data/1d_data/', re.sub('-s$', '', file) + '.mat') data = gpml.load_mat(unscaled_file) (X_unscaled, y_unscaled) = (data[0], data[1]) (X_mean, X_scale) = (X_unscaled.mean(), X_unscaled.std()) (y_mean, y_scale) = (y_unscaled.mean(), y_unscaled.std()) else: (X_mean, X_scale, y_mean, y_scale) = (0,1,0,1) # A shunt to deal with a legacy issue. if datafile == '../data/1d_data/01-airline-months.mat': # Scaling should turn months starting at zero into years starting at 1949 print "Special rescaling for airline months data" X_mean = X_mean + 1949 X_scale = 1.0/12.0 best_kernel = exp.parse_results(os.path.join(folder, file + "_result.txt"), max_level=max_level) stripped_kernel = fk.strip_masks(best_kernel.k_opt) if not max_level is None: fig_folder = os.path.join(save_folder, (prefix + file + '_max_level_%d' % max_level)) else: fig_folder = os.path.join(save_folder, (prefix + file)) if not os.path.exists(fig_folder): os.makedirs(fig_folder) gpml.plot_decomposition(stripped_kernel, X, y, os.path.join(fig_folder, file), best_kernel.noise, X_mean, X_scale, y_mean, y_scale) else: print "Cannnot find file %s" % results_file
def make_all_1d_figures(folder, save_folder='../figures/decomposition/', max_level=None, prefix='', rescale=True, data_folder=None): """Crawls the results directory, and makes decomposition plots for each file. prefix is an optional string prepended to the output directory """ #### Quick fix to axis scaling #### TODO - Ultimately this and the shunt below should be removed / made elegant if rescale: data_sets = list(exp.gen_all_datasets("../data/1d_data_rescaled/")) else: if data_folder is None: data_sets = list(exp.gen_all_datasets("../data/1d_data/")) else: data_sets = list(exp.gen_all_datasets(data_folder)) for r, file in data_sets: results_file = os.path.join(folder, file + "_result.txt") # Is the experiment complete if os.path.isfile(results_file): # Find best kernel and produce plots datafile = os.path.join(r, file + ".mat") X, y, D = gpml.load_mat(datafile) if rescale: # Load unscaled data to remove scaling later unscaled_file = os.path.join('../data/1d_data/', re.sub('-s$', '', file) + '.mat') data = gpml.load_mat(unscaled_file) (X_unscaled, y_unscaled) = (data[0], data[1]) (X_mean, X_scale) = (X_unscaled.mean(), X_unscaled.std()) (y_mean, y_scale) = (y_unscaled.mean(), y_unscaled.std()) else: (X_mean, X_scale, y_mean, y_scale) = (0, 1, 0, 1) # A shunt to deal with a legacy issue. if datafile == '../data/1d_data/01-airline-months.mat': # Scaling should turn months starting at zero into years starting at 1949 print "Special rescaling for airline months data" X_mean = X_mean + 1949 X_scale = 1.0 / 12.0 best_kernel = exp.parse_results(os.path.join( folder, file + "_result.txt"), max_level=max_level) stripped_kernel = fk.strip_masks(best_kernel.k_opt) if not max_level is None: fig_folder = os.path.join( save_folder, (prefix + file + '_max_level_%d' % max_level)) else: fig_folder = os.path.join(save_folder, (prefix + file)) if not os.path.exists(fig_folder): os.makedirs(fig_folder) gpml.plot_decomposition(stripped_kernel, X, y, os.path.join(fig_folder, file), best_kernel.noise, X_mean, X_scale, y_mean, y_scale) else: print "Cannnot find file %s" % results_file