def perform_experiment(data_file, output_file, exp): if exp.make_predictions: X, y, D, Xtest, ytest = gpml.load_mat(data_file) prediction_file = os.path.join( exp.results_dir, os.path.splitext(os.path.split(data_file)[-1])[0] + "_predictions.mat") else: X, y, D = gpml.load_mat(data_file) perform_kernel_search(X, y, D, data_file, output_file, exp) best_model = parse_results(output_file) if exp.make_predictions: print '\nMaking predictions\n' predictions = jc.make_predictions(X, y, Xtest, ytest, best_model, local_computation=True, max_jobs=exp.max_jobs, verbose=exp.verbose, random_seed=exp.random_seed) scipy.io.savemat(prediction_file, predictions, appendmat=False) os.system('reset') # Stop terminal from going invisible.
def perform_experiment(data_file, output_file, exp): if exp.make_predictions: X, y, D, Xtest, ytest = gpml.load_mat(data_file, y_dim=1) prediction_file = os.path.join( exp.results_dir, os.path.splitext(os.path.split(data_file)[-1])[0] + "_predictions.mat") else: X, y, D = gpml.load_mat(data_file, y_dim=1) perform_kernel_search(X, y, D, data_file, output_file, exp) best_scored_kernel = parse_results(output_file) if exp.make_predictions: predictions = jc.make_predictions( X, y, Xtest, ytest, best_scored_kernel, local_computation=exp.local_computation, max_jobs=exp.max_jobs, verbose=exp.verbose, zero_mean=exp.zero_mean, random_seed=exp.random_seed) scipy.io.savemat(prediction_file, predictions, appendmat=False)
def make_all_1d_figures(folder, save_folder='../figures/decomposition/', max_level=None, prefix='', rescale=True, data_folder=None): """Crawls the results directory, and makes decomposition plots for each file. prefix is an optional string prepended to the output directory """ #### Quick fix to axis scaling #### TODO - Ultimately this and the shunt below should be removed / made elegant if rescale: data_sets = list(exp.gen_all_datasets("../data/1d_data_rescaled/")) else: if data_folder is None: data_sets = list(exp.gen_all_datasets("../data/1d_data/")) else: data_sets = list(exp.gen_all_datasets(data_folder)) for r, file in data_sets: results_file = os.path.join(folder, file + "_result.txt") # Is the experiment complete if os.path.isfile(results_file): # Find best kernel and produce plots datafile = os.path.join(r,file + ".mat") X, y, D = gpml.load_mat(datafile) if rescale: # Load unscaled data to remove scaling later unscaled_file = os.path.join('../data/1d_data/', re.sub('-s$', '', file) + '.mat') data = gpml.load_mat(unscaled_file) (X_unscaled, y_unscaled) = (data[0], data[1]) (X_mean, X_scale) = (X_unscaled.mean(), X_unscaled.std()) (y_mean, y_scale) = (y_unscaled.mean(), y_unscaled.std()) else: (X_mean, X_scale, y_mean, y_scale) = (0,1,0,1) # A shunt to deal with a legacy issue. if datafile == '../data/1d_data/01-airline-months.mat': # Scaling should turn months starting at zero into years starting at 1949 print "Special rescaling for airline months data" X_mean = X_mean + 1949 X_scale = 1.0/12.0 best_kernel = exp.parse_results(os.path.join(folder, file + "_result.txt"), max_level=max_level) stripped_kernel = fk.strip_masks(best_kernel.k_opt) if not max_level is None: fig_folder = os.path.join(save_folder, (prefix + file + '_max_level_%d' % max_level)) else: fig_folder = os.path.join(save_folder, (prefix + file)) if not os.path.exists(fig_folder): os.makedirs(fig_folder) gpml.plot_decomposition(stripped_kernel, X, y, os.path.join(fig_folder, file), best_kernel.noise, X_mean, X_scale, y_mean, y_scale) else: print "Cannnot find file %s" % results_file
def repeat_predictions(filename): """ A convenience function to re run the predictions from an experiment """ expstring = open(filename, 'r').read() exp = eval(expstring) print experiment_fields_to_str(exp) if not exp.make_predictions: print 'This experiment does not make predictions' return None data_sets = list(gen_all_datasets(exp.data_dir)) for r, file in data_sets: # Check if this experiment has already been done. output_file = os.path.join(exp.results_dir, file + "_result.txt") if os.path.isfile(output_file): print 'Predictions for %s' % file data_file = os.path.join(r, file + ".mat") X, y, D, Xtest, ytest = gpml.load_mat(data_file) prediction_file = os.path.join(exp.results_dir, os.path.splitext(os.path.split(data_file)[-1])[0] + "_predictions.mat") best_model = parse_results(output_file) predictions = jc.make_predictions(X, y, Xtest, ytest, best_model, local_computation=True, max_jobs=exp.max_jobs, verbose=exp.verbose, random_seed=exp.random_seed) scipy.io.savemat(prediction_file, predictions, appendmat=False) print "Finished file %s" % file else: print 'Results not found for %s' % file
def debug_laplace(): # Load data set X, y, D, Xtest, ytest = gpml.load_mat( '../data/kfold_data/r_concrete_500_fold_10_of_10.mat', y_dim=1) # Load the suspicious kernel sk = fk.repr_string_to_kernel( 'ScoredKernel(k_opt=ProductKernel([ MaskKernel(ndim=8, active_dimension=0, base_kernel=CubicKernel(offset=1.757755, output_variance=7.084045)), MaskKernel(ndim=8, active_dimension=7, base_kernel=SqExpPeriodicKernel(lengthscale=-2.701080, period=-0.380918, output_variance=-0.071214)) ]), nll=6348.096611, laplace_nle=-184450132.068237, bic_nle=12720.630212, noise=[-1.77276072])' ) # Create some code to evaluate it if X.ndim == 1: X = X[:, nax] if y.ndim == 1: y = y[:, nax] ndata = y.shape[0] # Create data file data_file = cblparallel.create_temp_file('.mat') scipy.io.savemat(data_file, {'X': X, 'y': y}) # Save regression data # Move to fear cblparallel.copy_to_remote(data_file) scripts = [ gpml.OPTIMIZE_KERNEL_CODE % { 'datafile': data_file.split('/')[-1], 'writefile': '%(output_file)s', # N.B. cblparallel manages output files 'gpml_path': cblparallel.gpml_path(local_computation=False), 'kernel_family': sk.k_opt.gpml_kernel_expression(), 'kernel_params': '[ %s ]' % ' '.join(str(p) for p in sk.k_opt.param_vector()), 'noise': str(sk.noise), 'iters': str(300) } ] #### Need to be careful with % signs #### For the moment, cblparallel expects no single % signs - FIXME scripts[0] = re.sub('% ', '%% ', scripts[0]) # Test scripts[0] = re.sub('delta = 1e-6', 'delta = 1e-6', scripts[0]) #scripts[0] = re.sub('hyp.lik = [-1.77276072]', 'hyp.lik = [-0.77276072]', scripts[0]) output_file = cblparallel.run_batch_on_fear(scripts, language='matlab', max_jobs=600)[0] # Read in results output = gpml.read_outputs(output_file) result = ScoredKernel.from_matlab_output(output, sk.k_opt.family(), ndata) print result print output.hessian os.remove(output_file) # Remove temporary data file (perhaps on the cluster server) cblparallel.remove_temp_file(data_file, local_computation=False)
def perform_experiment(data_file, output_file, exp): if exp.make_predictions: X, y, D, Xtest, ytest = gpml.load_mat(data_file, y_dim=1) prediction_file = os.path.join(exp.results_dir, os.path.splitext(os.path.split(data_file)[-1])[0] + "_predictions.mat") else: X, y, D = gpml.load_mat(data_file, y_dim=1) perform_kernel_search(X, y, D, data_file, output_file, exp) best_scored_kernel = parse_results(output_file) if exp.make_predictions: predictions = jc.make_predictions(X, y, Xtest, ytest, best_scored_kernel, local_computation=exp.local_computation, max_jobs=exp.max_jobs, verbose=exp.verbose, zero_mean=exp.zero_mean, random_seed=exp.random_seed) scipy.io.savemat(prediction_file, predictions, appendmat=False) os.system('reset') # Stop terminal from going invisible.
def calculate_model_fits(data_file, output_file, exp): prediction_file = os.path.join(exp.results_dir, os.path.splitext(os.path.split(data_file)[-1])[0] + "_predictions.mat") X, y, D, = gpml.load_mat(data_file, y_dim=1) Xtest = X ytest = y best_scored_kernel = parse_results(output_file) predictions = jc.make_predictions(X, y, Xtest, ytest, best_scored_kernel, local_computation=exp.local_computation, max_jobs=exp.max_jobs, verbose=exp.verbose, zero_mean=exp.zero_mean, random_seed=exp.random_seed) scipy.io.savemat(prediction_file, predictions, appendmat=False) os.system('reset') # Stop terminal from going invisible.
def debug_laplace(): # Load data set X, y, D, Xtest, ytest = gpml.load_mat('../data/kfold_data/r_concrete_500_fold_10_of_10.mat', y_dim=1) # Load the suspicious kernel sk = fk.repr_string_to_kernel('ScoredKernel(k_opt=ProductKernel([ MaskKernel(ndim=8, active_dimension=0, base_kernel=CubicKernel(offset=1.757755, output_variance=7.084045)), MaskKernel(ndim=8, active_dimension=7, base_kernel=SqExpPeriodicKernel(lengthscale=-2.701080, period=-0.380918, output_variance=-0.071214)) ]), nll=6348.096611, laplace_nle=-184450132.068237, bic_nle=12720.630212, noise=[-1.77276072])') # Create some code to evaluate it if X.ndim == 1: X = X[:, nax] if y.ndim == 1: y = y[:, nax] ndata = y.shape[0] # Create data file data_file = cblparallel.create_temp_file('.mat') scipy.io.savemat(data_file, {'X': X, 'y': y}) # Save regression data # Move to fear cblparallel.copy_to_remote(data_file) scripts = [gpml.OPTIMIZE_KERNEL_CODE % {'datafile': data_file.split('/')[-1], 'writefile': '%(output_file)s', # N.B. cblparallel manages output files 'gpml_path': cblparallel.gpml_path(local_computation=False), 'kernel_family': sk.k_opt.gpml_kernel_expression(), 'kernel_params': '[ %s ]' % ' '.join(str(p) for p in sk.k_opt.param_vector()), 'noise': str(sk.noise), 'iters': str(300)}] #### Need to be careful with % signs #### For the moment, cblparallel expects no single % signs - FIXME scripts[0] = re.sub('% ', '%% ', scripts[0]) # Test scripts[0] = re.sub('delta = 1e-6', 'delta = 1e-6', scripts[0]) #scripts[0] = re.sub('hyp.lik = [-1.77276072]', 'hyp.lik = [-0.77276072]', scripts[0]) output_file = cblparallel.run_batch_on_fear(scripts, language='matlab', max_jobs=600)[0] # Read in results output = gpml.read_outputs(output_file) result = ScoredKernel.from_matlab_output(output, sk.k_opt.family(), ndata) print result print output.hessian os.remove(output_file) # Remove temporary data file (perhaps on the cluster server) cblparallel.remove_temp_file(data_file, local_computation=False)
def repeat_predictions(filename): """ A convenience function to re run the predictions from an experiment """ expstring = open(filename, 'r').read() exp = eval(expstring) print experiment_fields_to_str(exp) if not exp.make_predictions: print 'This experiment does not make predictions' return None data_sets = list(gen_all_datasets(exp.data_dir)) for r, file in data_sets: # Check if this experiment has already been done. output_file = os.path.join(exp.results_dir, file + "_result.txt") if os.path.isfile(output_file): print 'Predictions for %s' % file data_file = os.path.join(r, file + ".mat") X, y, D, Xtest, ytest = gpml.load_mat(data_file) prediction_file = os.path.join( exp.results_dir, os.path.splitext(os.path.split(data_file)[-1])[0] + "_predictions.mat") best_model = parse_results(output_file) predictions = jc.make_predictions(X, y, Xtest, ytest, best_model, local_computation=True, max_jobs=exp.max_jobs, verbose=exp.verbose, random_seed=exp.random_seed) scipy.io.savemat(prediction_file, predictions, appendmat=False) print "Finished file %s" % file else: print 'Results not found for %s' % file
def perform_experiment(data_file, output_file, exp): if exp.make_predictions: X, y, D, Xtest, ytest = gpml.load_mat(data_file) prediction_file = os.path.join(exp.results_dir, os.path.splitext(os.path.split(data_file)[-1])[0] + "_predictions.mat") else: X, y, D = gpml.my_load_mat(data_file) import time start_time = time.time() perform_kernel_search(X, y, D, data_file, output_file, exp) elapse_time = time.time() - start_time print ('Elapsed time: {}'.format(elapse_time)) best_model = parse_results(output_file) if exp.make_predictions: print '\nMaking predictions\n' predictions = jc.make_predictions(X, y, Xtest, ytest, best_model, local_computation=True, max_jobs=exp.max_jobs, verbose=exp.verbose, random_seed=exp.random_seed) scipy.io.savemat(prediction_file, predictions, appendmat=False) os.system('reset') # Stop terminal from going invisible.
def make_all_1d_figures(folders, save_folder='../figures/decomposition/', prefix='', rescale=False, data_folder=None, skip_kernel_evaluation=False, unit='year', all_depths=False): """Crawls the results directory, and makes decomposition plots for each file. prefix is an optional string prepended to the output directory """ if not isinstance(folders, list): folders = [folders] # Backward compatibility with specifying one folder #### Quick fix to axis scaling #### TODO - Ultimately this and the shunt below should be removed / made elegant if rescale: data_sets = list(exp.gen_all_datasets("../data/1d_data_rescaled/")) else: if data_folder is None: data_sets = list(exp.gen_all_datasets("../data/1d_data/")) else: data_sets = list(exp.gen_all_datasets(data_folder)) for r, file in data_sets: results_files = [] for folder in folders: results_file = os.path.join(folder, file + "_result.txt") if os.path.isfile(results_file): results_files.append(results_file) # Is the experiment complete if len(results_files) > 0: # Find best kernel and produce plots datafile = os.path.join(r,file + ".mat") data = gpml.load_mat(datafile) X = data[0] y = data[1] D = data[2] assert D == 1 if rescale: # Load unscaled data to remove scaling later unscaled_file = os.path.join('../data/1d_data/', re.sub('-s$', '', file) + '.mat') data = gpml.load_mat(unscaled_file) (X_unscaled, y_unscaled) = (data[0], data[1]) (X_mean, X_scale) = (X_unscaled.mean(), X_unscaled.std()) (y_mean, y_scale) = (y_unscaled.mean(), y_unscaled.std()) else: (X_mean, X_scale, y_mean, y_scale) = (0,1,0,1) if all_depths: # A quick version for now TODO - write correct code models = [exp.parse_results(results_files, max_level=depth) for depth in range(10)] suffices = ['-depth-%d' % (depth+1) for depth in range(len(models))] else: models = [exp.parse_results(results_files)] suffices = [''] for (model, suffix) in zip(models, suffices): model = model.simplified().canonical() kernel_components = model.kernel.break_into_summands() kernel_components = ff.SumKernel(kernel_components).simplified().canonical().operands print model.pretty_print() fig_folder = os.path.join(save_folder, (prefix + file + suffix)) if not os.path.exists(fig_folder): os.makedirs(fig_folder) # First ask GPML to order the components print 'Determining order of components' (component_order, mae_data) = gpml.order_by_mae(model, kernel_components, X, y, D, os.path.join(fig_folder, file + suffix), skip_kernel_evaluation=skip_kernel_evaluation) print 'Plotting decomposition and computing basic stats' component_data = gpml.component_stats(model, kernel_components, X, y, D, os.path.join(fig_folder, file + suffix), component_order, skip_kernel_evaluation=skip_kernel_evaluation) print 'Computing model checking stats' checking_stats = gpml.checking_stats(model, kernel_components, X, y, D, os.path.join(fig_folder, file + suffix), component_order, make_plots=True, skip_kernel_evaluation=skip_kernel_evaluation) # Now the kernels have been evaluated we can translate the revelant ones evaluation_data = mae_data evaluation_data.update(component_data) evaluation_data.update(checking_stats) evaluation_data['vars'] = evaluation_data['vars'].ravel() evaluation_data['cum_vars'] = evaluation_data['cum_vars'].ravel() evaluation_data['cum_resid_vars'] = evaluation_data['cum_resid_vars'].ravel() evaluation_data['MAEs'] = evaluation_data['MAEs'].ravel() evaluation_data['MAE_reductions'] = evaluation_data['MAE_reductions'].ravel() evaluation_data['monotonic'] = evaluation_data['monotonic'].ravel() evaluation_data['acf_min_p'] = evaluation_data['acf_min_p'].ravel() evaluation_data['acf_min_loc_p'] = evaluation_data['acf_min_loc_p'].ravel() evaluation_data['pxx_max_p'] = evaluation_data['pxx_max_p'].ravel() evaluation_data['pxx_max_loc_p'] = evaluation_data['pxx_max_loc_p'].ravel() evaluation_data['qq_d_max_p'] = evaluation_data['qq_d_max_p'].ravel() evaluation_data['qq_d_min_p'] = evaluation_data['qq_d_min_p'].ravel() i = 1 short_descriptions = [] while os.path.isfile(os.path.join(fig_folder, '%s_%d.fig' % (file + suffix, i))): # Describe this component (summary, sentences, extrap_sentences) = translation.translate_additive_component(kernel_components[component_order[i-1]], X, evaluation_data['monotonic'][i-1], evaluation_data['gradients'][i-1], unit) short_descriptions.append(summary) paragraph = '.\n'.join(sentences) + '.' extrap_paragraph = '.\n'.join(extrap_sentences) + '.' with open(os.path.join(fig_folder, '%s_%d_description.tex' % (file + suffix, i)), 'w') as description_file: description_file.write(paragraph) with open(os.path.join(fig_folder, '%s_%d_extrap_description.tex' % (file + suffix, i)), 'w') as description_file: description_file.write(extrap_paragraph) with open(os.path.join(fig_folder, '%s_%d_short_description.tex' % (file + suffix, i)), 'w') as description_file: description_file.write(summary + '.') i += 1 # Produce the summary LaTeX document print 'Producing LaTeX document' latex_summary = translation.produce_summary_document(file + suffix, i-1, evaluation_data, short_descriptions) with open(os.path.join(save_folder, '%s.tex' % (file + suffix)), 'w') as latex_file: latex_file.write(latex_summary) print 'Saving to ' + (os.path.join(save_folder, '%s.tex' % (file + suffix))) else: print "Cannnot find results for %s" % file
def make_all_1d_figures(folders, save_folder='../figures/decomposition/', prefix='', rescale=False, data_folder=None, skip_kernel_evaluation=False, unit='year', all_depths=False): """Crawls the results directory, and makes decomposition plots for each file. prefix is an optional string prepended to the output directory """ if not isinstance(folders, list): folders = [folders ] # Backward compatibility with specifying one folder #### Quick fix to axis scaling #### TODO - Ultimately this and the shunt below should be removed / made elegant if rescale: data_sets = list(exp.gen_all_datasets("../data/1d_data_rescaled/")) else: if data_folder is None: data_sets = list(exp.gen_all_datasets("../data/1d_data/")) else: data_sets = list(exp.gen_all_datasets(data_folder)) for r, file in data_sets: results_files = [] for folder in folders: results_file = os.path.join(folder, file + "_result.txt") if os.path.isfile(results_file): results_files.append(results_file) # Is the experiment complete if len(results_files) > 0: # Find best kernel and produce plots datafile = os.path.join(r, file + ".mat") data = gpml.my_load_mat(datafile) X = data[0] y = data[1] M = y.shape[1] D = data[2] iiii = 1 y = y[:, iiii] assert D == 1 if rescale: # Load unscaled data to remove scaling later unscaled_file = os.path.join('../data/1d_data/', re.sub('-s$', '', file) + '.mat') data = gpml.load_mat(unscaled_file) (X_unscaled, y_unscaled) = (data[0], data[1]) (X_mean, X_scale) = (X_unscaled.mean(), X_unscaled.std()) (y_mean, y_scale) = (y_unscaled.mean(), y_unscaled.std()) else: (X_mean, X_scale, y_mean, y_scale) = (0, 1, 0, 1) if all_depths: # A quick version for now TODO - write correct code models = [ exp.parse_results(results_files, max_level=depth)[0] for depth in range(10) ] suffices = [ '-depth-%d' % (depth + 1) for depth in range(len(models)) ] else: models = [exp.parse_results(results_files)[0]] try: suffices = ['-' + str(data[5][iiii]).replace(" ", "")] #suffices = ['-'+str(data[5][iiii][0][0]).replace(" ","")] uncomment this if you test given dataset(house, stock and so on) except: suffices = ['-' + str(iiii)] #suffices = ['-'+str(iiii)] best_depth = exp.parse_results(results_files)[1] params_filename = '/home/heechan/gpss-research-srkl' + results_files[ 0][2:] + 'lvl_' + str(best_depth) + '_0.mat1.mat' scale_params = scipy.io.loadmat(params_filename)['scale'] scl1 = scale_params[0][iiii][0][0] scl2 = scale_params[0][iiii][1][0] for (model, suffix) in zip(models, suffices): model = model.simplified().canonical() model.kernel = model.kernel * ff.ConstKernel( sf=scl2) + ff.ConstKernel(sf=scl1) kernel_components = model.kernel.break_into_summands() kernel_components = ff.SumKernel( kernel_components).simplified().canonical().operands print model.pretty_print() fig_folder = os.path.join(save_folder, (prefix + file + suffix)) if not os.path.exists(fig_folder): os.makedirs(fig_folder) # First ask GPML to order the components print 'Determining order of components' (component_order, mae_data) = gpml.order_by_mae( model, kernel_components, X, y, D, os.path.join(fig_folder, file + suffix), skip_kernel_evaluation=skip_kernel_evaluation) print 'Plotting decomposition and computing basic stats' component_data = gpml.component_stats( model, kernel_components, X, y, D, os.path.join(fig_folder, file + suffix), component_order, skip_kernel_evaluation=skip_kernel_evaluation) print 'Computing model checking stats' checking_stats = gpml.checking_stats( model, kernel_components, X, y, D, os.path.join(fig_folder, file + suffix), component_order, make_plots=True, skip_kernel_evaluation=skip_kernel_evaluation) # Now the kernels have been evaluated we can translate the revelant ones evaluation_data = mae_data evaluation_data.update(component_data) evaluation_data.update(checking_stats) evaluation_data['vars'] = evaluation_data['vars'].ravel() evaluation_data['cum_vars'] = evaluation_data[ 'cum_vars'].ravel() evaluation_data['cum_resid_vars'] = evaluation_data[ 'cum_resid_vars'].ravel() evaluation_data['MAEs'] = evaluation_data['MAEs'].ravel() evaluation_data['MAE_reductions'] = evaluation_data[ 'MAE_reductions'].ravel() evaluation_data['monotonic'] = evaluation_data[ 'monotonic'].ravel() evaluation_data['acf_min_p'] = evaluation_data[ 'acf_min_p'].ravel() evaluation_data['acf_min_loc_p'] = evaluation_data[ 'acf_min_loc_p'].ravel() evaluation_data['pxx_max_p'] = evaluation_data[ 'pxx_max_p'].ravel() evaluation_data['pxx_max_loc_p'] = evaluation_data[ 'pxx_max_loc_p'].ravel() evaluation_data['qq_d_max_p'] = evaluation_data[ 'qq_d_max_p'].ravel() evaluation_data['qq_d_min_p'] = evaluation_data[ 'qq_d_min_p'].ravel() i = 1 short_descriptions = [] while os.path.isfile( os.path.join(fig_folder, '%s_%d.fig' % (file + suffix, i))): # Describe this component (summary, sentences, extrap_sentences ) = translation.translate_additive_component( kernel_components[component_order[i - 1]], X, evaluation_data['monotonic'][i - 1], evaluation_data['gradients'][i - 1], unit) short_descriptions.append(summary) paragraph = '.\n'.join(sentences) + '.' extrap_paragraph = '.\n'.join(extrap_sentences) + '.' with open( os.path.join( fig_folder, '%s_%d_description.tex' % (file + suffix, i)), 'w') as description_file: description_file.write(paragraph) with open( os.path.join( fig_folder, '%s_%d_extrap_description.tex' % (file + suffix, i)), 'w') as description_file: description_file.write(extrap_paragraph) with open( os.path.join( fig_folder, '%s_%d_short_description.tex' % (file + suffix, i)), 'w') as description_file: description_file.write(summary + '.') i += 1 # Produce the summary LaTeX document print 'Producing LaTeX document' latex_summary = translation.produce_summary_document( file + suffix, i - 1, evaluation_data, short_descriptions) with open( os.path.join(save_folder, '%s.tex' % (file + suffix)), 'w') as latex_file: latex_file.write(latex_summary) print 'Saving to ' + (os.path.join(save_folder, '%s.tex' % (file + suffix))) else: print "Cannnot find results for %s" % file
def make_all_1d_figures(folder, save_folder='../figures/decomposition/', max_level=None, prefix='', rescale=True, data_folder=None): """Crawls the results directory, and makes decomposition plots for each file. prefix is an optional string prepended to the output directory """ #### Quick fix to axis scaling #### TODO - Ultimately this and the shunt below should be removed / made elegant if rescale: data_sets = list(exp.gen_all_datasets("../data/1d_data_rescaled/")) else: if data_folder is None: data_sets = list(exp.gen_all_datasets("../data/1d_data/")) else: data_sets = list(exp.gen_all_datasets(data_folder)) for r, file in data_sets: results_file = os.path.join(folder, file + "_result.txt") # Is the experiment complete if os.path.isfile(results_file): # Find best kernel and produce plots datafile = os.path.join(r, file + ".mat") X, y, D = gpml.load_mat(datafile) if rescale: # Load unscaled data to remove scaling later unscaled_file = os.path.join('../data/1d_data/', re.sub('-s$', '', file) + '.mat') data = gpml.load_mat(unscaled_file) (X_unscaled, y_unscaled) = (data[0], data[1]) (X_mean, X_scale) = (X_unscaled.mean(), X_unscaled.std()) (y_mean, y_scale) = (y_unscaled.mean(), y_unscaled.std()) else: (X_mean, X_scale, y_mean, y_scale) = (0, 1, 0, 1) # A shunt to deal with a legacy issue. if datafile == '../data/1d_data/01-airline-months.mat': # Scaling should turn months starting at zero into years starting at 1949 print "Special rescaling for airline months data" X_mean = X_mean + 1949 X_scale = 1.0 / 12.0 best_kernel = exp.parse_results(os.path.join( folder, file + "_result.txt"), max_level=max_level) stripped_kernel = fk.strip_masks(best_kernel.k_opt) if not max_level is None: fig_folder = os.path.join( save_folder, (prefix + file + '_max_level_%d' % max_level)) else: fig_folder = os.path.join(save_folder, (prefix + file)) if not os.path.exists(fig_folder): os.makedirs(fig_folder) gpml.plot_decomposition(stripped_kernel, X, y, os.path.join(fig_folder, file), best_kernel.noise, X_mean, X_scale, y_mean, y_scale) else: print "Cannnot find file %s" % results_file