def make_all_1d_figures(folder, save_folder='../figures/decomposition/', max_level=None, prefix='', rescale=True, data_folder=None): """Crawls the results directory, and makes decomposition plots for each file. prefix is an optional string prepended to the output directory """ #### Quick fix to axis scaling #### TODO - Ultimately this and the shunt below should be removed / made elegant if rescale: data_sets = list(exp.gen_all_datasets("../data/1d_data_rescaled/")) else: if data_folder is None: data_sets = list(exp.gen_all_datasets("../data/1d_data/")) else: data_sets = list(exp.gen_all_datasets(data_folder)) for r, file in data_sets: results_file = os.path.join(folder, file + "_result.txt") # Is the experiment complete if os.path.isfile(results_file): # Find best kernel and produce plots datafile = os.path.join(r,file + ".mat") X, y, D = gpml.load_mat(datafile) if rescale: # Load unscaled data to remove scaling later unscaled_file = os.path.join('../data/1d_data/', re.sub('-s$', '', file) + '.mat') data = gpml.load_mat(unscaled_file) (X_unscaled, y_unscaled) = (data[0], data[1]) (X_mean, X_scale) = (X_unscaled.mean(), X_unscaled.std()) (y_mean, y_scale) = (y_unscaled.mean(), y_unscaled.std()) else: (X_mean, X_scale, y_mean, y_scale) = (0,1,0,1) # A shunt to deal with a legacy issue. if datafile == '../data/1d_data/01-airline-months.mat': # Scaling should turn months starting at zero into years starting at 1949 print "Special rescaling for airline months data" X_mean = X_mean + 1949 X_scale = 1.0/12.0 best_kernel = exp.parse_results(os.path.join(folder, file + "_result.txt"), max_level=max_level) stripped_kernel = fk.strip_masks(best_kernel.k_opt) if not max_level is None: fig_folder = os.path.join(save_folder, (prefix + file + '_max_level_%d' % max_level)) else: fig_folder = os.path.join(save_folder, (prefix + file)) if not os.path.exists(fig_folder): os.makedirs(fig_folder) gpml.plot_decomposition(stripped_kernel, X, y, os.path.join(fig_folder, file), best_kernel.noise, X_mean, X_scale, y_mean, y_scale) else: print "Cannnot find file %s" % results_file
def classification_accuracy(folders, data_folder): if not isinstance(folders, list): folders = [folders] # Backward compatibility with specifying one folder data_sets = list(exp.gen_all_datasets(data_folder)) np.set_printoptions(precision=4) for (i, folder) in enumerate(folders): print '' print folder print '' # Load predictions file count = 0 sum_error = 0 for (j, (r, data_file)) in enumerate(data_sets): print '%s : ' % data_file, results_file = os.path.join(folder, data_file + "_predictions.mat") if os.path.isfile(results_file): data = scipy.io.loadmat(results_file) error = (1 - np.sum((data['predictions'].ravel() > 0) == (data['actuals'].ravel() > 0)) * 1.0 / data['actuals'].ravel().shape[0]) * 100 count += 1 sum_error += error print '%f %f' % (error, 100 - error) else: print '' if count > 0: print '' print 'Average error: %f' % (sum_error / count) print 'Average accuracy: %f' % (100 - sum_error / count)
def classification_accuracy(folders, data_folder): if not isinstance(folders, list): folders = [folders ] # Backward compatibility with specifying one folder data_sets = list(exp.gen_all_datasets(data_folder)) np.set_printoptions(precision=4) for (i, folder) in enumerate(folders): print '' print folder print '' # Load predictions file count = 0 sum_error = 0 for (j, (r, data_file)) in enumerate(data_sets): print '%s : ' % data_file, results_file = os.path.join(folder, data_file + "_predictions.mat") if os.path.isfile(results_file): data = scipy.io.loadmat(results_file) error = (1 - np.sum((data['predictions'].ravel() > 0) == (data['actuals'].ravel() > 0)) * 1.0 / data['actuals'].ravel().shape[0]) * 100 count += 1 sum_error += error print '%f %f' % (error, 100 - error) else: print '' if count > 0: print '' print 'Average error: %f' % (sum_error / count) print 'Average accuracy: %f' % (100 - sum_error / count)
def compare_mse(folders, data_folder): if not isinstance(folders, list): folders = [folders ] # Backward compatibility with specifying one folder data_sets = list(exp.gen_all_datasets(data_folder)) RMSEs = np.inf * np.ones((len(data_sets), len(folders))) for (i, folder) in enumerate(folders): print '' print folder print '' # Load predictions file for (j, (r, data_file)) in enumerate(data_sets): print '%s : ' % data_file, results_file = os.path.join(folder, data_file + "_predictions.mat") if os.path.isfile(results_file): data = scipy.io.loadmat(results_file) RMSE = np.sqrt( np.mean( np.power( data['predictions'].ravel() - data['actuals'].ravel(), 2))) RMSEs[data_sets.index((r, data_file)), folders.index(folder)] = RMSE print '%f' % RMSE else: print '' np.set_printoptions(precision=3) standard_RMSEs = RMSEs / np.tile(np.min( RMSEs, 1), (RMSEs.shape[1], 1)).T # Divide by best algorithm print '' for folder in folders: print folder print '' for row in standard_RMSEs: print ','.join(str(element) for element in row) print '' print '' print standard_RMSEs print '' for row in standard_RMSEs: print ' & '.join('%1.2f' % element for element in row) + ' \\\\' print '' medians = np.median(standard_RMSEs, 0) print medians return RMSEs
def compare_mse(folders, data_folder): if not isinstance(folders, list): folders = [folders] # Backward compatibility with specifying one folder data_sets = list(exp.gen_all_datasets(data_folder)) RMSEs = np.inf * np.ones((len(data_sets), len(folders))) for (i, folder) in enumerate(folders): print '' print folder print '' # Load predictions file for (j, (r, data_file)) in enumerate(data_sets): print '%s : ' % data_file, results_file = os.path.join(folder, data_file + "_predictions.mat") if os.path.isfile(results_file): data = scipy.io.loadmat(results_file) RMSE = np.sqrt(np.mean(np.power(data['predictions'].ravel() - data['actuals'].ravel(), 2))) RMSEs[data_sets.index((r,data_file)), folders.index(folder)] = RMSE print '%f' % RMSE else: print '' np.set_printoptions(precision=3) standard_RMSEs = RMSEs / np.tile(np.min(RMSEs,1), (RMSEs.shape[1],1)).T # Divide by best algorithm print '' for folder in folders: print folder print '' for row in standard_RMSEs: print ','.join(str(element) for element in row) print '' print '' print standard_RMSEs print '' for row in standard_RMSEs: print ' & '.join('%1.2f' % element for element in row) + ' \\\\' print '' medians = np.median(standard_RMSEs, 0) print medians return RMSEs
def make_all_1d_figures(folders, save_folder='../figures/decomposition/', prefix='', rescale=False, data_folder=None, skip_kernel_evaluation=False, unit='year', all_depths=False): """Crawls the results directory, and makes decomposition plots for each file. prefix is an optional string prepended to the output directory """ if not isinstance(folders, list): folders = [folders] # Backward compatibility with specifying one folder #### Quick fix to axis scaling #### TODO - Ultimately this and the shunt below should be removed / made elegant if rescale: data_sets = list(exp.gen_all_datasets("../data/1d_data_rescaled/")) else: if data_folder is None: data_sets = list(exp.gen_all_datasets("../data/1d_data/")) else: data_sets = list(exp.gen_all_datasets(data_folder)) for r, file in data_sets: results_files = [] for folder in folders: results_file = os.path.join(folder, file + "_result.txt") if os.path.isfile(results_file): results_files.append(results_file) # Is the experiment complete if len(results_files) > 0: # Find best kernel and produce plots datafile = os.path.join(r,file + ".mat") data = gpml.load_mat(datafile) X = data[0] y = data[1] D = data[2] assert D == 1 if rescale: # Load unscaled data to remove scaling later unscaled_file = os.path.join('../data/1d_data/', re.sub('-s$', '', file) + '.mat') data = gpml.load_mat(unscaled_file) (X_unscaled, y_unscaled) = (data[0], data[1]) (X_mean, X_scale) = (X_unscaled.mean(), X_unscaled.std()) (y_mean, y_scale) = (y_unscaled.mean(), y_unscaled.std()) else: (X_mean, X_scale, y_mean, y_scale) = (0,1,0,1) if all_depths: # A quick version for now TODO - write correct code models = [exp.parse_results(results_files, max_level=depth) for depth in range(10)] suffices = ['-depth-%d' % (depth+1) for depth in range(len(models))] else: models = [exp.parse_results(results_files)] suffices = [''] for (model, suffix) in zip(models, suffices): model = model.simplified().canonical() kernel_components = model.kernel.break_into_summands() kernel_components = ff.SumKernel(kernel_components).simplified().canonical().operands print model.pretty_print() fig_folder = os.path.join(save_folder, (prefix + file + suffix)) if not os.path.exists(fig_folder): os.makedirs(fig_folder) # First ask GPML to order the components print 'Determining order of components' (component_order, mae_data) = gpml.order_by_mae(model, kernel_components, X, y, D, os.path.join(fig_folder, file + suffix), skip_kernel_evaluation=skip_kernel_evaluation) print 'Plotting decomposition and computing basic stats' component_data = gpml.component_stats(model, kernel_components, X, y, D, os.path.join(fig_folder, file + suffix), component_order, skip_kernel_evaluation=skip_kernel_evaluation) print 'Computing model checking stats' checking_stats = gpml.checking_stats(model, kernel_components, X, y, D, os.path.join(fig_folder, file + suffix), component_order, make_plots=True, skip_kernel_evaluation=skip_kernel_evaluation) # Now the kernels have been evaluated we can translate the revelant ones evaluation_data = mae_data evaluation_data.update(component_data) evaluation_data.update(checking_stats) evaluation_data['vars'] = evaluation_data['vars'].ravel() evaluation_data['cum_vars'] = evaluation_data['cum_vars'].ravel() evaluation_data['cum_resid_vars'] = evaluation_data['cum_resid_vars'].ravel() evaluation_data['MAEs'] = evaluation_data['MAEs'].ravel() evaluation_data['MAE_reductions'] = evaluation_data['MAE_reductions'].ravel() evaluation_data['monotonic'] = evaluation_data['monotonic'].ravel() evaluation_data['acf_min_p'] = evaluation_data['acf_min_p'].ravel() evaluation_data['acf_min_loc_p'] = evaluation_data['acf_min_loc_p'].ravel() evaluation_data['pxx_max_p'] = evaluation_data['pxx_max_p'].ravel() evaluation_data['pxx_max_loc_p'] = evaluation_data['pxx_max_loc_p'].ravel() evaluation_data['qq_d_max_p'] = evaluation_data['qq_d_max_p'].ravel() evaluation_data['qq_d_min_p'] = evaluation_data['qq_d_min_p'].ravel() i = 1 short_descriptions = [] while os.path.isfile(os.path.join(fig_folder, '%s_%d.fig' % (file + suffix, i))): # Describe this component (summary, sentences, extrap_sentences) = translation.translate_additive_component(kernel_components[component_order[i-1]], X, evaluation_data['monotonic'][i-1], evaluation_data['gradients'][i-1], unit) short_descriptions.append(summary) paragraph = '.\n'.join(sentences) + '.' extrap_paragraph = '.\n'.join(extrap_sentences) + '.' with open(os.path.join(fig_folder, '%s_%d_description.tex' % (file + suffix, i)), 'w') as description_file: description_file.write(paragraph) with open(os.path.join(fig_folder, '%s_%d_extrap_description.tex' % (file + suffix, i)), 'w') as description_file: description_file.write(extrap_paragraph) with open(os.path.join(fig_folder, '%s_%d_short_description.tex' % (file + suffix, i)), 'w') as description_file: description_file.write(summary + '.') i += 1 # Produce the summary LaTeX document print 'Producing LaTeX document' latex_summary = translation.produce_summary_document(file + suffix, i-1, evaluation_data, short_descriptions) with open(os.path.join(save_folder, '%s.tex' % (file + suffix)), 'w') as latex_file: latex_file.write(latex_summary) print 'Saving to ' + (os.path.join(save_folder, '%s.tex' % (file + suffix))) else: print "Cannnot find results for %s" % file
def make_all_1d_figures(folders, save_folder='../figures/decomposition/', prefix='', rescale=False, data_folder=None, skip_kernel_evaluation=False, unit='year', all_depths=False): """Crawls the results directory, and makes decomposition plots for each file. prefix is an optional string prepended to the output directory """ if not isinstance(folders, list): folders = [folders ] # Backward compatibility with specifying one folder #### Quick fix to axis scaling #### TODO - Ultimately this and the shunt below should be removed / made elegant if rescale: data_sets = list(exp.gen_all_datasets("../data/1d_data_rescaled/")) else: if data_folder is None: data_sets = list(exp.gen_all_datasets("../data/1d_data/")) else: data_sets = list(exp.gen_all_datasets(data_folder)) for r, file in data_sets: results_files = [] for folder in folders: results_file = os.path.join(folder, file + "_result.txt") if os.path.isfile(results_file): results_files.append(results_file) # Is the experiment complete if len(results_files) > 0: # Find best kernel and produce plots datafile = os.path.join(r, file + ".mat") data = gpml.my_load_mat(datafile) X = data[0] y = data[1] M = y.shape[1] D = data[2] iiii = 1 y = y[:, iiii] assert D == 1 if rescale: # Load unscaled data to remove scaling later unscaled_file = os.path.join('../data/1d_data/', re.sub('-s$', '', file) + '.mat') data = gpml.load_mat(unscaled_file) (X_unscaled, y_unscaled) = (data[0], data[1]) (X_mean, X_scale) = (X_unscaled.mean(), X_unscaled.std()) (y_mean, y_scale) = (y_unscaled.mean(), y_unscaled.std()) else: (X_mean, X_scale, y_mean, y_scale) = (0, 1, 0, 1) if all_depths: # A quick version for now TODO - write correct code models = [ exp.parse_results(results_files, max_level=depth)[0] for depth in range(10) ] suffices = [ '-depth-%d' % (depth + 1) for depth in range(len(models)) ] else: models = [exp.parse_results(results_files)[0]] try: suffices = ['-' + str(data[5][iiii]).replace(" ", "")] #suffices = ['-'+str(data[5][iiii][0][0]).replace(" ","")] uncomment this if you test given dataset(house, stock and so on) except: suffices = ['-' + str(iiii)] #suffices = ['-'+str(iiii)] best_depth = exp.parse_results(results_files)[1] params_filename = '/home/heechan/gpss-research-srkl' + results_files[ 0][2:] + 'lvl_' + str(best_depth) + '_0.mat1.mat' scale_params = scipy.io.loadmat(params_filename)['scale'] scl1 = scale_params[0][iiii][0][0] scl2 = scale_params[0][iiii][1][0] for (model, suffix) in zip(models, suffices): model = model.simplified().canonical() model.kernel = model.kernel * ff.ConstKernel( sf=scl2) + ff.ConstKernel(sf=scl1) kernel_components = model.kernel.break_into_summands() kernel_components = ff.SumKernel( kernel_components).simplified().canonical().operands print model.pretty_print() fig_folder = os.path.join(save_folder, (prefix + file + suffix)) if not os.path.exists(fig_folder): os.makedirs(fig_folder) # First ask GPML to order the components print 'Determining order of components' (component_order, mae_data) = gpml.order_by_mae( model, kernel_components, X, y, D, os.path.join(fig_folder, file + suffix), skip_kernel_evaluation=skip_kernel_evaluation) print 'Plotting decomposition and computing basic stats' component_data = gpml.component_stats( model, kernel_components, X, y, D, os.path.join(fig_folder, file + suffix), component_order, skip_kernel_evaluation=skip_kernel_evaluation) print 'Computing model checking stats' checking_stats = gpml.checking_stats( model, kernel_components, X, y, D, os.path.join(fig_folder, file + suffix), component_order, make_plots=True, skip_kernel_evaluation=skip_kernel_evaluation) # Now the kernels have been evaluated we can translate the revelant ones evaluation_data = mae_data evaluation_data.update(component_data) evaluation_data.update(checking_stats) evaluation_data['vars'] = evaluation_data['vars'].ravel() evaluation_data['cum_vars'] = evaluation_data[ 'cum_vars'].ravel() evaluation_data['cum_resid_vars'] = evaluation_data[ 'cum_resid_vars'].ravel() evaluation_data['MAEs'] = evaluation_data['MAEs'].ravel() evaluation_data['MAE_reductions'] = evaluation_data[ 'MAE_reductions'].ravel() evaluation_data['monotonic'] = evaluation_data[ 'monotonic'].ravel() evaluation_data['acf_min_p'] = evaluation_data[ 'acf_min_p'].ravel() evaluation_data['acf_min_loc_p'] = evaluation_data[ 'acf_min_loc_p'].ravel() evaluation_data['pxx_max_p'] = evaluation_data[ 'pxx_max_p'].ravel() evaluation_data['pxx_max_loc_p'] = evaluation_data[ 'pxx_max_loc_p'].ravel() evaluation_data['qq_d_max_p'] = evaluation_data[ 'qq_d_max_p'].ravel() evaluation_data['qq_d_min_p'] = evaluation_data[ 'qq_d_min_p'].ravel() i = 1 short_descriptions = [] while os.path.isfile( os.path.join(fig_folder, '%s_%d.fig' % (file + suffix, i))): # Describe this component (summary, sentences, extrap_sentences ) = translation.translate_additive_component( kernel_components[component_order[i - 1]], X, evaluation_data['monotonic'][i - 1], evaluation_data['gradients'][i - 1], unit) short_descriptions.append(summary) paragraph = '.\n'.join(sentences) + '.' extrap_paragraph = '.\n'.join(extrap_sentences) + '.' with open( os.path.join( fig_folder, '%s_%d_description.tex' % (file + suffix, i)), 'w') as description_file: description_file.write(paragraph) with open( os.path.join( fig_folder, '%s_%d_extrap_description.tex' % (file + suffix, i)), 'w') as description_file: description_file.write(extrap_paragraph) with open( os.path.join( fig_folder, '%s_%d_short_description.tex' % (file + suffix, i)), 'w') as description_file: description_file.write(summary + '.') i += 1 # Produce the summary LaTeX document print 'Producing LaTeX document' latex_summary = translation.produce_summary_document( file + suffix, i - 1, evaluation_data, short_descriptions) with open( os.path.join(save_folder, '%s.tex' % (file + suffix)), 'w') as latex_file: latex_file.write(latex_summary) print 'Saving to ' + (os.path.join(save_folder, '%s.tex' % (file + suffix))) else: print "Cannnot find results for %s" % file
def make_all_1d_figures(folder, save_folder='../figures/decomposition/', max_level=None, prefix='', rescale=True, data_folder=None): """Crawls the results directory, and makes decomposition plots for each file. prefix is an optional string prepended to the output directory """ #### Quick fix to axis scaling #### TODO - Ultimately this and the shunt below should be removed / made elegant if rescale: data_sets = list(exp.gen_all_datasets("../data/1d_data_rescaled/")) else: if data_folder is None: data_sets = list(exp.gen_all_datasets("../data/1d_data/")) else: data_sets = list(exp.gen_all_datasets(data_folder)) for r, file in data_sets: results_file = os.path.join(folder, file + "_result.txt") # Is the experiment complete if os.path.isfile(results_file): # Find best kernel and produce plots datafile = os.path.join(r, file + ".mat") X, y, D = gpml.load_mat(datafile) if rescale: # Load unscaled data to remove scaling later unscaled_file = os.path.join('../data/1d_data/', re.sub('-s$', '', file) + '.mat') data = gpml.load_mat(unscaled_file) (X_unscaled, y_unscaled) = (data[0], data[1]) (X_mean, X_scale) = (X_unscaled.mean(), X_unscaled.std()) (y_mean, y_scale) = (y_unscaled.mean(), y_unscaled.std()) else: (X_mean, X_scale, y_mean, y_scale) = (0, 1, 0, 1) # A shunt to deal with a legacy issue. if datafile == '../data/1d_data/01-airline-months.mat': # Scaling should turn months starting at zero into years starting at 1949 print "Special rescaling for airline months data" X_mean = X_mean + 1949 X_scale = 1.0 / 12.0 best_kernel = exp.parse_results(os.path.join( folder, file + "_result.txt"), max_level=max_level) stripped_kernel = fk.strip_masks(best_kernel.k_opt) if not max_level is None: fig_folder = os.path.join( save_folder, (prefix + file + '_max_level_%d' % max_level)) else: fig_folder = os.path.join(save_folder, (prefix + file)) if not os.path.exists(fig_folder): os.makedirs(fig_folder) gpml.plot_decomposition(stripped_kernel, X, y, os.path.join(fig_folder, file), best_kernel.noise, X_mean, X_scale, y_mean, y_scale) else: print "Cannnot find file %s" % results_file