def test_expand(self): print 'expand' print '1d' k = ff.SqExpKernel(dimension=0, lengthscale=0, sf=0) expanded = grammar.expand_kernels(1, [k], base_kernels='SE', rules=None) for k in expanded: print '\n', k.pretty_print(), '\n' print '2d' k = ff.SqExpKernel(dimension=0, lengthscale=0, sf=0) expanded = grammar.expand_kernels(2, [k], base_kernels='SE', rules=None) for k in expanded: print '\n', k.pretty_print(), '\n' print '3d' k = ff.SqExpKernel(dimension=0, lengthscale=0, sf=0) expanded = grammar.expand_kernels(3, [k], base_kernels='SE', rules=None) for k in expanded: print '\n', k.pretty_print(), '\n' print '3d with two SEs' k = ff.SqExpKernel(dimension=0, lengthscale=0, sf=0) expanded = grammar.expand_kernels(3, [k + k.copy()], base_kernels='SE', rules=None) for k in expanded: print '\n', k.pretty_print(), '\n'
def perform_search(X, y, scheduler, max_depth, params, verbose=False, output_fname_fn=None): D = X.shape[1] current_kernels = list(flexiblekernel.base_kernels(D)) all_scored_kernels = [] scored_kernels_by_level = [] for depth in range(max_depth): if verbose: print 'Level', depth + 1 current_kernels = flexiblekernel.add_random_restarts( current_kernels, params.n_restarts, params.restart_std) if verbose: print 'Evaluating kernels...' scored_kernels = scheduler.evaluate_kernels(current_kernels, X, y) scored_kernels = remove_nan_scored_kernels(scored_kernels) scored_kernels.sort(key=flexiblekernel.ScoredKernel.score) scored_kernels = scored_kernels[:params.num_winners] if verbose: print 'Removing duplicates...' scored_kernels = remove_duplicates(scored_kernels, X, params.num_subsample, params.proj_dim, params.rel_cutoff) scored_kernels.sort(key=flexiblekernel.ScoredKernel.score) all_scored_kernels += scored_kernels scored_kernels_by_level.append(scored_kernels) best_kernels = [k.k_opt for k in scored_kernels[:params.num_expand]] current_kernels = grammar.expand_kernels(D, best_kernels) if output_fname_fn is not None: if verbose: print 'Saving results...' fname = output_fname_fn(depth) cPickle.dump(current_kernels, open(fname, 'wb'), protocol=2) all_scored_kernels.sort(key=flexiblekernel.ScoredKernel.score) return all_scored_kernels, scored_kernels_by_level
def perform_search(X, y, scheduler, max_depth, params, verbose=False, output_fname_fn=None): D = X.shape[1] current_kernels = list(flexiblekernel.base_kernels(D)) all_scored_kernels = [] scored_kernels_by_level = [] for depth in range(max_depth): if verbose: print 'Level', depth + 1 current_kernels = flexiblekernel.add_random_restarts(current_kernels, params.n_restarts, params.restart_std) if verbose: print 'Evaluating kernels...' scored_kernels = scheduler.evaluate_kernels(current_kernels, X, y) scored_kernels = remove_nan_scored_kernels(scored_kernels) scored_kernels.sort(key=flexiblekernel.ScoredKernel.score) scored_kernels = scored_kernels[:params.num_winners] if verbose: print 'Removing duplicates...' scored_kernels = remove_duplicates(scored_kernels, X, params.num_subsample, params.proj_dim, params.rel_cutoff) scored_kernels.sort(key=flexiblekernel.ScoredKernel.score) all_scored_kernels += scored_kernels scored_kernels_by_level.append(scored_kernels) best_kernels = [k.k_opt for k in scored_kernels[:params.num_expand]] current_kernels = grammar.expand_kernels(D, best_kernels) if output_fname_fn is not None: if verbose: print 'Saving results...' fname = output_fname_fn(depth) cPickle.dump(current_kernels, open(fname, 'wb'), protocol=2) all_scored_kernels.sort(key=flexiblekernel.ScoredKernel.score) return all_scored_kernels, scored_kernels_by_level
def perform_kernel_search(X, y, D, experiment_data_file_name, results_filename, exp): '''Search for the best kernel, in parallel on fear or local machine.''' # Initialise random seeds - randomness may be used in e.g. data subsetting utils.misc.set_all_random_seeds(exp.random_seed) # Initialise kernels to be all base kernels along all dimensions. current_kernels = list(fk.base_kernels(D, exp.base_kernels)) # Create location, scale and minimum period parameters to pass around for initialisations data_shape = {} data_shape['input_location'] = [np.mean(X[:,dim]) for dim in range(X.shape[1])] data_shape['output_location'] = np.mean(y) data_shape['input_scale'] = np.log([np.std(X[:,dim]) for dim in range(X.shape[1])]) data_shape['output_scale'] = np.log(np.std(y)) # Initialise period at a multiple of the shortest / average distance between points, to prevent Nyquist problems. if exp.use_min_period: data_shape['min_period'] = np.log([max(exp.period_heuristic * utils.misc.min_abs_diff(X[:,i]), exp.period_heuristic * np.ptp(X[:,i]) / X.shape[0]) for i in range(X.shape[1])]) else: data_shape['min_period'] = None #### TODO - make the below and above more elegant if exp.use_constraints: data_shape['min_alpha'] = exp.alpha_heuristic data_shape['min_lengthscale'] = exp.lengthscale_heuristic + data_shape['input_scale'] else: data_shape['min_alpha'] = None data_shape['min_lengthscale'] = None all_results = [] results_sequence = [] # List of lists of results, indexed by level of expansion. # Perform search for depth in range(exp.max_depth): if exp.debug==True: current_kernels = current_kernels[0:4] # Add random restarts to kernels current_kernels = fk.add_random_restarts(current_kernels, exp.n_rand, exp.sd, data_shape=data_shape) # Score the kernels new_results = jc.evaluate_kernels(current_kernels, X, y, verbose=exp.verbose, local_computation=exp.local_computation, zip_files=False, max_jobs=exp.max_jobs, iters=exp.iters, zero_mean=exp.zero_mean, random_seed=exp.random_seed) # Enforce the period heuristic #### TODO - Concept of parameter constraints is more general than this - make it so if exp.use_min_period: new_results = [sk for sk in new_results if not sk.k_opt.out_of_bounds(data_shape)] # Some of the scores may have failed - remove nans to prevent sorting algorithms messing up new_results = remove_nan_scored_kernels(new_results) assert(len(new_results) > 0) # FIXME - Need correct control flow if this happens # Sort the new all_results new_results = sorted(new_results, key=ScoredKernel.score, reverse=True) print 'All new results:' for result in new_results: print result.nll, result.laplace_nle, result.bic_nle, result.k_opt.pretty_print() # Remove near duplicates from these all_results (top m all_results only for efficiency) if exp.k > 1: # Only remove duplicates if they affect the search new_results = remove_duplicates(new_results, X, local_computation=exp.local_computation, verbose=exp.verbose) print 'All new results after duplicate removal:' for result in new_results: print result.nll, result.laplace_nle, result.bic_nle, result.k_opt.pretty_print() all_results = all_results + new_results all_results = sorted(all_results, key=ScoredKernel.score, reverse=True) results_sequence.append(all_results) if exp.verbose: print 'Printing all results' for result in all_results: print result.nll, result.laplace_nle, result.bic_nle, result.k_opt.pretty_print() # Extract the best k kernels from the new all_results best_results = sorted(new_results, key=ScoredKernel.score)[0:exp.k] best_kernels = [r.k_opt for r in best_results] current_kernels = grammar.expand_kernels(D, best_kernels, verbose=exp.verbose, debug=exp.debug, base_kernels=exp.base_kernels) if exp.debug==True: current_kernels = current_kernels[0:4] # Write all_results to a temporary file at each level. all_results = sorted(all_results, key=ScoredKernel.score, reverse=True) with open(results_filename + '.unfinished', 'w') as outfile: outfile.write('Experiment all_results for\n datafile = %s\n\n %s \n\n' \ % (experiment_data_file_name, experiment_fields_to_str(exp))) for (i, all_results) in enumerate(results_sequence): outfile.write('\n%%%%%%%%%% Level %d %%%%%%%%%%\n\n' % i) if exp.verbose_results: for result in all_results: print >> outfile, result else: # Only print top k kernels - i.e. those used to seed the next level of the search for result in best_results: print >> outfile, result # Rename temporary results file to actual results file os.rename(results_filename + '.unfinished', results_filename)
def test_kernel_expand_multi_d(): D = 3 k_base = list(fk.base_kernels(3)) k_expanded = grammar.expand_kernels(3, k_base) assert len(k_expanded) > len(k_base)
def test_kernel_expand(): k = fk.Carls_Mauna_kernel() k_expanded = grammar.expand_kernels(1, [k]) assert len(k_expanded) > 1