Ejemplo n.º 1
0
def perform_search(X,
                   y,
                   scheduler,
                   max_depth,
                   params,
                   verbose=False,
                   output_fname_fn=None):
    D = X.shape[1]
    current_kernels = list(flexiblekernel.base_kernels(D))

    all_scored_kernels = []
    scored_kernels_by_level = []
    for depth in range(max_depth):
        if verbose:
            print 'Level', depth + 1
        current_kernels = flexiblekernel.add_random_restarts(
            current_kernels, params.n_restarts, params.restart_std)

        if verbose:
            print 'Evaluating kernels...'
        scored_kernels = scheduler.evaluate_kernels(current_kernels, X, y)

        scored_kernels = remove_nan_scored_kernels(scored_kernels)
        scored_kernels.sort(key=flexiblekernel.ScoredKernel.score)
        scored_kernels = scored_kernels[:params.num_winners]
        if verbose:
            print 'Removing duplicates...'
        scored_kernels = remove_duplicates(scored_kernels, X,
                                           params.num_subsample,
                                           params.proj_dim, params.rel_cutoff)
        scored_kernels.sort(key=flexiblekernel.ScoredKernel.score)

        all_scored_kernels += scored_kernels
        scored_kernels_by_level.append(scored_kernels)

        best_kernels = [k.k_opt for k in scored_kernels[:params.num_expand]]
        current_kernels = grammar.expand_kernels(D, best_kernels)

        if output_fname_fn is not None:
            if verbose:
                print 'Saving results...'
            fname = output_fname_fn(depth)
            cPickle.dump(current_kernels, open(fname, 'wb'), protocol=2)

    all_scored_kernels.sort(key=flexiblekernel.ScoredKernel.score)
    return all_scored_kernels, scored_kernels_by_level
Ejemplo n.º 2
0
def perform_search(X, y, scheduler, max_depth, params, verbose=False, output_fname_fn=None):
    D = X.shape[1]
    current_kernels = list(flexiblekernel.base_kernels(D))

    all_scored_kernels = []
    scored_kernels_by_level = []
    for depth in range(max_depth):
        if verbose:
            print 'Level', depth + 1
        current_kernels = flexiblekernel.add_random_restarts(current_kernels, params.n_restarts,
                                                             params.restart_std)

        if verbose:
            print 'Evaluating kernels...'
        scored_kernels = scheduler.evaluate_kernels(current_kernels, X, y)

        scored_kernels = remove_nan_scored_kernels(scored_kernels)
        scored_kernels.sort(key=flexiblekernel.ScoredKernel.score)
        scored_kernels = scored_kernels[:params.num_winners]
        if verbose:
            print 'Removing duplicates...'
        scored_kernels = remove_duplicates(scored_kernels, X, params.num_subsample, params.proj_dim,
                                           params.rel_cutoff)
        scored_kernels.sort(key=flexiblekernel.ScoredKernel.score)

        all_scored_kernels += scored_kernels
        scored_kernels_by_level.append(scored_kernels)

        best_kernels = [k.k_opt for k in scored_kernels[:params.num_expand]]
        current_kernels = grammar.expand_kernels(D, best_kernels)

        if output_fname_fn is not None:
            if verbose:
                print 'Saving results...'
            fname = output_fname_fn(depth)
            cPickle.dump(current_kernels, open(fname, 'wb'), protocol=2)

    all_scored_kernels.sort(key=flexiblekernel.ScoredKernel.score)
    return all_scored_kernels, scored_kernels_by_level
Ejemplo n.º 3
0
def perform_kernel_search(X, y, D, experiment_data_file_name, results_filename, exp):
    '''Search for the best kernel, in parallel on fear or local machine.'''
    
    # Initialise random seeds - randomness may be used in e.g. data subsetting
    utils.misc.set_all_random_seeds(exp.random_seed)

    # Initialise kernels to be all base kernels along all dimensions.
    current_kernels = list(fk.base_kernels(D, exp.base_kernels))
    
    # Create location, scale and minimum period parameters to pass around for initialisations
    data_shape = {}
    data_shape['input_location'] = [np.mean(X[:,dim]) for dim in range(X.shape[1])]
    data_shape['output_location'] = np.mean(y)
    data_shape['input_scale'] = np.log([np.std(X[:,dim]) for dim in range(X.shape[1])])
    data_shape['output_scale'] = np.log(np.std(y)) 
    # Initialise period at a multiple of the shortest / average distance between points, to prevent Nyquist problems.
    if exp.use_min_period:
        data_shape['min_period'] = np.log([max(exp.period_heuristic * utils.misc.min_abs_diff(X[:,i]), exp.period_heuristic * np.ptp(X[:,i]) / X.shape[0]) for i in range(X.shape[1])])
    else:
        data_shape['min_period'] = None
    #### TODO - make the below and above more elegant
    if exp.use_constraints:
        data_shape['min_alpha'] = exp.alpha_heuristic
        data_shape['min_lengthscale'] = exp.lengthscale_heuristic + data_shape['input_scale']
    else:
        data_shape['min_alpha'] = None
        data_shape['min_lengthscale'] = None
    
    all_results = []
    results_sequence = []     # List of lists of results, indexed by level of expansion.
    
    # Perform search
    for depth in range(exp.max_depth):
        
        if exp.debug==True:
            current_kernels = current_kernels[0:4]
             
        # Add random restarts to kernels
        current_kernels = fk.add_random_restarts(current_kernels, exp.n_rand, exp.sd, data_shape=data_shape)
        # Score the kernels
        new_results = jc.evaluate_kernels(current_kernels, X, y, verbose=exp.verbose, local_computation=exp.local_computation,
                                          zip_files=False, max_jobs=exp.max_jobs, iters=exp.iters, zero_mean=exp.zero_mean, random_seed=exp.random_seed)
        # Enforce the period heuristic
        #### TODO - Concept of parameter constraints is more general than this - make it so
        if exp.use_min_period:
            new_results = [sk for sk in new_results if not sk.k_opt.out_of_bounds(data_shape)]
        # Some of the scores may have failed - remove nans to prevent sorting algorithms messing up
        new_results = remove_nan_scored_kernels(new_results)
        assert(len(new_results) > 0) # FIXME - Need correct control flow if this happens 
        # Sort the new all_results
        new_results = sorted(new_results, key=ScoredKernel.score, reverse=True)
        
        print 'All new results:'
        for result in new_results:
            print result.nll, result.laplace_nle, result.bic_nle, result.k_opt.pretty_print()
            
        # Remove near duplicates from these all_results (top m all_results only for efficiency)
        if exp.k > 1:
            # Only remove duplicates if they affect the search
            new_results = remove_duplicates(new_results, X, local_computation=exp.local_computation, verbose=exp.verbose)

        print 'All new results after duplicate removal:'
        for result in new_results:
            print result.nll, result.laplace_nle, result.bic_nle, result.k_opt.pretty_print()

        all_results = all_results + new_results
        all_results = sorted(all_results, key=ScoredKernel.score, reverse=True)

        results_sequence.append(all_results)
        if exp.verbose:
            print 'Printing all results'
            for result in all_results:
                print result.nll, result.laplace_nle, result.bic_nle, result.k_opt.pretty_print()
        
        # Extract the best k kernels from the new all_results
        best_results = sorted(new_results, key=ScoredKernel.score)[0:exp.k]
        best_kernels = [r.k_opt for r in best_results]
        current_kernels = grammar.expand_kernels(D, best_kernels, verbose=exp.verbose, debug=exp.debug, base_kernels=exp.base_kernels)
        
        if exp.debug==True:
            current_kernels = current_kernels[0:4]

        # Write all_results to a temporary file at each level.
        all_results = sorted(all_results, key=ScoredKernel.score, reverse=True)
        with open(results_filename + '.unfinished', 'w') as outfile:
            outfile.write('Experiment all_results for\n datafile = %s\n\n %s \n\n' \
                          % (experiment_data_file_name, experiment_fields_to_str(exp)))
            for (i, all_results) in enumerate(results_sequence):
                outfile.write('\n%%%%%%%%%% Level %d %%%%%%%%%%\n\n' % i)
                if exp.verbose_results:
                    for result in all_results:
                        print >> outfile, result  
                else:
                    # Only print top k kernels - i.e. those used to seed the next level of the search
                    for result in best_results:
                        print >> outfile, result 
    
    # Rename temporary results file to actual results file                
    os.rename(results_filename + '.unfinished', results_filename)