Esempio n. 1
0
def polish_to_kernel(polish_expr):
    if type(polish_expr) == tuple:
        if polish_expr[0] == '+':
            operands = [polish_to_kernel(e) for e in polish_expr[1:]]
            return ff.SumKernel(operands)
        elif polish_expr[0] == '*':
            operands = [polish_to_kernel(e) for e in polish_expr[1:]]
            return ff.ProductKernel(operands)
        elif polish_expr[0] == '*-const':
            operands = [polish_to_kernel(e) for e in polish_expr[1:]]
            return ff.ProductKernel([operands[0], ff.SumKernel([operands[1], ff.ConstKernel()])])
        elif polish_expr[0] == 'CP':
            base_kernel = polish_to_kernel(polish_expr[2])
            return ff.ChangePointKernel(dimension=polish_expr[1], operands=[base_kernel.copy(), base_kernel.copy()])
        elif polish_expr[0] == 'CW':
            base_kernel = polish_to_kernel(polish_expr[2])
            return ff.ChangeWindowKernel(dimension=polish_expr[1], operands=[base_kernel.copy(), base_kernel.copy()])
        elif polish_expr[0] == 'B':
            base_kernel = polish_to_kernel(polish_expr[2])
            return ff.ChangeWindowKernel(dimension=polish_expr[1], operands=[ff.ConstKernel(), base_kernel.copy()])
        elif polish_expr[0] == 'BL':
            base_kernel = polish_to_kernel(polish_expr[2])
            return ff.ChangeWindowKernel(dimension=polish_expr[1], operands=[base_kernel.copy(), ff.ConstKernel()])
        elif polish_expr[0] == 'None':
            return ff.NoneKernel()
        else:
            raise RuntimeError('Unknown operator: %s' % polish_expr[0])
    else:
        assert isinstance(polish_expr, ff.Kernel) or (polish_expr is None) or (type(polish_expr) == int)
        if isinstance(polish_expr, ff.Kernel):
            return polish_expr.copy()
        else:
            return polish_expr
 def test_simplify(self):
     m = ff.GPModel(mean=ff.MeanZero(),
                    kernel=ff.SumKernel(operands=[
                        ff.ProductKernel(operands=[
                            ff.ConstKernel(sf=0.170186999131),
                            ff.SqExpKernel(dimension=0,
                                           lengthscale=1.02215322228,
                                           sf=5.9042619611)
                        ]),
                        ff.ProductKernel(operands=[
                            ff.NoiseKernel(sf=2.43188502201),
                            ff.ConstKernel(sf=-0.368638271154)
                        ]),
                        ff.ProductKernel(operands=[
                            ff.NoiseKernel(sf=1.47110516981),
                            ff.PeriodicKernel(dimension=0,
                                              lengthscale=-1.19651800365,
                                              period=0.550394248167,
                                              sf=0.131044872864)
                        ]),
                        ff.ProductKernel(operands=[
                            ff.SqExpKernel(dimension=0,
                                           lengthscale=3.33346140605,
                                           sf=3.7579461353),
                            ff.PeriodicKernel(dimension=0,
                                              lengthscale=0.669624964607,
                                              period=0.00216264543496,
                                              sf=2.41995024965)
                        ])
                    ]),
                    likelihood=ff.LikGauss(sf=-np.inf),
                    nll=599.59757993,
                    ndata=144)
     assert not m.simplified() == m
     m = ff.GPModel(mean=ff.MeanZero(),
                    kernel=ff.SumKernel(operands=[
                        ff.ProductKernel(operands=[
                            ff.ConstKernel(sf=0.170186999131),
                            ff.SqExpKernel(dimension=0,
                                           lengthscale=1.02215322228,
                                           sf=5.9042619611)
                        ]),
                        ff.ProductKernel(operands=[
                            ff.NoiseKernel(sf=2.43188502201),
                            ff.ConstKernel(sf=-0.368638271154)
                        ])
                    ]),
                    likelihood=ff.LikGauss(sf=-np.inf),
                    nll=599.59757993,
                    ndata=144)
     assert not m.simplified() == m
Esempio n. 3
0
def removeKernelParams(kernel):
    """
    Remove hyperparameters of a GPSS kernel and reset them to None.

    :returns: a GPSS kernel without parameter initialisation
    """
    assert isinstance(
        kernel, ff.Kernel), "kernel must be of type flexible_function.Kernel"

    if isinstance(kernel, ff.SqExpKernel):
        return ff.SqExpKernel(dimension=kernel.dimension)

    elif isinstance(kernel, ff.PeriodicKernel):
        return ff.PeriodicKernel(dimension=kernel.dimension)

    elif isinstance(kernel, ff.ConstKernel):
        return ff.ConstKernel()

    elif isinstance(kernel, ff.SumKernel):
        return ff.SumKernel(map(removeKernelParams, kernel.operands))

    elif isinstance(kernel, ff.ProductKernel):
        return ff.ProductKernel(map(removeKernelParams, kernel.operands))

    elif isinstance(kernel, ff.NoneKernel):
        return kernel

    else:
        raise NotImplementedError("Unrecognised kernel type " +
                                  type(kernel).__name__)
Esempio n. 4
0
def gpy2gpss(kernel):
    """
    Convert a GPy kernel to a GPSS kernel recursively.

    Support only:
    1) 1-D squared exponential kernels
    2) 1-D periodic kernels
    3) constant kernels (called `bias` in GPy)
    4) sum kernels
    5) product kernels

    :param kernel: a GPSS kernel as defined in flexible_function.py
    :returns: an object of type GPy.kern.Kern
    """
    assert isinstance(kernel,
                      GPy.kern.Kern), "kernel must be of type GPy.kern.Kern"

    if isinstance(kernel, GPy.kern.RBF):
        sf = np.sqrt(kernel.variance)[0]
        ls = kernel.lengthscale[0]
        dim = kernel.active_dims[0]
        return ff.SqExpKernel(dimension=dim, lengthscale=ls, sf=sf)

    elif isinstance(kernel, GPy.kern.StdPeriodic):
        sf = np.sqrt(kernel.variance)[0]
        ls = kernel.lengthscale[0]
        per = kernel.period[0]
        dim = kernel.active_dims[0]
        return ff.PeriodicKernel(dimension=dim,
                                 lengthscale=ls,
                                 period=per,
                                 sf=sf)

    elif isinstance(kernel, GPy.kern.Bias):
        sf = np.sqrt(kernel.variance)[0]
        return ff.ConstKernel(sf=sf)

    elif isinstance(kernel, GPy.kern.Add):
        return ff.SumKernel(map(gpy2gpss, kernel.parts))

    elif isinstance(kernel, GPy.kern.Prod):
        return ff.ProductKernel(map(gpy2gpss, kernel.parts))

    else:
        raise NotImplementedError("Cannot translate kernel of type " +
                                  type(kernel).__name__)
Esempio n. 5
0
def make_all_1d_figures(folders,
                        save_folder='../figures/decomposition/',
                        prefix='',
                        rescale=False,
                        data_folder=None,
                        skip_kernel_evaluation=False,
                        unit='year',
                        all_depths=False):
    """Crawls the results directory, and makes decomposition plots for each file.
    
    prefix is an optional string prepended to the output directory
    """

    if not isinstance(folders, list):
        folders = [folders
                   ]  # Backward compatibility with specifying one folder
    #### Quick fix to axis scaling
    #### TODO - Ultimately this and the shunt below should be removed / made elegant
    if rescale:
        data_sets = list(exp.gen_all_datasets("../data/1d_data_rescaled/"))
    else:
        if data_folder is None:
            data_sets = list(exp.gen_all_datasets("../data/1d_data/"))
        else:
            data_sets = list(exp.gen_all_datasets(data_folder))
    for r, file in data_sets:
        results_files = []
        for folder in folders:
            results_file = os.path.join(folder, file + "_result.txt")
            if os.path.isfile(results_file):
                results_files.append(results_file)
        # Is the experiment complete
        if len(results_files) > 0:
            # Find best kernel and produce plots
            datafile = os.path.join(r, file + ".mat")
            data = gpml.my_load_mat(datafile)
            X = data[0]
            y = data[1]
            M = y.shape[1]
            D = data[2]
            iiii = 1
            y = y[:, iiii]
            assert D == 1
            if rescale:
                # Load unscaled data to remove scaling later
                unscaled_file = os.path.join('../data/1d_data/',
                                             re.sub('-s$', '', file) + '.mat')
                data = gpml.load_mat(unscaled_file)
                (X_unscaled, y_unscaled) = (data[0], data[1])
                (X_mean, X_scale) = (X_unscaled.mean(), X_unscaled.std())
                (y_mean, y_scale) = (y_unscaled.mean(), y_unscaled.std())
            else:
                (X_mean, X_scale, y_mean, y_scale) = (0, 1, 0, 1)

            if all_depths:
                # A quick version for now TODO - write correct code
                models = [
                    exp.parse_results(results_files, max_level=depth)[0]
                    for depth in range(10)
                ]
                suffices = [
                    '-depth-%d' % (depth + 1) for depth in range(len(models))
                ]
            else:
                models = [exp.parse_results(results_files)[0]]
                try:
                    suffices = ['-' + str(data[5][iiii]).replace(" ", "")]
                    #suffices = ['-'+str(data[5][iiii][0][0]).replace(" ","")] uncomment this if you test given dataset(house, stock and so on)
                except:
                    suffices = ['-' + str(iiii)]
                #suffices = ['-'+str(iiii)]
                best_depth = exp.parse_results(results_files)[1]
                params_filename = '/home/heechan/gpss-research-srkl' + results_files[
                    0][2:] + 'lvl_' + str(best_depth) + '_0.mat1.mat'
                scale_params = scipy.io.loadmat(params_filename)['scale']
                scl1 = scale_params[0][iiii][0][0]
                scl2 = scale_params[0][iiii][1][0]

            for (model, suffix) in zip(models, suffices):
                model = model.simplified().canonical()
                model.kernel = model.kernel * ff.ConstKernel(
                    sf=scl2) + ff.ConstKernel(sf=scl1)
                kernel_components = model.kernel.break_into_summands()
                kernel_components = ff.SumKernel(
                    kernel_components).simplified().canonical().operands
                print model.pretty_print()
                fig_folder = os.path.join(save_folder,
                                          (prefix + file + suffix))
                if not os.path.exists(fig_folder):
                    os.makedirs(fig_folder)
                # First ask GPML to order the components
                print 'Determining order of components'
                (component_order, mae_data) = gpml.order_by_mae(
                    model,
                    kernel_components,
                    X,
                    y,
                    D,
                    os.path.join(fig_folder, file + suffix),
                    skip_kernel_evaluation=skip_kernel_evaluation)
                print 'Plotting decomposition and computing basic stats'
                component_data = gpml.component_stats(
                    model,
                    kernel_components,
                    X,
                    y,
                    D,
                    os.path.join(fig_folder, file + suffix),
                    component_order,
                    skip_kernel_evaluation=skip_kernel_evaluation)
                print 'Computing model checking stats'
                checking_stats = gpml.checking_stats(
                    model,
                    kernel_components,
                    X,
                    y,
                    D,
                    os.path.join(fig_folder, file + suffix),
                    component_order,
                    make_plots=True,
                    skip_kernel_evaluation=skip_kernel_evaluation)
                # Now the kernels have been evaluated we can translate the revelant ones
                evaluation_data = mae_data
                evaluation_data.update(component_data)
                evaluation_data.update(checking_stats)
                evaluation_data['vars'] = evaluation_data['vars'].ravel()
                evaluation_data['cum_vars'] = evaluation_data[
                    'cum_vars'].ravel()
                evaluation_data['cum_resid_vars'] = evaluation_data[
                    'cum_resid_vars'].ravel()
                evaluation_data['MAEs'] = evaluation_data['MAEs'].ravel()
                evaluation_data['MAE_reductions'] = evaluation_data[
                    'MAE_reductions'].ravel()
                evaluation_data['monotonic'] = evaluation_data[
                    'monotonic'].ravel()
                evaluation_data['acf_min_p'] = evaluation_data[
                    'acf_min_p'].ravel()
                evaluation_data['acf_min_loc_p'] = evaluation_data[
                    'acf_min_loc_p'].ravel()
                evaluation_data['pxx_max_p'] = evaluation_data[
                    'pxx_max_p'].ravel()
                evaluation_data['pxx_max_loc_p'] = evaluation_data[
                    'pxx_max_loc_p'].ravel()
                evaluation_data['qq_d_max_p'] = evaluation_data[
                    'qq_d_max_p'].ravel()
                evaluation_data['qq_d_min_p'] = evaluation_data[
                    'qq_d_min_p'].ravel()
                i = 1
                short_descriptions = []
                while os.path.isfile(
                        os.path.join(fig_folder,
                                     '%s_%d.fig' % (file + suffix, i))):
                    # Describe this component
                    (summary, sentences, extrap_sentences
                     ) = translation.translate_additive_component(
                         kernel_components[component_order[i - 1]], X,
                         evaluation_data['monotonic'][i - 1],
                         evaluation_data['gradients'][i - 1], unit)
                    short_descriptions.append(summary)
                    paragraph = '.\n'.join(sentences) + '.'
                    extrap_paragraph = '.\n'.join(extrap_sentences) + '.'
                    with open(
                            os.path.join(
                                fig_folder,
                                '%s_%d_description.tex' % (file + suffix, i)),
                            'w') as description_file:
                        description_file.write(paragraph)
                    with open(
                            os.path.join(
                                fig_folder, '%s_%d_extrap_description.tex' %
                                (file + suffix, i)), 'w') as description_file:
                        description_file.write(extrap_paragraph)
                    with open(
                            os.path.join(
                                fig_folder, '%s_%d_short_description.tex' %
                                (file + suffix, i)), 'w') as description_file:
                        description_file.write(summary + '.')
                    i += 1
                # Produce the summary LaTeX document
                print 'Producing LaTeX document'
                latex_summary = translation.produce_summary_document(
                    file + suffix, i - 1, evaluation_data, short_descriptions)
                with open(
                        os.path.join(save_folder, '%s.tex' % (file + suffix)),
                        'w') as latex_file:
                    latex_file.write(latex_summary)
                print 'Saving to ' + (os.path.join(save_folder, '%s.tex' %
                                                   (file + suffix)))
        else:
            print "Cannnot find results for %s" % file