Ejemplo n.º 1
0
def generate_pinched_twin_bumps_quasi_mirrored_gp(d):

    # doesn't make much sense unless we ask for
    # a dimension higher than 2
    assert d > 2

    x = np.array([0.0, 0.25, 0.5, 0.75, 1.0])
    x_star = np.linspace(0.0, 1.0, d)
    obs_noise_stddev = 0.00001
    kernel_stddev = 10.0

    def kernel(x1, x2):
        return gaussian_process.square_distance_kernel_1D(
            x1, x2, kernel_stddev)

    # top arc
    top_y = np.array([0.0, 1.0, 0.5, 0.25, 0.0])
    top_arc = gaussian_process.sample_trajectory_1D(
        x, top_y, kernel, x_star, obs_noise_stddev)["samples"]

    # bottom arc (some kind of projection through x=0.5)
    bottom_y = np.array([0.0, -0.25, -0.5, -1.0, 0.0])
    bottom_arc = gaussian_process.sample_trajectory_1D(
        x, bottom_y, kernel, x_star, obs_noise_stddev)["samples"]

    # maybe we would want to funnel through the loglikelihood
    # as well and not just return the samples
    #def arc_sampler(x_star, arc_points, n_samples, sampler_kernel_stddev, obs_noise_stddev):
    #    return gaussian_process.sample_trajectory_1D(x_star, arc_points,
    #                                                 lambda x1,x2: gaussian_process.square_distance_kernel_1D(x1,x2,sampler_kernel_stddev),
    #                                                 x_star, obs_noise_stddev, n_samples)["samples"]
    #
    #def top_arc_sampler(n_samples, sampler_kernel_stddev, obs_noise_stddev):
    #    return arc_sampler(x_star, top_arc, n_samples, sampler_kernel_stddev, obs_noise_stddev)
    #
    #def bottom_arc_sampler(n_samples, sampler_kernel_stddev, obs_noise_stddev):
    #    return arc_sampler(x_star, bottom_arc, n_samples, sampler_kernel_stddev, obs_noise_stddev)

    # might want to return more useful stuff
    return (x_star, top_arc, bottom_arc, obs_noise_stddev, kernel_stddev)
def generate_pinched_twin_bumps_quasi_mirrored_gp(d):

    # doesn't make much sense unless we ask for
    # a dimension higher than 2
    assert d > 2

    x = np.array([0.0, 0.25, 0.5, 0.75, 1.0])
    x_star = np.linspace(0.0, 1.0, d)
    obs_noise_stddev = 0.00001
    kernel_stddev = 10.0

    def kernel(x1,x2):
        return gaussian_process.square_distance_kernel_1D(x1,x2,kernel_stddev)

    # top arc
    top_y = np.array([0.0, 1.0,  0.5, 0.25, 0.0])
    top_arc = gaussian_process.sample_trajectory_1D(x, top_y, kernel, x_star, obs_noise_stddev)["samples"]

    # bottom arc (some kind of projection through x=0.5)
    bottom_y = np.array([0.0, -0.25, -0.5, -1.0,  0.0])
    bottom_arc = gaussian_process.sample_trajectory_1D(x, bottom_y, kernel, x_star, obs_noise_stddev)["samples"]

    # maybe we would want to funnel through the loglikelihood
    # as well and not just return the samples
    #def arc_sampler(x_star, arc_points, n_samples, sampler_kernel_stddev, obs_noise_stddev):
    #    return gaussian_process.sample_trajectory_1D(x_star, arc_points,
    #                                                 lambda x1,x2: gaussian_process.square_distance_kernel_1D(x1,x2,sampler_kernel_stddev),
    #                                                 x_star, obs_noise_stddev, n_samples)["samples"]
    #
    #def top_arc_sampler(n_samples, sampler_kernel_stddev, obs_noise_stddev):
    #    return arc_sampler(x_star, top_arc, n_samples, sampler_kernel_stddev, obs_noise_stddev)
    #
    #def bottom_arc_sampler(n_samples, sampler_kernel_stddev, obs_noise_stddev):
    #    return arc_sampler(x_star, bottom_arc, n_samples, sampler_kernel_stddev, obs_noise_stddev)

    # might want to return more useful stuff
    return (x_star, top_arc, bottom_arc, obs_noise_stddev, kernel_stddev)
Ejemplo n.º 3
0
def main(argv):
    """
       n_train
       n_test
       d is the dimension of the samples. Should be higher than 2 and preferable 10 or more..
       output_dir is the directory in which we'll write the results
    """

    import getopt
    import cPickle

    try:
        opts, args = getopt.getopt(
            sys.argv[1:], "hv", ["n_train=", "n_test=", "d=", "output_dir="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

    n_train = None
    n_test = None
    d = None
    output_dir = None

    verbose = False
    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("--n_train"):
            n_train = int(a)
        elif o in ("--n_test"):
            n_test = int(a)
        elif o in ("--d"):
            d = int(a)
        elif o in ("--output_dir"):
            output_dir = a
        else:
            assert False, "unhandled option"

    assert n_train
    assert n_test
    assert d
    assert output_dir

    # These points are used to define the arcs from which the samples are drawn.
    base_number_of_points = 8
    (base_x, top_arc, bottom_arc, twin_bumps_obs_noise_stddev,
     twin_bumps_kernel_stddev
     ) = generate_pinched_twin_bumps_quasi_mirrored_gp(base_number_of_points)

    # These values are hardcoded to yield something that looks good.
    # We're not really interested in varying those with parameters.
    kernel_stddev = 0.15
    obs_noise_stddev = 0.2

    def kernel(x1, x2):
        return gaussian_process.square_distance_kernel_1D(
            x1, x2, kernel_stddev)

    samples_x = np.linspace(0.0, 1.0, d)

    N = n_train + n_test
    samples = np.zeros((N, d))

    # Track from which arc you get the sample.
    cluster_index = np.array(np.random.uniform(0, 1, size=N) < 0.5, dtype=int)

    # Not the most efficient way to do this because it
    # recomputes certain matrices instead of caching them,
    # but that's not important.
    for n in range(N):
        R = gaussian_process.sample_trajectory_1D(
            base_x,
            top_arc * cluster_index[n] + bottom_arc * (1 - cluster_index[n]),
            kernel, samples_x, obs_noise_stddev)
        samples[n, :] = R['samples']
        # always the same f_star_mean and f_star_cov
        f_star_mean = R['f_star_mean']
        f_star_cov = R['f_star_cov']

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print "Creating directory %s" % output_dir,

    extra_props = {
        'base_x': base_x,
        'top_arc': top_arc,
        'bottom_arc': bottom_arc,
        'samples_x': samples_x,
        'f_star_mean': f_star_mean,
        'f_star_cov': f_star_cov,
        'n': None,
        'd': d,
        'kernel_stddev': kernel_stddev,
        'obs_noise_stddev': obs_noise_stddev,
        'base_number_of_points': base_number_of_points,
        'twin_bumps_obs_noise_stddev': twin_bumps_obs_noise_stddev,
        'twin_bumps_kernel_stddev': twin_bumps_kernel_stddev
    }

    #print type(extra_props)
    #print type(conj(extra_props, ('n', n_train)))
    #print type(dict(extra_props.items() + [('lupi',"chien")]))
    #quit()

    train_samples = samples[0:n_train, :]
    train_cluster_index = cluster_index[0:n_train]
    train_samples_filename = os.path.join(output_dir, "train_samples.pkl")
    train_samples_extra_filename = os.path.join(output_dir,
                                                "train_samples_extra.pkl")

    cPickle.dump(train_samples, open(train_samples_filename, "w"))
    cPickle.dump(
        conj(conj(extra_props, ('n', n_train)),
             ('cluster_indices', train_cluster_index)),
        open(train_samples_extra_filename, "w"))
    print "wrote " + train_samples_filename
    print "wrote " + train_samples_extra_filename

    test_samples = samples[n_train:(n_train + n_test), :]
    test_cluster_index = cluster_index[n_train:(n_train + n_test)]
    test_samples_filename = os.path.join(output_dir, "test_samples.pkl")
    test_samples_extra_filename = os.path.join(output_dir,
                                               "test_samples_extra.pkl")

    cPickle.dump(test_samples, open(test_samples_filename, "w"))
    cPickle.dump(
        conj(conj(extra_props, ('n', n_test)),
             ('cluster_indices', test_cluster_index)),
        open(test_samples_extra_filename, "w"))
    print "wrote " + test_samples_filename
    print "wrote " + test_samples_extra_filename

    output_image_file = os.path.join(output_dir, "overview.png")
    plot_the_overview(base_x, top_arc, bottom_arc, samples_x,
                      train_samples[0:10, :], train_samples[10:20, :],
                      output_image_file)
    print "wrote " + output_image_file
def main(argv):
    """
       n_train
       n_test
       d is the dimension of the samples. Should be higher than 2 and preferable 10 or more..
       output_dir is the directory in which we'll write the results
    """

    import getopt
    import cPickle

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hv", ["n_train=", "n_test=", "d=", "output_dir="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err) # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

    n_train = None
    n_test = None
    d = None
    output_dir = None

    verbose = False
    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("--n_train"):
            n_train = int(a)
        elif o in ("--n_test"):
            n_test = int(a)
        elif o in ("--d"):
            d = int(a)
        elif o in ("--output_dir"):
            output_dir = a
        else:
            assert False, "unhandled option"
 
    assert n_train
    assert n_test
    assert d
    assert output_dir

    # These points are used to define the arcs from which the samples are drawn.
    base_number_of_points = 8
    (base_x, top_arc, bottom_arc, 
     twin_bumps_obs_noise_stddev,
     twin_bumps_kernel_stddev) = generate_pinched_twin_bumps_quasi_mirrored_gp(base_number_of_points)

    # These values are hardcoded to yield something that looks good.
    # We're not really interested in varying those with parameters.
    kernel_stddev = 0.15
    obs_noise_stddev = 0.2
    def kernel(x1,x2):
        return gaussian_process.square_distance_kernel_1D(x1,x2,kernel_stddev)

    samples_x = np.linspace(0.0, 1.0, d)

    N = n_train + n_test
    samples = np.zeros((N, d))

    # Track from which arc you get the sample.
    cluster_index = np.array(np.random.uniform(0,1,size=N) < 0.5, dtype=int)

    # Not the most efficient way to do this because it
    # recomputes certain matrices instead of caching them,
    # but that's not important.
    for n in range(N):
        R = gaussian_process.sample_trajectory_1D(
            base_x,
            top_arc * cluster_index[n] + bottom_arc * (1 - cluster_index[n]),
            kernel, 
            samples_x,
            obs_noise_stddev)
        samples[n,:] = R['samples']
        # always the same f_star_mean and f_star_cov
        f_star_mean = R['f_star_mean']
        f_star_cov  = R['f_star_cov']


    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print "Creating directory %s" % output_dir,

    extra_props = {'base_x':base_x,
                   'top_arc':top_arc,
                   'bottom_arc':bottom_arc,
                   'samples_x':samples_x,
                   'f_star_mean':f_star_mean,
                   'f_star_cov':f_star_cov,
                   'n':None,
                   'd':d,
                   'kernel_stddev':kernel_stddev,
                   'obs_noise_stddev':obs_noise_stddev,
                   'base_number_of_points':base_number_of_points,
                   'twin_bumps_obs_noise_stddev':twin_bumps_obs_noise_stddev,
                   'twin_bumps_kernel_stddev':twin_bumps_kernel_stddev}

    #print type(extra_props)
    #print type(conj(extra_props, ('n', n_train)))
    #print type(dict(extra_props.items() + [('lupi',"chien")]))
    #quit()

    train_samples = samples[0:n_train,:]
    train_cluster_index = cluster_index[0:n_train]
    train_samples_filename = os.path.join(output_dir, "train_samples.pkl")
    train_samples_extra_filename = os.path.join(output_dir, "train_samples_extra.pkl")

    cPickle.dump(train_samples, open(train_samples_filename, "w"))
    cPickle.dump(conj(conj(extra_props,
                           ('n', n_train)),
                      ('cluster_indices', train_cluster_index)),
                 open(train_samples_extra_filename, "w"))
    print "wrote " + train_samples_filename
    print "wrote " + train_samples_extra_filename


    test_samples = samples[n_train:(n_train + n_test),:]
    test_cluster_index = cluster_index[n_train:(n_train + n_test)]
    test_samples_filename  = os.path.join(output_dir, "test_samples.pkl")
    test_samples_extra_filename  = os.path.join(output_dir, "test_samples_extra.pkl")

    cPickle.dump(test_samples, open(test_samples_filename, "w"))
    cPickle.dump(conj(conj(extra_props,
                           ('n', n_test)),
                      ('cluster_indices', test_cluster_index)),
                 open(test_samples_extra_filename, "w"))
    print "wrote " + test_samples_filename
    print "wrote " + test_samples_extra_filename

    output_image_file = os.path.join(output_dir,"overview.png")
    plot_the_overview(base_x, top_arc, bottom_arc, samples_x, train_samples[0:10,:], train_samples[10:20,:], output_image_file)
    print "wrote " + output_image_file