예제 #1
0
def main(argv):
    """
       n_train
       n_test
       d is the dimension of the samples. Should be higher than 2 and preferable 10 or more..
       output_dir is the directory in which we'll write the results
    """

    import getopt
    import cPickle

    try:
        opts, args = getopt.getopt(
            sys.argv[1:], "hv", ["n_train=", "n_test=", "d=", "output_dir="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

    n_train = None
    n_test = None
    d = None
    output_dir = None

    verbose = False
    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("--n_train"):
            n_train = int(a)
        elif o in ("--n_test"):
            n_test = int(a)
        elif o in ("--d"):
            d = int(a)
        elif o in ("--output_dir"):
            output_dir = a
        else:
            assert False, "unhandled option"

    assert n_train
    assert n_test
    assert d
    assert output_dir

    # These points are used to define the arcs from which the samples are drawn.
    base_number_of_points = 8
    (base_x, top_arc, bottom_arc, twin_bumps_obs_noise_stddev,
     twin_bumps_kernel_stddev
     ) = generate_pinched_twin_bumps_quasi_mirrored_gp(base_number_of_points)

    # These values are hardcoded to yield something that looks good.
    # We're not really interested in varying those with parameters.
    kernel_stddev = 0.15
    obs_noise_stddev = 0.2

    def kernel(x1, x2):
        return gaussian_process.square_distance_kernel_1D(
            x1, x2, kernel_stddev)

    samples_x = np.linspace(0.0, 1.0, d)

    N = n_train + n_test
    samples = np.zeros((N, d))

    # Track from which arc you get the sample.
    cluster_index = np.array(np.random.uniform(0, 1, size=N) < 0.5, dtype=int)

    # Not the most efficient way to do this because it
    # recomputes certain matrices instead of caching them,
    # but that's not important.
    for n in range(N):
        R = gaussian_process.sample_trajectory_1D(
            base_x,
            top_arc * cluster_index[n] + bottom_arc * (1 - cluster_index[n]),
            kernel, samples_x, obs_noise_stddev)
        samples[n, :] = R['samples']
        # always the same f_star_mean and f_star_cov
        f_star_mean = R['f_star_mean']
        f_star_cov = R['f_star_cov']

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print "Creating directory %s" % output_dir,

    extra_props = {
        'base_x': base_x,
        'top_arc': top_arc,
        'bottom_arc': bottom_arc,
        'samples_x': samples_x,
        'f_star_mean': f_star_mean,
        'f_star_cov': f_star_cov,
        'n': None,
        'd': d,
        'kernel_stddev': kernel_stddev,
        'obs_noise_stddev': obs_noise_stddev,
        'base_number_of_points': base_number_of_points,
        'twin_bumps_obs_noise_stddev': twin_bumps_obs_noise_stddev,
        'twin_bumps_kernel_stddev': twin_bumps_kernel_stddev
    }

    #print type(extra_props)
    #print type(conj(extra_props, ('n', n_train)))
    #print type(dict(extra_props.items() + [('lupi',"chien")]))
    #quit()

    train_samples = samples[0:n_train, :]
    train_cluster_index = cluster_index[0:n_train]
    train_samples_filename = os.path.join(output_dir, "train_samples.pkl")
    train_samples_extra_filename = os.path.join(output_dir,
                                                "train_samples_extra.pkl")

    cPickle.dump(train_samples, open(train_samples_filename, "w"))
    cPickle.dump(
        conj(conj(extra_props, ('n', n_train)),
             ('cluster_indices', train_cluster_index)),
        open(train_samples_extra_filename, "w"))
    print "wrote " + train_samples_filename
    print "wrote " + train_samples_extra_filename

    test_samples = samples[n_train:(n_train + n_test), :]
    test_cluster_index = cluster_index[n_train:(n_train + n_test)]
    test_samples_filename = os.path.join(output_dir, "test_samples.pkl")
    test_samples_extra_filename = os.path.join(output_dir,
                                               "test_samples_extra.pkl")

    cPickle.dump(test_samples, open(test_samples_filename, "w"))
    cPickle.dump(
        conj(conj(extra_props, ('n', n_test)),
             ('cluster_indices', test_cluster_index)),
        open(test_samples_extra_filename, "w"))
    print "wrote " + test_samples_filename
    print "wrote " + test_samples_extra_filename

    output_image_file = os.path.join(output_dir, "overview.png")
    plot_the_overview(base_x, top_arc, bottom_arc, samples_x,
                      train_samples[0:10, :], train_samples[10:20, :],
                      output_image_file)
    print "wrote " + output_image_file
def main(argv):
    """
       n_train
       n_test
       d is the dimension of the samples. Should be higher than 2 and preferable 10 or more.
       mixing_prop controls how much of the vector v_t we mix in with the proposal for v_{t+1}
       leading_eigenvalue
       n_components
       output_dir is the directory in which we'll write the results
    """

    import getopt
    import cPickle

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hv", ["d=", "n_train=", "n_test=", "mixing_prop=", "leading_eigenvalue=", "n_components=", "output_dir="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err) # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

    n_train = None
    n_test = None
    d = None
    mixing_prop = 0.5
    leading_eigenvalue = 1.0
    output_dir = None

    verbose = False
    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("--n_train"):
            n_train = int(a)
        elif o in ("--n_test"):
            n_test = int(a)
        elif o in ("--d"):
            d = int(a)
        elif o in ("--mixing_prop"):
            mixing_prop = float(a)
        elif o in ("--leading_eigenvalue"):
            leading_eigenvalue = float(a)
        elif o in ("--n_components"):
            n_components = int(a)
        elif o in ("--output_dir"):
            output_dir = a
        else:
            assert False, "unhandled option"
 
    assert n_train
    assert n_test
    assert d
    assert n_components
    assert output_dir

    start_time = time.time()

    (component_means, component_covariances) = sample_manifold_components(d, n_components, leading_eigenvalue, mixing_prop)
    (samples, component_indices) = sample_from_mixture(component_means, component_covariances, n_train + n_test)

    end_time = time.time()
    computational_cost_in_seconds = int(end_time - start_time)
    print "Sampling took %d seconds." % computational_cost_in_seconds

    print component_means


    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print "Creating directory %s" % output_dir,

    extra_props = {'component_means':component_means,
                   'component_covariances':component_covariances,
                   #'n_train':n_train,
                   #'n_test':n_test,
                   'd':d,
                   'leading_eigenvalue':leading_eigenvalue,
                   'mixing_prop':mixing_prop,
                   'n_components':n_components,
                   'computational_cost_in_seconds':computational_cost_in_seconds}


    train_samples = samples[0:n_train,:]
    train_component_indices = component_indices[0:n_train]
    train_samples_filename = os.path.join(output_dir, "train_samples.pkl")
    train_samples_extra_filename = os.path.join(output_dir, "train_samples_extra.pkl")

    cPickle.dump(train_samples, open(train_samples_filename, "w"))
    cPickle.dump(conj(conj(extra_props,
                           ('n', n_train)),
                      ('component_indices', train_component_indices)),
                 open(train_samples_extra_filename, "w"))
    print "wrote " + train_samples_filename
    print "wrote " + train_samples_extra_filename


    test_samples = samples[n_train:(n_train + n_test),:]
    test_component_indices= component_indices[n_train:(n_train + n_test)]
    test_samples_filename  = os.path.join(output_dir, "test_samples.pkl")
    test_samples_extra_filename  = os.path.join(output_dir, "test_samples_extra.pkl")

    cPickle.dump(test_samples, open(test_samples_filename, "w"))
    cPickle.dump(conj(conj(extra_props,
                           ('n', n_test)),
                      ('component_indices', test_component_indices)),
                 open(test_samples_extra_filename, "w"))
    print "wrote " + test_samples_filename
    print "wrote " + test_samples_extra_filename


    for i in range(0,d-1):

        output_image_file = os.path.join(output_dir,"overview_dimensions_%d_and_%d.png" % (i,i+1))
        plot_the_overview(samples, i, i+1, output_image_file)
        print "wrote " + output_image_file
def main(argv):
    """
       n_train
       n_test
       d is the dimension of the samples. Should be higher than 2 and preferable 10 or more.
       mixing_prop controls how much of the vector v_t we mix in with the proposal for v_{t+1}
       leading_eigenvalue
       n_components
       output_dir is the directory in which we'll write the results
    """

    import getopt
    import cPickle

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hv", [
            "d=", "n_train=", "n_test=", "mixing_prop=", "leading_eigenvalue=",
            "n_components=", "output_dir="
        ])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

    n_train = None
    n_test = None
    d = None
    mixing_prop = 0.5
    leading_eigenvalue = 1.0
    output_dir = None

    verbose = False
    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("--n_train"):
            n_train = int(a)
        elif o in ("--n_test"):
            n_test = int(a)
        elif o in ("--d"):
            d = int(a)
        elif o in ("--mixing_prop"):
            mixing_prop = float(a)
        elif o in ("--leading_eigenvalue"):
            leading_eigenvalue = float(a)
        elif o in ("--n_components"):
            n_components = int(a)
        elif o in ("--output_dir"):
            output_dir = a
        else:
            assert False, "unhandled option"

    assert n_train
    assert n_test
    assert d
    assert n_components
    assert output_dir

    start_time = time.time()

    (component_means, component_covariances) = sample_manifold_components(
        d, n_components, leading_eigenvalue, mixing_prop)
    (samples, component_indices) = sample_from_mixture(component_means,
                                                       component_covariances,
                                                       n_train + n_test)

    end_time = time.time()
    computational_cost_in_seconds = int(end_time - start_time)
    print "Sampling took %d seconds." % computational_cost_in_seconds

    print component_means

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print "Creating directory %s" % output_dir,

    extra_props = {
        'component_means': component_means,
        'component_covariances': component_covariances,
        #'n_train':n_train,
        #'n_test':n_test,
        'd': d,
        'leading_eigenvalue': leading_eigenvalue,
        'mixing_prop': mixing_prop,
        'n_components': n_components,
        'computational_cost_in_seconds': computational_cost_in_seconds
    }

    train_samples = samples[0:n_train, :]
    train_component_indices = component_indices[0:n_train]
    train_samples_filename = os.path.join(output_dir, "train_samples.pkl")
    train_samples_extra_filename = os.path.join(output_dir,
                                                "train_samples_extra.pkl")

    cPickle.dump(train_samples, open(train_samples_filename, "w"))
    cPickle.dump(
        conj(conj(extra_props, ('n', n_train)),
             ('component_indices', train_component_indices)),
        open(train_samples_extra_filename, "w"))
    print "wrote " + train_samples_filename
    print "wrote " + train_samples_extra_filename

    test_samples = samples[n_train:(n_train + n_test), :]
    test_component_indices = component_indices[n_train:(n_train + n_test)]
    test_samples_filename = os.path.join(output_dir, "test_samples.pkl")
    test_samples_extra_filename = os.path.join(output_dir,
                                               "test_samples_extra.pkl")

    cPickle.dump(test_samples, open(test_samples_filename, "w"))
    cPickle.dump(
        conj(conj(extra_props, ('n', n_test)),
             ('component_indices', test_component_indices)),
        open(test_samples_extra_filename, "w"))
    print "wrote " + test_samples_filename
    print "wrote " + test_samples_extra_filename

    for i in range(0, d - 1):

        output_image_file = os.path.join(
            output_dir, "overview_dimensions_%d_and_%d.png" % (i, i + 1))
        plot_the_overview(samples, i, i + 1, output_image_file)
        print "wrote " + output_image_file
def main(argv):
    """
       n_train
       n_test
       d is the dimension of the samples. Should be higher than 2 and preferable 10 or more..
       output_dir is the directory in which we'll write the results
    """

    import getopt
    import cPickle

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hv", ["n_train=", "n_test=", "d=", "output_dir="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err) # will print something like "option -a not recognized"
        usage()
        sys.exit(2)

    n_train = None
    n_test = None
    d = None
    output_dir = None

    verbose = False
    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("--n_train"):
            n_train = int(a)
        elif o in ("--n_test"):
            n_test = int(a)
        elif o in ("--d"):
            d = int(a)
        elif o in ("--output_dir"):
            output_dir = a
        else:
            assert False, "unhandled option"
 
    assert n_train
    assert n_test
    assert d
    assert output_dir

    # These points are used to define the arcs from which the samples are drawn.
    base_number_of_points = 8
    (base_x, top_arc, bottom_arc, 
     twin_bumps_obs_noise_stddev,
     twin_bumps_kernel_stddev) = generate_pinched_twin_bumps_quasi_mirrored_gp(base_number_of_points)

    # These values are hardcoded to yield something that looks good.
    # We're not really interested in varying those with parameters.
    kernel_stddev = 0.15
    obs_noise_stddev = 0.2
    def kernel(x1,x2):
        return gaussian_process.square_distance_kernel_1D(x1,x2,kernel_stddev)

    samples_x = np.linspace(0.0, 1.0, d)

    N = n_train + n_test
    samples = np.zeros((N, d))

    # Track from which arc you get the sample.
    cluster_index = np.array(np.random.uniform(0,1,size=N) < 0.5, dtype=int)

    # Not the most efficient way to do this because it
    # recomputes certain matrices instead of caching them,
    # but that's not important.
    for n in range(N):
        R = gaussian_process.sample_trajectory_1D(
            base_x,
            top_arc * cluster_index[n] + bottom_arc * (1 - cluster_index[n]),
            kernel, 
            samples_x,
            obs_noise_stddev)
        samples[n,:] = R['samples']
        # always the same f_star_mean and f_star_cov
        f_star_mean = R['f_star_mean']
        f_star_cov  = R['f_star_cov']


    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print "Creating directory %s" % output_dir,

    extra_props = {'base_x':base_x,
                   'top_arc':top_arc,
                   'bottom_arc':bottom_arc,
                   'samples_x':samples_x,
                   'f_star_mean':f_star_mean,
                   'f_star_cov':f_star_cov,
                   'n':None,
                   'd':d,
                   'kernel_stddev':kernel_stddev,
                   'obs_noise_stddev':obs_noise_stddev,
                   'base_number_of_points':base_number_of_points,
                   'twin_bumps_obs_noise_stddev':twin_bumps_obs_noise_stddev,
                   'twin_bumps_kernel_stddev':twin_bumps_kernel_stddev}

    #print type(extra_props)
    #print type(conj(extra_props, ('n', n_train)))
    #print type(dict(extra_props.items() + [('lupi',"chien")]))
    #quit()

    train_samples = samples[0:n_train,:]
    train_cluster_index = cluster_index[0:n_train]
    train_samples_filename = os.path.join(output_dir, "train_samples.pkl")
    train_samples_extra_filename = os.path.join(output_dir, "train_samples_extra.pkl")

    cPickle.dump(train_samples, open(train_samples_filename, "w"))
    cPickle.dump(conj(conj(extra_props,
                           ('n', n_train)),
                      ('cluster_indices', train_cluster_index)),
                 open(train_samples_extra_filename, "w"))
    print "wrote " + train_samples_filename
    print "wrote " + train_samples_extra_filename


    test_samples = samples[n_train:(n_train + n_test),:]
    test_cluster_index = cluster_index[n_train:(n_train + n_test)]
    test_samples_filename  = os.path.join(output_dir, "test_samples.pkl")
    test_samples_extra_filename  = os.path.join(output_dir, "test_samples_extra.pkl")

    cPickle.dump(test_samples, open(test_samples_filename, "w"))
    cPickle.dump(conj(conj(extra_props,
                           ('n', n_test)),
                      ('cluster_indices', test_cluster_index)),
                 open(test_samples_extra_filename, "w"))
    print "wrote " + test_samples_filename
    print "wrote " + test_samples_extra_filename

    output_image_file = os.path.join(output_dir,"overview.png")
    plot_the_overview(base_x, top_arc, bottom_arc, samples_x, train_samples[0:10,:], train_samples[10:20,:], output_image_file)
    print "wrote " + output_image_file