def main(argv): """ n_train n_test d is the dimension of the samples. Should be higher than 2 and preferable 10 or more.. output_dir is the directory in which we'll write the results """ import getopt import cPickle try: opts, args = getopt.getopt( sys.argv[1:], "hv", ["n_train=", "n_test=", "d=", "output_dir="]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) n_train = None n_test = None d = None output_dir = None verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("--n_train"): n_train = int(a) elif o in ("--n_test"): n_test = int(a) elif o in ("--d"): d = int(a) elif o in ("--output_dir"): output_dir = a else: assert False, "unhandled option" assert n_train assert n_test assert d assert output_dir # These points are used to define the arcs from which the samples are drawn. base_number_of_points = 8 (base_x, top_arc, bottom_arc, twin_bumps_obs_noise_stddev, twin_bumps_kernel_stddev ) = generate_pinched_twin_bumps_quasi_mirrored_gp(base_number_of_points) # These values are hardcoded to yield something that looks good. # We're not really interested in varying those with parameters. kernel_stddev = 0.15 obs_noise_stddev = 0.2 def kernel(x1, x2): return gaussian_process.square_distance_kernel_1D( x1, x2, kernel_stddev) samples_x = np.linspace(0.0, 1.0, d) N = n_train + n_test samples = np.zeros((N, d)) # Track from which arc you get the sample. cluster_index = np.array(np.random.uniform(0, 1, size=N) < 0.5, dtype=int) # Not the most efficient way to do this because it # recomputes certain matrices instead of caching them, # but that's not important. for n in range(N): R = gaussian_process.sample_trajectory_1D( base_x, top_arc * cluster_index[n] + bottom_arc * (1 - cluster_index[n]), kernel, samples_x, obs_noise_stddev) samples[n, :] = R['samples'] # always the same f_star_mean and f_star_cov f_star_mean = R['f_star_mean'] f_star_cov = R['f_star_cov'] if not os.path.exists(output_dir): os.makedirs(output_dir) print "Creating directory %s" % output_dir, extra_props = { 'base_x': base_x, 'top_arc': top_arc, 'bottom_arc': bottom_arc, 'samples_x': samples_x, 'f_star_mean': f_star_mean, 'f_star_cov': f_star_cov, 'n': None, 'd': d, 'kernel_stddev': kernel_stddev, 'obs_noise_stddev': obs_noise_stddev, 'base_number_of_points': base_number_of_points, 'twin_bumps_obs_noise_stddev': twin_bumps_obs_noise_stddev, 'twin_bumps_kernel_stddev': twin_bumps_kernel_stddev } #print type(extra_props) #print type(conj(extra_props, ('n', n_train))) #print type(dict(extra_props.items() + [('lupi',"chien")])) #quit() train_samples = samples[0:n_train, :] train_cluster_index = cluster_index[0:n_train] train_samples_filename = os.path.join(output_dir, "train_samples.pkl") train_samples_extra_filename = os.path.join(output_dir, "train_samples_extra.pkl") cPickle.dump(train_samples, open(train_samples_filename, "w")) cPickle.dump( conj(conj(extra_props, ('n', n_train)), ('cluster_indices', train_cluster_index)), open(train_samples_extra_filename, "w")) print "wrote " + train_samples_filename print "wrote " + train_samples_extra_filename test_samples = samples[n_train:(n_train + n_test), :] test_cluster_index = cluster_index[n_train:(n_train + n_test)] test_samples_filename = os.path.join(output_dir, "test_samples.pkl") test_samples_extra_filename = os.path.join(output_dir, "test_samples_extra.pkl") cPickle.dump(test_samples, open(test_samples_filename, "w")) cPickle.dump( conj(conj(extra_props, ('n', n_test)), ('cluster_indices', test_cluster_index)), open(test_samples_extra_filename, "w")) print "wrote " + test_samples_filename print "wrote " + test_samples_extra_filename output_image_file = os.path.join(output_dir, "overview.png") plot_the_overview(base_x, top_arc, bottom_arc, samples_x, train_samples[0:10, :], train_samples[10:20, :], output_image_file) print "wrote " + output_image_file
def main(argv): """ n_train n_test d is the dimension of the samples. Should be higher than 2 and preferable 10 or more. mixing_prop controls how much of the vector v_t we mix in with the proposal for v_{t+1} leading_eigenvalue n_components output_dir is the directory in which we'll write the results """ import getopt import cPickle try: opts, args = getopt.getopt(sys.argv[1:], "hv", ["d=", "n_train=", "n_test=", "mixing_prop=", "leading_eigenvalue=", "n_components=", "output_dir="]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) n_train = None n_test = None d = None mixing_prop = 0.5 leading_eigenvalue = 1.0 output_dir = None verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("--n_train"): n_train = int(a) elif o in ("--n_test"): n_test = int(a) elif o in ("--d"): d = int(a) elif o in ("--mixing_prop"): mixing_prop = float(a) elif o in ("--leading_eigenvalue"): leading_eigenvalue = float(a) elif o in ("--n_components"): n_components = int(a) elif o in ("--output_dir"): output_dir = a else: assert False, "unhandled option" assert n_train assert n_test assert d assert n_components assert output_dir start_time = time.time() (component_means, component_covariances) = sample_manifold_components(d, n_components, leading_eigenvalue, mixing_prop) (samples, component_indices) = sample_from_mixture(component_means, component_covariances, n_train + n_test) end_time = time.time() computational_cost_in_seconds = int(end_time - start_time) print "Sampling took %d seconds." % computational_cost_in_seconds print component_means if not os.path.exists(output_dir): os.makedirs(output_dir) print "Creating directory %s" % output_dir, extra_props = {'component_means':component_means, 'component_covariances':component_covariances, #'n_train':n_train, #'n_test':n_test, 'd':d, 'leading_eigenvalue':leading_eigenvalue, 'mixing_prop':mixing_prop, 'n_components':n_components, 'computational_cost_in_seconds':computational_cost_in_seconds} train_samples = samples[0:n_train,:] train_component_indices = component_indices[0:n_train] train_samples_filename = os.path.join(output_dir, "train_samples.pkl") train_samples_extra_filename = os.path.join(output_dir, "train_samples_extra.pkl") cPickle.dump(train_samples, open(train_samples_filename, "w")) cPickle.dump(conj(conj(extra_props, ('n', n_train)), ('component_indices', train_component_indices)), open(train_samples_extra_filename, "w")) print "wrote " + train_samples_filename print "wrote " + train_samples_extra_filename test_samples = samples[n_train:(n_train + n_test),:] test_component_indices= component_indices[n_train:(n_train + n_test)] test_samples_filename = os.path.join(output_dir, "test_samples.pkl") test_samples_extra_filename = os.path.join(output_dir, "test_samples_extra.pkl") cPickle.dump(test_samples, open(test_samples_filename, "w")) cPickle.dump(conj(conj(extra_props, ('n', n_test)), ('component_indices', test_component_indices)), open(test_samples_extra_filename, "w")) print "wrote " + test_samples_filename print "wrote " + test_samples_extra_filename for i in range(0,d-1): output_image_file = os.path.join(output_dir,"overview_dimensions_%d_and_%d.png" % (i,i+1)) plot_the_overview(samples, i, i+1, output_image_file) print "wrote " + output_image_file
def main(argv): """ n_train n_test d is the dimension of the samples. Should be higher than 2 and preferable 10 or more. mixing_prop controls how much of the vector v_t we mix in with the proposal for v_{t+1} leading_eigenvalue n_components output_dir is the directory in which we'll write the results """ import getopt import cPickle try: opts, args = getopt.getopt(sys.argv[1:], "hv", [ "d=", "n_train=", "n_test=", "mixing_prop=", "leading_eigenvalue=", "n_components=", "output_dir=" ]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) n_train = None n_test = None d = None mixing_prop = 0.5 leading_eigenvalue = 1.0 output_dir = None verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("--n_train"): n_train = int(a) elif o in ("--n_test"): n_test = int(a) elif o in ("--d"): d = int(a) elif o in ("--mixing_prop"): mixing_prop = float(a) elif o in ("--leading_eigenvalue"): leading_eigenvalue = float(a) elif o in ("--n_components"): n_components = int(a) elif o in ("--output_dir"): output_dir = a else: assert False, "unhandled option" assert n_train assert n_test assert d assert n_components assert output_dir start_time = time.time() (component_means, component_covariances) = sample_manifold_components( d, n_components, leading_eigenvalue, mixing_prop) (samples, component_indices) = sample_from_mixture(component_means, component_covariances, n_train + n_test) end_time = time.time() computational_cost_in_seconds = int(end_time - start_time) print "Sampling took %d seconds." % computational_cost_in_seconds print component_means if not os.path.exists(output_dir): os.makedirs(output_dir) print "Creating directory %s" % output_dir, extra_props = { 'component_means': component_means, 'component_covariances': component_covariances, #'n_train':n_train, #'n_test':n_test, 'd': d, 'leading_eigenvalue': leading_eigenvalue, 'mixing_prop': mixing_prop, 'n_components': n_components, 'computational_cost_in_seconds': computational_cost_in_seconds } train_samples = samples[0:n_train, :] train_component_indices = component_indices[0:n_train] train_samples_filename = os.path.join(output_dir, "train_samples.pkl") train_samples_extra_filename = os.path.join(output_dir, "train_samples_extra.pkl") cPickle.dump(train_samples, open(train_samples_filename, "w")) cPickle.dump( conj(conj(extra_props, ('n', n_train)), ('component_indices', train_component_indices)), open(train_samples_extra_filename, "w")) print "wrote " + train_samples_filename print "wrote " + train_samples_extra_filename test_samples = samples[n_train:(n_train + n_test), :] test_component_indices = component_indices[n_train:(n_train + n_test)] test_samples_filename = os.path.join(output_dir, "test_samples.pkl") test_samples_extra_filename = os.path.join(output_dir, "test_samples_extra.pkl") cPickle.dump(test_samples, open(test_samples_filename, "w")) cPickle.dump( conj(conj(extra_props, ('n', n_test)), ('component_indices', test_component_indices)), open(test_samples_extra_filename, "w")) print "wrote " + test_samples_filename print "wrote " + test_samples_extra_filename for i in range(0, d - 1): output_image_file = os.path.join( output_dir, "overview_dimensions_%d_and_%d.png" % (i, i + 1)) plot_the_overview(samples, i, i + 1, output_image_file) print "wrote " + output_image_file
def main(argv): """ n_train n_test d is the dimension of the samples. Should be higher than 2 and preferable 10 or more.. output_dir is the directory in which we'll write the results """ import getopt import cPickle try: opts, args = getopt.getopt(sys.argv[1:], "hv", ["n_train=", "n_test=", "d=", "output_dir="]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) n_train = None n_test = None d = None output_dir = None verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("--n_train"): n_train = int(a) elif o in ("--n_test"): n_test = int(a) elif o in ("--d"): d = int(a) elif o in ("--output_dir"): output_dir = a else: assert False, "unhandled option" assert n_train assert n_test assert d assert output_dir # These points are used to define the arcs from which the samples are drawn. base_number_of_points = 8 (base_x, top_arc, bottom_arc, twin_bumps_obs_noise_stddev, twin_bumps_kernel_stddev) = generate_pinched_twin_bumps_quasi_mirrored_gp(base_number_of_points) # These values are hardcoded to yield something that looks good. # We're not really interested in varying those with parameters. kernel_stddev = 0.15 obs_noise_stddev = 0.2 def kernel(x1,x2): return gaussian_process.square_distance_kernel_1D(x1,x2,kernel_stddev) samples_x = np.linspace(0.0, 1.0, d) N = n_train + n_test samples = np.zeros((N, d)) # Track from which arc you get the sample. cluster_index = np.array(np.random.uniform(0,1,size=N) < 0.5, dtype=int) # Not the most efficient way to do this because it # recomputes certain matrices instead of caching them, # but that's not important. for n in range(N): R = gaussian_process.sample_trajectory_1D( base_x, top_arc * cluster_index[n] + bottom_arc * (1 - cluster_index[n]), kernel, samples_x, obs_noise_stddev) samples[n,:] = R['samples'] # always the same f_star_mean and f_star_cov f_star_mean = R['f_star_mean'] f_star_cov = R['f_star_cov'] if not os.path.exists(output_dir): os.makedirs(output_dir) print "Creating directory %s" % output_dir, extra_props = {'base_x':base_x, 'top_arc':top_arc, 'bottom_arc':bottom_arc, 'samples_x':samples_x, 'f_star_mean':f_star_mean, 'f_star_cov':f_star_cov, 'n':None, 'd':d, 'kernel_stddev':kernel_stddev, 'obs_noise_stddev':obs_noise_stddev, 'base_number_of_points':base_number_of_points, 'twin_bumps_obs_noise_stddev':twin_bumps_obs_noise_stddev, 'twin_bumps_kernel_stddev':twin_bumps_kernel_stddev} #print type(extra_props) #print type(conj(extra_props, ('n', n_train))) #print type(dict(extra_props.items() + [('lupi',"chien")])) #quit() train_samples = samples[0:n_train,:] train_cluster_index = cluster_index[0:n_train] train_samples_filename = os.path.join(output_dir, "train_samples.pkl") train_samples_extra_filename = os.path.join(output_dir, "train_samples_extra.pkl") cPickle.dump(train_samples, open(train_samples_filename, "w")) cPickle.dump(conj(conj(extra_props, ('n', n_train)), ('cluster_indices', train_cluster_index)), open(train_samples_extra_filename, "w")) print "wrote " + train_samples_filename print "wrote " + train_samples_extra_filename test_samples = samples[n_train:(n_train + n_test),:] test_cluster_index = cluster_index[n_train:(n_train + n_test)] test_samples_filename = os.path.join(output_dir, "test_samples.pkl") test_samples_extra_filename = os.path.join(output_dir, "test_samples_extra.pkl") cPickle.dump(test_samples, open(test_samples_filename, "w")) cPickle.dump(conj(conj(extra_props, ('n', n_test)), ('cluster_indices', test_cluster_index)), open(test_samples_extra_filename, "w")) print "wrote " + test_samples_filename print "wrote " + test_samples_extra_filename output_image_file = os.path.join(output_dir,"overview.png") plot_the_overview(base_x, top_arc, bottom_arc, samples_x, train_samples[0:10,:], train_samples[10:20,:], output_image_file) print "wrote " + output_image_file