import sys import os import numpy as np N = 1000 X = np.zeros((N, 2)) X[:, 0] = np.linspace(-1, 1, N) X[:, 1] = np.sin(2 * np.pi * np.linspace(-1, 1, N)) print X.shape from dae_untied_weights import DAE_untied_weights mydae = DAE_untied_weights(n_inputs=2, n_hiddens=32, act_func=['tanh', 'tanh'], want_plus_x=False) mydae.fit_with_decreasing_noise(X, [1.0, 0.5, 0.2, 0.1, 0.05, 0.01, 0.001], { 'method': 'fmin_bfgs', 'maxiter': 5000 }) clean_data = X import matplotlib matplotlib.use('Agg') import pylab def plot_grid_reconstruction_grid(mydae,
import numpy as np import cPickle mnist_dataset = cPickle.load(open('/data/lisa/data/mnist/mnist.pkl', 'rb')) mnist_train = mnist_dataset[0] mnist_train_data, mnist_train_labels = mnist_train from dae_untied_weights import DAE_untied_weights mydae = DAE_untied_weights(n_inputs=784, n_hiddens=1024, act_func=['tanh', 'sigmoid']) #mydae.fit_with_decreasing_noise(mnist_train_data[0:2000,:], # [0.1, 0.05, 0.01, 0.001], # {'method' : 'fmin_cg', # 'maxiter' : 500, # 'gtol':0.001}) mydae.fit_with_decreasing_noise(mnist_train_data[0:2000, :], [0.1, 0.05, 0.01], { 'method': 'fmin_l_bfgs_b', 'maxiter': 500, 'm': 25 }) print mydae.s pkl_file = "/u/alaingui/umontreal/denoising_autoencoder/mcmc_pof/trained_models/mydae_2013_02_08.pkl" cPickle.dump( {
def main(argv): """ maxiter lbfgs_rank act_func noise_stddevs train_samples_pickle output_dir is the directory in which we'll write the results """ import getopt import cPickle import json try: opts, args = getopt.getopt(argv[1:], "hv", ["n_hiddens=", "maxiter=", "lbfgs_rank=", "act_func=", "noise_stddevs=", "train_samples_pickle=", "valid_samples_pickle=", "test_samples_pickle=", "output_dir=", "save_hidden_units=", "resume_dae_pickle=", "want_constant_s=", "loss_function_desc="]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) n_hiddens = None maxiter = 50 lbfgs_rank = None act_func = None noise_stddevs = None alt_valid_noise_stddevs = None train_samples_pickle = None valid_samples_pickle = None test_samples_pickle = None output_dir = None # used for multilayers save_hidden_units = False resume_dae_pickle = None want_constant_s = None loss_function_desc = None verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("--n_hiddens"): n_hiddens = int(a) elif o in ("--maxiter"): maxiter = int(a) elif o in ("--lbfgs_rank"): lbfgs_rank = int(a) elif o in ("--act_func"): act_func = json.loads(a) assert type(act_func) == list assert len(act_func) == 2 elif o in ("--noise_stddevs"): noise_stddevs = json.loads(a) #assert type(noise_stddevs) in [list, float, int] #if type(noise_stddevs) in [float, int]: # noise_stddevs = [noise_stddevs] assert type(noise_stddevs) in [dict] assert noise_stddevs.has_key('train') elif o in ("--train_samples_pickle"): train_samples_pickle = a elif o in ("--valid_samples_pickle"): valid_samples_pickle = a elif o in ("--test_samples_pickle"): test_samples_pickle = a elif o in ("--output_dir"): output_dir = a elif o in ("--save_hidden_units"): save_hidden_units = ((a == "True") or (a == "true") or (a=="1")) elif o in ("--resume_dae_pickle"): resume_dae_pickle = a elif o in ("--want_constant_s"): want_constant_s = ((a == "True") or (a == "true") or (a=="1")) elif o in ("--loss_function_desc"): loss_function_desc = a else: assert False, "unhandled option" assert os.path.exists(train_samples_pickle) train_samples = cPickle.load(open(train_samples_pickle, 'rb')) _, n_inputs = train_samples.shape if valid_samples_pickle: # it's alright to omit the validation set, # but if it's specified as argument, it has to be # a legitimate pickle assert os.path.exists(valid_samples_pickle) valid_samples = cPickle.load(open(valid_samples_pickle, 'rb')) assert train_samples.shape[1] == valid_samples.shape[1] else: valid_samples = None from dae_untied_weights import DAE_untied_weights if resume_dae_pickle is not None: assert os.path.exists(resume_dae_pickle) mydae = DAE_untied_weights(dae_pickle_file = resume_dae_pickle) if n_hiddens is not None: assert mydae.n_hiddens == n_hiddens assert mydae.n_inputs == n_inputs else: mydae = DAE_untied_weights(n_inputs = n_inputs, n_hiddens = n_hiddens, act_func = act_func, want_constant_s = want_constant_s, loss_function_desc = loss_function_desc) #mydae.fit_with_decreasing_noise(mnist_train_data[0:2000,:], # [0.1, 0.05, 0.01, 0.001], # {'method' : 'fmin_cg', # 'maxiter' : 500, # 'gtol':0.001}) start_time = time.time() #(train_model_losses, valid_model_losses, post_valid_model_losses, post_alt_valid_model_losses) = \ # mydae.fit_with_decreasing_noise(train_samples, # noise_stddevs, # {'method' : 'fmin_l_bfgs_b', # 'maxiter' : maxiter, # 'm':lbfgs_rank}, # X_valid = valid_samples} best_q_mean_losses = mydae.fit_with_stddevs_sequence(train_samples, valid_samples, noise_stddevs, {'method' : 'fmin_l_bfgs_b', 'maxiter' : maxiter, 'm':lbfgs_rank}) #train_model_losses = best_q_mean_losses['train'] end_time = time.time() computational_cost_in_seconds = int(end_time - start_time) print "Training took %d seconds." % (computational_cost_in_seconds,) if not os.path.exists(output_dir): os.makedirs(output_dir) print "Creating directory %s" % (output_dir,) trained_model_pickle_file = os.path.join(output_dir, "trained_dae.pkl") mydae.save_pickle(trained_model_pickle_file) print "Wrote trained DAE in %s" % (trained_model_pickle_file,) if test_samples_pickle: assert os.path.exists(test_samples_pickle) test_samples = cPickle.load(open(test_samples_pickle, 'rb')) assert train_samples.shape[1] == test_samples.shape[1] else: test_samples = None extra_pickle_file = os.path.join(output_dir, "extra_details.pkl") extra_json_file = os.path.join(output_dir, "extra_details.json") extra_details = {'n_inputs':n_inputs, 'n_hiddens':n_hiddens, 'maxiter':maxiter, 'lbfgs_rank':lbfgs_rank, 'act_func':act_func, 'noise_stddevs':noise_stddevs, 'train_samples_pickle':train_samples_pickle, 'valid_samples_pickle':valid_samples_pickle, 'test_samples_pickle':test_samples_pickle, 'train_model_losses' : best_q_mean_losses['train'], 'model_losses' : best_q_mean_losses, #'model_losses':model_losses, #'train_model_losses':train_model_losses, #'valid_model_losses':valid_model_losses, #'post_valid_model_losses':post_valid_model_losses, #'post_alt_valid_model_losses':post_alt_valid_model_losses, 'computational_cost_in_seconds':computational_cost_in_seconds} if save_hidden_units: for (samples, prefix) in [(train_samples, 'train'), (valid_samples, 'valid'), (test_samples, 'test')]: if samples is not None: hidden_units = mydae.encode(samples) hidden_units_pickle_file = os.path.join(output_dir, "%s_hidden_units.pkl" % (prefix,)) extra_details['%s_hidden_units_pickle' % (prefix,)] = hidden_units_pickle_file cPickle.dump(hidden_units, open(hidden_units_pickle_file, "w")) print "Wrote %s" % (hidden_units_pickle_file,) del hidden_units del hidden_units_pickle_file cPickle.dump(extra_details, open(extra_pickle_file, "w")) json.dump(extra_details, open(extra_json_file, "w"), sort_keys=True, indent=4, separators=(',', ': ')) print "Wrote %s" % (extra_pickle_file,) print "Wrote %s" % (extra_json_file,)
import sys import os import numpy as np N = 1000 X = np.zeros((N,2)) X[:,0] = np.linspace(-1,1,N) X[:,1] = np.sin( 2* np.pi * np.linspace(-1,1,N) ) print X.shape from dae_untied_weights import DAE_untied_weights mydae = DAE_untied_weights(n_inputs = 2, n_hiddens = 32, act_func = ['tanh', 'tanh'], want_plus_x = False) mydae.fit_with_decreasing_noise(X, [1.0, 0.5, 0.2, 0.1, 0.05, 0.01, 0.001], {'method' : 'fmin_bfgs', 'maxiter' : 5000}) clean_data = X import matplotlib matplotlib.use('Agg') import pylab
def main(argv): """ maxiter lbfgs_rank act_func noise_stddevs train_samples_pickle output_dir is the directory in which we'll write the results """ import getopt import cPickle import json try: opts, args = getopt.getopt(argv[1:], "hv", [ "n_hiddens=", "maxiter=", "lbfgs_rank=", "act_func=", "noise_stddevs=", "train_samples_pickle=", "valid_samples_pickle=", "test_samples_pickle=", "output_dir=", "save_hidden_units=", "resume_dae_pickle=", "want_constant_s=", "loss_function_desc=" ]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) n_hiddens = None maxiter = 50 lbfgs_rank = None act_func = None noise_stddevs = None alt_valid_noise_stddevs = None train_samples_pickle = None valid_samples_pickle = None test_samples_pickle = None output_dir = None # used for multilayers save_hidden_units = False resume_dae_pickle = None want_constant_s = None loss_function_desc = None verbose = False for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("--n_hiddens"): n_hiddens = int(a) elif o in ("--maxiter"): maxiter = int(a) elif o in ("--lbfgs_rank"): lbfgs_rank = int(a) elif o in ("--act_func"): act_func = json.loads(a) assert type(act_func) == list assert len(act_func) == 2 elif o in ("--noise_stddevs"): noise_stddevs = json.loads(a) #assert type(noise_stddevs) in [list, float, int] #if type(noise_stddevs) in [float, int]: # noise_stddevs = [noise_stddevs] assert type(noise_stddevs) in [dict] assert noise_stddevs.has_key('train') elif o in ("--train_samples_pickle"): train_samples_pickle = a elif o in ("--valid_samples_pickle"): valid_samples_pickle = a elif o in ("--test_samples_pickle"): test_samples_pickle = a elif o in ("--output_dir"): output_dir = a elif o in ("--save_hidden_units"): save_hidden_units = ((a == "True") or (a == "true") or (a == "1")) elif o in ("--resume_dae_pickle"): resume_dae_pickle = a elif o in ("--want_constant_s"): want_constant_s = ((a == "True") or (a == "true") or (a == "1")) elif o in ("--loss_function_desc"): loss_function_desc = a else: assert False, "unhandled option" assert os.path.exists(train_samples_pickle) train_samples = cPickle.load(open(train_samples_pickle, 'rb')) _, n_inputs = train_samples.shape if valid_samples_pickle: # it's alright to omit the validation set, # but if it's specified as argument, it has to be # a legitimate pickle assert os.path.exists(valid_samples_pickle) valid_samples = cPickle.load(open(valid_samples_pickle, 'rb')) assert train_samples.shape[1] == valid_samples.shape[1] else: valid_samples = None from dae_untied_weights import DAE_untied_weights if resume_dae_pickle is not None: assert os.path.exists(resume_dae_pickle) mydae = DAE_untied_weights(dae_pickle_file=resume_dae_pickle) if n_hiddens is not None: assert mydae.n_hiddens == n_hiddens assert mydae.n_inputs == n_inputs else: mydae = DAE_untied_weights(n_inputs=n_inputs, n_hiddens=n_hiddens, act_func=act_func, want_constant_s=want_constant_s, loss_function_desc=loss_function_desc) #mydae.fit_with_decreasing_noise(mnist_train_data[0:2000,:], # [0.1, 0.05, 0.01, 0.001], # {'method' : 'fmin_cg', # 'maxiter' : 500, # 'gtol':0.001}) start_time = time.time() #(train_model_losses, valid_model_losses, post_valid_model_losses, post_alt_valid_model_losses) = \ # mydae.fit_with_decreasing_noise(train_samples, # noise_stddevs, # {'method' : 'fmin_l_bfgs_b', # 'maxiter' : maxiter, # 'm':lbfgs_rank}, # X_valid = valid_samples} best_q_mean_losses = mydae.fit_with_stddevs_sequence( train_samples, valid_samples, noise_stddevs, { 'method': 'fmin_l_bfgs_b', 'maxiter': maxiter, 'm': lbfgs_rank }) #train_model_losses = best_q_mean_losses['train'] end_time = time.time() computational_cost_in_seconds = int(end_time - start_time) print "Training took %d seconds." % (computational_cost_in_seconds, ) if not os.path.exists(output_dir): os.makedirs(output_dir) print "Creating directory %s" % (output_dir, ) trained_model_pickle_file = os.path.join(output_dir, "trained_dae.pkl") mydae.save_pickle(trained_model_pickle_file) print "Wrote trained DAE in %s" % (trained_model_pickle_file, ) if test_samples_pickle: assert os.path.exists(test_samples_pickle) test_samples = cPickle.load(open(test_samples_pickle, 'rb')) assert train_samples.shape[1] == test_samples.shape[1] else: test_samples = None extra_pickle_file = os.path.join(output_dir, "extra_details.pkl") extra_json_file = os.path.join(output_dir, "extra_details.json") extra_details = { 'n_inputs': n_inputs, 'n_hiddens': n_hiddens, 'maxiter': maxiter, 'lbfgs_rank': lbfgs_rank, 'act_func': act_func, 'noise_stddevs': noise_stddevs, 'train_samples_pickle': train_samples_pickle, 'valid_samples_pickle': valid_samples_pickle, 'test_samples_pickle': test_samples_pickle, 'train_model_losses': best_q_mean_losses['train'], 'model_losses': best_q_mean_losses, #'model_losses':model_losses, #'train_model_losses':train_model_losses, #'valid_model_losses':valid_model_losses, #'post_valid_model_losses':post_valid_model_losses, #'post_alt_valid_model_losses':post_alt_valid_model_losses, 'computational_cost_in_seconds': computational_cost_in_seconds } if save_hidden_units: for (samples, prefix) in [(train_samples, 'train'), (valid_samples, 'valid'), (test_samples, 'test')]: if samples is not None: hidden_units = mydae.encode(samples) hidden_units_pickle_file = os.path.join( output_dir, "%s_hidden_units.pkl" % (prefix, )) extra_details['%s_hidden_units_pickle' % (prefix, )] = hidden_units_pickle_file cPickle.dump(hidden_units, open(hidden_units_pickle_file, "w")) print "Wrote %s" % (hidden_units_pickle_file, ) del hidden_units del hidden_units_pickle_file cPickle.dump(extra_details, open(extra_pickle_file, "w")) json.dump(extra_details, open(extra_json_file, "w"), sort_keys=True, indent=4, separators=(',', ': ')) print "Wrote %s" % (extra_pickle_file, ) print "Wrote %s" % (extra_json_file, )
import numpy as np import cPickle mnist_dataset = cPickle.load(open('/data/lisa/data/mnist/mnist.pkl', 'rb')) mnist_train = mnist_dataset[0] mnist_train_data, mnist_train_labels = mnist_train from dae_untied_weights import DAE_untied_weights mydae = DAE_untied_weights(n_inputs = 784, n_hiddens = 1024, act_func = ['tanh', 'sigmoid']) #mydae.fit_with_decreasing_noise(mnist_train_data[0:2000,:], # [0.1, 0.05, 0.01, 0.001], # {'method' : 'fmin_cg', # 'maxiter' : 500, # 'gtol':0.001}) mydae.fit_with_decreasing_noise(mnist_train_data[0:2000,:], [0.1, 0.05, 0.01], {'method' : 'fmin_l_bfgs_b', 'maxiter' : 500, 'm':25}) print mydae.s