def main(args): # Create a new database db = sql.db('%s?table=%s' % (args.database, args.table_name)) # Create a jobman state state = DD() # Loop over the search space and schedule jobs config_generator = gen_configurations(args.configurations) for i, params in enumerate(config_generator): # Complete parameters dictionary and add to the state state.parameters = params state.parameters['model'] = args.model state.parameters['dataset'] = args.dataset state.parameters['nb_epoch'] = args.n_epochs state.parameters['batch_size'] = args.batch_size # Insert the job into the database if args.model == 'ADIOS': state.parameters['labels_order'] = args.labels_order state.parameters['n_label_splits'] = args.n_label_splits for i in xrange(args.n_label_splits): state.parameters['label_split'] = i + 1 sql.insert_job(train_adios, flatten(state), db) else: # args.model == 'MLP' sql.insert_job(train_mlp, flatten(state), db) # Create a view for the new database table db.createView(args.table_name + '_view')
def jobman_insert_random(n_jobs): JOBDB = 'postgres://*****:*****@opter.iro.umontreal.ca/dauphiya_db/saddle_mnist_ae' EXPERIMENT_PATH = "ilya_experiment.jobman_entrypoint" jobs = [] for _ in range(n_jobs): job = DD() job.learning_rate = 10.**numpy.random.uniform(-2, 0) job.momentum = 10.**numpy.random.uniform(-2, 0) job.batch_size = 200 job.tag = "ilya_fixed" jobs.append(job) print job answer = raw_input("Submit %d jobs?[y/N] " % len(jobs)) if answer == "y": numpy.random.shuffle(jobs) db = jobman.sql.db(JOBDB) for job in jobs: job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) jobman.sql.insert_dict(job, db) print "inserted %d jobs" % len(jobs) print "To run: jobdispatch --condor --gpu --env=THEANO_FLAGS='floatX=float32, device=gpu' --repeat_jobs=%d jobman sql -n 1 'postgres://*****:*****@opter.iro.umontreal.ca/dauphiya_db/saddle_mnist_ae' ." % len(jobs)
def load_experiments_jobman(experiment_module, jobargs): """ Load jobs from experiment onto postgresql database table. """ dbdescr = get_desc(jobargs) db = api0.open_db(dbdescr) experiment = imp.load_source("module.name", experiment_module) for i, items in enumerate(experiment.generator): hyperparams = experiment.default_hyperparams state = DD() set_hyper_parameters(hyperparams, **dict((k, v) for k, v in items)) state.hyperparams = translate(hyperparams, "knex") state["out&path"] = path.abspath(jobargs["out_path"]) state["experiment&module"] = path.abspath(experiment_module) state["dbdescr"] = dbdescr sql.insert_job(run_experiment_jobman, flatten(state), db) db.createView("%s" % jobargs["table"])
def results_extractor(train_obj): channels = train_obj.model.monitor.channels best_index = numpy.argmin(channels['valid_y_nll'].val_record) return DD( best_epoch=best_index, best_epoch_time=channels['valid_y_misclass'].time_record[best_index], valid_y_misclass_array=[i.item() for i in channels['valid_y_misclass'].val_record], test_y_misclass_array=[i.item() for i in channels['test_y_misclass'].val_record], train_y_misclass_array=[i.item() for i in channels['train_y_misclass'].val_record], valid_y_misclass=channels[ 'valid_y_misclass'].val_record[best_index].item(), test_y_misclass=channels[ 'test_y_misclass'].val_record[best_index].item(), train_y_misclass=channels[ 'train_y_misclass'].val_record[best_index].item(), )
# # You can also submit the jobs on the command line like this example: # jobman sqlschedules <tablepath> Experimentsbatchpretrain.finetuning dat="MNIST" depth=1 tie=True "n_hid={{100,250,500,750,1000}}" act=tanh "sup_lr={{.01,.001,.0001,.00001}}" seed=1 nbepochs_sup=1000 batchsize=10 # from jobman.tools import DD, flatten from jobman import sql from jobman.parse import filemerge from Experimentsbatchpretrain import * import numpy db = sql.db('postgres://[email protected]/glorotxa_db/pretrainexpe') state = DD() state.curridata = DD(filemerge('Curridata.conf')) state.depth = 3 state.tie = True state.n_hid = 1000 #nb of unit per layer state.act = 'tanh' state.sup_lr = 0.01 state.unsup_lr = 0.001 state.noise = 0.25 state.seed = 1 state.nbepochs_unsup = 30 #maximal number of supervised updates state.nbepochs_sup = 1000 #maximal number of unsupervised updates per layer state.batchsize = 10
if iterable: raise NotImplementedError( ('Current implementation does not support running multiple ' 'models in one yaml string. Please change the yaml template ' 'and parameters to contain only one single model.')) else: # print "Executing the model." train_obj.main_loop() # This line will call a function defined by the user and pass train_obj # to it. state.results = jobman.tools.resolve(state.extract_results)(train_obj) return channel.COMPLETE if __name__ == '__main__': state = DD() c1 = 8 kernel_c1 = 5 pool_c1 = 4 c2 = 8 kernel_c2 = 6 pool_c2 = 2 lamda = 0.1 gamma = 0.0 decay = 0.000 save_best_path = './result/test_tree_cnn_thresh_depth2_%d_%d_%d_%d_%d.pkl'%(c1,c2, lamda*10, gamma*10, decay*1000)
def load_experiments(args): dataset_name = args.dataset_name # Load the database and table. db = sql.db( "postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" % { "user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) # Don't worry about this yet. input_handler = InputHandler() # For generating models, we use a special set of jobman generators, made # for convenience. for items in jg.nested_generator( jg.float_generator("learning_rate", 3, 0.01, 0.0001, log_scale=True), jg.list_generator("nhid", [50, 100, 200, 300]), ): logger.info("Adding RBM experiment across hyperparameters %s" % (items, )) state = DD() # Load experiment hyperparams from experiment experiment_hyperparams = experiment.default_hyperparams() # Set them with values in our loop. for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] assert split_keys[-1] in entry, ( "Key not found in hyperparams: %s" % split_keys[-1]) entry[split_keys[-1]] = value # Set the dataset name experiment_hyperparams["dataset_name"] = dataset_name # Get the input dim and variance map. Don't worry about variance maps right now, # they aren't used here. input_dim, variance_map_file = input_handler.get_input_params( args, experiment_hyperparams) logger.info("%s\n%s\n" % (input_dim, variance_map_file)) # Set the input dimensionality by the data experiment_hyperparams["nvis"] = input_dim # Set the minimum learning rate relative to the initial learning rate. experiment_hyperparams[ "min_lr"] = experiment_hyperparams["learning_rate"] / 10 # Make a unique hash for experiments. Remember that lists, dicts, and other data # types may not be hashable, so you may need to do some special processing. In # this case we convert the lists to tuples. h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\ [tuple(v) if isinstance(v, list) else v for v in flatten(experiment_hyperparams).values()]))) # Save path for the experiments. In this case we are sharing a directory in my # export directory so IT can blame me. save_path = serial.preprocess( "/export/mialab/users/dhjelm/pylearn2_outs/rbm_demo/%d" % h) # We save file params separately as they aren't model specific. file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams user = path.expandvars("$USER") state.created_by = user # Finally we add the experiment to the table. sql.insert_job(experiment.experiment, flatten(state), db) # A view can be used when querying the database using psql. May not be needed in future. db.createView("%s_view" % args.table)
def load_experiments(args): dataset_name = args.dataset_name db = sql.db( "postgres://%(user)s@%(host)s:" "%(port)d/%(database)s?table=%(table)s" % { "user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) input_handler = InputHandler() for items in jg.nested_generator( jg.list_generator( "encoder.layer_depths", [[3, 5, 5, 5, 3], [5, 5, 5, 5, 5], [2, 4, 4, 2]]), jg.list_generator("variance_normalize", [False, 2]), jg.float_generator("weight_decay.coeff", 4, 0.1, 0.0001, log_scale=True), jg.list_generator("prior.__builder__", [ "nice.pylearn2.models.nice.StandardNormal", "nice.pylearn2.models.nice.StandardLogistic" ])): logger.info("Adding NICE experiment across hyperparameters %s" % (items, )) state = DD() experiment_hyperparams = nice_experiment.default_hyperparams() for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] assert split_keys[-1] in entry,\ ("Key not found in hyperparams: %s, " "found: %s" % (split_keys[-1], entry.keys())) entry[split_keys[-1]] = value experiment_hyperparams["dataset_name"] = dataset_name input_dim, variance_map_file = input_handler.get_input_params( args, experiment_hyperparams) logger.info("%s\n%s\n" % (input_dim, variance_map_file)) experiment_hyperparams["nvis"] = input_dim experiment_hyperparams["encoder"]["nvis"] = input_dim h = abs(hash(frozenset( flatten(experiment_hyperparams).keys() +\ [tuple(v) if isinstance(v, list) else v for v in flatten(experiment_hyperparams).values()]))) user = path.expandvars("$USER") save_path = serial.preprocess( "/export/mialab/users/%s/pylearn2_outs/%d" % (user, h)) file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams sql.insert_job(nice_experiment.experiment, flatten(state), db) db.createView("%s_view" % args.table)
from jobman.tools import DD, flatten from jobman import api0, sql import numpy from pylearn2.scripts.jobman import experiment rng = numpy.random.RandomState(4312987) if __name__ == '__main__': db = api0.open_db('postgres://gershwin.iro.umontreal.ca/desjagui_db/aistats12_mnist_500_1000_diag') state = DD() state.yaml_template = """ !obj:pylearn2.scripts.train.Train { "max_epochs": 250, "save_freq": 10, "save_path": "dbm", # dataset below is now (temporarily) obsolete "dataset": &data !obj:pylearn2.datasets.mnist.MNIST { "which_set": 'train', "shuffle": True, "one_hot": True, "binarize": %(binarize)i, }, "model": &model !obj:DBM.dbm.DBM { "seed" : 123141, "n_u": [784, %(nu1)i, %(nu2)i], "lr": %(lr)f, "flags": { 'enable_natural': %(enable_natural)i,
# hash) as part of the result of a job. That way, when analyzing things much # later, you can see right away what version of your code produced a given # result. If your version number changes every time you add a job or tweak an # analysis function (i.e. list_all), then you will see many different version # numbers among your experiments, even though the file with the function being # tested didn't change at all. This can be confusing. from jobman.examples.def_addition import addition_example # here we build a list of dictionaries, each of which specifies a setting of # parameters corresponding to one job. # # We will pass this list to mydriver.main, so that it can potentially insert # them, or trim down the database to match the current list, and other sorts of # things. state = DD() jobs = [] for first in 0, 2, 4, 6, 8, 10: state.first = first for second in 1, 3, 5, 7, 9: state.second = second jobs.append(dict(flatten(state))) # # mydriver.main defines a few commands that are generally useful (insert, # clear_db, summary, etc.) but if you want to analyse your results you can add # additional commands. # # For example, if you type 'python additional_driver.py list_all', then this little function will run, because its function name is 'list_all'. #
def results_extractor(train_obj): channels = train_obj.model.monitor.channels train_y_misclass = channels['y_misclass'].val_record[-1] train_y_nll = channels['y_nll'].val_record[-1] return DD(train_y_misclass=train_y_misclass, train_y_nll=train_y_nll)
from jobman.tools import DD, flatten from jobman import sql from DARPAscript import NLPSDAE db = sql.db('postgres://[email protected]/glorotxa_db/opentablegpu') # you should change this line to match the database you need state = DD() state.act = ['tanh'] state.depth = 1 state.n_hid = [5000] state.noise = ['binomial_NLP'] state.weight_regularization_type = 'l2' state.weight_regularization_coeff = [0.0,0.0] state.activation_regularization_type = 'l1' # Number of pretraining epochs, one-per-layer state.nepochs = [30] # Different validation runs # - 100 training examples (x20 different samples of 100 training examples) # - 1000 training examples (x10 different samples of 1000 training examples) # - 10000 training examples (x1 different sample of 10000 training examples) # (because of jobman, the keys have to be strings, not ints) # NOTE: Probably you don't want to make trainsize larger than 10K, # because it will be too large for CPU memory. state.validation_runs_for_each_trainingsize = {"100": 20, "1000": 10, "10000": 1}
from jobman import api0, sql from jobman.tools import DD, flatten # Experiment function from mlp_jobman import experiment # Database TABLE_NAME = 'mlp_dumi' db = api0.open_db( 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db?table=' + TABLE_NAME) # Default values state = DD() state.learning_rate = 0.01 state.L1_reg = 0.00 state.L2_reg = 0.0001 state.n_iter = 50 state.batch_size = 20 state.n_hidden = 10 # Hyperparameter exploration for n_hidden in 20, 30: state.n_hidden = n_hidden # Explore L1 regularization w/o L2 state.L2_reg = 0. for L1_reg in 0., 1e-6, 1e-5, 1e-4: state.L1_reg = L1_reg # Insert job sql.insert_job(experiment, flatten(state), db)
from jobman.tools import DD, flatten from jobman import api0, sql import numpy from pylearn2.scripts.jobman import experiment rng = numpy.random.RandomState(4312987) if __name__ == '__main__': db = api0.open_db( 'postgres://gershwin.iro.umontreal.ca/desjagui_db/aistats12_mnist_nat') state = DD() state.yaml_template = """ !obj:pylearn2.scripts.train.Train { "max_epochs": 250, "save_freq": 10, "save_path": "dbm", # dataset below is now (temporarily) obsolete "dataset": &data !obj:pylearn2.datasets.mnist.MNIST { "which_set": 'train', "shuffle": True, "one_hot": True, "binarize": %(binarize)i, }, "model": &model !obj:DBM.dbm.DBM { "seed" : 123141, "n_u": [784, %(nu1)i, %(nu2)i], "lr": %(lr)f, "flags": {
def load_experiments(args): dataset_name = args.dataset_name db = sql.db("postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" % {"user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) logger.info("Getting dataset info for %s" % dataset_name) data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + dataset_name) mask_file = path.join(data_path, "mask.npy") mask = np.load(mask_file) input_dim = (mask == 1).sum() if input_dim % 2 == 1: input_dim -= 1 mri = MRI.MRI_Standard(which_set="full", dataset_name=dataset_name, unit_normalize=True, even_input=True, apply_mask=True) variance_map_file = path.join(data_path, "variance_map.npy") mri_nifti.save_variance_map(mri, variance_map_file) for items in jg.nested_generator(jg.hidden_generator("nhid1", 1), jg.hidden_generator("nhid2", 1), ): state = DD() experiment_hyperparams = mlp_experiment.default_hyperparams(input_dim) for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] entry[split_keys[-1]] = value experiment_hyperparams["dataset_name"] = dataset_name h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\ flatten(experiment_hyperparams).values()))) user = path.expandvars("$USER") save_path = serial.preprocess("/export/mialab/users/%s/pylearn2_outs/%d" % (user, h)) file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams state.pid = 0 sql.insert_job( mlp_experiment.experiment, flatten(state), db ) db.createView("%s_view" % args.table)
if iterable: raise NotImplementedError( ('Current implementation does not support running multiple ' 'models in one yaml string. Please change the yaml template ' 'and parameters to contain only one single model.')) else: # print "Executing the model." train_obj.main_loop() # This line will call a function defined by the user and pass train_obj # to it. state.results = jobman.tools.resolve(state.extract_results)(train_obj) return channel.COMPLETE if __name__ == '__main__': state = DD() state.yaml_template = ''' !obj:pylearn2.train.Train { dataset: !obj:Database_hand.Database_latent { dataset_path: &train %(trainfile)s, N: %(N)i, n: %(n)i, which_set: 'train', which_label: 'uvd' }, "model": !obj:myLayer_tree_faster.MLPLayer { "batch_size": %(batch_size)d, "latent_space":!obj:pylearn2.space.VectorSpace { dim: 1
print "jobman loaded" # Experiment function from test_exp import experiment print "open database" # Database TABLE_NAME = 'mlp_dumi' db = api0.open_db('postgres://ift6266h13@gershwin/ift6266h13_sandbox_db?table='+TABLE_NAME) print "database loaded" # Default values state = DD() state.learning_rate = 0.01 state.L1_reg = 0.00 state.L2_reg = 0.0001 state.n_iter = 50 state.batch_size = 20 state.n_hidden = 10 # Hyperparameter exploration for n_hidden in 20, 30: print "h_hidden =",h_hidden state.n_hidden = n_hidden # Explore L1 regularization w/o L2 state.L2_reg = 0.
def load_experiments(args): dataset_name = args.dataset_name # Load the database and table. db = sql.db("postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" % {"user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) # Don't worry about this yet. input_handler = InputHandler() # For generating models, we use a special set of jobman generators, made # for convenience. for items in jg.nested_generator( jg.float_generator("learning_rate", 3, 0.01, 0.0001, log_scale=True), jg.list_generator("nhid", [50, 100, 200, 300]), ): logger.info("Adding RBM experiment across hyperparameters %s" % (items, )) state = DD() # Load experiment hyperparams from experiment experiment_hyperparams = experiment.default_hyperparams() # Set them with values in our loop. for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] assert split_keys[-1] in entry, ("Key not found in hyperparams: %s" % split_keys[-1]) entry[split_keys[-1]] = value # Set the dataset name experiment_hyperparams["dataset_name"] = dataset_name # Get the input dim and variance map. Don't worry about variance maps right now, # they aren't used here. input_dim, variance_map_file = input_handler.get_input_params(args, experiment_hyperparams) logger.info("%s\n%s\n" % (input_dim, variance_map_file)) # Set the input dimensionality by the data experiment_hyperparams["nvis"] = input_dim # Set the minimum learning rate relative to the initial learning rate. experiment_hyperparams["min_lr"] = experiment_hyperparams["learning_rate"] / 10 # Make a unique hash for experiments. Remember that lists, dicts, and other data # types may not be hashable, so you may need to do some special processing. In # this case we convert the lists to tuples. h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\ [tuple(v) if isinstance(v, list) else v for v in flatten(experiment_hyperparams).values()]))) # Save path for the experiments. In this case we are sharing a directory in my # export directory so IT can blame me. save_path = serial.preprocess("/export/mialab/users/dhjelm/pylearn2_outs/rbm_demo/%d" % h) # We save file params separately as they aren't model specific. file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams user = path.expandvars("$USER") state.created_by = user # Finally we add the experiment to the table. sql.insert_job( experiment.experiment, flatten(state), db ) # A view can be used when querying the database using psql. May not be needed in future. db.createView("%s_view" % args.table)
# it can be handy to record a version number (such as the mercurial version # hash) as part of the result of a job. That way, when analyzing things much # later, you can see right away what version of your code produced a given # result. If your version number changes every time you add a job or tweak an # analysis function (i.e. list_all), then you will see many different version # numbers among your experiments, even though the file with the function being # tested didn't change at all. This can be confusing. from jobman.examples.def_addition import addition_example # here we build a list of dictionaries, each of which specifies a setting of # parameters corresponding to one job. # # We will pass this list to mydriver.main, so that it can potentially insert # them, or trim down the database to match the current list, and other sorts of # things. state = DD() jobs = [] for first in 0, 2, 4, 6, 8, 10: state.first = first for second in 1, 3, 5, 7, 9: state.second = second jobs.append(dict(flatten(state))) # # mydriver.main defines a few commands that are generally useful (insert, # clear_db, summary, etc.) but if you want to analyse your results you can add # additional commands. # # For example, if you type 'python additional_driver.py list_all', then this little function will run, because its function name is 'list_all'. # # For help on the meaning of the various elements of `kwargs`, see <WRITEME>.
raise NotImplementedError( ('Current implementation does not support running multiple ' 'models in one yaml string. Please change the yaml template ' 'and parameters to contain only one single model.')) else: # print "Executing the model." train_obj.main_loop() # This line will call a function defined by the user and pass train_obj # to it. state.results = jobman.tools.resolve(state.extract_results)(train_obj) return channel.COMPLETE if __name__ == '__main__': state = DD() c1 = 8 kernel_c1 = 5 pool_c1 = 4 c2 = 8 kernel_c2 = 6 pool_c2 = 2 lamda = 0.1 gamma = 0.0 decay = 0.000 save_best_path = './result/test_tree_cnn_thresh_depth2_%d_%d_%d_%d_%d.pkl' % ( c1, c2, lamda * 10, gamma * 10, decay * 1000)
from jobman.tools import DD, flatten from jobman import sql from DARPAscript import NLPSDAE db = sql.db( 'postgres://[email protected]/glorotxa_db/opentablegpu' ) # you should change this line to match the database you need state = DD() state.act = ['tanh'] state.depth = 1 state.n_hid = [5000] state.noise = ['binomial_NLP'] state.weight_regularization_type = 'l2' state.weight_regularization_coeff = [0.0, 0.0] state.activation_regularization_type = 'l1' # Number of pretraining epochs, one-per-layer state.nepochs = [30] # Different validation runs # - 100 training examples (x20 different samples of 100 training examples) # - 1000 training examples (x10 different samples of 1000 training examples) # - 10000 training examples (x1 different sample of 10000 training examples) # (because of jobman, the keys have to be strings, not ints) # NOTE: Probably you don't want to make trainsize larger than 10K, # because it will be too large for CPU memory. state.validation_runs_for_each_trainingsize = {
def create_jobman_jobs(): #Database operations TABLE_NAME = "arcade_multi_prmlp_cv_binary_8x8_40k" db = api0.open_db( 'postgresql://[email protected]/gulcehrc_db?table=' + TABLE_NAME) ri = numpy.random.random_integers # Default values state = DD() state.dataset = \ "/home/gulcehre/dataset/pentomino/experiment_data/pento64x64_40k_seed_23112222.npy" state.no_of_folds = 5 state.exid = 0 state.n_hiddens = [100, 200, 300] state.n_hidden_layers = 3 state.learning_rate = 0.001 state.l1_reg = 1e-5 state.l2_reg = 1e-3 state.n_epochs = 2 state.batch_size = 120 state.save_exp_data = True self.no_of_patches = 64 state.cost_type = "crossentropy" state.n_in = 8 * 8 state.n_out = 1 state.best_valid_error = 0.0 state.best_test_error = 0.0 state.valid_obj_path_error = 0.0 state.test_obj_path_error = 0.0 l1_reg_values = [0., 1e-6, 1e-5, 1e-4] l2_reg_values = [0., 1e-5, 1e-4] learning_rates = numpy.logspace(numpy.log10(0.0001), numpy.log10(1), 36) num_hiddens = numpy.logspace(numpy.log10(256), numpy.log10(2048), 24) for i in xrange(NO_OF_TRIALS): state.exid = i state.n_hidden_layers = ri(4) n_hiddens = [] for i in xrange(state.n_hidden_layers): n_hiddens.append(int(num_hiddens[ri(num_hiddens.shape[0]) - 1])) state.n_hiddens = n_hiddens state.learning_rate = learning_rates[ri(learning_rates.shape[0]) - 1] state.l1_reg = l1_reg_values[ri(len(l1_reg_values)) - 1] state.l2_reg = l2_reg_values[ri(len(l2_reg_values)) - 1] sql.insert_job(experiment, flatten(state), db) db.createView(TABLE_NAME + "_view")
from jobman.tools import DD, flatten from jobman import api0, sql import numpy from pylearn2.scripts.jobman import experiment rng = numpy.random.RandomState(4312987) if __name__ == '__main__': db = api0.open_db('postgres://opter.iro.umontreal.ca/desjagui_db/nips2013_modes_rbm_exp1_1') state = DD() state.yaml_template = """ !obj:pylearn2.scripts.train.Train { "dataset": &data !obj:deep_tempering.data.test_modes.OnlineModesNIPSDLUFL {}, "model": &model !obj:deep_tempering.rbm.RBM { "seed" : %(seed)i, "init_from" : "", "batch_size" : &batch_size %(batch_size)i, "n_v" : 784, "n_h" : &n_h %(nh1)i, "neg_sample_steps" : 1, "flags": { "ml_vbias": 0, }, "lr_spec" : { 'type': 'linear', 'start': %(lr_start)f, 'end': %(lr_end)f, },
#!/usr/bin/python from jobman.tools import DD, flatten from jobman import sql #from DARPAscript import NLPSDAE from DARPAscript_simplified import NLPSDAE #db = sql.db('postgres://[email protected]/glorotxa_db/opentablegpu') # you should change this line to match the database you need #db = sql.db('postgres://[email protected]/ift6266h10_sandbox_db/opentablegpu') db = sql.db('postgres://*****:*****@gershwin.iro.umontreal.ca/ift6266h10_sandbox_db/opentablegpu') state = DD() state.act = ['tanh'] state.depth = 1 state.noise = ['gaussian'] state.weight_regularization_type = 'l2' state.weight_regularization_coeff = [0.0,0.0] state.activation_regularization_type = 'l1' # Number of pretraining epochs, one-per-layer state.nepochs = [128] # Different validation runs # - 100 training examples (x20 different samples of 100 training examples) # - 1000 training examples (x10 different samples of 1000 training examples) # - 10000 training examples (x1 different sample of 10000 training examples) # (because of jobman, the keys have to be strings, not ints)
def jobman_insert_random(n_jobs): JOBDB = 'postgres://*****:*****@opter.iro.umontreal.ca/devries_db/lvq_mnist' EXPERIMENT_PATH = "lvq_mnist.jobman_entrypoint" jobs = [] for _ in range(n_jobs): job = DD() job.n_hiddens = numpy.random.randint(1000, high=3000) job.n_out = numpy.random.randint(100, high=500) job.noise_std = numpy.random.uniform(low=0.0, high=0.8) job.learning_rate = 10.**numpy.random.uniform(-2, 0) job.momentum = 10.**numpy.random.uniform(-2, 0) job.gamma = numpy.random.uniform(low=1.0, high=3.0) job.tag = "lvq_mnist" jobs.append(job) print job answer = raw_input("Submit %d jobs?[y/N] " % len(jobs)) if answer == "y": numpy.random.shuffle(jobs) db = jobman.sql.db(JOBDB) for job in jobs: job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) jobman.sql.insert_dict(job, db) print "inserted %d jobs" % len(jobs) print "To run: jobdispatch --gpu --env=THEANO_FLAGS='floatX=float32, device=gpu' --repeat_jobs=%d jobman sql -n 1 'postgres://*****:*****@opter.iro.umontreal.ca/devries_db/lvq_mnist' ." % len( jobs)
def results_extractor(train_obj): """ Default results extractor that does nothing. Good for tutorials. """ return DD()
numpy.random.seed(state.seed) datatrain = (PATH_DATA+NAME_DATA+'_1.pkl',PATH_DATA+NAME_LABEL+'_1.pkl') datatrainsave = PATH_SAVE+'/train.libsvm' datatest = (PATH_DATA+NAME_DATATEST+'_1.pkl',PATH_DATA+NAME_LABELTEST+'_1.pkl') datatestsave = PATH_SAVE+'/test.libsvm' depthbegin = 0 #monitor best performance for reconstruction and classification state.bestrec = [] state.bestrecepoch = [] state.besterr = dict([(`trainsize`, []) for trainsize in VALIDATION_TRAININGSIZE]) state.besterrepoch = dict([(`trainsize`, []) for trainsize in VALIDATION_TRAININGSIZE]) if MODEL_RELOAD != None: oldstate = expand(DD(filemerge(MODEL_RELOAD+'../current.conf'))) DEPTH = oldstate.depth + DEPTH depthbegin = oldstate.depth ACT = oldstate.act + ACT N_HID = oldstate.n_hid + N_HID NOISE = oldstate.noise + NOISE ACTIVATION_REGULARIZATION_COEFF = oldstate.activation_regularization_coeff + ACTIVATION_REGULARIZATION_COEFF WEIGHT_REGULARIZATION_COEFF = oldstate.weight_regularization_coeff[:-1] + WEIGHT_REGULARIZATION_COEFF NEPOCHS = oldstate.nepochs + NEPOCHS LR = oldstate.lr + LR NOISE_LVL = oldstate.noise_lvl + NOISE_LVL EPOCHSTEST = oldstate.epochstest + EPOCHSTEST state.bestrec = oldstate.bestrec state.bestrecepoch = oldstate.bestrec del oldstate
raise NotImplementedError( ('Current implementation does not support running multiple ' 'models in one yaml string. Please change the yaml template ' 'and parameters to contain only one single model.')) else: # print "Executing the model." train_obj.main_loop() # This line will call a function defined by the user and pass train_obj # to it. state.results = jobman.tools.resolve(state.extract_results)(train_obj) return channel.COMPLETE if __name__ == '__main__': state = DD() state.yaml_template = ''' !obj:pylearn2.train.Train { dataset: !obj:Database_hand.Database_latent { dataset_path: &train %(trainfile)s, N: %(N)i, n: %(n)i, which_set: 'train', which_label: 'uvd' }, "model": !obj:myLayer_tree_faster.MLPLayer { "batch_size": %(batch_size)d, "latent_space":!obj:pylearn2.space.VectorSpace { dim: 1
def load_experiments(args): dataset_name = args.dataset_name db = sql.db("postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" % {"user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) logger.info("Getting dataset info for %s%s" % (dataset_name, ", transposed" if args.transposed else "")) data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + args.dataset_name) if args.transposed: logger.info("Data in transpose...") mri = MRI.MRI_Transposed(dataset_name=args.dataset_name, unit_normalize=True, even_input=True, apply_mask=True) input_dim = mri.X.shape[1] variance_map_file = path.join(data_path, "transposed_variance_map.npy") else: mask_file = path.join(data_path, "mask.npy") mask = np.load(mask_file) input_dim = (mask == 1).sum() if input_dim % 2 == 1: input_dim -= 1 mri = MRI.MRI_Standard(which_set="full", dataset_name=args.dataset_name, unit_normalize=True, even_input=True, apply_mask=True) variance_map_file = path.join(data_path, "variance_map.npy") mri_nifti.save_variance_map(mri, variance_map_file) for items in nested_generator(layer_depth_generator("encoder.layer_depths", xrange(4, 6), 5), hidden_generator("encoder.nhid", 4), float_generator("weight_decay.coeffs.z", 3, 0.1, 0.001, log_scale=True)): # logger.info("Adding NICE experiment with hyperparameters %s" % (items, )) state = DD() experiment_hyperparams = nice_experiment.default_hyperparams(input_dim) if args.transposed: experiment_hyperparams["data_class"] = "MRI_Transposed" if args.logistic: experiment_hyperparams["prior"]["__builder__"] =\ "nice.pylearn2.models.nice.StandardLogistic" for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] entry[split_keys[-1]] = value experiment_hyperparams["dataset_name"] = dataset_name h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\ [tuple(v) if isinstance(v, list) else v for v in flatten(experiment_hyperparams).values()]))) user = path.expandvars("$USER") save_path = serial.preprocess("/export/mialab/users/%s/pylearn2_outs/%d" % (user, h)) file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams sql.insert_job( nice_experiment.experiment, flatten(state), db ) db.createView("%s_view" % args.table)
def load_experiments(args): dataset_name = args.dataset_name db = sql.db( "postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" % { "user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) logger.info("Getting dataset info for %s" % dataset_name) data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + dataset_name) mask_file = path.join(data_path, "mask.npy") mask = np.load(mask_file) input_dim = (mask == 1).sum() if input_dim % 2 == 1: input_dim -= 1 mri = MRI.MRI_Standard(which_set="full", dataset_name=dataset_name, unit_normalize=True, even_input=True, apply_mask=True) variance_map_file = path.join(data_path, "variance_map.npy") mri_nifti.save_variance_map(mri, variance_map_file) for items in jg.nested_generator( jg.hidden_generator("nhid1", 1), jg.hidden_generator("nhid2", 1), ): state = DD() experiment_hyperparams = mlp_experiment.default_hyperparams(input_dim) for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] entry[split_keys[-1]] = value experiment_hyperparams["dataset_name"] = dataset_name h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\ flatten(experiment_hyperparams).values()))) user = path.expandvars("$USER") save_path = serial.preprocess( "/export/mialab/users/%s/pylearn2_outs/%d" % (user, h)) file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams state.pid = 0 sql.insert_job(mlp_experiment.experiment, flatten(state), db) db.createView("%s_view" % args.table)
for all layer in the same graph and reconstruction error. """ import cPickle import matplotlib matplotlib.use('Agg') import pylab import numpy from jobman.tools import DD, expand from jobman.parse import filemerge rec = {} err = {} epochvect = {} state = expand(DD(filemerge('orig.conf'))) val_run_per_tr_siz = state.validation_runs_for_each_trainingsize val_run = [int(trainsize) for trainsize in val_run_per_tr_siz] val_run.sort() epochstest = state.epochstest nepochs = state.nepochs depth = state.depth for i in range(depth): err.update({i: {}}) rec.update({i: []}) epochvect.update({i: []}) for j in val_run: err[i].update({j: []})
IMPORTANT: actually runs on sampled MNIST, contrary to nips2013_mnist_tdbn_exp9_1, which used thresholding to binarize (due to a discrepancy in Pylearn2). """ from jobman.tools import DD, flatten from jobman import api0, sql import numpy from pylearn2.scripts.jobman import experiment rng = numpy.random.RandomState(4312987) if __name__ == '__main__': db = api0.open_db('postgres://opter.iro.umontreal.ca/desjagui_db/icml14_dtneg_mnist_tdbn_exp1_1') state = DD() state.yaml_template = """ !obj:pylearn2.train.Train { "dataset": &data !obj:pylearn2.datasets.mnist.MNIST { "which_set": 'train', "center": False, "one_hot": True, "binarize": 'sample', }, "model": &model !obj:deep_tempering.tempered_dbn.TemperedDBN { "max_updates": 2000000, "flags": { 'train_on_samples': 0, 'order': 'swap_sample_learn', 'use_dtneg': True,
from jobman.tools import DD, flatten from jobman import api0, sql import numpy from pylearn2.scripts.jobman import experiment rng = numpy.random.RandomState(4312987) if __name__ == '__main__': db = api0.open_db('postgres://opter.iro.umontreal.ca/desjagui_db/nips2013_modes_tdbn_exp3_2') state = DD() state.yaml_template = """ !obj:pylearn2.scripts.train.Train { "dataset": &data !obj:deep_tempering.data.test_modes.OnlineModesNIPSDLUFL {}, "model": &model !obj:deep_tempering.tempered_dbn.TemperedDBN { "rbm1": &rbm1 !obj:deep_tempering.rbm.RBM { "seed" : %(seed1)i, "batch_size" : &batch_size %(batch_size)i, "n_v" : 784, "n_h" : &nh1 %(nh1)i, "gibbs_vhv": True, "neg_sample_steps" : 1, "flags": { 'ml_vbias': 0, }, "lr_spec" : { 'type': 'linear', 'start': %(lr_start)f, 'end': %(lr_end)f,
def load_experiments(args): dataset_name = args.dataset_name db = sql.db( "postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" % { "user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) logger.info("Getting dataset info for %s%s" % (dataset_name, ", transposed" if args.transposed else "")) data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + args.dataset_name) if args.transposed: logger.info("Data in transpose...") mri = MRI.MRI_Transposed(dataset_name=args.dataset_name, unit_normalize=True, even_input=True, apply_mask=True) input_dim = mri.X.shape[1] variance_map_file = path.join(data_path, "transposed_variance_map.npy") else: mask_file = path.join(data_path, "mask.npy") mask = np.load(mask_file) input_dim = (mask == 1).sum() if input_dim % 2 == 1: input_dim -= 1 mri = MRI.MRI_Standard(which_set="full", dataset_name=args.dataset_name, unit_normalize=True, even_input=True, apply_mask=True) variance_map_file = path.join(data_path, "variance_map.npy") mri_nifti.save_variance_map(mri, variance_map_file) for items in nested_generator( layer_depth_generator("encoder.layer_depths", xrange(4, 6), 5), hidden_generator("encoder.nhid", 4), float_generator("weight_decay.coeffs.z", 3, 0.1, 0.001, log_scale=True)): # logger.info("Adding NICE experiment with hyperparameters %s" % (items, )) state = DD() experiment_hyperparams = nice_experiment.default_hyperparams(input_dim) if args.transposed: experiment_hyperparams["data_class"] = "MRI_Transposed" if args.logistic: experiment_hyperparams["prior"]["__builder__"] =\ "nice.pylearn2.models.nice.StandardLogistic" for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] entry[split_keys[-1]] = value experiment_hyperparams["dataset_name"] = dataset_name h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\ [tuple(v) if isinstance(v, list) else v for v in flatten(experiment_hyperparams).values()]))) user = path.expandvars("$USER") save_path = serial.preprocess( "/export/mialab/users/%s/pylearn2_outs/%d" % (user, h)) file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams sql.insert_job(nice_experiment.experiment, flatten(state), db) db.createView("%s_view" % args.table)
# import types of architectures we want to use import RNN if __name__ == '__main__': TABLE_NAME = 'rnn001' #db = sql.db('postgres://*****:*****@gershwin/pascanur_db/'+TABLE_NAME) db = sql.postgres_serial(user='******', password='******', host='colosse1', port=5432, database='pascanur_db', table_prefix=TABLE_NAME) state = DD() ## DEFAULT VALUES ## state['configfile'] = '/home/rpascanu/workspace/RNN_theano/rnn001/RNN.ini' state['jobman.experiment'] = 'RNN.jobman' n_jobs = 0 for n_jobs in xrange(512): n = numpy.random.rand() #if n > 0.5: state['opt_alg'] = 'sgd_qn' _lambda = -(numpy.random.randint(3) + 4) state['mylambda'] = 10**_lambda start_lr = numpy.random.randint(3) + 1 + abs(_lambda) t0 = 10**start_lr state['t0'] = t0
# -*- coding: utf-8 -*- import sys, os from jobman.tools import DD from jobman import parse import argparse import matplotlib.pyplot as plt import numpy parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('files', type=str, nargs='*') args = parser.parse_args() files = [] for fname in args.files: d = DD(parse.filemerge(fname)) if d['jobman.status'] == 5: continue if not hasattr(d,'best_test_combined'): continue if numpy.isnan(d.best_test_combined).any(): continue if d.valid_diff > 0.474: continue d.filename = fname #idx = numpy.argmin(d.valid_err_list) #d.best_valid = d.valid_err_list[idx] files.append(d)
channels = train_obj.model.monitor.channels train_cost = channels['sgd_cost(ExhaustiveSGD[X])'] best_epoch = numpy.argmin(train_cost.val_record) best_rec_error = train_cost.val_record[best_epoch] batch_num = train_cost.batch_record[best_epoch] return dict( best_epoch=best_epoch, train_rec_error=best_rec_error, batch_num=batch_num) if __name__ == '__main__': db = api0.open_db('sqlite:///test.db?table=test_jobman_pylearn2') state = DD() state.yaml_template = ''' !obj:pylearn2.train.Train { "dataset": !obj:pylearn2.datasets.npy_npz.NpyDataset &dataset { "file" : "%(file)s" }, "model": !obj:pylearn2.autoencoder.ContractiveAutoencoder { "nvis" : %(nvis)d, "nhid" : %(nhid)d, "irange" : 0.05, "act_enc": "sigmoid", #for some reason only sigmoid function works "act_dec": "sigmoid", }, "algorithm": !obj:pylearn2.training_algorithms.sgd.ExhaustiveSGD { "learning_rate" : %(learning_rate)f,
def create_jobman_jobs(): #Database operations TABLE_NAME = "arcade_post_mlp_cv_binary_8x8_40k" db = api0.open_db('postgresql://[email protected]/gulcehrc_db?table=' + TABLE_NAME) ri = numpy.random.random_integers # Default values state = DD() state.dataset = \ "/home/gulcehre/dataset/pentomino/experiment_data/pento64x64_40k_seed_23112222.npy" state.no_of_folds = 5 state.exid = 0 state.n_hiddens = [100, 200, 300] state.n_hidden_layers = 3 state.learning_rate = 0.001 state.l1_reg = 1e-5 state.l2_reg = 1e-3 state.n_epochs = 2 state.batch_size = 120 state.save_exp_data = True self.no_of_patches = 64 state.cost_type = "crossentropy" state.n_in = 8*8 state.n_out = 1 state.best_valid_error = 0.0 state.best_test_error = 0.0 state.valid_obj_path_error = 0.0 state.test_obj_path_error = 0.0 l1_reg_values = [0., 1e-6, 1e-5, 1e-4] l2_reg_values = [0., 1e-5, 1e-4] learning_rates = numpy.logspace(numpy.log10(0.0001), numpy.log10(1), 36) num_hiddens = numpy.logspace(numpy.log10(256), numpy.log10(2048), 24) for i in xrange(NO_OF_TRIALS): state.exid = i state.n_hidden_layers = ri(4) n_hiddens = [] for i in xrange(state.n_hidden_layers): n_hiddens.append(int(num_hiddens[ri(num_hiddens.shape[0]) - 1])) state.n_hiddens = n_hiddens state.learning_rate = learning_rates[ri(learning_rates.shape[0]) - 1] state.l1_reg = l1_reg_values[ri(len(l1_reg_values)) - 1] state.l2_reg = l2_reg_values[ri(len(l2_reg_values)) - 1] sql.insert_job(experiment, flatten(state), db) db.createView(TABLE_NAME + "_view")