def find_conf_files(cwd, fname='current.conf', recurse=True): """ This generator will iterator from the given directory, and find all job configuration files recursively (if specified). Job config files are read and a dict is returned with the proper key/value pairs. @param cwd: diretory to start iterating from @param fname: name of the job config file to look for and parse @param recurse: enable recursive search of job config files """ for jobid in os.listdir(cwd): e = os.path.join(cwd, jobid) if os.path.isdir(e) and recurse: find_conf_files(e, fname) try: jobdd = DD(parse.filemerge(os.path.join(e, fname))) except IOError: print "WARNING: %s file not found. Skipping it" % os.path.join(e, fname) continue try: jobid = int(jobid) except ValueError: jobid = None yield (jobid, jobdd)
for all layer in the same graph and reconstruction error. """ import cPickle import matplotlib matplotlib.use('Agg') import pylab import numpy from jobman.tools import DD, expand from jobman.parse import filemerge rec = {} err = {} epochvect = {} state = expand(DD(filemerge('orig.conf'))) val_run_per_tr_siz = state.validation_runs_for_each_trainingsize val_run = [int(trainsize) for trainsize in val_run_per_tr_siz] val_run.sort() epochstest = state.epochstest nepochs = state.nepochs depth = state.depth for i in range(depth): err.update({i: {}}) rec.update({i: []}) epochvect.update({i: []}) for j in val_run: err[i].update({j: []})
def get_dir_by_key_name(dirs, keys, sort_inside_groups=True): ''' Returns a 3-tuple The first is the number of key values that where found in the directory The second is a list of all the key values found The third is a list of list of directories. It contains nb_key_value lists Each list contains the folder names Parameters ---------- dirs : str or list of str Directories that correspond to the table path directory inside the experiment root directory. keys : str or list of str Experiments parameters used to group the jobs sort_inside_groups: bool We sort seach group so that each job in each group have the same other parameter Examples -------- get_dir_by_key_name(['/data/lisa/exp/mullerx/exp/dae/mullerx_db/ms_0050'],'nb_groups') ''' if isinstance(keys, str): keys = [keys] nb_key_values = 0 dir_list = [] nb_dir_per_group = [] key_values = [] for base_dir in dirs: for expdir in os.listdir(base_dir): confdir = os.path.join(base_dir, expdir) conf = os.path.join(confdir, 'current.conf') # No conf file here, go to next dir. if not os.path.isfile(conf): continue keys_to_match = {} params = filemerge(conf) # Get the keyvalue in the conf file. kval = () for key in keys: kval += params.get(key, None), new_key = -1 # Check if we have this key value already. for i in range(len(key_values)): if kval == key_values[i]: new_key = i # Update dir list accordingly. if new_key == -1: key_values.append(kval) nb_dir_per_group.append(1) dir_list.append([]) dir_list[nb_key_values].append([confdir, expdir]) nb_key_values = nb_key_values + 1 else: nb_dir_per_group[new_key] = nb_dir_per_group[new_key] + 1 dir_list[new_key].append([confdir, expdir]) if(nb_key_values == 1): return (nb_key_values, key_values, dir_list) if not sort_inside_groups or nb_key_values == 1: return (nb_key_values, key_values, dir_list) # Check if we have the same number of elements in each group. # This means some experiments have failed for i in range(nb_key_values): for j in range(nb_key_values): if (nb_dir_per_group[i] != nb_dir_per_group[j]): raise EnvironmentError( 'Not all experiments where found, They might have crashed.... This is not supported yet') # Reparse the list based on first key. # Sort all the other lists so # The same conf parameters values show up always in the same order in each group # Do it the slow lazy way as this code is not time critical for i in range(len(dir_list[0])): conf = os.path.join(dir_list[0][i][0], 'orig.conf') original_params = filemerge(conf) for j in range(1, nb_key_values): for k in range(nb_dir_per_group[0]): # Parse each group until we match the exact dictionnary (exept for or our key), # then swap it within the gorup so it has the same index as in # group 0. conf = os.path.join(dir_list[j][k][0], 'orig.conf') current_params = filemerge(conf) current_params[key] = original_params[key] if current_params == original_params: temp = dir_list[j].pop(k) dir_list[j].insert(i, temp) return (nb_key_values, key_values, dir_list)
def get_dir_by_key_value(dirs, keys=['seed=0']): ''' Returns a list containing the name of the folders. Each element in the list is a list containing the full path and the id of the experiment as a string Parameters ---------- dirs : str or list of str Directories that correspond to the table path directory inside the experiment root directory. keys : str or list of str str of format key=value that represent the experiments that we want to select. Examples -------- get_dir_by_key_value(['/data/lisa/exp/mullerx/exp/dae/mullerx_db/ms_0050'],['seed=0']) ''' if isinstance(dirs, str): dirs = (dirs,) good_dir = [] # Gather results. for base_dir in dirs: for expdir in os.listdir(base_dir): skip = False confdir = os.path.join(base_dir, expdir) conf = os.path.join(confdir, 'current.conf') if not os.path.isfile(conf): continue params = filemerge(conf) skip = len(keys) for k in keys: keys_to_match = {} subkeys = k.split(':') for t in subkeys: if t.find('=') != -1: keys_to_match.update(jparse(t)) else: if len(subkeys) != 1: raise ValueError( 'key1:key2 syntax requires keyval pairs') kval = params.get(t) skip -= 1 for fkey, fval in keys_to_match.iteritems(): kval = params.get(fkey, None) if kval == fval: skip -= 1 break if not skip: good_dir.append([confdir, expdir]) return good_dir
numpy.random.seed(state.seed) datatrain = (PATH_DATA+NAME_DATA+'_1.pkl.gz',PATH_DATA+NAME_LABEL+'_1.pkl.gz') datatrainsave = PATH_SAVE+'/train.libsvm' datatest = (PATH_DATA+NAME_DATATEST+'_1.pkl.gz',PATH_DATA+NAME_LABELTEST+'_1.pkl.gz') datatestsave = PATH_SAVE+'/test.libsvm' depthbegin = 0 #monitor best performance for reconstruction and classification state.bestrec = [] state.bestrecepoch = [] state.besterr = dict([(`trainsize`, []) for trainsize in VALIDATION_TRAININGSIZE]) state.besterrepoch = dict([(`trainsize`, []) for trainsize in VALIDATION_TRAININGSIZE]) if MODEL_RELOAD != None: oldstate = expand(DD(filemerge(MODEL_RELOAD+'../current.conf'))) DEPTH = oldstate.depth + DEPTH depthbegin = oldstate.depth ACT = oldstate.act + ACT N_HID = oldstate.n_hid + N_HID NOISE = oldstate.noise + NOISE ACTIVATION_REGULARIZATION_COEFF = oldstate.activation_regularization_coeff + ACTIVATION_REGULARIZATION_COEFF WEIGHT_REGULARIZATION_COEFF = oldstate.weight_regularization_coeff[:-1] + WEIGHT_REGULARIZATION_COEFF NEPOCHS = oldstate.nepochs + NEPOCHS LR = oldstate.lr + LR NOISE_LVL = oldstate.noise_lvl + NOISE_LVL EPOCHSTEST = oldstate.epochstest + EPOCHSTEST state.bestrec = oldstate.bestrec state.bestrecepoch = oldstate.bestrec del oldstate
numpy.random.seed(state.seed) datatrain = (PATH_DATA+NAME_DATA+'_1.pkl',PATH_DATA+NAME_LABEL+'_1.pkl') datatrainsave = PATH_SAVE+'/train.libsvm' datatest = (PATH_DATA+NAME_DATATEST+'_1.pkl',PATH_DATA+NAME_LABELTEST+'_1.pkl') datatestsave = PATH_SAVE+'/test.libsvm' depthbegin = 0 #monitor best performance for reconstruction and classification state.bestrec = [] state.bestrecepoch = [] state.besterr = dict([(`trainsize`, []) for trainsize in VALIDATION_TRAININGSIZE]) state.besterrepoch = dict([(`trainsize`, []) for trainsize in VALIDATION_TRAININGSIZE]) if MODEL_RELOAD != None: oldstate = expand(DD(filemerge(MODEL_RELOAD+'../current.conf'))) DEPTH = oldstate.depth + DEPTH depthbegin = oldstate.depth ACT = oldstate.act + ACT N_HID = oldstate.n_hid + N_HID NOISE = oldstate.noise + NOISE ACTIVATION_REGULARIZATION_COEFF = oldstate.activation_regularization_coeff + ACTIVATION_REGULARIZATION_COEFF WEIGHT_REGULARIZATION_COEFF = oldstate.weight_regularization_coeff[:-1] + WEIGHT_REGULARIZATION_COEFF NEPOCHS = oldstate.nepochs + NEPOCHS LR = oldstate.lr + LR NOISE_LVL = oldstate.noise_lvl + NOISE_LVL EPOCHSTEST = oldstate.epochstest + EPOCHSTEST state.bestrec = oldstate.bestrec state.bestrecepoch = oldstate.bestrec del oldstate
# -*- coding: utf-8 -*- import sys, os from jobman.tools import DD from jobman import parse import argparse import matplotlib.pyplot as plt import numpy parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('files', type=str, nargs='*') args = parser.parse_args() files = [] for fname in args.files: d = DD(parse.filemerge(fname)) if d['jobman.status'] == 5: continue if not hasattr(d,'best_test_combined'): continue if numpy.isnan(d.best_test_combined).any(): continue if d.valid_diff > 0.474: continue d.filename = fname #idx = numpy.argmin(d.valid_err_list) #d.best_valid = d.valid_err_list[idx] files.append(d)
for all layer in the same graph and reconstruction error. """ import cPickle import matplotlib matplotlib.use('Agg') import pylab import numpy from jobman.tools import DD,expand from jobman.parse import filemerge rec = {} err = {} epochvect = {} state = expand(DD(filemerge('orig.conf'))) val_run_per_tr_siz = state.validation_runs_for_each_trainingsize val_run = [int(trainsize) for trainsize in val_run_per_tr_siz ] val_run.sort() epochstest = state.epochstest nepochs = state.nepochs depth = state.depth for i in range(depth): err.update({i:{}}) rec.update({i:[]}) epochvect.update({i:[]}) for j in val_run: err[i].update({j:[]})
# # You can also submit the jobs on the command line like this example: # jobman sqlschedules <tablepath> Experimentsbatchpretrain.finetuning dat="MNIST" depth=1 tie=True "n_hid={{100,250,500,750,1000}}" act=tanh "sup_lr={{.01,.001,.0001,.00001}}" seed=1 nbepochs_sup=1000 batchsize=10 # from jobman.tools import DD, flatten from jobman import sql from jobman.parse import filemerge from Experimentsbatchpretrain import * import numpy db = sql.db('postgres://[email protected]/glorotxa_db/pretrainexpe') state = DD() state.curridata = DD(filemerge('Curridata.conf')) state.depth = 3 state.tie = True state.n_hid = 1000 #nb of unit per layer state.act = 'tanh' state.sup_lr = 0.01 state.unsup_lr = 0.001 state.noise = 0.25 state.seed = 1 state.nbepochs_unsup = 30 #maximal number of supervised updates state.nbepochs_sup = 1000 #maximal number of unsupervised updates per layer state.batchsize = 10
def get_dir_by_key_name(dirs, keys, sort_inside_groups=True): ''' Returns a 3-tuple The first is the number of key values that where found in the directory The second is a list of all the key values found The third is a list of list of directories. It contains nb_key_value lists Each list contains the folder names Parameters ---------- dirs : str or list of str Directories that correspond to the table path directory inside the experiment root directory. keys : str or list of str Experiments parameters used to group the jobs sort_inside_groups: bool We sort seach group so that each job in each group have the same other parameter Examples -------- get_dir_by_key_name(['/data/lisa/exp/mullerx/exp/dae/mullerx_db/ms_0050'],'nb_groups') ''' if isinstance(keys, str): keys = [keys] nb_key_values = 0 dir_list = [] nb_dir_per_group = [] key_values = [] for base_dir in dirs: for expdir in os.listdir(base_dir): confdir = os.path.join(base_dir, expdir) conf = os.path.join(confdir, 'current.conf') # No conf file here, go to next dir. if not os.path.isfile(conf): continue keys_to_match = {} params = filemerge(conf) # Get the keyvalue in the conf file. kval = () for key in keys: kval += params.get(key, None), new_key = -1 # Check if we have this key value already. for i in range(len(key_values)): if kval == key_values[i]: new_key = i # Update dir list accordingly. if new_key == -1: key_values.append(kval) nb_dir_per_group.append(1) dir_list.append([]) dir_list[nb_key_values].append([confdir, expdir]) nb_key_values = nb_key_values + 1 else: nb_dir_per_group[new_key] = nb_dir_per_group[new_key] + 1 dir_list[new_key].append([confdir, expdir]) if (nb_key_values == 1): return (nb_key_values, key_values, dir_list) if not sort_inside_groups or nb_key_values == 1: return (nb_key_values, key_values, dir_list) # Check if we have the same number of elements in each group. # This means some experiments have failed for i in range(nb_key_values): for j in range(nb_key_values): if (nb_dir_per_group[i] != nb_dir_per_group[j]): raise EnvironmentError( 'Not all experiments where found, They might have crashed.... This is not supported yet' ) # Reparse the list based on first key. # Sort all the other lists so # The same conf parameters values show up always in the same order in each group # Do it the slow lazy way as this code is not time critical for i in range(len(dir_list[0])): conf = os.path.join(dir_list[0][i][0], 'orig.conf') original_params = filemerge(conf) for j in range(1, nb_key_values): for k in range(nb_dir_per_group[0]): # Parse each group until we match the exact dictionnary (exept for or our key), # then swap it within the gorup so it has the same index as in # group 0. conf = os.path.join(dir_list[j][k][0], 'orig.conf') current_params = filemerge(conf) current_params[key] = original_params[key] if current_params == original_params: temp = dir_list[j].pop(k) dir_list[j].insert(i, temp) return (nb_key_values, key_values, dir_list)
def get_dir_by_key_value(dirs, keys=['seed=0']): ''' Returns a list containing the name of the folders. Each element in the list is a list containing the full path and the id of the experiment as a string Parameters ---------- dirs : str or list of str Directories that correspond to the table path directory inside the experiment root directory. keys : str or list of str str of format key=value that represent the experiments that we want to select. Examples -------- get_dir_by_key_value(['/data/lisa/exp/mullerx/exp/dae/mullerx_db/ms_0050'],['seed=0']) ''' if isinstance(dirs, str): dirs = (dirs, ) good_dir = [] # Gather results. for base_dir in dirs: for expdir in os.listdir(base_dir): skip = False confdir = os.path.join(base_dir, expdir) conf = os.path.join(confdir, 'current.conf') if not os.path.isfile(conf): continue params = filemerge(conf) skip = len(keys) for k in keys: keys_to_match = {} subkeys = k.split(':') for t in subkeys: if t.find('=') != -1: keys_to_match.update(jparse(t)) else: if len(subkeys) != 1: raise ValueError( 'key1:key2 syntax requires keyval pairs') kval = params.get(t) skip -= 1 for fkey, fval in keys_to_match.items(): kval = params.get(fkey, None) if kval == fval: skip -= 1 break if not skip: good_dir.append([confdir, expdir]) return good_dir