Ejemplo n.º 1
0
def find_conf_files(cwd, fname='current.conf', recurse=True):
    """
    This generator will iterator from the given directory, and find all job
    configuration files recursively (if specified). Job config files are read
    and a dict is returned with the proper key/value pairs.

    @param cwd: diretory to start iterating from
    @param fname: name of the job config file to look for and parse
    @param recurse: enable recursive search of job config files
    """
    for jobid in os.listdir(cwd):
        e = os.path.join(cwd, jobid)

        if os.path.isdir(e) and recurse:
            find_conf_files(e, fname)

        try:
            jobdd = DD(parse.filemerge(os.path.join(e, fname)))
        except IOError:
            print "WARNING: %s file not found. Skipping it" % os.path.join(e, fname)
            continue

        try:
            jobid = int(jobid)
        except ValueError:
            jobid = None
        yield (jobid, jobdd)
Ejemplo n.º 2
0
def find_conf_files(cwd, fname='current.conf', recurse=True):
    """
    This generator will iterator from the given directory, and find all job
    configuration files recursively (if specified). Job config files are read
    and a dict is returned with the proper key/value pairs.

    @param cwd: diretory to start iterating from
    @param fname: name of the job config file to look for and parse
    @param recurse: enable recursive search of job config files
    """
    for jobid in os.listdir(cwd):
        e = os.path.join(cwd, jobid)

        if os.path.isdir(e) and recurse:
            find_conf_files(e, fname)

        try:
            jobdd = DD(parse.filemerge(os.path.join(e, fname)))
        except IOError:
            print "WARNING: %s file not found. Skipping it" % os.path.join(e, fname)
            continue

        try:
            jobid = int(jobid)
        except ValueError:
            jobid = None
        yield (jobid, jobdd)
Ejemplo n.º 3
0
for all layer in the same graph and reconstruction error. """

import cPickle
import matplotlib
matplotlib.use('Agg')
import pylab
import numpy

from jobman.tools import DD, expand
from jobman.parse import filemerge

rec = {}
err = {}
epochvect = {}

state = expand(DD(filemerge('orig.conf')))
val_run_per_tr_siz = state.validation_runs_for_each_trainingsize
val_run = [int(trainsize) for trainsize in val_run_per_tr_siz]
val_run.sort()
epochstest = state.epochstest
nepochs = state.nepochs

depth = state.depth

for i in range(depth):
    err.update({i: {}})
    rec.update({i: []})
    epochvect.update({i: []})
    for j in val_run:
        err[i].update({j: []})
Ejemplo n.º 4
0
def get_dir_by_key_name(dirs, keys, sort_inside_groups=True):
    '''
    Returns a 3-tuple
    The first is the number of key values that where found in the directory
    The second is a list of all the key values found
    The third is a list of list of directories. It contains nb_key_value lists
    Each list contains the folder names 

    Parameters
    ----------
    dirs : str or list of str
        Directories that correspond to the table path directory inside the experiment root directory.
    keys : str or list of str
        Experiments parameters used to group the jobs
    sort_inside_groups: bool    
        We sort seach group so that each job in each group have the same other parameter
    Examples
    --------

        get_dir_by_key_name(['/data/lisa/exp/mullerx/exp/dae/mullerx_db/ms_0050'],'nb_groups')
    '''

    if isinstance(keys, str):
        keys = [keys]

    nb_key_values = 0
    dir_list = []
    nb_dir_per_group = []
    key_values = []

    for base_dir in dirs:
        for expdir in os.listdir(base_dir):

            confdir = os.path.join(base_dir, expdir)
            conf = os.path.join(confdir, 'current.conf')

            # No conf file here, go to next dir.
            if not os.path.isfile(conf):
                continue

            keys_to_match = {}
            params = filemerge(conf)

            # Get the keyvalue in the conf file.
            kval = ()
            for key in keys:
                kval += params.get(key, None),

            new_key = -1
            # Check if we have this key value already.
            for i in range(len(key_values)):
                if kval == key_values[i]:
                    new_key = i

            # Update dir list accordingly.
            if new_key == -1:
                key_values.append(kval)
                nb_dir_per_group.append(1)
                dir_list.append([])
                dir_list[nb_key_values].append([confdir, expdir])
                nb_key_values = nb_key_values + 1
            else:
                nb_dir_per_group[new_key] = nb_dir_per_group[new_key] + 1
                dir_list[new_key].append([confdir, expdir])

    if(nb_key_values == 1):
        return (nb_key_values, key_values, dir_list)

    if not sort_inside_groups or nb_key_values == 1:
        return (nb_key_values, key_values, dir_list)

    # Check if we have the same number of elements in each group.
    # This means some experiments have failed

    for i in range(nb_key_values):
        for j in range(nb_key_values):
            if (nb_dir_per_group[i] != nb_dir_per_group[j]):
                raise EnvironmentError(
                    'Not all experiments where found, They might have crashed.... This is not supported yet')

    # Reparse the list based on first key.
    # Sort all the other lists so
    # The same conf parameters values show up always in the same order in each group
    # Do it the slow lazy way as this code is not time critical
    for i in range(len(dir_list[0])):
        conf = os.path.join(dir_list[0][i][0], 'orig.conf')
        original_params = filemerge(conf)

        for j in range(1, nb_key_values):
            for k in range(nb_dir_per_group[0]):
                # Parse each group until we match the exact dictionnary (exept for or our key),
                # then swap it within the gorup so it has the same index as in
                # group 0.
                conf = os.path.join(dir_list[j][k][0], 'orig.conf')
                current_params = filemerge(conf)
                current_params[key] = original_params[key]
                if current_params == original_params:
                    temp = dir_list[j].pop(k)
                    dir_list[j].insert(i, temp)

    return (nb_key_values, key_values, dir_list)
Ejemplo n.º 5
0
def get_dir_by_key_value(dirs, keys=['seed=0']):
    '''
    Returns a list containing the name of the folders. Each element in the list is a list
    containing the full path and the id of the experiment as a string

    Parameters
    ----------
    dirs : str or list of str
        Directories that correspond to the table path directory inside the experiment root directory.
    keys : str or list of str
        str of format key=value that represent the experiments that we want to select.

    Examples
    --------

       get_dir_by_key_value(['/data/lisa/exp/mullerx/exp/dae/mullerx_db/ms_0050'],['seed=0'])
    '''
    if isinstance(dirs, str):
        dirs = (dirs,)

    good_dir = []

    # Gather results.
    for base_dir in dirs:
        for expdir in os.listdir(base_dir):

            skip = False

            confdir = os.path.join(base_dir, expdir)
            conf = os.path.join(confdir, 'current.conf')
            if not os.path.isfile(conf):
                continue

            params = filemerge(conf)

            skip = len(keys)

            for k in keys:
                keys_to_match = {}
                subkeys = k.split(':')
                for t in subkeys:
                    if t.find('=') != -1:
                        keys_to_match.update(jparse(t))

                    else:
                        if len(subkeys) != 1:
                            raise ValueError(
                                'key1:key2 syntax requires keyval pairs')
                        kval = params.get(t)
                        skip -= 1

                for fkey, fval in keys_to_match.iteritems():
                    kval = params.get(fkey, None)
                    if kval == fval:
                        skip -= 1
                        break

            if not skip:
                good_dir.append([confdir, expdir])

    return good_dir
Ejemplo n.º 6
0
    numpy.random.seed(state.seed)
    datatrain = (PATH_DATA+NAME_DATA+'_1.pkl.gz',PATH_DATA+NAME_LABEL+'_1.pkl.gz')
    datatrainsave = PATH_SAVE+'/train.libsvm'
    datatest = (PATH_DATA+NAME_DATATEST+'_1.pkl.gz',PATH_DATA+NAME_LABELTEST+'_1.pkl.gz')
    datatestsave = PATH_SAVE+'/test.libsvm'

    depthbegin = 0

    #monitor best performance for reconstruction and classification
    state.bestrec = []
    state.bestrecepoch = []
    state.besterr = dict([(`trainsize`, []) for trainsize in VALIDATION_TRAININGSIZE])
    state.besterrepoch = dict([(`trainsize`, []) for trainsize in VALIDATION_TRAININGSIZE])

    if MODEL_RELOAD != None:
        oldstate = expand(DD(filemerge(MODEL_RELOAD+'../current.conf')))
        DEPTH = oldstate.depth + DEPTH
        depthbegin = oldstate.depth
        ACT = oldstate.act + ACT
        N_HID = oldstate.n_hid + N_HID
        NOISE = oldstate.noise + NOISE
        ACTIVATION_REGULARIZATION_COEFF = oldstate.activation_regularization_coeff + ACTIVATION_REGULARIZATION_COEFF
        WEIGHT_REGULARIZATION_COEFF = oldstate.weight_regularization_coeff[:-1] + WEIGHT_REGULARIZATION_COEFF
        NEPOCHS = oldstate.nepochs + NEPOCHS
        LR = oldstate.lr + LR
        NOISE_LVL = oldstate.noise_lvl + NOISE_LVL
        EPOCHSTEST = oldstate.epochstest + EPOCHSTEST
        state.bestrec = oldstate.bestrec
        state.bestrecepoch = oldstate.bestrec
        del oldstate
Ejemplo n.º 7
0
    numpy.random.seed(state.seed)
    datatrain = (PATH_DATA+NAME_DATA+'_1.pkl',PATH_DATA+NAME_LABEL+'_1.pkl')
    datatrainsave = PATH_SAVE+'/train.libsvm'
    datatest = (PATH_DATA+NAME_DATATEST+'_1.pkl',PATH_DATA+NAME_LABELTEST+'_1.pkl')
    datatestsave = PATH_SAVE+'/test.libsvm'

    depthbegin = 0

    #monitor best performance for reconstruction and classification
    state.bestrec = []
    state.bestrecepoch = []
    state.besterr = dict([(`trainsize`, []) for trainsize in VALIDATION_TRAININGSIZE])
    state.besterrepoch = dict([(`trainsize`, []) for trainsize in VALIDATION_TRAININGSIZE])

    if MODEL_RELOAD != None:
        oldstate = expand(DD(filemerge(MODEL_RELOAD+'../current.conf')))
        DEPTH = oldstate.depth + DEPTH
        depthbegin = oldstate.depth
        ACT = oldstate.act + ACT
        N_HID = oldstate.n_hid + N_HID
        NOISE = oldstate.noise + NOISE
        ACTIVATION_REGULARIZATION_COEFF = oldstate.activation_regularization_coeff + ACTIVATION_REGULARIZATION_COEFF
        WEIGHT_REGULARIZATION_COEFF = oldstate.weight_regularization_coeff[:-1] + WEIGHT_REGULARIZATION_COEFF
        NEPOCHS = oldstate.nepochs + NEPOCHS
        LR = oldstate.lr + LR
        NOISE_LVL = oldstate.noise_lvl + NOISE_LVL
        EPOCHSTEST = oldstate.epochstest + EPOCHSTEST
        state.bestrec = oldstate.bestrec
        state.bestrecepoch = oldstate.bestrec
        del oldstate
Ejemplo n.º 8
0
# -*- coding: utf-8 -*-
import sys, os
from jobman.tools import DD
from jobman import parse
import argparse
import matplotlib.pyplot as plt
import numpy

parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('files', type=str, nargs='*')
args = parser.parse_args()

files = []

for fname in args.files:
    d = DD(parse.filemerge(fname))
    
    if d['jobman.status'] == 5:
        continue
    if not hasattr(d,'best_test_combined'):
        continue
    if numpy.isnan(d.best_test_combined).any():
        continue
    if d.valid_diff > 0.474:
        continue

    d.filename  =   fname
    #idx = numpy.argmin(d.valid_err_list)
    #d.best_valid = d.valid_err_list[idx]

    files.append(d)    
for all layer in the same graph and reconstruction error. """
  
import cPickle
import matplotlib
matplotlib.use('Agg')
import pylab
import numpy

from jobman.tools import DD,expand
from jobman.parse import filemerge

rec = {}
err = {}
epochvect = {}

state = expand(DD(filemerge('orig.conf')))
val_run_per_tr_siz = state.validation_runs_for_each_trainingsize
val_run = [int(trainsize) for trainsize in val_run_per_tr_siz ]
val_run.sort()
epochstest = state.epochstest 
nepochs = state.nepochs

depth = state.depth

for i in range(depth):
    err.update({i:{}})
    rec.update({i:[]})
    epochvect.update({i:[]})
    for j in val_run:
        err[i].update({j:[]})
Ejemplo n.º 10
0
#
# You can also submit the jobs on the command line like this example:
# jobman sqlschedules <tablepath> Experimentsbatchpretrain.finetuning dat="MNIST" depth=1 tie=True "n_hid={{100,250,500,750,1000}}" act=tanh "sup_lr={{.01,.001,.0001,.00001}}" seed=1 nbepochs_sup=1000 batchsize=10
#


from jobman.tools import DD, flatten
from jobman import sql
from jobman.parse import filemerge
from Experimentsbatchpretrain import *
import numpy

db = sql.db('postgres://[email protected]/glorotxa_db/pretrainexpe')

state = DD()
state.curridata = DD(filemerge('Curridata.conf'))

state.depth = 3
state.tie = True
state.n_hid = 1000 #nb of unit per layer
state.act = 'tanh'

state.sup_lr = 0.01
state.unsup_lr = 0.001
state.noise = 0.25

state.seed = 1

state.nbepochs_unsup = 30 #maximal number of supervised updates
state.nbepochs_sup = 1000 #maximal number of unsupervised updates per layer
state.batchsize = 10
Ejemplo n.º 11
0
def get_dir_by_key_name(dirs, keys, sort_inside_groups=True):
    '''
    Returns a 3-tuple
    The first is the number of key values that where found in the directory
    The second is a list of all the key values found
    The third is a list of list of directories. It contains nb_key_value lists
    Each list contains the folder names 

    Parameters
    ----------
    dirs : str or list of str
        Directories that correspond to the table path directory inside the experiment root directory.
    keys : str or list of str
        Experiments parameters used to group the jobs
    sort_inside_groups: bool    
        We sort seach group so that each job in each group have the same other parameter
    Examples
    --------

        get_dir_by_key_name(['/data/lisa/exp/mullerx/exp/dae/mullerx_db/ms_0050'],'nb_groups')
    '''

    if isinstance(keys, str):
        keys = [keys]

    nb_key_values = 0
    dir_list = []
    nb_dir_per_group = []
    key_values = []

    for base_dir in dirs:
        for expdir in os.listdir(base_dir):

            confdir = os.path.join(base_dir, expdir)
            conf = os.path.join(confdir, 'current.conf')

            # No conf file here, go to next dir.
            if not os.path.isfile(conf):
                continue

            keys_to_match = {}
            params = filemerge(conf)

            # Get the keyvalue in the conf file.
            kval = ()
            for key in keys:
                kval += params.get(key, None),

            new_key = -1
            # Check if we have this key value already.
            for i in range(len(key_values)):
                if kval == key_values[i]:
                    new_key = i

            # Update dir list accordingly.
            if new_key == -1:
                key_values.append(kval)
                nb_dir_per_group.append(1)
                dir_list.append([])
                dir_list[nb_key_values].append([confdir, expdir])
                nb_key_values = nb_key_values + 1
            else:
                nb_dir_per_group[new_key] = nb_dir_per_group[new_key] + 1
                dir_list[new_key].append([confdir, expdir])

    if (nb_key_values == 1):
        return (nb_key_values, key_values, dir_list)

    if not sort_inside_groups or nb_key_values == 1:
        return (nb_key_values, key_values, dir_list)

    # Check if we have the same number of elements in each group.
    # This means some experiments have failed

    for i in range(nb_key_values):
        for j in range(nb_key_values):
            if (nb_dir_per_group[i] != nb_dir_per_group[j]):
                raise EnvironmentError(
                    'Not all experiments where found, They might have crashed.... This is not supported yet'
                )

    # Reparse the list based on first key.
    # Sort all the other lists so
    # The same conf parameters values show up always in the same order in each group
    # Do it the slow lazy way as this code is not time critical
    for i in range(len(dir_list[0])):
        conf = os.path.join(dir_list[0][i][0], 'orig.conf')
        original_params = filemerge(conf)

        for j in range(1, nb_key_values):
            for k in range(nb_dir_per_group[0]):
                # Parse each group until we match the exact dictionnary (exept for or our key),
                # then swap it within the gorup so it has the same index as in
                # group 0.
                conf = os.path.join(dir_list[j][k][0], 'orig.conf')
                current_params = filemerge(conf)
                current_params[key] = original_params[key]
                if current_params == original_params:
                    temp = dir_list[j].pop(k)
                    dir_list[j].insert(i, temp)

    return (nb_key_values, key_values, dir_list)
Ejemplo n.º 12
0
def get_dir_by_key_value(dirs, keys=['seed=0']):
    '''
    Returns a list containing the name of the folders. Each element in the list is a list
    containing the full path and the id of the experiment as a string

    Parameters
    ----------
    dirs : str or list of str
        Directories that correspond to the table path directory inside the experiment root directory.
    keys : str or list of str
        str of format key=value that represent the experiments that we want to select.

    Examples
    --------

       get_dir_by_key_value(['/data/lisa/exp/mullerx/exp/dae/mullerx_db/ms_0050'],['seed=0'])
    '''
    if isinstance(dirs, str):
        dirs = (dirs, )

    good_dir = []

    # Gather results.
    for base_dir in dirs:
        for expdir in os.listdir(base_dir):

            skip = False

            confdir = os.path.join(base_dir, expdir)
            conf = os.path.join(confdir, 'current.conf')
            if not os.path.isfile(conf):
                continue

            params = filemerge(conf)

            skip = len(keys)

            for k in keys:
                keys_to_match = {}
                subkeys = k.split(':')
                for t in subkeys:
                    if t.find('=') != -1:
                        keys_to_match.update(jparse(t))

                    else:
                        if len(subkeys) != 1:
                            raise ValueError(
                                'key1:key2 syntax requires keyval pairs')
                        kval = params.get(t)
                        skip -= 1

                for fkey, fval in keys_to_match.items():
                    kval = params.get(fkey, None)
                    if kval == fval:
                        skip -= 1
                        break

            if not skip:
                good_dir.append([confdir, expdir])

    return good_dir