Example #1
0
def get_gendec_parameters(params, opt):
    params.net.gendec = DD()

    # Type of model to use (e.g., ed = simple decoder)
    params.net.gendec.model = opt.generation_decoder.model

    # Recurrent unit to use: {lstm}
    params.net.gendec.unit = opt.generation_decoder.unit

    # NUmber of layers in decoder
    params.net.gendec.nL = opt.generation_decoder.num_layers

    # Dropout
    params.net.gendec.dpt = opt.generation_decoder.dropout

    # Coefficient by which to multiply output activations before softmax
    # Higher makes distribution over tokens more peaky
    # Lower flattens the distribution
    params.net.gendec.dt = opt.generation_decoder.output_temperature

    # Size of hidden state of recurrent unit in decoder
    params.net.gendec.hSize = opt.hidden_size

    # Size of embeddings in the decoder
    params.net.gendec.iSize = opt.embed_size

    # Where to attach context vector
    # (out = concatenate with recurrent unit output)
    # (inp = concatenate with word embedding)
    params.net.gendec.ctx = opt.generation_decoder.context
Example #2
0
def get_class_parameters(params, opt):
    params.net.classdec = DD()

    # number of input features to the classifier
    # should be the same as this hidden size of encoder
    params.net.classdec.hSize = opt.hidden_size

    # Dropout in the classifier
    params.net.classdec.dpt = opt.classification_decoder.dropout
Example #3
0
def read_config(file_):
    config = DD()
    print file_
    for k, v in file_.iteritems():
        if v == "True" or v == "T" or v == "true":
            config[k] = True
        elif v == "False" or v == "F" or v == "false":
            config[k] = False
        elif type(v) == dict:
            config[k] = read_config(v)
        else:
            config[k] = v

    return config
Example #4
0
def produit_cartesien_jobs(val_dict):
    job_list = [DD()]
    all_keys = val_dict.keys()

    for key in all_keys:
        possible_values = val_dict[key]
        new_job_list = []
        for val in possible_values:
            for job in job_list:
                to_insert = job.copy()
                to_insert.update({key: val})
                new_job_list.append(to_insert)
        job_list = new_job_list

    return job_list
Example #5
0
config = DD({
    'module_name':
    'Laura_Two_Layers',
    'model':
    DD({'rand_seed': None}),  # end mlp
    'log':
    DD({
        # 'experiment_name'       : 'AE1214_Scale_Warp_Blocks_2Layers_finetune_2049_180_tanh_tanh_gpu_maskout',
        'experiment_name':
        'AE0306_Scale_Warp_Blocks_2Layers_finetune_2049_1000_180',
        'description': '',
        'save_outputs': True,
        'save_learning_rule': True,
        'save_model': True,
        'save_epoch_error': True,
        'save_to_database_name': 'Laura13.db'
    }),  # end log
    'learning_rule':
    DD({
        'max_col_norm':
        1,
        'L1_lambda':
        None,
        'L2_lambda':
        None,
        'cost':
        'mse',
        'stopping_criteria':
        DD({
            'max_epoch': 100,
            'epoch_look_back': 5,
            'cost': 'mse',
            'percent_decrease': 0.05
        })  # end stopping_criteria
    }),  # end learning_rule
    'learning_method':
    DD({
        'type': 'SGD',
        # 'type'                  : 'AdaGrad',
        # 'type'                  : 'AdaDelta',

        # for SGD and AdaGrad
        'learning_rate': (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5),
        'momentum': (1e-2, 1e-1, 0.5, 0.9),

        # for AdaDelta
        'rho': 0.95,
        'eps': 1e-6,
    }),  # end learning_method

    #===========================[ Dataset ]===========================#
    'dataset':
    DD({

        # 'type'                  : 'Laura_Blocks',
        'type':
        'Laura_Warp_Blocks',
        # 'type'                  : 'TransFactor_Blocks',
        'num_blocks':
        20,
        'feature_size':
        2049,
        'train_valid_test_ratio': [8, 1, 1],
        'dataset_noise':
        DD({
            'type': None
            # 'type'              : 'BlackOut',
            # 'type'              : 'MaskOut',
            # 'type'              : 'Gaussian',
        }),
        'preprocessor':
        DD({
            # 'type' : None,
            'type': 'Scale',
            # 'type' : 'GCN',
            # 'type' : 'LogGCN',
            # 'type' : 'Standardize',

            # for Scale
            'global_max': 89,
            'global_min': -23,
            'buffer': 0.5,
            'scale_range': [-1, 1],
        }),
        'batch_size': (50, 100, 150, 200),
        'num_batches':
        None,
        'iter_class':
        'SequentialSubsetIterator',
        'rng':
        None
    }),  # end dataset

    # #============================[ Layers ]===========================#
    'hidden1':
    DD({
        'name': 'hidden1',

        # 'model'                 : 'AE0911_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140912_2337_04263067',
        # 'model'                 : 'AE1112_Scale_Warp_Blocks_2Layers_finetune_2049_180_tanh_tanh_gpu_clean_20141112_2145_06823495',
        # 'model'                 : 'AE1121_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_gaussian_continue_20141126_1543_50554671',
        # 'model'                 : 'AE1122_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141128_1421_47179280',
        # 'model'                 : 'AE1210_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141210_1728_15311837',
        # 'model'                 : 'AE0302_Scale_Warp_Blocks_2049_300_Clean_No_Pretrain_20150302_2336_46071497',
        'model':
        'AE0302_Scale_Warp_Blocks_2049_1000_Clean_No_Pretrain_20150302_1234_10065582',
        'dropout_below': None,
        # 'dropout_below'         : (0.1, 0.2, 0.3, 0.4, 0.5),
        # 'dropout_below'         : 0.1,
    }),  # end hidden_layer
    'hidden2':
    DD({
        'name': 'hidden2',

        # 'model'                 : 'AE1001_Warp_Blocks_500_120_tanh_tanh_gpu_clean_20141003_0113_02206401',
        # 'model'                 : 'AE1115_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_clean_20141119_1327_11490503',
        # 'model'                 : 'AE1127_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_gaussian_20141127_1313_31905279',
        # 'model'                 : 'AE1201_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_clean_20141202_2352_57643114',
        # 'model'                 : 'AE1210_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_maskout_20141212_2056_15976132',
        # 'model'                 : 'AE0302_Scale_Warp_Blocks_300_180_Clean_No_Pretrain_20150304_0436_47181007',
        'model':
        'AE0302_Scale_Warp_Blocks_1000_180_Clean_20150304_0511_52424408',
        'dropout_below': None,
    })
})  # end autoencoder
Example #6
0
config = DD({

        'module_name'                   : 'Laura_Three_Layers',

        'fine_tuning_only'              : False,

        'model' : DD({
                'rand_seed'             : None
                }), # end mlp

        'log' : DD({

                # 'experiment_name'       : 'AE0917_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_clean',
                # 'experiment_name'       : 'AE0919_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_noisy',

                # 'experiment_name'       : 'AE0917_Blocks_3layers_finetune_2049_120_tanh_sigmoid_gpu_clean',
                # 'experiment_name'       : 'AE0917_Blocks_3layers_finetune_2049_120_tanh_sigmoid_gpu_noisy',

                # 'experiment_name'       : 'AE0917_Warp_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_clean',
                # 'experiment_name'       : 'AE0919_Warp_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_noisy',

                # 'experiment_name'       : 'AE1002_Scale_Warp_Blocks_3Layers_finetune_2049_120_tanh_tanh_gpu_noisy',
                # 'experiment_name'       : 'AE1002_Scale_Warp_Blocks_3Layers_finetune_2049_120_tanh_tanh_gpu_clean',

                'experiment_name'       : 'AE1213_Scale_Laura_Warp_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_maskout',

                'description'           : '',
                'save_outputs'          : True,
                'save_learning_rule'    : True,
                'save_model'            : True,
                'save_epoch_error'      : True,
                'save_to_database_name' : 'Laura12.db'
                }), # end log


        'learning_rule' : DD({
                'max_col_norm'          : 1,
                'L1_lambda'             : None,
                'L2_lambda'             : None,
                'cost'                  : 'mse',
                'stopping_criteria'     : DD({
                                            'max_epoch'         : 100,
                                            'epoch_look_back'   : 5,
                                            'cost'              : 'mse',
                                            'percent_decrease'  : 0.05
                                            }) # end stopping_criteria
                }), # end learning_rule


        'learning_method' : DD({
                'type'                  : 'SGD',
                # 'type'                  : 'AdaGrad',
                # 'type'                  : 'AdaDelta',

                # for SGD and AdaGrad
                'learning_rate'         : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5),
                # 'learning_rate'         : 0.001,
                'momentum'              : (1e-2, 1e-1, 0.5, 0.9),
                # 'momentum'              : 0.1,
                # 'momentum'              : 0.5,

                # for AdaDelta
                'rho'                   : 0.95,
                'eps'                   : 1e-6,
                }), # end learning_method

        #===========================[ Dataset ]===========================#
        'dataset' : DD({

                # 'type'                  : 'Laura_Blocks',
                'type'                  : 'Laura_Warp_Blocks',

                'num_blocks'            : 20,
                'feature_size'          : 2049,
                'train_valid_test_ratio': [8, 1, 1],

                'dataset_noise'         : DD({
                                            'type'              : None
                                            # 'type'              : 'BlackOut',
                                            # 'type'              : 'MaskOut',
                                            # 'type'              : 'Gaussian',
                                            }),

                'preprocessor'          : DD({
                                            # 'type' : None,
                                            'type' : 'Scale',
                                            # 'type' : 'GCN',
                                            # 'type' : 'LogGCN',
                                            # 'type' : 'Standardize',

                                            # for Scale
                                            'global_max' : 89,
                                            'global_min' : -23,
                                            'buffer'     : 0.9,
                                            'scale_range': [-1, 1],
                                            }),

                'batch_size'            : (50, 100, 150, 200),
                # 'batch_size'            : 50,
                'num_batches'           : None,
                'iter_class'            : 'SequentialSubsetIterator',
                'rng'                   : None
                }), # end dataset

        # #============================[ Layers ]===========================#

        'hidden1' : DD({
                'name'                  : 'hidden1',
                # 'model'                 : 'AE0911_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140912_2337_04263067',
                # 'model'                 : 'AE0916_Warp_Blocks_2049_500_tanh_tanh_gpu_dropout_20140916_1705_29139505',

                # 'model'                 :'AE0912_Blocks_2049_500_tanh_tanh_gpu_clean_20140914_1242_27372903',
                # 'model'                 : 'AE0915_Blocks_2049_500_tanh_tanh_gpu_Dropout_20140915_1900_37160748',

                # 'model'                 : 'AE1002_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_dropout_20141001_0321_33382955',
                # 'model'                 : 'AE0930_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140930_1345_29800576',

                # 'model'                 : 'AE1110_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_clean_continue_20141110_1235_21624029',
                # 'model'                 : 'AE1110_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_batchout_continue_20141111_0957_22484008',
                # 'model'                 : 'AE1121_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_gaussian_continue_20141126_1543_50554671',
                # 'model'                 : 'AE1122_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141128_1421_47179280',
                'model'                 : 'AE1210_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141210_1728_15311837',
                'dropout_below'         : None,
                # 'dropout_below'         : (0.1, 0.2, 0.3, 0.4, 0.5),
                # 'dropout_below'         : 0.1,

                }), # end hidden_layer

        'hidden2' : DD({
                'name'                  : 'hidden2',
                # 'model'                 : 'AE0914_Warp_Blocks_500_180_tanh_tanh_gpu_clean_20140915_0400_30113212',
                # 'model'                 : 'AE0918_Warp_Blocks_500_180_tanh_tanh_gpu_dropout_20140918_1125_23612485',

                # 'model'                 : 'AE0916_Blocks_500_180_tanh_tanh_gpu_clean_20140916_2255_06553688',
                # 'model'                 : 'AE0918_Blocks_500_180_tanh_tanh_gpu_dropout_20140918_0920_42738052',

                # 'model'                 : 'AE1001_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_dropout_20141001_2158_16765065',
                # 'model'                 : 'AE1001_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_clean_20141002_0348_53679208',

                # 'model'                 : 'AE1110_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_clean_20141111_2157_47387660',
                # 'model'                 : 'AE1111_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_batchout_continue_20141112_0844_45882544',
                # 'model'                 : 'AE1127_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_gaussian_20141127_1313_31905279',
                # 'model'                 : 'AE1201_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_clean_20141202_2352_57643114',
                'model'                 : 'AE1210_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_maskout_20141212_2056_15976132',

                'dropout_below'         : None,

                }), # end hidden_layer

        'hidden3' : DD({
                'name'                  : 'hidden3',
                # 'model'                 : 'AE0915_Warp_Blocks_180_120_tanh_gpu_dropout_clean_20140916_1028_26875210',
                # 'model'                 : 'AE0918_Warp_Blocks_180_120_tanh_tanh_gpu_dropout_20140919_1649_54631649',

                # 'model'                 : 'AE0914_Blocks_180_120_tanh_tanh_gpu_clean_20140918_0119_40376829',
                # 'model'                 : 'AE0919_Blocks_180_120_tanh_tanh_gpu_dropout_20140919_1345_22865393',

                # 'model'                 : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_dropout_20141002_1711_48207269',
                # 'model'                 : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_dropout_20141002_1457_08966968',
                # 'model'                 : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_clean_20141002_1713_16791523',

                # 'model'                 : 'AE1120_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_clean_20141122_0044_09351031',
                # 'model'                 : 'AE1121_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_batchout_20141122_0348_49379314',
                # 'model'                 : 'AE1127_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_gaussian_20141201_0345_39835964',
                # 'model'                 : 'AE1201_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_clean_20141204_0137_07827194',
                'model'                 : 'AE1210_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_maskout_20141213_1608_33432934',

                'dropout_below'         : None,

                }), # end hidden_layer


        }) # end autoencoder
Example #7
0
config = DD({

        'module_name'                   : 'Laura_Continue',

        'model' : DD({
                'rand_seed'             : None
                }), # end mlp

        'log' : DD({

                'experiment_name'       : 'AE0302_Scale_Warp_Blocks_2049_500_Clean_AdaGrad_20150304_0512_00344145_continue',



                'description'           : '',
                'save_outputs'          : True,
                'save_learning_rule'    : True,
                'save_model'            : True,
                'save_epoch_error'      : True,
                'save_to_database_name' : 'Laura13.db'
                }), # end log


        'learning_rule' : DD({
                'max_col_norm'          : 1,
                'L1_lambda'             : None,
                'L2_lambda'             : None,
                'cost'                  : 'mse',
                'stopping_criteria'     : DD({
                                            'max_epoch'         : 100,
                                            'epoch_look_back'   : 5,
                                            'cost'              : 'mse',
                                            'percent_decrease'  : 0.05
                                            }) # end stopping_criteria
                }), # end learning_rule


        'learning_method' : DD({
                # 'type'                  : 'SGD',
                'type'                  : 'AdaGrad',
                # 'type'                  : 'AdaDelta',

                # for SGD and AdaGrad
                'learning_rate'         : 0.01,
                'momentum'              : 0.01,

                # for AdaDelta
                'rho'                   : 0.95,
                'eps'                   : 1e-6,
                }), # end learning_method

        #===========================[ Dataset ]===========================#
        'dataset' : DD({
                # 'type'                  : 'Laura_Warp_Blocks_500',
                # 'type'                  : 'Laura_Blocks_500',
                # 'type'                  : 'Laura_Blocks',
                'type'                  : 'Laura_Warp_Blocks',
                # 'type'                  : 'Mnist_Blocks',
                # 'type'                  : 'Laura_Scale_Warp_Blocks_500_Tanh',
                # 'type'                  : 'Laura_Warp_Blocks_500_Tanh_Noisy_MaskOut',
                # 'type'                  : 'Laura_Warp_Blocks_500_Tanh_Noisy_Gaussian',

                'num_blocks'            : 20,
                'feature_size'          : 2049,
                'train_valid_test_ratio': [8, 1, 1],

                'dataset_noise'         : DD({
                                            # 'type'              : 'BlackOut',
                                            # 'type'              : 'MaskOut',
                                            # 'type'              : 'Gaussian',
                                            'type'              : None
                                            }),

                'preprocessor'          : DD({
                                            # 'type' : None,
                                            'type' : 'Scale',
                                            # 'type' : 'GCN',
                                            # 'type' : 'LogGCN',
                                            # 'type' : 'Standardize',

                                            # for Scale
                                            'global_max' : 89,
                                            'global_min' : -23,
                                            'buffer'     : 0.5,
                                            'scale_range': [-1, 1],
                                            }),

                'batch_size'            : 100,
                'num_batches'           : None,
                'iter_class'            : 'SequentialSubsetIterator',
                'rng'                   : None
                }), # end dataset

        # #============================[ Layers ]===========================#
        'fine_tuning_only'              : True,
        'hidden1' : DD({
                'name'                  : 'hidden1',
                'model'                 : 'AE0302_Scale_Warp_Blocks_2049_500_Clean_AdaGrad_20150304_0512_00344145',
                }), # end hidden_layer


        }) # end autoencoder
config = DD({
    'model':
    'attention',
    'random_seed':
    1234,
    # ERASE everything under save_model_path
    'erase_history':
    True,
    'attention':
    DD({
        'reload_': False,
        'save_model_dir': exp_path + 'res_obj10+beam6/',
        'from_dir': '',
        'dataset': 'youtube2text',
        'video_feature': 'resnet152',
        'dim_word': 300,  # 474
        'ctx_dim': -1,  # auto set 
        'dim': 512,  # lstm dim # 536
        'n_layers_out': 1,  # for predicting next word        
        'n_layers_init': 0,
        'encoder_dim': 300,
        'prev2out': True,
        'ctx2out': True,
        'patience': 20,
        'max_epochs': 500,
        'decay_c': 1e-4,
        'alpha_entropy_r': 0.,
        'alpha_c': 0.70602,
        'lrate': 2e-5,
        'selector': True,
        'n_words': 14021,
        'maxlen': 30,  # max length of the descprition
        'optimizer': 'adadelta',
        'clip_c': 5.,
        'batch_size': 64,  # for trees use 25
        'valid_batch_size': 200,
        # in the unit of minibatches
        'dispFreq': 10,
        'validFreq': 2000,
        'saveFreq': -1,  # this is disabled, now use sampleFreq instead
        'sampleFreq': 100,
        # blue, meteor, or both
        'metric': 'everything',  # set to perplexity on DVS
        'use_dropout': True,
        'K': 28,  # 26 when compare
        'OutOf': None,  # used to be 240, for motionfeature use 26
        'verbose': True,
        'debug': False,
    }),
})
import theano
import theano.tensor as TT
import memnet.train_model_adam_gru_soft

n_trials = 64
lr_min = 8e-5
lr_max = 1e-2
batches = [100, 200, 400, 800]
renormalization_scale = [1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0]
mem_nels = [200, 220, 230, 240, 250, 260, 290, 300]
mem_sizes = [20, 24, 28, 30, 32]
std_min = 0.01
std_max = 0.05

state = DD()

state.lr = 6e-6
state.batch_size = 200
state.sub_mb_size = 25
state.std = 0.01
state.max_iters = 20000
state.n_hids = 200
state.mem_nel = 200
state.mem_size = 28

np.random.seed(3)

ri = np.random.random_integers
learning_rates = np.logspace(np.log10(lr_min), np.log10(lr_max), 100)
stds = np.random.uniform(std_min, std_max, 100)
Example #10
0
def jobman_insert_random(
        n_jobs, table_name="emotiw_mlp_audio_sigm_fixed_pool2_mixed5_nrelu"):

    JOBDB = 'postgresql://[email protected]/gulcehrc_db?table=' + table_name
    EXPERIMENT_PATH = "experiment_cg_2layer_sigm_hyper2_fixed2_pool2_save_mixed5_nrelu.jobman_entrypoint"
    nlr = 45
    learning_rates = numpy.logspace(numpy.log10(0.0008), numpy.log10(0.09),
                                    nlr)
    max_col_norms = [1.9835, 1.8256, 1.2124, 0.98791]
    jobs = []

    for _ in range(n_jobs):

        job = DD()
        id_lr = numpy.random.random_integers(0, nlr - 1)
        rnd_maxcn = numpy.random.random_integers(0, len(max_col_norms) - 1)
        job.n_hiddens = numpy.random.randint(100, 540)
        job.n_layers = 2
        job.learning_rate = learning_rates[id_lr]
        job.momentum = 10.**numpy.random.uniform(-1, -0)
        job.hidden_dropout = numpy.random.uniform(low=0.1, high=0.9)
        job.rmsprop = 1
        job.rbm_epochs = 12
        job.rho = 0.96
        job.validerror = 0.0
        job.loss = 0.0
        job.epoch = 0
        job.epoch_time = 0
        job.use_nesterov = 1
        job.trainerror = 0.0
        job.features = "full.pca"
        job.max_col_norm = max_col_norms[rnd_maxcn]
        job.example_dropout = numpy.random.randint(60, 200)
        job.tag = "sigm_norm_const_fixed_pool2"

        jobs.append(job)
        print job

    answer = raw_input("Submit %d jobs?[y/N] " % len(jobs))

    if answer == "y":
        numpy.random.shuffle(jobs)

        db = jobman.sql.db(JOBDB)
        for job in jobs:
            job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
            jobman.sql.insert_dict(job, db)

        print "inserted %d jobs" % len(jobs)
        print "To run: jobdispatch --condor --mem=3G --gpu --env=THEANO_FLAGS='floatX=float32, device=gpu' --repeat_jobs=%d jobman sql -n 1 '%s' ." % (
            len(jobs), JOBDB)
def jobman_insert_random(n_jobs,  table_name="emotiw_mlp_audio_fixed_pool2_mixed_grbmx2"):

    JOBDB = 'postgresql://[email protected]/gulcehrc_db?table=' + table_name
    EXPERIMENT_PATH = "experiment_cg_2layer_sigm_hyper2_fixed2_pool2_save_mixed_grbmx2.jobman_entrypoint"
    nlr = 45
    learning_rates = numpy.logspace(numpy.log10(0.0008), numpy.log10(0.09), nlr)
    max_col_norms = [1.9835, 1.8256, 1.2124, 0.98791]
    jobs = []

    for _ in range(n_jobs):

        job = DD()
        id_lr = numpy.random.random_integers(0, nlr-1)
        rnd_maxcn = numpy.random.random_integers(0, len(max_col_norms)-1)
        job.n_hiddens = numpy.random.random_integers(2,5) * 100 + 2 * numpy.random.random_integers(0,15)
        job.n_layers = 2
        job.learning_rate = learning_rates[id_lr]
        job.momentum = 10.**numpy.random.uniform(-1, -0)
        job.hidden_dropout = numpy.random.uniform(low=0.1, high=0.2)
        job.layer_dropout = 0
        job.topN_pooling = 1
        job.no_final_dropout = 1
        job.l2 = numpy.random.random_integers(1, 20) * 1e-3
        job.rmsprop = 1
        job.normalize_acts = 0
        job.enable_standardization = 0
        job.response_normalize = 0
        job.rbm_epochs = 15
        job.rho = 0.94
        job.validerror = 0.0
        job.loss = 0.0
        job.epoch = 0
        job.epoch_time = 0
        job.use_nesterov = 1
        job.trainerror = 0.0
        job.features = "full.pca"
        job.max_col_norm = max_col_norms[rnd_maxcn]
        job.example_dropout = numpy.random.randint(60, 200)
        job.tag = "relu_nlayers_dbn"

        jobs.append(job)
        print job

    answer = raw_input("Submit %d jobs?[y/N] " % len(jobs))

    if answer == "y":
        numpy.random.shuffle(jobs)

        db = jobman.sql.db(JOBDB)
        for job in jobs:
            job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
            jobman.sql.insert_dict(job, db)

        print "inserted %d jobs" % len(jobs)
        print "To run: jobdispatch --condor --mem=3G --gpu --env=THEANO_FLAGS='floatX=float32, device=gpu' --repeat_jobs=%d jobman sql -n 1 '%s' ." % (len(jobs), JOBDB)
Example #12
0
def get_parameters(opt, exp="class"):
    params = DD()
    params.net = DD()
    params.net.enc = DD()
    get_encoder_parameters(params, opt)
    params.net.gendec = DD()
    params.net.classdec = DD()

    if exp == "class":
        get_class_parameters(params, opt)
    elif exp == "gen":
        get_gendec_parameters(params, opt)
    else:
        get_class_parameters(params, opt)
        params.net.enc.pt = "none"
    params.train = DD()
    params.train.static = DD()
    params.train.dynamic = DD()
    params.eval = DD()
    params.data = DD()

    # Which experiment to run: {class = classification; gen = generation}
    params.exp = opt.experiment

    # Task to run: {emotion, motivation}
    params.task = opt.task

    if params.exp == "class":
        # % of development set stories to keep as training data
        params.train.static.tr = opt.train_ratio

        # granularity of labels
        # motivation: {maslow, reiss}
        # emotion: {plutchik, plutchik16}
        params.granularity = opt.granularity

        # Labeling type (default = majority)
        params.data.label = opt.label

    if params.exp == "gen":
        # Number of positive examples per negative example
        params.train.static.pnr = opt.pos_neg_ratio

        # Loss to use during training
        params.train.static.wcrit = "nll"

        # Prune useless words in motivation sequences such as
        # "to be" in "to be famous"
        params.data.pruned = opt.pruned

    # Max norm at which to clip gradients
    params.train.static.gc = opt.grad_clip

    # Random seed
    params.train.static.seed = opt.random_seed

    # learning rate
    params.train.dynamic.lr = opt.learning_rate

    # batch size
    params.train.dynamic.bs = opt.batch_size

    # optimizer to use {adam, rmsprop, etc.}
    # Only Adam is actually implemented
    params.train.dynamic.optim = opt.optimizer

    # Default parameters for the CNN model from
    # 2014 Yoon Kim paper
    if params.net.enc.model == "cnn+stock":
        params.net.enc.ks = "3,4,5"
        params.net.enc.kn = 100
        params.net.classdec.dpt = 0.5
        params.train.dynamic.lr = 0.001
        params.train.dynamic.bs = 64
        params.data.shuffle = False
        params.train.static.l2 = 3
        params.net.enc.iSize = 128
        params.net.classdec.hSize = 300

    meta = DD()
    meta.iterations = opt.iterations
    meta.epochs = opt.epochs
    meta.mark = opt.mark

    return params, meta
Example #13
0
default_config = DD({
    # available options: mnist, curves
    'dataset': 'mnist',
    'seed': 123,
    #-------------------------------------
    # layerwise pretraining
    # number of pretraining epochs, layerwise
    'pretraining_epochs': 1,
    'pretrain_lr': 0.1,
    'top_layer_pretrain_lr': 0.001,
    # CD-k
    'k': 1,
    # not used
    'weight_decay': 0.00002,
    #--------------------------------------
    # global pretraining
    # number of global pretraining epochs
    # this only makes sense when sgd is used
    # originally 5000
    'global_pretraining_epochs': 1,
    'global_pretrain_lr': 0.02,
    'global_pretraining_batch_size': 3000,
    # or mse
    'reconstruction_cost_type': 'cross_entropy',
    # preconditioner for lcg. jacobi
    'preconditioner': 'martens',
    # hf or sgd
    'global_pretraining_optimization': 'hf',
    #---------------------------------------
    # fine tuning
    # originally 1000
    'training_epochs': 1,
    # standard or russ
    'supervised_training_type': 'russ',
    'finetune_lr': 0.1,
    #-----------------------------------------
    # minibatch size for both layerwise pretraining and finetuning
    # note that if global pretraining is sgd, then this batch_size
    # is used as well.
    'batch_size': 20,
})
Example #14
0
config = DD({
    'model': 'attention',
    'random_seed': 1234,
    # ERASE everything under save_model_path
    'erase_history': True,
    'attention': DD({
        'reload_': False,
        'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/',
        'from_dir': '',
        'dataset': 'youtube2text',#'youtube2text',#'lsmdc',mvad. 'ysvd'
        'video_feature': 'googlenet',
        'dim_word':468, # 474
        'ctx_dim':-1,# auto set 
        'dim':3518, # lstm dim # 536
        'n_layers_out':1, # for predicting next word        
        'n_layers_init':0, 
        'encoder_dim': 300,
        'prev2out':True, 
        'ctx2out':True, 
        'patience':20,
        'max_epochs':500, 
        'decay_c':1e-4,
        'alpha_entropy_r': 0.,
        'alpha_c':0.70602,
        'lrate':0.01,
        'selector':True,
        'n_words':20000, 
        'maxlen':30, # max length of the descprition
        'optimizer':'adadelta',
        'clip_c': 10.,
        'batch_size': 64, # for trees use 25
        # 'batch_size': 2, # for trees use 25
        'valid_batch_size':200,
        # 'valid_batch_size':2,
        # in the unit of minibatches
        'dispFreq':200,
        'validFreq':2000,
        'saveFreq':-1, # this is disabled, now use sampleFreq instead
        'sampleFreq':100,
        # blue, meteor, or both
        'metric': 'everything', # set to perplexity on DVS
        'use_dropout':True,
        'K':28, # 26 when compare
        'OutOf':None, # used to be 240, for motionfeature use 26
        'verbose': True,
        'debug': False,
        'dec':'standard',
        'encoder':None,
        'mode':'train',
        'proc':'nostd',
        'data_dir':'',
        'feats_dir':''
        }),
    'iLSTM': DD({
        'reload_': False,
        'save_model_dir': exp_path + 'attention_mod/',
        'dec':'standard',
        'valid_batch_size':200,
        'dataset': 'youtube2text',
        'encoder': None,
        'max_epochs':500,
        'from_dir': '',
        }),
    'attention_mod': DD({
        'reload_': False,
        'save_model_dir': exp_path + 'attention_mod/',
        'dec':'multi-stdist'
        }),
    'mtle': DD({
        'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/',
        'reload_': False,
        'from_dir': '',
        'dec':'multi-stdist',
        'dim_word':468, # 474
        'encoder':None,
        'encoder_dim': 300,
        'batch_size': 64, #64 for trees use 25
        'valid_batch_size':200,
        'dataset': 'vtt',
        'dim':3518, # lstm dim # 536
        'video_feature': 'googlenet',
        'validFreq': 2000,
        'max_epochs': 500,
        'mode':'train',
        'proc':'nostd',
        'K':28, # 26 when compare
        'lrate':0.0001,
        'data_dir':'',
        'dispFreq':10,
        'feats_dir':'',
        'cost_type':'v1'
        }),
    'fcoupled': DD({
        'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/',
        'reload_': False,
        'dec':'multi-random',
        'encoder':None,
        'encoder_dim': 300,
        'batch_size': 64, # for trees use 25
        'dataset': 'youtube2text',
        'dim':3518, # lstm dim # 536
        'from_dir': '',
        'valid_batch_size':200,
        'max_epochs':500,
        'video_feature': 'googlenet',
        }),
    'const': DD({
        'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/',
        'reload_': False,
        'dec':'multi-random',
        'encoder':None,
        'encoder_dim': 300,
        'batch_size': 64, # for trees use 25
        'dataset': 'youtube2text',
        'dim':3518, # lstm dim # 536
        'from_dir': '',
        }),
    'const2': DD({
        'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/',
        'reload_': False,
        'dec':'multi-random',
        'encoder':None,
        'encoder_dim': 300,
        'batch_size': 64, # for trees use 25
        'dataset': 'youtube2text'
        }),
    'LSTM': DD({
        'reload_': False,
        'save_model_dir': exp_path + 'attention_mod/',
        'dec':'standard',
        'valid_batch_size':200,
        'dataset': 'youtube2text',
        'encoder': 'lstm_uni',
        'max_epochs':500,
        'from_dir': '',
        }),
    'lstmdd': DD({
        'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/',
        'reload_': False,
        'from_dir': '',
        'dec':'multi-stdi',
        'dim_word':468, # 474
        'encoder':None,
        'encoder_dim': 300,
        'batch_size': 64, #64 for trees use 25
        'valid_batch_size':200,
        'dataset': 'vtt',
        'dim':3518, # lstm dim # 536
        'video_feature': 'googlenet',
        'validFreq': 2000,
        'max_epochs': 500,
        'mode':'train',
        'proc':'nostd',
        'K':28, # 26 when compare
        'lrate':0.0001,
        'data_dir':'',
        'dispFreq':10,
        'feats_dir':'',
        'cost_type':'v1'

    }),
    'gru': DD({
        'reload_': False,
        'save_model_dir': exp_path + 'gru_model2/',
        'from_dir': '',
        'dataset': 'youtube2text',#'youtube2text',#'lsmdc',mvad. 'ysvd'
        'video_feature': 'googlenet',
        'dim_word':468, # 474
        'ctx_dim':-1,# auto set
        'dim':3518, # lstm dim # 536
        'n_layers_out':1, # for predicting next word
        'n_layers_init':0,
        'encoder_dim': 300,
        'prev2out':True,
        'ctx2out':True,
        'patience':20,
        'max_epochs':500,
        'decay_c':1e-4,
        'alpha_entropy_r': 0.,
        'alpha_c':0.70602,
        'lrate':0.01,
        'selector':True,
        'n_words':20000,
        'maxlen':30, # max length of the descprition
        'optimizer':'adadelta',
        'clip_c': 10.,
        'batch_size': 64, # for trees use 25
        # 'batch_size': 2, # for trees use 25
        'valid_batch_size':200,
        # 'valid_batch_size':2,
        # in the unit of minibatches
        'dispFreq':10,
        'validFreq':2000,
        'saveFreq':-1, # this is disabled, now use sampleFreq instead
        'sampleFreq':100,
        # blue, meteor, or both
        'metric': 'everything', # set to perplexity on DVS
        'use_dropout':True,
        'K':28, # 26 when compare
        'OutOf':None, # used to be 240, for motionfeature use 26
        'verbose': True,
        'debug': False,
        'dec':'standard',
        'encoder':None,
        'mode':'train',
        'proc':'nostd'
        }),
    'fc': DD({
        'reload_': False,
        'save_model_dir': exp_path + 'attention_mod/',
        'dec':'standard',
        'dataset': 'youtube2text',
        'encoder': None,
        'from_dir': '',
        }),
    'ic': DD({
        'reload_': False,
        'save_model_dir': exp_path + 'attention_mod/',
        'dec':'standard',
        'dataset': 'youtube2text',
        'encoder': None,
        'from_dir': '',
        }),
    'const_w': DD({
        'save_model_dir': exp_path + 'const_w/',
        'reload_': False,
        'dec':'multi-stdist',
        'encoder':None,
        'encoder_dim': 300,
        'batch_size': 64, # for trees use 25
        'dataset': 'youtube2text',
        'video_feature': 'googlenet',
        }),


})
Example #15
0
options = DD({

         ### Loop
        'text_only': True,
        'do_eval': False,
        'save_model_dir': save_dir, #+'LSMDC/',
        'load_model_from':  None, #'/Tmp/ballasn/LSMDC/model/baseline_textonly_h_320_wemb_data_10/LSMDC/', #None, #'/Tmp/ballasn/LSMDC/model/baseline_textonyl_h_320/LSMDC/',
        'load_options_from': None,
        'erase': False,
        'max_epoch': 300,
        'dispFreq': 10,
        'estimate_population_statistics': False,
        'debug': True,

        ### Dataset
        'data_path' : "/Tmp/ballasn/LSMDC/LSMDC2016.pkl",
         ### Full None, 10%=>9511, 50%=>47559, 100%=>95118
        'max_train_example': 9511,
        'input_dim':  4096+1024, # 1024 gnet, 4096 C3D
        'features_type' : "Fuse", # 2D/3D/Fuse
        'features_path' : "/Tmp/ballasn/LSMDC/feat/", #"/data/lisatmp4/ballasn/datasets/LSMDC2016/LSMDC_googlenetfeatures.pkl",
        'n_subframes': 15,
        'batch_size': 24,
        'max_n_epoch': 1000,

        ### Vocabulary
        'train_emb': True, # Use only word present > 50 times in the training sets for the output vocabulary
        'use_out_vocab': True, # Use only word present > 50 times in the training sets for the output vocabulary
        'reduce_vocabulary': False, # Use only word present > 3 times in the training sets
        'n_words': 26818,
        'dim_word': 512,
        'hdims': 320,
        'use_dropout': True,
        'use_residual': False,
        'use_zoneout': True,
        'use_bn': True,
        'initial_gamma': 0.1,
        'initial_beta': 0.,
        'use_popstats': False, ### required to be false


        # Model: standard, momentum, adagrad, rmsprop
        'memory_update_rule': 'standard',
        'lstm_alpha': 0.95,

        ### Optimization
        'ita': 0.001,
        'optimizer': 'adam',
        'lr': 0.001,
        'clip_c': 10.,
        'patience': 5,
        'valid_freq': -1,
        })
model_config = DD({
    # MLP
    'mlp':
    DD({
        'model_class':
        'mlp',
        'train_class':
        'sgd',
        #'config_id'                     : 'GaussianNoise1000cifar200epoch',
        #'config_id'                     : 'Clean100cifar200epoch',
        #'config_id'                     : 'Clean100cifar200epochPreproc',
        #'config_id'                     : 'GaussianNoise1000cifar200epochPreproc',
        #'config_id'                     : 'GaussNoise2k-2kCifar200epochPreproc',
        #'config_id'                     : 'Noisy200-2kCifar200epochPreproc1',
        'config_id':
        'Clean200-200Cifar200epochPreproc1',

        # TODO: cached should always be True!
        'cached':
        True,

        # dataset can be mnist or svhn or cifar10
        'dataset':
        'svhn',
        'input_space_id':
        None,
        'nvis':
        None,

        # Channel and dataset monitoring
        # mca : mean classification average of a minibatch
        #'channel_array'                 : ['mca'],
        'channel_array':
        None,

        # valid or test or both
        'monitoring_dataset': ['valid'],
        'random_seed':
        251,
        'batch_size':
        200,
        'learning_rate': ((1e-4, 1.0), float),
        'init_momentum': ((0.5, 0.99), float),

        # for mnist
        #train_iteration_mode'          : 'random_uniform',
        # for svhn
        'train_iteration_mode':
        'sequential',

        #<training modes>
        #sequential
        #shuffled_sequential
        #random_slice
        #random_uniform
        #batchwise_shuffled_sequential

        # TODO: cached should always be True!
        'cached':
        True,

        # dataset can be mnist or svhn or cifar10
        'dataset':
        'cifar10',
        'input_space_id':
        None,
        'nvis':
        None,

        # Channel and dataset monitoring
        # mca : mean classification average of a minibatch
        #'channel_array'                 : ['mca'],
        'channel_array':
        None,

        # valid or test or both
        'monitoring_dataset': ['valid'],
        'random_seed':
        251,
        'batch_size':
        200,
        'learning_rate': ((1e-4, 1.0), float),
        'init_momentum': ((0.5, 0.99), float),

        # for mnist
        #train_iteration_mode'          : 'random_uniform',
        # for svhn
        'train_iteration_mode':
        'sequential',

        #<training modes>
        #sequential
        #shuffled_sequential
        #random_slice
        #random_uniform
        #batchwise_shuffled_sequential

        # Momentum and exponential decay
        'ext_array':
        DD({
            'exp_decay':
            DD({
                'ext_class': 'exponentialdecayoverepoch',
                'decay_factor': ((0.85, 0.999), float),
                'min_lr_scale': ((1e-3, 1e-1), float),
            }),
            'moment_adj':
            DD({
                'ext_class': 'momentumadjustor',
                'final_momentum': 0.9,
                'start_epoch': 1,
                'saturate_epoch': ((20, 50), int),
            }),
        }),

        # Termination criteria
        'term_array':
        DD({
            # Max number of training epochs
            'epoch_count':
            DD({
                'term_class': 'epochcounter',
                'max_epochs': 100,
            }),
            # Early stopping on validation set
            # If after max_epochs, we don't see significant improvement
            # on validation cost, we stop the training.
            'early_stopping':
            DD({
                'term_class': 'monitorbased',
                'proportional_decrease': 1e-4,
                'max_epochs': 20,
                'channel_name': 'valid_softmax2_nll',
                'save_best_channel': 'valid_softmax2_nll',
            })
        }),
        'layers':
        DD({
            # IMPORTANT: For each layer, only add hyperparams that are different than
            # the default hyperparams from layer_config

            # NOTE: always start the name of your hidden layers with hidden and your
            # output layers with output in order for the hidden layers
            # to be found first before the output layers when going
            # through the layers DD dictionary.

            # NOTE: the supported activation functions are:
            # tanh, sigmoid, rectifiedlinear, softmax

            #                First hidden layer
            #                 'hidden1' : DD({
            #                     'layer_class'           : 'rectifiedlinear',
            #                     #'dim'                   : ((100, 2000), int),
            #                     'dim'                   : 200,
            #                     'max_col_norm'          : ((0.1, 8.), float),
            #                     #'weight_decay'          : ((0.1, 7.), float),
            #                     'sparse_init'           : 15
            #                 }),

            #                 First hidden layer
            'hidden1':
            DD({
                'layer_class': 'tanh',
                #'dim'                   : ((100, 2000), int),
                'dim': 200,
                'max_col_norm': ((0.1, 5.), float)
                #'weight_decay'          : ((1., 9.), float),
            }),
            'hidden2':
            DD({
                'layer_class': 'tanh',
                #'dim'                   : ((100, 2000), int),
                'dim': 200,
                'max_col_norm': ((0.1, 5.), float)
                #'weight_decay'          : ((1., 9.), float),
            }),

            #                 'hidden1' : DD({
            #                     'layer_class'           : 'gaussianRELU',
            #                     #'dim'                   : ((100, 2000), int),
            #                     'dim'                   : 2000,
            #                     'max_col_norm'          : ((0.1, 5.), float),
            #                     'adjust_threshold_factor'   : ((0.0001, 1), float),
            #                     'desired_active_rate'   : 0.1,
            #                     'noise_std'             : ((0.1, 10), float),
            #
            #                     #'weight_decay'          : ((1., 9.), float),
            #
            #                     'sparse_init'           : 15
            #                 }),
            #
            #                'hidden2' : DD({
            #                    'layer_class'           : 'gaussianRELU',
            #                    #'dim'                   : ((100, 2000), int),
            #                    'dim'                   : 2000,
            #                    'max_col_norm'          : ((0.1, 5.), float),
            #                    'adjust_threshold_factor'   : ((0.0001, 1), float),
            #                    'desired_active_rate'   : 0.1,
            #                    'noise_std'             : ((0.1, 10), float),
            #
            #                    #'weight_decay'          : ((1., 9.), float),
            #
            #                    'sparse_init'           : 15
            #                }),

            #First hidden layer

            #                 'hidden1' : DD({
            #                     'layer_class'           : 'noisyRELU',
            #                     'sparse_init'           : 15,
            #                     'dim'                   : 3000,
            #                     'max_col_norm'          : ((0.1, 5.), float),
            #                     'noise_factor'          : ((0.0001, 1.), float),
            #                     'adjust_threshold_factor'   : ((0.0001, 1), float),
            #                     'desired_active_rate'   : 0.1
            #                     }),
            #
            #Second hidden layer
            #                 'hidden2' : DD({
            #                     'layer_class'           : 'tanh',
            #                     #'dim'                   : ((100, 2000), int),
            #                     'dim'                   : 100,
            #                     'max_col_norm'          : ((0.1, 5.), float)
            #                     #'weight_decay'          : ((1., 9.), float),
            #
            #                 }),

            # Last (output) layer
            # The fun model only takes 1 output.
            'output1':
            DD({
                'layer_class': 'softmax',
                'dim': 10,
                'irange': 0.05
                #'sparse_init'           : 15
            })
        }),
    }),
})
Example #17
0
config = DD({
    'module_name':
    'NN',
    'model':
    DD({'rand_seed': None}),  # end mlp
    'log':
    DD({
        'experiment_name': 'mlp_dropout',
        'description': '',
        'save_outputs': False,
        'save_learning_rule': False,
        'save_model': False,
        'save_epoch_error': False,
        'save_to_database_name': "mnist_model.db"
    }),  # end log
    'learning_method':
    DD({
        # 'type'                  : 'SGD',
        # 'type'                  : 'AdaGrad',
        'type': 'AdaDelta',

        ###[ For SGD and AdaGrad ]###
        # 'learning_rate'         : 0.001,
        # 'learning_rate'         : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5),
        'learning_rate': 0.1,
        'momentum': 0.5,
        # 'momentum'              : 0.,
        # 'momentum'              : (1e-2, 1e-1, 0.5, 0.9),

        # For AdaDelta
        # 'rho'                   : ((0.90, 0.99), float),
        'rho': 0.95,
        # 'eps'                   : (1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7),
        'eps': 1e-6,
    }),
    'learning_rule':
    DD({
        'max_col_norm':
        None,
        'L1_lambda':
        None,
        'L2_lambda':
        0.0001,
        'cost':
        'entropy',
        'stopping_criteria':
        DD({
            'max_epoch': 10,
            'epoch_look_back': 5,
            'cost': 'error',
            'percent_decrease': 0.05
        })  # end stopping_criteria
    }),  # end learning_rule
    'dataset':
    DD({
        'type':
        'Mnist',
        'train_valid_test_ratio': [5, 1, 1],
        'feature_size':
        784,
        'target_size':
        10,
        'dataset_noise':
        DD({
            # 'type'              : 'BlackOut',
            # 'type'              : 'MaskOut',
            # 'type'              : 'Gaussian',
            'type': None
        }),
        'preprocessor':
        DD({
            'type': None,
            # 'type' : 'Scale',
            # 'type' : 'GCN',
            # 'type' : 'LogGCN',
            # 'type' : 'Standardize',

            # for Scale
            'global_max': 4.0,
            'global_min': 0.,
            'buffer': 0.,
            'scale_range': [0., 1.],
        }),
        'batch_size': (50, 100, 150, 200),
        # 'batch_size'            : 20,
        'num_batches':
        None,
        'iter_class':
        'SequentialSubsetIterator',
        'rng':
        None
    }),  # end dataset

    #============================[ Layers ]===========================#
    'hidden1':
    DD({
        'name':
        'hidden1',
        'type':
        'Tanh',
        'dim':
        500,

        # 'dropout_below'         : (0.05, 0.1, 0.15, 0.2)
        # 'dropout_below'         : (0, 0.5),
        'dropout_below':
        None,
        'layer_noise':
        DD({
            'type': None,
            # 'type'      : 'BlackOut',
            # 'type'      : 'Gaussian',
            # 'type'      : 'MaskOut',
            # 'type'      : 'BatchOut',

            # for BlackOut, MaskOut and BatchOut
            'ratio': 0.5,

            # for Gaussian
            'std': 0.1,
            'mean': 0,
        })
    }),  # end hidden_layer
    'output':
    DD({
        'name':
        'output',
        'type':
        'Sigmoid',
        'dim':
        10,

        # 'dropout_below'         : 0.5,
        'dropout_below':
        None,
        'layer_noise':
        DD({
            'type': None,
            # 'type'      : 'BlackOut',
            # 'type'      : 'Gaussian',
            # 'type'      : 'MaskOut',
            # 'type'      : 'BatchOut',

            # for BlackOut, MaskOut and BatchOut
            'ratio': 0.5,

            # for Gaussian
            'std': 0.1,
            'mean': 0,
        })
    })  # end output_layer
})
Example #18
0
def jobman_insert_random(n_jobs, table_name="emotiw_mlp_audio_tanh"):
    JOBDB = 'postgresql://[email protected]/gulcehrc_db?table=' + table_name

    EXPERIMENT_PATH = "experiment_cg.jobman_entrypoint"
    nlr = 50
    learning_rates = numpy.logspace(numpy.log10(0.001), numpy.log10(0.3), nlr)

    jobs = []
    for _ in range(n_jobs):
        job = DD()
        id_lr = numpy.random.random_integers(0, nlr-1)
        job.n_hiddens = numpy.random.randint(100, 800)
        job.n_layers = numpy.random.randint(1, 4)
        job.learning_rate = learning_rates[id_lr]
        job.momentum = 10.**numpy.random.uniform(-1, -0)
        job.rmsprop = numpy.random.binomial(1, 0.5)
        job.validerror = 0.0
        job.loss = 0.0
        job.epoch = 0
        job.epoch_time = 0
        job.trainerror = 0.0
        job.features = "full.pca"
        job.max_col_norm = 1.8456
        job.example_dropout = numpy.random.randint(16, 200)
        job.rbm_learning_rate = 10.**numpy.random.uniform(-3, -0)
        job.rbm_epochs = 0 #numpy.random.randint(8, 100)
        job.tag = "tanh_norm_const"

        jobs.append(job)
        print job

    answer = raw_input("Submit %d jobs?[y/N] " % len(jobs))
    if answer == "y":
        numpy.random.shuffle(jobs)

        db = jobman.sql.db(JOBDB)
        for job in jobs:
            job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
            jobman.sql.insert_dict(job, db)

        print "inserted %d jobs" % len(jobs)
        print "To run: jobdispatch --condor --mem=3G --gpu --env=THEANO_FLAGS='floatX=float32, device=gpu' --repeat_jobs=%d jobman sql -n 1 '%s' ." % (len(jobs), JOBDB)
Example #19
0
import warnings

import numpy as np

from jobman import DD, flatten, api0, sql

import theano
import theano.tensor as TT
import train_model_adam
import sys

sys.path.append("../codes/")

from core.nan_guard import NanGuardMode

state = DD()

state.lr = 3e-3
state.batch_size = 160
state.sub_mb_size = 160
state.std = 0.05
state.max_iters = 40000
state.n_hids = 240
state.mem_nel = 150
state.mem_size = 28
state.renormalization_scale = 5.0
state.bowout = True
state.use_ff_controller = False
state.std = 0.01
state.bow_size = 80
state.n_reading_steps = 1
Example #20
0
default_config = DD({
    # theano profiling, 0 not printing
    'profile': 1,
    # specify the correct data path
    # cifar10.npz
    # curves.npz
    # mnist_6k_1k_1k.npz
    'data': '/scratch/yaoli/Exp_scratch/data/mnist_6k_1k_1k.npz',
    'verbose': 3,
    'device': 'gpu',
    # batch for computing gradient
    # 50000 for mnist, 40000 for cifar, 20000 for curves
    # gbs=mbs=ebs=cbs=200 when sgd
    'gbs': 60000,
    # batch for computing the metric, 10000 for non-sgd
    'mbs': 10000,
    # batch for evaluating the model
    # and doing line search, 10000 for non-sgd
    'ebs': 10000,
    # number of samples to consider at any time
    # 250
    'cbs': 250,
    # daa, mlp
    'model': 'daa',
    #'sgd' 'krylov' 'natNCG', 'natSGD_jacobi'
    'algo': 'natSGD_jacobi',  #'krylov',
    # Gf for park metric, amari otherwise
    'type': 'Gf',
    # keep it under 1000, but bigger for sgd
    'loopIters': 1000,
    # 1 is catching NaN
    'gotNaN': 0,
    'seed': 312,
    # there must not be any space between numbers below, otherwise
    # jobman raise an error
    # mlp [1000,1000,1000],
    # cifar deep [2000,1000,1000],
    # to compare:
    #------------------
    #mnist(mlp): [500,500,2000]
    #mnist(ae):[1000,500,250,30]
    #cifar(mlp): 1000, 10000
    #curves(ae):[400,200,100,50,25,5]
    'hids': '[1000,500,250,30]',
    # stop LCG till this difference is reached
    'mrtol': 1e-4,
    # damping factor for the matrix, should be fixed for natNCG
    'mreg': 45,
    # damping factor for preconditioning
    'jreg': .02,
    # NCG restart
    'resetFreq': 40,
    # max iterations of LCG
    'miters': numpy.int32(20),
    # sgd:0.03, other 0.9, 1 or 2
    'lr': 1,
    # weight initialization formula .. not very useful to change it right now
    # xavier or small
    'init': 'xavier',
    # error cost for deep autoencoder (note Dumi and I think Martens used cross entropy for MNIST)
    'daacost': 'cross',
    'l2norm': 1e-5,
    # numbers of linear search
    'lsIters': 80,
    # checking the validation score, keep it low, unless SGD.
    'checkFreq': 5,
    # the size krylov space
    'krylovDim': 15,
    # lbfgs steps
    'lbfgsIters': 10,
    # natNCG uses 0
    'adaptivedamp': 1,
})
Example #21
0
def jobman_insert_random(n_jobs, table_name="emotiw_mlp_audio_tanh"):
    JOBDB = 'postgresql://[email protected]/gulcehrc_db?table=' + table_name

    EXPERIMENT_PATH = "experiment_cg.jobman_entrypoint"
    nlr = 50
    learning_rates = numpy.logspace(numpy.log10(0.001), numpy.log10(0.3), nlr)

    jobs = []
    for _ in range(n_jobs):
        job = DD()
        id_lr = numpy.random.random_integers(0, nlr - 1)
        job.n_hiddens = numpy.random.randint(100, 800)
        job.n_layers = numpy.random.randint(1, 4)
        job.learning_rate = learning_rates[id_lr]
        job.momentum = 10.**numpy.random.uniform(-1, -0)
        job.rmsprop = numpy.random.binomial(1, 0.5)
        job.validerror = 0.0
        job.loss = 0.0
        job.epoch = 0
        job.epoch_time = 0
        job.trainerror = 0.0
        job.features = "full.pca"
        job.max_col_norm = 1.8456
        job.example_dropout = numpy.random.randint(16, 200)
        job.rbm_learning_rate = 10.**numpy.random.uniform(-3, -0)
        job.rbm_epochs = 0  #numpy.random.randint(8, 100)
        job.tag = "tanh_norm_const"

        jobs.append(job)
        print job

    answer = raw_input("Submit %d jobs?[y/N] " % len(jobs))
    if answer == "y":
        numpy.random.shuffle(jobs)

        db = jobman.sql.db(JOBDB)
        for job in jobs:
            job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
            jobman.sql.insert_dict(job, db)

        print "inserted %d jobs" % len(jobs)
        print "To run: jobdispatch --condor --mem=3G --gpu --env=THEANO_FLAGS='floatX=float32, device=gpu' --repeat_jobs=%d jobman sql -n 1 '%s' ." % (
            len(jobs), JOBDB)
def jobman_insert_random(n_jobs, table_name="emotiw_mlp_audio_sigm_fixed_pool2_mixed_norbm3"):

    JOBDB = 'postgresql://[email protected]/gulcehrc_db?table=' + table_name
    EXPERIMENT_PATH = "experiment_cg_2layer_sigm_hyper2_fixed2_pool2_save_mixed_norbm3.jobman_entrypoint"
    nlr = 45
    learning_rates = numpy.logspace(numpy.log10(0.0008), numpy.log10(0.1), nlr)
    max_col_norms = [1.8256, 1.5679, 1.2124, 0.98791]
    rhos = [0.96, 0.92, 0.88]
    jobs = []

    for _ in range(n_jobs):

        job = DD()
        id_lr = numpy.random.random_integers(0, nlr-1)
        rnd_maxcn = numpy.random.random_integers(0, len(max_col_norms)-1)
        rnd_rho = numpy.random.random_integers(0, len(rhos)-1)
        job.n_hiddens = numpy.random.randint(80, 500)
        job.n_layers = numpy.random.random_integers(1, 2)
        job.learning_rate = learning_rates[id_lr]
        job.momentum = 10.**numpy.random.uniform(-1, -0)
        job.rmsprop = 1
        job.rho = rhos[rnd_rho]
        job.validerror = 0.0
        job.loss = 0.0
        job.seed = 1938471
        job.rbm_epochs = 0
        job.epoch = 0
        job.epoch_time = 0
        job.use_nesterov = 1
        job.trainerror = 0.0
        job.features = "full.pca"
        job.max_col_norm = max_col_norms[rnd_maxcn]
        job.example_dropout = numpy.random.randint(60, 200)
        job.tag = "sigm_norm_const_fixed_pool2_norbm3"

        jobs.append(job)
        print job

    answer = raw_input("Submit %d jobs?[y/N] " % len(jobs))

    if answer == "y":
        numpy.random.shuffle(jobs)

        db = jobman.sql.db(JOBDB)
        for job in jobs:
            job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
            jobman.sql.insert_dict(job, db)

        print "inserted %d jobs" % len(jobs)
        print "To run: jobdispatch --condor --mem=3G --gpu --env=THEANO_FLAGS='floatX=float32, device=gpu' --repeat_jobs=%d jobman sql -n 1 '%s' ." % (len(jobs), JOBDB)
Example #23
0
config = DD({
    'model':
    'attention',
    'random_seed':
    1234,
    # ERASE everything under save_model_path
    'erase_history':
    True,
    'attention':
    DD({
        'reload_': False,
        'verbose': True,
        'debug': False,
        'save_model_dir': RAB_EXP_PATH + 'save_dir/',
        'from_dir': RAB_EXP_PATH + 'from_dir/',
        # dataset
        'dataset': 'youtube2text',
        'video_feature': 'googlenet',
        'K': 28,  # 26 when compare
        'OutOf': None,
        # network
        'dim_word': 512,  #468, # 474
        'tu_dim': 512,
        'mu_dim': 512,  # 1024,
        'vu_dim': 512,  # 1024,
        'ctx_dim': -1,  # auto set
        'n_layers_out': 1,  # for predicting next word
        'n_layers_init': 0,
        'prev2out': True,
        'ctx2out': True,
        'selector': True,
        'n_words': 20000,
        'maxlen': 30,  # max length of the descprition
        'use_dropout': True,
        'isGlobal': False,
        # training
        'patience': 20,
        'max_epochs': 500,
        'decay_c': 1e-4,
        'alpha_entropy_r': 0.,
        'alpha_c': 0.70602,
        'lrate': 0.0001,
        'optimizer': 'adadelta',
        'clip_c': 10.,
        'batch_size': 256,  # for trees use 25
        'valid_batch_size': 200,
        'dispFreq': 10,
        'validFreq': 500,
        'saveFreq': -1,  # this is disabled, now use sampleFreq instead
        'sampleFreq': 100,
        # blue, meteor, or both
        'metric': 'everything',  # set to perplexity on DVS
    }),
})
Example #24
0
import theano
import cPickle as pkl
import warnings

import argparse
import numpy as np

from jobman import DD, flatten, api0, sql

import theano
import theano.tensor as TT
import train_model_adam
from train_model_adam import search_model_adam

state = DD()
parser = argparse.ArgumentParser("Parameters for the single soft model.")

parser.add_argument("--task_id", default=1, type=int)
parser.add_argument("--reload_model", default=1, type=int)
parser.add_argument("--save_path", default=".", type=str)
parser.add_argument("--seed", default=".", type=str)

args = parser.parse_args()
state.reload_model = args.reload_model
state.task_id = args.task_id
state.save_path = args.save_path

state.lr = 8.2 * 1e-3
state.batch_size = 160
state.sub_mb_size = 160
state.max_iters = 90000
Example #25
0
def pento(n_trials):
    ri = numpy.random.random_integers

    state = DD()
    with open("mnist_powerup_temp.yaml") as ymtmp:
        state.yaml_string = ymtmp.read()

    state.powerup_nunits = 240
    state.powerup_npieces = 5
    state.W_lr_scale = 0.04
    state.p_lr_scale = 0.01
    state.lr_rate = 0.1
    state.l2_pen = 1e-5
    state.l2_pen2 = 0.0000
    state.init_mom = 0.5
    state.final_mom = 0.5
    state.decay_factor = 0.5
    state.max_col_norm = 1.9365
    state.max_col_norm2 = 1.8365

    state.save_path = "./"

    n_pieces = [2, 3, 4, 5, 6, 8, 10, 12]
    n_units = [200, 240, 280, 320, 420]
    learning_rates = numpy.logspace(numpy.log10(0.001), numpy.log10(1.0), 30)
    learning_rate_scalers = numpy.logspace(numpy.log10(0.01), numpy.log10(1), 30)
    l2_pen = numpy.logspace(numpy.log10(1e-6), numpy.log10(8 * 1e-4), 100)
    max_col_norms = [1.7365, 1.8365, 1.9365, 2.1365, 2.2365, 2.4365]

    ind = 0
    TABLE_NAME = "powerup_mnist_1layer_finer"
    db = api0.open_db("postgresql://[email protected]/gulcehrc_db?table=" + TABLE_NAME)

    for i in xrange(n_trials):

        state.lr_rate = learning_rates[ri(learning_rates.shape[0]) - 1]
        state.powerup_nunits = n_units[ri(len(n_units)) - 1]
        state.powerup_npieces = n_pieces[ri(len(n_pieces)) - 1]
        state.W_lr_scale = learning_rate_scalers[ri(len(learning_rate_scalers)) - 1]
        state.p_lr_scale = learning_rate_scalers[ri(len(learning_rate_scalers)) - 1]
        state.l2_pen = l2_pen[ri(l2_pen.shape[0]) - 1]
        state.init_mom = numpy.random.uniform(low=0.3, high=0.6)
        state.final_mom = numpy.random.uniform(low=state.init_mom + 1.0, high=0.9)
        state.decay_factor = numpy.random.uniform(low=0.01, high=0.05)
        state.max_col_norm = max_col_norms[ri(len(max_col_norms)) - 1]

        alphabet = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUWXYZ0123456789")
        numpy.random.shuffle(alphabet)
        state.save_path = "./"
        state.save_path += "".join(alphabet[:7]) + "_"
        sql.insert_job(experiment, flatten(state), db)
        ind += 1

    db.createView(TABLE_NAME + "_view")
    print "{} jobs submitted".format(ind)
Example #26
0
config = DD({
    'model': 'DeepOrderlessNADE',
    'load_trained': DD({
        # action: 0 standard train, 1 load trained model and evaluate, 2 continue training
        'action': 0,
        'from_path': best_2h_model,
        'epoch': 3999, 
        }),
    'random_seed': 1234,
    'save_model_path': exp_path + '/nade_k_nips14_release_final/test_h2/',
    'dataset': DD({
        'signature': 'MNIST_binary_russ',
        }),
    'DeepOrderlessNADE': DD({
        'n_in': None,
        'n_out': None,
        'n_hidden': 500,
        'n_layers': 2,
        'hidden_act': 'tanh',
        'tied_weights': False,
        # only for the first step of mean field
        'use_mask': False,
        # use data mean to intialize the mean field
        'init_mean_field': True,
        # not avg cost over k steps but only take the cost from the last step
        'cost_from_last': False,
        # 1:0.01 gaussian,2: formula
        'init_weights': 1,
        # centering v
        'center_v': False,
        'train': DD({
            # valid once every 'valid_freq' epochs
            'valid_freq': 250,
            # compute valid and test LL over this many of orderings
            'n_orderings': 5,
            'n_epochs': 1000,
            'minibatch_size': 100,
            # 0 for momentum, 1 for adadelta
            'sgd_type': 1,
            'momentum': 0.9,
            'lr': 0.001,
            # 0.0012279827881 for 2h model
            # 0.0 for 1h model
            'l2': 0.0012279827881,
            # number of mean field steps
            'k': 5,
            'verbose': True,
            'fine_tune': DD({
                'activate': True,
                'n_epochs': 3000,
                })
            })
        })
    })
Example #27
0
    "motivation": (True, True),
    "sentence": (True, True),
    "reiss": (False, False),
    "maslow": (False, False),
    "plutchik": (False, False),
    "plutchik16": (False, False),
    "entity": (True, False)
}

splits = ["train", "dev", "test"]

for experiment in ["emotion", "motivation"]:
    print("Making {} data for {} class of models".format(
        experiment, sys.argv[1]))

    opt = DD()
    opt.data = DD()
    opt.data.pruned = True

    # Make a memory model (EntNet, NPN) data loader for generation
    if sys.argv[1] == "memory":
        # Make save name
        name = "processed/{}/{}_{}_data_loader.pth".format(
            experiment, "gen_memory", "-".join(splits))
        print(name)

        # Initialize data loader and load vocabs and raw data
        data_loader = data.MemoryGenModelDataLoader()
        data_loader.load_vocabs(vocab_paths, vocab_text)
        data_loader.load_data(opt,
                              splits,
Example #28
0
def get_encoder_parameters(params, opt):
    params.net.enc = DD()

    # Whether to encode the entity context
    # For LSTM and CNN, this adds a separate encoder
    # for entity-specific context lines.
    # For REN and NPN, this does nothing since those models
    # represent entities using memory slots, so the var is
    # overwritten below
    params.net.enc.ctx = opt.encoder.ctx

    # Type of encoder to us -- {lstm, cnn, ren, npn}
    params.net.enc.model = opt.encoder.model

    # Hidden sizes of encoder
    # Self-explanatory for LSTMs
    params.net.enc.hSize = opt.hidden_size

    # Input embedding size
    params.net.enc.iSize = opt.embed_size

    # Dropout probability for any nodes in the encoder
    params.net.enc.dpt = opt.encoder.dropout

    # Type of pretrained embeddings for the encoder
    # Either glove or none (feel free to add other types)
    params.net.enc.pt = opt.encoder.pt

    # Initialization to use
    # d = default xavier glorot initialization
    # Gets overwritten for REN or NPN initialization
    params.net.enc.init = opt.encoder.init

    if params.net.enc.model in ['gru', 'lstm', 'rnn']:
        # num layers in rnn
        params.net.enc.nL = opt.encoder.rnn.num_layers

        # make encoder bidirectional
        params.net.enc.bid = opt.encoder.rnn.bid

    elif params.net.enc.model in ["ren", "npn"]:
        # Set context to be true automatically
        params.net.enc.ctx = True

        # tie entity cells to story entities
        params.net.enc.tied = opt.encoder.ren.tied

        # how many entity slots
        params.net.enc.ents = opt.encoder.ren.num_slots

        # activation function for entity update
        # (P = PReLU or I = Identity)
        params.net.enc.act = opt.encoder.ren.activation

        # Size of entity hidden cells
        params.net.enc.eSize = opt.encoder.ren.entity_size

        # how to intialize parameters
        # format is gauss+{}+{}.format(mean, std)
        # n = the default initialization pytorch
        params.net.enc.init = opt.encoder.ren.init

        # entity_act update function options:
        # k = key projection (REN-style),
        # v = value projection (REN-style),
        # c = context projection (REN-style)}
        params.net.enc.afunc = opt.encoder.ren.application_function

        # use action and affect labels to supervise entitiy selection
        params.net.enc.sup = opt.encoder.ren.supervise

        # lock keys to glove init
        params.net.enc.lk = opt.encoder.ren.lock_keys

        # use glove embeddings for pretrained entities
        params.net.enc.entpt = opt.encoder.ren.entpt

        if params.net.enc.model == "npn":
            # Number of actions
            params.net.enc.na = opt.encoder.npn.actions

            # Size of action embeddings
            params.net.enc.aSize = opt.encoder.npn.action_size

            # Number of MLP layers for selecting actions
            params.net.enc.aNL = opt.encoder.npn.action_num_layers

            # Dropout for action selector MLP
            params.net.enc.adpt = opt.encoder.npn.action_dropout

            # Activation functions between layers of action selector MLP
            params.net.enc.aI = opt.encoder.npn.action_init

            # number of layers of projections for entity selection
            params.net.enc.eNL = opt.encoder.npn.entity_num_layers

            # dropout probability in preprocess layers
            params.net.enc.edpt = opt.encoder.npn.entity_dropout

            # use recurrent attention (See Bosselut et al., 2018)
            params.net.enc.rec = opt.encoder.npn.entity_recurrent_attention

            # Sum entity selection or just scale
            params.net.enc.eRed = opt.encoder.npn.entity_reduce

            # If it's using an NPN, you need to activate action contribution
            # to entity update if you mistakenly haven't done so in
            # params.net.enc.afunc
            if "a" not in params.net.enc.afunc:
                params.net.enc.afunc = "a" + params.net.enc.afunc
        else:
            # EntNet update rule
            params.net.enc.afunc = "nkvc"

            # Initialize encoder with REN initialization
            params.net.enc.init = opt.encoder.ren.init

            # No projection layers between encoder and entity section
            params.net.enc.eNL = 0

    elif params.net.enc.model == "cnn":
        # Size of kernels (different sizes separated by commas)
        params.net.enc.ks = opt.encoder.cnn.kernel_sizes

        # Number of kernel functions
        params.net.enc.kn = opt.encoder.cnn.kernel_num
Example #29
0
config = DD({
    'module_name':
    'I2R_AE',
    'model':
    DD({'rand_seed': None}),  # end mlp
    'log':
    DD({
        'experiment_name': 'i2r0217_i2r_clean_clean',
        'description': '',
        'save_outputs': True,
        'save_learning_rule': False,
        'save_model': True,
        'save_epoch_error': True,
        'save_to_database_name': "i2r.db"
    }),  # end log
    'learning_method':
    DD({
        'type': 'SGD',
        # 'type'                  : 'AdaGrad',
        # 'type'                  : 'AdaDelta',

        ###[ For SGD and AdaGrad ]###
        # 'learning_rate'         : 0.5,
        # 'learning_rate'         : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5),
        'learning_rate': ((1e-2, 0.5), float),
        # 'learning_rate'         : 0.0305287335067987,
        # 'learning_rate'         : 0.01,
        'momentum': 0.9,
        # 'momentum'              : 0.,
        # 'momentum'              : (1e-2, 1e-1, 0.5, 0.9),

        # For AdaDelta
        'rho': ((0.90, 0.99), float),
        'eps': (1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7),
    }),
    'learning_rule':
    DD({
        'max_col_norm':
        1,
        'L1_lambda':
        None,
        'L2_lambda':
        0.0001,
        'cost':
        'mse',
        'stopping_criteria':
        DD({
            'max_epoch': 100,
            'epoch_look_back': 5,
            'cost': 'error',
            'percent_decrease': 0.05
        })  # end stopping_criteria
    }),  # end learning_rule
    'dataset':
    DD({

        # 'type'                  : 'I2R_Posterior_Blocks_ClnDNN_NoisyFeat',
        # 'type'                  : 'I2R_Posterior_Blocks_ClnDNN_CleanFeat',
        # 'type'                  : 'I2R_Posterior_NoisyFeat_Sample',
        'type':
        'I2R_Posterior_Gaussian_Noisy_Sample',
        # 'type'                  : 'Mnist',
        'train_valid_test_ratio': [5, 1, 1],
        'feature_size':
        1998,
        'target_size':
        1998,
        'dataset_noise':
        DD({
            # 'type'              : 'BlackOut',
            # 'type'              : 'MaskOut',
            # 'type'              : 'Gaussian',
            'type': None,

            # for Gaussian
            # 'std'       :((0.15, 0.4), float),
            'std': 0.5,
        }),
        'preprocessor':
        DD({
            'type': None,
            # 'type' : 'Scale',
            # 'type' : 'GCN',
            # 'type' : 'LogGCN',
            # 'type' : 'Standardize',
            # 'type'  : 'Log',

            # for Scale
            'global_max': 1.0,
            'global_min': 0,
            'buffer': 0.,
            'scale_range': [0.5, 1.],
        }),

        # 'batch_size'            : (50, 100, 150, 200),
        'batch_size':
        100,
        'num_batches':
        None,
        'iter_class':
        'SequentialSubsetIterator',
        'rng':
        None
    }),  # end dataset

    #============================[ Layers ]===========================#
    'hidden1':
    DD({
        'name':
        'hidden1',
        'type':
        'RELU',
        'dim':
        3000,

        # 'dropout_below'         : (0.05, 0.1, 0.15, 0.2)
        'dropout_below':
        0.5,
        # 'dropout_below'         : None,
        'layer_noise':
        DD({
            'type': None,
            # 'type'      : 'BlackOut',
            # 'type'      : 'Gaussian',
            # 'type'      : 'MaskOut',
            # 'type'      : 'BatchOut',

            # for BlackOut, MaskOut and BatchOut
            'ratio': 0.5,

            # for Gaussian
            'std': 0.1,
            'mean': 0,
        })
    }),  # end hidden_layer
    'hidden2':
    DD({
        'name':
        'hidden2',
        'type':
        'RELU',
        'dim':
        1000,

        # 'dropout_below'         : (0.05, 0.1, 0.15, 0.2)
        # 'dropout_below'         : (0, 0.5),
        'dropout_below':
        None,
        'layer_noise':
        DD({
            'type': None,
            # 'type'      : 'BlackOut',
            # 'type'      : 'Gaussian',
            # 'type'      : 'MaskOut',
            # 'type'      : 'BatchOut',

            # for BlackOut, MaskOut and BatchOut
            'ratio': 0.5,

            # for Gaussian
            'std': 0.1,
            'mean': 0,
        })
    }),  # end hidden_layer
    'output':
    DD({
        'name':
        'output',
        'type':
        'Sigmoid',
        'dim':
        1998,
        # 'dim'                   : 1848,

        # 'dropout_below'         : 0.5,
        # 'dropout_below'         : (0, 0.5),
        'dropout_below':
        None,
        'layer_noise':
        DD({
            'type': None,
            # 'type'      : 'BlackOut',
            # 'type'      : 'Gaussian',
            # 'type'      : 'MaskOut',
            # 'type'      : 'BatchOut',

            # for BlackOut, MaskOut and BatchOut
            'ratio': 0.5,

            # for Gaussian
            'std': 0.1,
            'mean': 0,
        })
    })  # end output_layer
})
Example #30
0
def jobman_insert_random(n_jobs):
    JOBDB = 'postgres://[email protected]/dauphiya_db/emotiw_mlp_audio'
    EXPERIMENT_PATH = "experiment.jobman_entrypoint"

    jobs = []
    for _ in range(n_jobs):
        job = DD()

        job.n_hiddens = numpy.random.randint(8, 512)
        job.n_layers = numpy.random.randint(1, 4)
        job.learning_rate = 10.**numpy.random.uniform(-3, -0)
        job.momentum = 10.**numpy.random.uniform(-1, -0)
        job.features = ["minimal.pca", "full.pca"][numpy.random.binomial(1, 0.5)]
        job.example_dropout = numpy.random.randint(16, 200)
        job.rbm_learning_rate = 10.**numpy.random.uniform(-3, -0)
        job.rbm_epochs = numpy.random.randint(8, 100)
        job.tag = "pretrain"

        jobs.append(job)
        print job

    answer = raw_input("Submit %d jobs?[y/N] " % len(jobs))
    if answer == "y":
        numpy.random.shuffle(jobs)

        db = jobman.sql.db(JOBDB)
        for job in jobs:
            job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
            jobman.sql.insert_dict(job, db)

        print "inserted %d jobs" % len(jobs)
        print "To run: jobdispatch --condor --repeat_jobs=%d jobman sql -n 1 'postgres://[email protected]/dauphiya_db/emotiw_mlp_audio' ." % len(jobs)
def tfd(n_trials):
    ri = numpy.random.random_integers

    state = DD()
    with open('tfd_powerup_temp.yaml') as ymtmp:
        state.yaml_string = ymtmp.read()

    state.powerup_nunits = 240
    state.powerup_npieces = 5
    state.W_lr_scale = 0.04
    state.p_lr_scale = 0.01
    state.lr_rate = 0.1
    state.l2_pen = 1e-5
    state.l2_pen2 = 0.0000
    state.init_mom = 0.5
    state.final_mom = 0.5
    state.decay_factor = 0.5
    state.max_col_norm = 1.9365
    state.max_col_norm2 = 1.8365
    state.batch_size = 128

    state.save_path = './'

    n_pieces = [2, 3, 4, 5, 6]
    n_units = [200, 240, 320, 360, 420, 480, 540]
    learning_rates = numpy.logspace(numpy.log10(0.002), numpy.log10(1.0), 40)
    learning_rate_scalers = numpy.logspace(numpy.log10(0.04), numpy.log10(1), 30)
    l2_pen = numpy.logspace(numpy.log10(1e-6), numpy.log10(8*1e-3), 90)
    max_col_norms = [1.8365, 1.9365, 2.1365, 2.2365, 2.3486]
    batch_sizes = [128, 100, 80]

    ind = 0
    TABLE_NAME = "powerup_tfd_1layer_finer_large_fixed2"
    db = api0.open_db('postgresql://[email protected]/gulcehrc_db?table=' + TABLE_NAME)

    for i in xrange(n_trials):
        state.lr_rate = learning_rates[ri(learning_rates.shape[0]) - 1]
        state.powerup_nunits = n_units[ri(len(n_units)) - 1]
        state.powerup_npieces = n_pieces[ri(len(n_pieces) - 1)]
        state.W_lr_scale = numpy.random.uniform(low=0.02, high=1.0)#learning_rate_scalers[ri(len(learning_rate_scalers)) - 1]
        state.p_lr_scale = numpy.random.uniform(low=0.02, high=1.0)#learning_rate_scalers[ri(len(learning_rate_scalers)) - 1]
        state.l2_pen = l2_pen[ri(l2_pen.shape[0]) - 1]
        state.init_mom = numpy.random.uniform(low=0.3, high=0.6)
        state.final_mom = numpy.random.uniform(low=state.init_mom + 0.1, high=0.9)
        state.decay_factor = numpy.random.uniform(low=0.01, high=0.05)
        state.max_col_norm = max_col_norms[ri(len(max_col_norms)) - 1]
        state.batch_size = batch_sizes[ri(len(batch_sizes)) - 1]

        alphabet = list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUWXYZ0123456789')
        state.save_path = './'
        state.save_path += ''.join(alphabet[:7]) + '_'
        sql.insert_job(experiment, flatten(state), db)
        ind += 1

    db.createView(TABLE_NAME + '_view')
    print "{} jobs submitted".format(ind)
def tfd(n_trials):
    ri = numpy.random.random_integers

    state = DD()
    with open('mnist_powerup_temp_l2.yaml') as ymtmp:
        state.yaml_string = ymtmp.read()

    state.powerup_nunits = 240
    state.powerup_npieces = 5

    state.powerup_nunits2 = 240
    state.powerup_npieces2 = 5

    state.W_lr_scale = 0.04
    state.p_lr_scale = 0.01
    state.lr_rate = 0.1
    state.init_mom = 0.5
    state.final_mom = 0.5
    state.decay_factor = 0.5
    state.max_col_norm = 1.9365

    state.save_path = './'

    n_pieces = [2, 3, 4, 5]
    n_units = [200, 240, 320, 360, 420, 480]

    learning_rates = numpy.logspace(numpy.log10(0.09), numpy.log10(1.2), 60)
    learning_rate_scalers = numpy.logspace(numpy.log10(0.1), numpy.log10(1), 50)
    decay_factors =  numpy.logspace(numpy.log10(0.001), numpy.log10(0.06), 40)
    max_col_norms = [1.8365, 1.9365, 2.1365, 2.2365, 2.3486]

    ind = 0
    TABLE_NAME = "powerup_mnist_finest_large_2l"
    db = api0.open_db('postgresql://[email protected]/gulcehrc_db?table=' + TABLE_NAME)

    for i in xrange(n_trials):
        state.lr_rate = learning_rates[ri(learning_rates.shape[0]) - 1]

        state.powerup_nunits = n_units[ri(len(n_units)) - 1]
        state.powerup_npieces = n_pieces[ri(len(n_pieces) - 1)]

        state.powerup_nunits2 = state.powerup_nunits
        state.powerup_npieces2 = state.powerup_npieces

        state.W_lr_scale = numpy.random.uniform(low=0.09, high=1.0)
        state.p_lr_scale = numpy.random.uniform(low=0.09, high=1.0)

        state.init_mom = numpy.random.uniform(low=0.3, high=0.6)
        state.final_mom = numpy.random.uniform(low=state.init_mom + 0.1, high=0.9)
        state.decay_factor = decay_factors[ri(len(decay_factors)) - 1]
        state.max_col_norm = max_col_norms[ri(len(max_col_norms)) - 1]

        alphabet = list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUWXYZ0123456789')
        state.save_path = './'
        state.save_path += ''.join(alphabet[:7]) + '_'
        sql.insert_job(experiment, flatten(state), db)
        ind += 1

    db.createView(TABLE_NAME + '_view')
    print "{} jobs submitted".format(ind)
Example #33
0
model_config = DD({

        ############################[AE_Testing]##########################
        ##################################################################

        'AE_Testing' : DD({

            'model' : DD({
                    'rand_seed'             : None
                    }), # end mlp

            'log' : DD({
                    # 'experiment_name'         : 'AE_Testing_Mnist_784_500',
                    'experiment_name'       : 'AE_Mnist_784_100',
                    'description'           : '',
                    'save_outputs'          : False,
                    'save_learning_rule'    : False,
                    'save_model'            : False,
                    'save_epoch_error'      : False,
                    'save_to_database_name' : 'Database_Name.db'
                    }), # end log


            'learning_rule' : DD({
                    'max_col_norm'          : (1, 10, 50),
                    'L1_lambda'             : None,
                    'L2_lambda'             : None,
                    'cost'                  : 'mse',
                    'stopping_criteria'     : DD({
                                                'max_epoch'         : 100,
                                                'epoch_look_back'   : 5,
                                                'cost'              : 'mse',
                                                'percent_decrease'  : 0.05
                                                }) # end stopping_criteria
                    }), # end learning_rule


            'learning_method' : DD({
                    'type'                  : 'SGD',
                    # 'type'                  : 'AdaGrad',
                    # 'type'                  : 'AdaDelta',

                    'learning_rate'         : 0.9,
                    'momentum'              : 0.01,
                    }), # end learning_method


            'dataset' : DD({

                    'type'                  : 'Mnist',
                    'train_valid_test_ratio': [8, 1, 1],
                    'feature_size'          : 784,
                    # 'preprocessor'          : None,
        #                     'preprocessor'          : 'Scale',
                    # 'preprocessor'          : 'GCN',
                            # 'preprocessor'          : 'LogGCN',
                    'dataset_noise'         : DD({
                                                'type'              : None
                                                # 'type'              : 'BlackOut',
                                                # 'type'              : 'MaskOut',
                                                # 'type'              : 'Gaussian',
                                                }),

                    'preprocessor'          : DD({
                                                'type' : None,
                                                # 'type' : 'Scale',
                                                # 'type' : 'GCN',
                                                # 'type' : 'LogGCN',
                                                # 'type' : 'Standardize',

                                                # for Scale
                                                'global_max' : 89,
                                                'global_min' : -23,
                                                'buffer'     : 0.5,
                                                'scale_range': [-1, 1],
                                                }),
                    'batch_size'            : 100,
                    'num_batches'           : None,
                    'iter_class'            : 'SequentialSubsetIterator',
                    'rng'                   : None
                    }), # end dataset

            #============================[ Layers ]===========================#
            'hidden1' : DD({
                    'name'                  : 'hidden1',
                    'type'                  : 'SoftRELU',
                    'dim'                   : 100,
                    # 'dropout_below'         : (0.05, 0.1, 0.15, 0.2)
                    'dropout_below'         : 0.5,

                    'layer_noise'           : DD({
                                                # 'type'      : None,
                                                # 'type'      : 'BlackOut',
                                                # 'type'      : 'Gaussian',
                                                'type'      : 'MaskOut',
                                                # 'type'      : 'BatchOut',

                                                # for BlackOut, MaskOut and BatchOut
                                                'ratio'     : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5),
                                                # 'ratio'     : 0.05,

                                                # for Gaussian
                                                # 'std'       : (0.001, 0.005, 0.01, 0.015, 0.02),
                                                'std'       : (0.005, 0.01, 0.02, 0.03, 0.04),
                                                # 'std'       : 0.001,
                                                'mean'      : 0,
                                                })
                    }), # end hidden_layer

            'h1_mirror' : DD({
                    'name'                  : 'h1_mirror',
                    'type'                  : 'Sigmoid',
                    # 'dim'                   : 2049, # dim = input.dim
                    'dropout_below'         : None,

                    'layer_noise'           : DD({
                                                # 'type'      : None,
                                                # 'type'      : 'BlackOut',
                                                # 'type'      : 'Gaussian',
                                                'type'      : 'MaskOut',
                                                # 'type'      : 'BatchOut',

                                                # for BlackOut, MaskOut and BatchOut
                                                'ratio'     : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5),
                                                # 'ratio'     : 0.05,

                                                # for Gaussian
                                                # 'std'       : (0.001, 0.005, 0.01, 0.015, 0.02),
                                                'std'       : (0.005, 0.01, 0.02, 0.03, 0.04),
                                                # 'std'       : 0.001,
                                                'mean'      : 0,
                                                })
                    }) # end output_layer
            }), # end autoencoder


        #############################[Mapping]############################
        ##################################################################

        'Laura_Mapping' : DD({

            'model' : DD({
                    'rand_seed'             : None
                    }), # end mlp

            'log' : DD({
                    'experiment_name'       : 'AE1001_Warp_Laura_Blocks_GCN_Mapping', #helios

                    'description'           : '',
                    'save_outputs'          : True,
                    'save_learning_rule'    : True,
                    'save_model'            : True,
                    'save_epoch_error'      : True,
                    'save_to_database_name' : 'Laura.db'
                    }), # end log


            'learning_rule' : DD({
                    'max_col_norm'          : (1, 10, 50),
                    # 'learning_rate'         : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5),
                    'learning_rate'         : ((1e-8, 1e-3), float),
                    'momentum'              : (1e-3, 1e-2, 1e-1, 0.5, 0.9),
                    'momentum_type'         : 'normal',
                    'L1_lambda'             : None,
                    'L2_lambda'             : None,
                    'cost'                  : 'entropy',
                    'stopping_criteria'     : DD({
                                                'max_epoch'         : 100,
                                                'epoch_look_back'   : 10,
                                                'cost'              : 'entropy',
                                                'percent_decrease'  : 0.05
                                                }) # end stopping_criteria
                    }), # end learning_rule


            'learning_method' : DD({
                    'type'                  : 'SGD',
                    # 'type'                  : 'AdaGrad',
                    # 'type'                  : 'AdaDelta',

                    'learning_rate'         : 0.9,
                    'momentum'              : 0.01,
                    }), # end learning_method

            #===========================[ Dataset ]===========================#
            'dataset' : DD({
                    # 'type'                  : 'Laura_Blocks_GCN_Mapping',
                    'type'                  : 'Laura_Warp_Blocks_GCN_Mapping',

                    'feature_size'          : 2049,
                    'target_size'           : 1,
                    'train_valid_test_ratio': [8, 1, 1],

                    'preprocessor'          : 'GCN',

                    'batch_size'            : (50, 100, 150, 200),
                    'num_batches'           : None,
                    'iter_class'            : 'SequentialSubsetIterator',
                    'rng'                   : None
                    }), # end dataset

            #============================[ Layers ]===========================#
            'num_layers' : 1,

            'hidden1' : DD({
                    'name'                  : 'hidden1',
                    'type'                  : 'Tanh',
                    'dim'                   : 1000,
                    'dropout_below'         : None,


                    }), # end hidden_layer

            'hidden2' : DD({
                    'name'                  : 'hidden2',
                    'type'                  : 'Tanh',
                    'dim'                   : 500,
                    'dropout_below'         : None,
                    }), # end hidden_layer

            'output' : DD({
                    'name'                  : 'output',
                    'type'                  : 'Linear',
                    'dim'                   : 1,
                    'dropout_below'         : None,
                    }), # end hidden_layer

            }), # end Laura_Mapping

        #############################[Laura]##############################
        ##################################################################

        'Laura' : DD({

            'model' : DD({
                    # 'rand_seed'             : 4520,
                    'rand_seed'             : None,
                    # 'rand_seed'             : 2137
                    }), # end mlp

            'log' : DD({
                    # 'experiment_name'       : 'testing_blackout',
                    # 'experiment_name'       : 'AE0910_Warp_Blocks_2049_500_tanh_gpu_blockout_more_no_filter_latest',
                    # 'experiment_name'       : 'AE0829_Warp_Standardize_GCN_Blocks_2049_500_tanh_gpu',
                    # 'experiment_name'       : 'AE0912_Blocks_2049_500_tanh_gpu_clean',
                    # 'experiment_name'       : 'AE0829_Standardize_GCN_Blocks_2049_500_tanh_gpu',
                    # 'experiment_name'       : 'AE0901_Warp_Blocks_500_180_tanh_gpu',

                    # 'experiment_name'       : 'AE1016_Warp_Blocks_180_120_tanh_tanh_gpu_dropout', #helios
                    # 'experiment_name'       : 'AE1018_Warp_Blocks_2049_500_tanh_tanh_gpu_blackout', #helios

                    # 'experiment_name'       : 'AE0919_Blocks_180_120_tanh_tanh_gpu_dropout', #helios
                    # 'experiment_name'       : 'AE0918_Blocks_180_120_tanh_tanh_gpu_clean', #helios

                    # 'experiment_name'       : 'AE0916_Blocks_180_120_tanh_tanh_gpu_output_sig_dropout',
                    # 'experiment_name'       : 'AE0916_Blocks_180_120_tanh_tanh_gpu_output_sig_clean',

                    # 'experiment_name'       : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_dropout', #helios
                    # 'experiment_name'       : 'AE1210_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_maskout', #helios

                    'experiment_name'       : 'AE1216_Transfactor_blocks_150_50small',

                    'description'           : 'scale_buffer=0.9',
                    'save_outputs'          : True,
                    'save_learning_rule'    : True,
                    'save_model'            : True,
                    'save_epoch_error'      : True,
                    # 'save_to_database_name' : 'Laura12.db'
                    'save_to_database_name' : 'transfactor.db',
                    }), # end log


            'learning_rule' : DD({
                    'max_col_norm'          : 1,
                    'L1_lambda'             : None,
                    'L2_lambda'             : None,
                    'cost'                  : 'mse',
                    'stopping_criteria'     : DD({
                                                'max_epoch'         : 100,
                                                'epoch_look_back'   : 5,
                                                'cost'              : 'mse',
                                                'percent_decrease'  : 0.05
                                                }) # end stopping_criteria
                    }), # end learning_rule


            'learning_method' : DD({
                    'type'                  : 'SGD',
                    # 'type'                  : 'AdaGrad',
                    # 'type'                  : 'AdaDelta',

                    ###[ For SGD and AdaGrad ]###
                    # 'learning_rate'         : 0.001,
                    'learning_rate'         : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5),

                    # 'momentum'              : 0.5,
                    # 'momentum'              : 0.,
                    'momentum'              : (1e-2, 1e-1, 0.5, 0.9),

                    ###[ For AdaDelta ]###
                    'rho'                   : 0.95,
                    'eps'                   : 1e-6,
                    }), # end learning_method

            #===========================[ Dataset ]===========================#
            'dataset' : DD({
                    # 'type'                  : 'Laura_Blocks',
                    # 'type'                  : 'Laura_Warp_Blocks',

                    # 'type'                  : 'Laura_Warp_Blocks_500_Tanh',
                    # 'type'                 : 'Laura_Warp_Blocks_180_Tanh_Dropout',
                    # 'type'                  : 'Laura_Cut_Warp_Blocks_300',
                    # 'type'                  : 'Laura_Blocks_180_Tanh_Tanh',
                    # 'type'                  : 'Laura_Blocks_180_Tanh_Tanh_Dropout',
                    # 'type'                  : 'Laura_Blocks_500_Tanh_Sigmoid',
                    # 'type'                  : 'Laura_Blocks_500',

                    # 'type'                  : 'Laura_Warp_Standardize_Blocks',
                    # 'type'                  : 'Laura_Standardize_Blocks',

                    # 'type'                  : 'Laura_Scale_Warp_Blocks_500_Tanh',
                    # 'type'                  : 'Laura_Scale_Warp_Blocks_180_Tanh_Dropout',

                    # 'type'                  : 'Laura_Warp_Blocks_180_Tanh_Blackout',

                    # 'type'                  : 'Mnist',

                    # 'type'                  : 'Laura_Warp_Blocks_180_Tanh_Noisy_MaskOut',
                    # 'type'                  : 'TransFactor_AE',
                    'type'                  : 'TransFactor_Blocks150',

                    'feature_size'          : 150,
                    'train_valid_test_ratio': [8, 1, 1],

                    'dataset_noise'         : DD({
                                                'type'              : None
                                                # 'type'              : 'BlackOut',
                                                # 'type'              : 'MaskOut',
                                                # 'type'              : 'Gaussian',
                                                }),

                    'preprocessor'          : DD({
                                                'type' : None,
                                                # 'type' : 'Scale',
                                                # 'type' : 'GCN',
                                                # 'type' : 'LogGCN',
                                                # 'type' : 'Standardize',

                                                # for Scale
                                                # 'global_max' : 89,
                                                # 'global_min' : -23,
                                                'global_max' : 4.0,
                                                'global_min' : 0.,
                                                'buffer'     : 0.9,
                                                'scale_range': [-1, 1],
                                                }),
                    # 'batch_size'            : 50,
                    'batch_size'            : (50, 100, 150, 200),
                    'num_batches'           : None,
                    'iter_class'            : 'SequentialSubsetIterator',
                    'rng'                   : None
                    }), # end dataset

            #============================[ Layers ]===========================#
            'num_layers' : 1,

            'hidden1' : DD({
                    'name'                  : 'hidden1',
                    'type'                  : 'Tanh',
                    # 'type'                  : 'SoftRELU',
                    'dim'                   : 50,

                    'dropout_below'         : None,
                    # 'dropout_below'         : (0.3, 0.4, 0.5),
                    # 'dropout_below'         : 0.5,

                    'layer_noise'           : DD({
                                                # 'type'      : None,
                                                'type'      : 'BlackOut',
                                                # 'type'      : 'Gaussian',
                                                # 'type'      : 'MaskOut',
                                                # 'type'      : 'BatchOut',

                                                # for BlackOut, MaskOut and BatchOut
                                                # 'ratio'     : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5),
                                                'ratio'     : 0.5,

                                                # for Gaussian
                                                # 'std'       : (0.001, 0.005, 0.01, 0.015, 0.02),
                                                'std'       : (0.005, 0.01, 0.02, 0.03, 0.04),
                                                # 'std'       : 0.001,
                                                'mean'      : 0,
                                                })

                    }), # end hidden_layer

            # 'hidden2' : DD({
            #         'name'                  : 'hidden2',
            #         'type'                  : 'RELU',
            #         'dim'                   : 100,
            #         'dropout_below'         : None,
            #
            #         'blackout_below'        : None
            #         }), # end hidden_layer
            #
            # 'h2_mirror' : DD({
            #         'name'                  : 'h2_mirror',
            #         'type'                  : 'RELU',
            #         # 'dim'                   : 2049, # dim = input.dim
            #         'dropout_below'         : None,
            #
            #         'blackout_below'        : None
            #         }), # end output_layer

            'h1_mirror' : DD({
                    'name'                  : 'h1_mirror',
                    'type'                  : 'Tanh',
                    # 'dim'                   : 2049, # dim = input.dim

                    'dropout_below'         : None,
                    # 'dropout_below'         : 0.5,

                    }) # end output_layer

            }), # end autoencoder



    ########################[Laura_Two_Layers]########################
    ##################################################################

    'Laura_Two_Layers' : DD({
        'model' : DD({
                'rand_seed'             : None
                }), # end mlp

        'log' : DD({
                # 'experiment_name'       : 'AE1214_Scale_Warp_Blocks_2Layers_finetune_2049_180_tanh_tanh_gpu_maskout',
                'experiment_name'       : 'Transfactor1215_500_50_Two_Layers_Finetune_small',

                'description'           : '',
                'save_outputs'          : True,
                'save_learning_rule'    : True,
                'save_model'            : True,
                'save_epoch_error'      : True,
                # 'save_to_database_name' : 'Laura12.db'
                'save_to_database_name' : 'transfactor.db',
                }), # end log


        'learning_rule' : DD({
                'max_col_norm'          : 1,
                'L1_lambda'             : None,
                'L2_lambda'             : None,
                'cost'                  : 'mse',
                'stopping_criteria'     : DD({
                                            'max_epoch'         : 100,
                                            'epoch_look_back'   : 5,
                                            'cost'              : 'mse',
                                            'percent_decrease'  : 0.05
                                            }) # end stopping_criteria
                }), # end learning_rule


        'learning_method' : DD({
                'type'                  : 'SGD',
                # 'type'                  : 'AdaGrad',
                # 'type'                  : 'AdaDelta',

                # for SGD and AdaGrad
                'learning_rate'         : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5),
                'momentum'              : (1e-2, 1e-1, 0.5, 0.9),

                # for AdaDelta
                'rho'                   : 0.95,
                'eps'                   : 1e-6,
                }), # end learning_method

        #===========================[ Dataset ]===========================#
        'dataset' : DD({

                # 'type'                  : 'Laura_Blocks',
                # 'type'                  : 'Laura_Warp_Blocks',
                'type'                  : 'TransFactor_Blocks',

                'feature_size'          : 500,
                'train_valid_test_ratio': [8, 1, 1],

                'dataset_noise'         : DD({
                                            'type'              : None
                                            # 'type'              : 'BlackOut',
                                            # 'type'              : 'MaskOut',
                                            # 'type'              : 'Gaussian',
                                            }),

                'preprocessor'          : DD({
                                            'type' : None,
                                            # 'type' : 'Scale',
                                            # 'type' : 'GCN',
                                            # 'type' : 'LogGCN',
                                            # 'type' : 'Standardize',

                                            # for Scale
                                            # 'global_max' : 89,
                                            # 'global_min' : -23,
                                            'global_max' : 4.0,
                                            'global_min' : 0.,
                                            'buffer'     : 0.9,
                                            'scale_range': [-1, 1],
                                            }),

                'batch_size'            : (50, 100, 150, 200),
                'num_batches'           : None,
                'iter_class'            : 'SequentialSubsetIterator',
                'rng'                   : None
                }), # end dataset

        # #============================[ Layers ]===========================#

        'hidden1' : DD({
                'name'                  : 'hidden1',

                # 'model'                 : 'AE0911_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140912_2337_04263067',
                # 'model'                 : 'AE1112_Scale_Warp_Blocks_2Layers_finetune_2049_180_tanh_tanh_gpu_clean_20141112_2145_06823495',
                # 'model'                 : 'AE1121_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_gaussian_continue_20141126_1543_50554671',
                # 'model'                 : 'AE1122_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141128_1421_47179280',
                # 'model'                 : 'AE1210_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141210_1728_15311837',
                'model'                 : 'AE1216_Transfactor_blocks_500_150small_20141215_1748_06646265',
                'dropout_below'         : None,
                # 'dropout_below'         : (0.1, 0.2, 0.3, 0.4, 0.5),
                # 'dropout_below'         : 0.1,
                }), # end hidden_layer

        'hidden2' : DD({
                'name'                  : 'hidden2',

                # 'model'                 : 'AE1001_Warp_Blocks_500_120_tanh_tanh_gpu_clean_20141003_0113_02206401',
                # 'model'                 : 'AE1115_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_clean_20141119_1327_11490503',
                # 'model'                 : 'AE1127_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_gaussian_20141127_1313_31905279',
                # 'model'                 : 'AE1201_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_clean_20141202_2352_57643114',
                # 'model'                 : 'AE1210_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_maskout_20141212_2056_15976132',
                'model'                 : 'AE1216_Transfactor_blocks_150_50small_20141215_2028_14707382',
                'dropout_below'         : None,
                })
        }), # end autoencoder

    ########################[Laura_Three_Layers]########################
    ####################################################################

    'Laura_Three_Layers' : DD({
        'fine_tuning_only'              : False,

        'model' : DD({
                'rand_seed'             : None
                }), # end mlp

        'log' : DD({

                # 'experiment_name'       : 'AE0917_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_clean',
                # 'experiment_name'       : 'AE0919_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_noisy',

                # 'experiment_name'       : 'AE0917_Blocks_3layers_finetune_2049_120_tanh_sigmoid_gpu_clean',
                # 'experiment_name'       : 'AE0917_Blocks_3layers_finetune_2049_120_tanh_sigmoid_gpu_noisy',

                # 'experiment_name'       : 'AE0917_Warp_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_clean',
                # 'experiment_name'       : 'AE0919_Warp_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_noisy',

                # 'experiment_name'       : 'AE1002_Scale_Warp_Blocks_3Layers_finetune_2049_120_tanh_tanh_gpu_noisy',
                # 'experiment_name'       : 'AE1002_Scale_Warp_Blocks_3Layers_finetune_2049_120_tanh_tanh_gpu_clean',

                'experiment_name'       : 'AE1213_Scale_Laura_Warp_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_maskout',

                'description'           : '',
                'save_outputs'          : True,
                'save_learning_rule'    : True,
                'save_model'            : True,
                'save_epoch_error'      : True,
                'save_to_database_name' : 'Laura12.db'
                }), # end log


        'learning_rule' : DD({
                'max_col_norm'          : 1,
                'L1_lambda'             : None,
                'L2_lambda'             : None,
                'cost'                  : 'mse',
                'stopping_criteria'     : DD({
                                            'max_epoch'         : 100,
                                            'epoch_look_back'   : 5,
                                            'cost'              : 'mse',
                                            'percent_decrease'  : 0.05
                                            }) # end stopping_criteria
                }), # end learning_rule


        'learning_method' : DD({
                'type'                  : 'SGD',
                # 'type'                  : 'AdaGrad',
                # 'type'                  : 'AdaDelta',

                # for SGD and AdaGrad
                'learning_rate'         : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5),
                # 'learning_rate'         : 0.001,
                'momentum'              : (1e-2, 1e-1, 0.5, 0.9),
                # 'momentum'              : 0.1,
                # 'momentum'              : 0.5,

                # for AdaDelta
                'rho'                   : 0.95,
                'eps'                   : 1e-6,
                }), # end learning_method

        #===========================[ Dataset ]===========================#
        'dataset' : DD({

                # 'type'                  : 'Laura_Blocks',
                'type'                  : 'Laura_Warp_Blocks',

                'feature_size'          : 2049,
                'train_valid_test_ratio': [8, 1, 1],

                'dataset_noise'         : DD({
                                            'type'              : None
                                            # 'type'              : 'BlackOut',
                                            # 'type'              : 'MaskOut',
                                            # 'type'              : 'Gaussian',
                                            }),

                'preprocessor'          : DD({
                                            # 'type' : None,
                                            'type' : 'Scale',
                                            # 'type' : 'GCN',
                                            # 'type' : 'LogGCN',
                                            # 'type' : 'Standardize',

                                            # for Scale
                                            'global_max' : 89,
                                            'global_min' : -23,
                                            'buffer'     : 0.9,
                                            'scale_range': [-1, 1],
                                            }),

                'batch_size'            : (50, 100, 150, 200),
                # 'batch_size'            : 50,
                'num_batches'           : None,
                'iter_class'            : 'SequentialSubsetIterator',
                'rng'                   : None
                }), # end dataset

        # #============================[ Layers ]===========================#

        'hidden1' : DD({
                'name'                  : 'hidden1',
                # 'model'                 : 'AE0911_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140912_2337_04263067',
                # 'model'                 : 'AE0916_Warp_Blocks_2049_500_tanh_tanh_gpu_dropout_20140916_1705_29139505',

                # 'model'                 :'AE0912_Blocks_2049_500_tanh_tanh_gpu_clean_20140914_1242_27372903',
                # 'model'                 : 'AE0915_Blocks_2049_500_tanh_tanh_gpu_Dropout_20140915_1900_37160748',

                # 'model'                 : 'AE1002_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_dropout_20141001_0321_33382955',
                # 'model'                 : 'AE0930_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140930_1345_29800576',

                # 'model'                 : 'AE1110_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_clean_continue_20141110_1235_21624029',
                # 'model'                 : 'AE1110_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_batchout_continue_20141111_0957_22484008',
                # 'model'                 : 'AE1121_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_gaussian_continue_20141126_1543_50554671',
                # 'model'                 : 'AE1122_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141128_1421_47179280',
                'model'                 : 'AE1210_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141210_1728_15311837',
                'dropout_below'         : None,
                # 'dropout_below'         : (0.1, 0.2, 0.3, 0.4, 0.5),
                # 'dropout_below'         : 0.1,

                }), # end hidden_layer

        'hidden2' : DD({
                'name'                  : 'hidden2',
                # 'model'                 : 'AE0914_Warp_Blocks_500_180_tanh_tanh_gpu_clean_20140915_0400_30113212',
                # 'model'                 : 'AE0918_Warp_Blocks_500_180_tanh_tanh_gpu_dropout_20140918_1125_23612485',

                # 'model'                 : 'AE0916_Blocks_500_180_tanh_tanh_gpu_clean_20140916_2255_06553688',
                # 'model'                 : 'AE0918_Blocks_500_180_tanh_tanh_gpu_dropout_20140918_0920_42738052',

                # 'model'                 : 'AE1001_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_dropout_20141001_2158_16765065',
                # 'model'                 : 'AE1001_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_clean_20141002_0348_53679208',

                # 'model'                 : 'AE1110_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_clean_20141111_2157_47387660',
                # 'model'                 : 'AE1111_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_batchout_continue_20141112_0844_45882544',
                # 'model'                 : 'AE1127_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_gaussian_20141127_1313_31905279',
                # 'model'                 : 'AE1201_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_clean_20141202_2352_57643114',
                'model'                 : 'AE1210_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_maskout_20141212_2056_15976132',

                'dropout_below'         : None,

                }), # end hidden_layer

        'hidden3' : DD({
                'name'                  : 'hidden3',
                # 'model'                 : 'AE0915_Warp_Blocks_180_120_tanh_gpu_dropout_clean_20140916_1028_26875210',
                # 'model'                 : 'AE0918_Warp_Blocks_180_120_tanh_tanh_gpu_dropout_20140919_1649_54631649',

                # 'model'                 : 'AE0914_Blocks_180_120_tanh_tanh_gpu_clean_20140918_0119_40376829',
                # 'model'                 : 'AE0919_Blocks_180_120_tanh_tanh_gpu_dropout_20140919_1345_22865393',

                # 'model'                 : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_dropout_20141002_1711_48207269',
                # 'model'                 : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_dropout_20141002_1457_08966968',
                # 'model'                 : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_clean_20141002_1713_16791523',

                # 'model'                 : 'AE1120_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_clean_20141122_0044_09351031',
                # 'model'                 : 'AE1121_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_batchout_20141122_0348_49379314',
                # 'model'                 : 'AE1127_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_gaussian_20141201_0345_39835964',
                # 'model'                 : 'AE1201_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_clean_20141204_0137_07827194',
                'model'                 : 'AE1210_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_maskout_20141213_1608_33432934',

                'dropout_below'         : None,

                }), # end hidden_layer


        }), # end autoencoder

    #####################[Two_Layers_No_Transpose]######################
    ####################################################################

    'Laura_Two_Layers_No_Transpose' : DD({

        'model' : DD({
                'rand_seed'             : 4520
                }), # end mlp

        'log' : DD({
                'experiment_name'       : 'AE1107_No_Transpose_Scale_Warp_Blocks_2049_500_gpu_adagrad_dropout',
                'description'           : '',
                'save_outputs'          : True,
                'save_learning_rule'    : True,
                'save_model'            : True,
                'save_epoch_error'      : True,
                'save_to_database_name' : 'Laura5.db'
                }), # end log


        'learning_rule' : DD({
                'max_col_norm'          : 1,
                'L1_lambda'             : None,
                'L2_lambda'             : None,
                'cost'                  : 'mse',
                'stopping_criteria'     : DD({
                                            'max_epoch'         : 100,
                                            'epoch_look_back'   : 5,
                                            'cost'              : 'mse',
                                            'percent_decrease'  : 0.05
                                            }) # end stopping_criteria
                }), # end learning_rule


        'learning_method' : DD({
                # 'type'                  : 'SGD',
                'type'                  : 'AdaGrad',
                # 'type'                  : 'AdaDelta',

                # for SGD and AdaGrad
                'learning_rate'         : 0.9,
                'momentum'              : 0.01,

                # for AdaDelta
                'rho'                   : 0.95,
                'eps'                   : 1e-6,
                }), # end learning_method

        #===========================[ Dataset ]===========================#
        'dataset' : DD({
                # 'type'                  : 'Laura_Warp_Blocks_180',
                # 'type'                  : 'Laura_Cut_Warp_Blocks_300',
                # 'type'                  : 'Laura_Blocks_500',
                # 'type'                  : 'Laura_Blocks',
                'type'                  : 'Laura_Warp_Blocks',
                'feature_size'          : 2049,
                'train_valid_test_ratio': [8, 1, 1],

                'dataset_noise'         : DD({
                                            # 'type'              : 'BlackOut',
                                            # 'type'              : 'MaskOut',
                                            # 'type'              : 'Gaussian',
                                            'type'              : None
                                            }),

                'preprocessor'          : DD({
                                            # 'type' : None,
                                            'type' : 'Scale',
                                            # 'type' : 'GCN',
                                            # 'type' : 'LogGCN',
                                            # 'type' : 'Standardize',

                                            # for Scale
                                            'global_max' : 89,
                                            'global_min' : -23,
                                            'buffer'     : 0.5,
                                            'scale_range': [-1, 1],
                                            }),

                'batch_size'            : (50, 100, 150, 200),
                'num_batches'           : None,
                'iter_class'            : 'SequentialSubsetIterator',
                'rng'                   : None
                }), # end dataset

        # #============================[ Layers ]===========================#
        'num_layers' : 1,

        'hidden1' : DD({
                'name'                  : 'hidden1',
                'type'                  : 'Tanh',
                'dim'                   : 500,
                'dropout_below'         : 0.5,
                'layer_noise'           : None,
                # 'layer_noise'           : 'BlackOut',
                # 'layer_noise'           : 'Gaussian',
                # 'layer_noise'           : 'MaskOut',
                # 'layer_noise'           : 'BatchOut',
                }), # end hidden_layer


        'h1_mirror' : DD({
                'name'                  : 'h1_mirror',
                'type'                  : 'Tanh',
                # 'dim'                   : 2049, # dim = input.dim
                'dropout_below'         : 0.5,
                'layer_noise'           : None,
                # 'layer_noise'           : 'BlackOut',
                # 'layer_noise'           : 'Gaussian',
                # 'layer_noise'           : 'MaskOut',
                # 'layer_noise'           : 'BatchOut',
                }) # end output_layer


        }), # end autoencoder

    }) # end model_config
Example #34
0
config = DD({
    'module_name':
    'Two_Layers_No_Transpose',
    'model':
    DD({'rand_seed': 4520}),  # end mlp
    'log':
    DD({
        'experiment_name':
        'AE1107_No_Transpose_Scale_Warp_Blocks_2049_500_gpu_adagrad_dropout',
        'description': '',
        'save_outputs': True,
        'save_learning_rule': True,
        'save_model': True,
        'save_epoch_error': True,
        'save_to_database_name': 'Laura5.db'
    }),  # end log
    'learning_rule':
    DD({
        'max_col_norm':
        1,
        'L1_lambda':
        None,
        'L2_lambda':
        None,
        'cost':
        'mse',
        'stopping_criteria':
        DD({
            'max_epoch': 100,
            'epoch_look_back': 5,
            'cost': 'mse',
            'percent_decrease': 0.05
        })  # end stopping_criteria
    }),  # end learning_rule
    'learning_method':
    DD({
        # 'type'                  : 'SGD',
        'type': 'AdaGrad',
        # 'type'                  : 'AdaDelta',

        # for SGD and AdaGrad
        'learning_rate': 0.9,
        'momentum': 0.01,

        # for AdaDelta
        'rho': 0.95,
        'eps': 1e-6,
    }),  # end learning_method

    #===========================[ Dataset ]===========================#
    'dataset':
    DD({
        # 'type'                  : 'Laura_Warp_Blocks_180',
        # 'type'                  : 'Laura_Cut_Warp_Blocks_300',
        # 'type'                  : 'Laura_Blocks_500',
        # 'type'                  : 'Laura_Blocks',
        'type':
        'Laura_Warp_Blocks',
        'feature_size':
        2049,
        'train_valid_test_ratio': [8, 1, 1],
        'dataset_noise':
        DD({
            # 'type'              : 'BlackOut',
            # 'type'              : 'MaskOut',
            # 'type'              : 'Gaussian',
            'type': None
        }),
        'preprocessor':
        DD({
            # 'type' : None,
            'type': 'Scale',
            # 'type' : 'GCN',
            # 'type' : 'LogGCN',
            # 'type' : 'Standardize',

            # for Scale
            'global_max': 89,
            'global_min': -23,
            'buffer': 0.5,
            'scale_range': [-1, 1],
        }),
        'batch_size': (50, 100, 150, 200),
        'num_batches':
        None,
        'iter_class':
        'SequentialSubsetIterator',
        'rng':
        None
    }),  # end dataset

    # #============================[ Layers ]===========================#
    'num_layers':
    1,
    'hidden1':
    DD({
        'name': 'hidden1',
        'type': 'Tanh',
        'dim': 500,
        'dropout_below': 0.5,
        'layer_noise': None,
        # 'layer_noise'           : 'BlackOut',
        # 'layer_noise'           : 'Gaussian',
        # 'layer_noise'           : 'MaskOut',
        # 'layer_noise'           : 'BatchOut',
    }),  # end hidden_layer
    'h1_mirror':
    DD({
        'name': 'h1_mirror',
        'type': 'Tanh',
        # 'dim'                   : 2049, # dim = input.dim
        'dropout_below': 0.5,
        'layer_noise': None,
        # 'layer_noise'           : 'BlackOut',
        # 'layer_noise'           : 'Gaussian',
        # 'layer_noise'           : 'MaskOut',
        # 'layer_noise'           : 'BatchOut',
    })  # end output_layer
}),  # end autoencoder
Example #35
0
config = DD({
    'module_name':
    'Laura_No_Transpose',
    'model':
    DD({
        # 'rand_seed'             : 4520,
        'rand_seed': None,
        # 'rand_seed'             : 2137
    }),  # end mlp
    'log':
    DD({
        # 'experiment_name'       : 'testing_blackout',
        # 'experiment_name'       : 'AE0910_Warp_Blocks_2049_500_tanh_gpu_blockout_more_no_filter_latest',
        # 'experiment_name'       : 'AE0829_Warp_Standardize_GCN_Blocks_2049_500_tanh_gpu',
        # 'experiment_name'       : 'AE0912_Blocks_2049_500_tanh_gpu_clean',
        # 'experiment_name'       : 'AE0829_Standardize_GCN_Blocks_2049_500_tanh_gpu',
        # 'experiment_name'       : 'AE0901_Warp_Blocks_500_180_tanh_gpu',

        # 'experiment_name'       : 'AE1016_Warp_Blocks_180_120_tanh_tanh_gpu_dropout', #helios
        # 'experiment_name'       : 'AE1018_Warp_Blocks_2049_500_tanh_tanh_gpu_blackout', #helios

        # 'experiment_name'       : 'AE0919_Blocks_180_120_tanh_tanh_gpu_dropout', #helios
        # 'experiment_name'       : 'AE0918_Blocks_180_120_tanh_tanh_gpu_clean', #helios

        # 'experiment_name'       : 'AE0916_Blocks_180_120_tanh_tanh_gpu_output_sig_dropout',
        # 'experiment_name'       : 'AE0916_Blocks_180_120_tanh_tanh_gpu_output_sig_clean',

        # 'experiment_name'       : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_dropout', #helios
        # 'experiment_name'       : 'AE1210_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_maskout', #helios

        # 'experiment_name'       : 'AE1216_Transfactor_blocks_150_50small',
        'experiment_name':
        'AE0306_Warp_Blocks_2049_120_Scale_No_Transpose_Clean',
        'description': 'scale_buffer=0.5',
        'save_outputs': True,
        'save_learning_rule': True,
        'save_model': True,
        'save_epoch_error': True,
        'save_to_database_name': 'Laura13.db'
        # 'save_to_database_name' : 'transfactor.db',
    }),  # end log
    'learning_rule':
    DD({
        'max_col_norm':
        1,
        'L1_lambda':
        None,
        'L2_lambda':
        None,
        'cost':
        'mse',
        'stopping_criteria':
        DD({
            'max_epoch': 100,
            'epoch_look_back': 5,
            'cost': 'mse',
            'percent_decrease': 0.05
        })  # end stopping_criteria
    }),  # end learning_rule
    'learning_method':
    DD({
        'type': 'SGD',
        # 'type'                  : 'AdaGrad',
        # 'type'                  : 'AdaDelta',

        ###[ For SGD and AdaGrad ]###
        # 'learning_rate'         : 0.001,
        'learning_rate': (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5),

        # 'momentum'              : 0.5,
        # 'momentum'              : 0.,
        'momentum': (1e-2, 1e-1, 0.5, 0.9),

        ###[ For AdaDelta ]###
        'rho': 0.95,
        'eps': 1e-6,
    }),  # end learning_method

    #===========================[ Dataset ]===========================#
    'dataset':
    DD({
        # 'type'                  : 'Laura_Blocks',
        'type':
        'Laura_Warp_Blocks',

        # 'type'                  : 'Laura_Warp_Blocks_500_Tanh',
        # 'type'                 : 'Laura_Warp_Blocks_180_Tanh_Dropout',
        # 'type'                  : 'Laura_Cut_Warp_Blocks_300',
        # 'type'                  : 'Laura_Blocks_180_Tanh_Tanh',
        # 'type'                  : 'Laura_Blocks_180_Tanh_Tanh_Dropout',
        # 'type'                  : 'Laura_Blocks_500_Tanh_Sigmoid',
        # 'type'                  : 'Laura_Blocks_500',

        # 'type'                  : 'Laura_Warp_Standardize_Blocks',
        # 'type'                  : 'Laura_Standardize_Blocks',

        # 'type'                  : 'Laura_Scale_Warp_Blocks_500_Tanh',
        # 'type'                  : 'Laura_Scale_Warp_Blocks_180_Tanh_Dropout',

        # 'type'                  : 'Laura_Warp_Blocks_180_Tanh_Blackout',

        # 'type'                  : 'Mnist',

        # 'type'                  : 'Laura_Warp_Blocks_180_Tanh_Noisy_MaskOut',
        'num_blocks':
        20,
        'feature_size':
        2049,
        'train_valid_test_ratio': [8, 1, 1],
        'dataset_noise':
        DD({
            'type': None
            # 'type'              : 'BlackOut',
            # 'type'              : 'MaskOut',
            # 'type'              : 'Gaussian',
        }),
        'preprocessor':
        DD({
            # 'type' : None,
            'type': 'Scale',
            # 'type' : 'GCN',
            # 'type' : 'LogGCN',
            # 'type' : 'Standardize',

            # for Scale
            'global_max': 89,
            'global_min': -23,
            'buffer': 0.5,
            'scale_range': [-1, 1],
        }),
        # 'batch_size'            : 50,
        'batch_size': (50, 100, 150, 200),
        'num_batches':
        None,
        'iter_class':
        'SequentialSubsetIterator',
        'rng':
        None
    }),  # end dataset

    #============================[ Layers ]===========================#
    'num_layers':
    3,
    'hidden1':
    DD({
        'name':
        'hidden1',
        'type':
        'Tanh',
        # 'type'                  : 'SoftRELU',
        'dim':
        500,
        'dropout_below':
        None,
        # 'dropout_below'         : (0.3, 0.4, 0.5),
        # 'dropout_below'         : 0.5,
        'layer_noise':
        DD({
            'type': None,
            # 'type'      : 'BlackOut',
            # 'type'      : 'Gaussian',
            # 'type'      : 'MaskOut',
            # 'type'      : 'BatchOut',

            # for BlackOut, MaskOut and BatchOut
            # 'ratio'     : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5),
            'ratio': 0.5,

            # for Gaussian
            # 'std'       : (0.001, 0.005, 0.01, 0.015, 0.02),
            'std': (0.005, 0.01, 0.02, 0.03, 0.04),
            # 'std'       : 0.001,
            'mean': 0,
        })
    }),  # end hidden_layer
    'hidden2':
    DD({
        'name':
        'hidden2',
        'type':
        'Tanh',
        'dim':
        180,
        'dropout_below':
        None,
        'layer_noise':
        DD({
            'type': None,
            # 'type'      : 'BlackOut',
            # 'type'      : 'Gaussian',
            # 'type'      : 'MaskOut',
            # 'type'      : 'BatchOut',

            # for BlackOut, MaskOut and BatchOut
            # 'ratio'     : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5),
            'ratio': 0.5,

            # for Gaussian
            # 'std'       : (0.001, 0.005, 0.01, 0.015, 0.02),
            'std': (0.005, 0.01, 0.02, 0.03, 0.04),
            # 'std'       : 0.001,
            'mean': 0,
        })
    }),  # end hidden_layer
    'hidden3':
    DD({
        'name':
        'hidden3',
        'type':
        'Tanh',
        'dim':
        120,
        'dropout_below':
        None,
        'layer_noise':
        DD({
            'type': None,
            # 'type'      : 'BlackOut',
            # 'type'      : 'Gaussian',
            # 'type'      : 'MaskOut',
            # 'type'      : 'BatchOut',

            # for BlackOut, MaskOut and BatchOut
            # 'ratio'     : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5),
            'ratio': 0.5,

            # for Gaussian
            # 'std'       : (0.001, 0.005, 0.01, 0.015, 0.02),
            'std': (0.005, 0.01, 0.02, 0.03, 0.04),
            # 'std'       : 0.001,
            'mean': 0,
        })
    }),  # end hidden_layer
    'h3_mirror':
    DD({
        'name':
        'h3_mirror',
        'type':
        'Tanh',
        'dim':
        180,
        'dropout_below':
        None,
        'layer_noise':
        DD({
            'type': None,
            # 'type'      : 'BlackOut',
            # 'type'      : 'Gaussian',
            # 'type'      : 'MaskOut',
            # 'type'      : 'BatchOut',

            # for BlackOut, MaskOut and BatchOut
            # 'ratio'     : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5),
            'ratio': 0.5,

            # for Gaussian
            # 'std'       : (0.001, 0.005, 0.01, 0.015, 0.02),
            'std': (0.005, 0.01, 0.02, 0.03, 0.04),
            # 'std'       : 0.001,
            'mean': 0,
        })
    }),  # end output_layer
    'h2_mirror':
    DD({
        'name':
        'h2_mirror',
        'type':
        'Tanh',
        'dim':
        500,
        'dropout_below':
        None,
        'layer_noise':
        DD({
            'type': None,
            # 'type'      : 'BlackOut',
            # 'type'      : 'Gaussian',
            # 'type'      : 'MaskOut',
            # 'type'      : 'BatchOut',

            # for BlackOut, MaskOut and BatchOut
            # 'ratio'     : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5),
            'ratio': 0.5,

            # for Gaussian
            # 'std'       : (0.001, 0.005, 0.01, 0.015, 0.02),
            'std': (0.005, 0.01, 0.02, 0.03, 0.04),
            # 'std'       : 0.001,
            'mean': 0,
        })
    }),  # end output_layer
    'h1_mirror':
    DD({
        'name':
        'h1_mirror',
        'type':
        'Tanh',
        'dim':
        2049,
        'dropout_below':
        None,
        # 'dropout_below'         : 0.5,
        'layer_noise':
        DD({
            'type': None,
            # 'type'      : 'BlackOut',
            # 'type'      : 'Gaussian',
            # 'type'      : 'MaskOut',
            # 'type'      : 'BatchOut',

            # for BlackOut, MaskOut and BatchOut
            # 'ratio'     : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5),
            'ratio': 0.5,

            # for Gaussian
            # 'std'       : (0.001, 0.005, 0.01, 0.015, 0.02),
            'std': (0.005, 0.01, 0.02, 0.03, 0.04),
            # 'std'       : 0.001,
            'mean': 0,
        })
    })  # end output_layer
})  # end Laura
Example #36
0
def jobman_insert_random(n_jobs):
    JOBDB = 'postgres://[email protected]/dauphiya_db/emotiw_mlp_audio'
    EXPERIMENT_PATH = "experiment.jobman_entrypoint"

    jobs = []
    for _ in range(n_jobs):
        job = DD()

        job.n_hiddens = numpy.random.randint(8, 512)
        job.n_layers = numpy.random.randint(1, 4)
        job.learning_rate = 10.**numpy.random.uniform(-3, -0)
        job.momentum = 10.**numpy.random.uniform(-1, -0)
        job.features = ["minimal.pca",
                        "full.pca"][numpy.random.binomial(1, 0.5)]
        job.example_dropout = numpy.random.randint(16, 200)
        job.rbm_learning_rate = 10.**numpy.random.uniform(-3, -0)
        job.rbm_epochs = numpy.random.randint(8, 100)
        job.tag = "pretrain"

        jobs.append(job)
        print job

    answer = raw_input("Submit %d jobs?[y/N] " % len(jobs))
    if answer == "y":
        numpy.random.shuffle(jobs)

        db = jobman.sql.db(JOBDB)
        for job in jobs:
            job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
            jobman.sql.insert_dict(job, db)

        print "inserted %d jobs" % len(jobs)
        print "To run: jobdispatch --condor --repeat_jobs=%d jobman sql -n 1 'postgres://[email protected]/dauphiya_db/emotiw_mlp_audio' ." % len(
            jobs)