def get_gendec_parameters(params, opt): params.net.gendec = DD() # Type of model to use (e.g., ed = simple decoder) params.net.gendec.model = opt.generation_decoder.model # Recurrent unit to use: {lstm} params.net.gendec.unit = opt.generation_decoder.unit # NUmber of layers in decoder params.net.gendec.nL = opt.generation_decoder.num_layers # Dropout params.net.gendec.dpt = opt.generation_decoder.dropout # Coefficient by which to multiply output activations before softmax # Higher makes distribution over tokens more peaky # Lower flattens the distribution params.net.gendec.dt = opt.generation_decoder.output_temperature # Size of hidden state of recurrent unit in decoder params.net.gendec.hSize = opt.hidden_size # Size of embeddings in the decoder params.net.gendec.iSize = opt.embed_size # Where to attach context vector # (out = concatenate with recurrent unit output) # (inp = concatenate with word embedding) params.net.gendec.ctx = opt.generation_decoder.context
def get_class_parameters(params, opt): params.net.classdec = DD() # number of input features to the classifier # should be the same as this hidden size of encoder params.net.classdec.hSize = opt.hidden_size # Dropout in the classifier params.net.classdec.dpt = opt.classification_decoder.dropout
def read_config(file_): config = DD() print file_ for k, v in file_.iteritems(): if v == "True" or v == "T" or v == "true": config[k] = True elif v == "False" or v == "F" or v == "false": config[k] = False elif type(v) == dict: config[k] = read_config(v) else: config[k] = v return config
def produit_cartesien_jobs(val_dict): job_list = [DD()] all_keys = val_dict.keys() for key in all_keys: possible_values = val_dict[key] new_job_list = [] for val in possible_values: for job in job_list: to_insert = job.copy() to_insert.update({key: val}) new_job_list.append(to_insert) job_list = new_job_list return job_list
config = DD({ 'module_name': 'Laura_Two_Layers', 'model': DD({'rand_seed': None}), # end mlp 'log': DD({ # 'experiment_name' : 'AE1214_Scale_Warp_Blocks_2Layers_finetune_2049_180_tanh_tanh_gpu_maskout', 'experiment_name': 'AE0306_Scale_Warp_Blocks_2Layers_finetune_2049_1000_180', 'description': '', 'save_outputs': True, 'save_learning_rule': True, 'save_model': True, 'save_epoch_error': True, 'save_to_database_name': 'Laura13.db' }), # end log 'learning_rule': DD({ 'max_col_norm': 1, 'L1_lambda': None, 'L2_lambda': None, 'cost': 'mse', 'stopping_criteria': DD({ 'max_epoch': 100, 'epoch_look_back': 5, 'cost': 'mse', 'percent_decrease': 0.05 }) # end stopping_criteria }), # end learning_rule 'learning_method': DD({ 'type': 'SGD', # 'type' : 'AdaGrad', # 'type' : 'AdaDelta', # for SGD and AdaGrad 'learning_rate': (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5), 'momentum': (1e-2, 1e-1, 0.5, 0.9), # for AdaDelta 'rho': 0.95, 'eps': 1e-6, }), # end learning_method #===========================[ Dataset ]===========================# 'dataset': DD({ # 'type' : 'Laura_Blocks', 'type': 'Laura_Warp_Blocks', # 'type' : 'TransFactor_Blocks', 'num_blocks': 20, 'feature_size': 2049, 'train_valid_test_ratio': [8, 1, 1], 'dataset_noise': DD({ 'type': None # 'type' : 'BlackOut', # 'type' : 'MaskOut', # 'type' : 'Gaussian', }), 'preprocessor': DD({ # 'type' : None, 'type': 'Scale', # 'type' : 'GCN', # 'type' : 'LogGCN', # 'type' : 'Standardize', # for Scale 'global_max': 89, 'global_min': -23, 'buffer': 0.5, 'scale_range': [-1, 1], }), 'batch_size': (50, 100, 150, 200), 'num_batches': None, 'iter_class': 'SequentialSubsetIterator', 'rng': None }), # end dataset # #============================[ Layers ]===========================# 'hidden1': DD({ 'name': 'hidden1', # 'model' : 'AE0911_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140912_2337_04263067', # 'model' : 'AE1112_Scale_Warp_Blocks_2Layers_finetune_2049_180_tanh_tanh_gpu_clean_20141112_2145_06823495', # 'model' : 'AE1121_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_gaussian_continue_20141126_1543_50554671', # 'model' : 'AE1122_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141128_1421_47179280', # 'model' : 'AE1210_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141210_1728_15311837', # 'model' : 'AE0302_Scale_Warp_Blocks_2049_300_Clean_No_Pretrain_20150302_2336_46071497', 'model': 'AE0302_Scale_Warp_Blocks_2049_1000_Clean_No_Pretrain_20150302_1234_10065582', 'dropout_below': None, # 'dropout_below' : (0.1, 0.2, 0.3, 0.4, 0.5), # 'dropout_below' : 0.1, }), # end hidden_layer 'hidden2': DD({ 'name': 'hidden2', # 'model' : 'AE1001_Warp_Blocks_500_120_tanh_tanh_gpu_clean_20141003_0113_02206401', # 'model' : 'AE1115_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_clean_20141119_1327_11490503', # 'model' : 'AE1127_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_gaussian_20141127_1313_31905279', # 'model' : 'AE1201_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_clean_20141202_2352_57643114', # 'model' : 'AE1210_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_maskout_20141212_2056_15976132', # 'model' : 'AE0302_Scale_Warp_Blocks_300_180_Clean_No_Pretrain_20150304_0436_47181007', 'model': 'AE0302_Scale_Warp_Blocks_1000_180_Clean_20150304_0511_52424408', 'dropout_below': None, }) }) # end autoencoder
config = DD({ 'module_name' : 'Laura_Three_Layers', 'fine_tuning_only' : False, 'model' : DD({ 'rand_seed' : None }), # end mlp 'log' : DD({ # 'experiment_name' : 'AE0917_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_clean', # 'experiment_name' : 'AE0919_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_noisy', # 'experiment_name' : 'AE0917_Blocks_3layers_finetune_2049_120_tanh_sigmoid_gpu_clean', # 'experiment_name' : 'AE0917_Blocks_3layers_finetune_2049_120_tanh_sigmoid_gpu_noisy', # 'experiment_name' : 'AE0917_Warp_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_clean', # 'experiment_name' : 'AE0919_Warp_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_noisy', # 'experiment_name' : 'AE1002_Scale_Warp_Blocks_3Layers_finetune_2049_120_tanh_tanh_gpu_noisy', # 'experiment_name' : 'AE1002_Scale_Warp_Blocks_3Layers_finetune_2049_120_tanh_tanh_gpu_clean', 'experiment_name' : 'AE1213_Scale_Laura_Warp_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_maskout', 'description' : '', 'save_outputs' : True, 'save_learning_rule' : True, 'save_model' : True, 'save_epoch_error' : True, 'save_to_database_name' : 'Laura12.db' }), # end log 'learning_rule' : DD({ 'max_col_norm' : 1, 'L1_lambda' : None, 'L2_lambda' : None, 'cost' : 'mse', 'stopping_criteria' : DD({ 'max_epoch' : 100, 'epoch_look_back' : 5, 'cost' : 'mse', 'percent_decrease' : 0.05 }) # end stopping_criteria }), # end learning_rule 'learning_method' : DD({ 'type' : 'SGD', # 'type' : 'AdaGrad', # 'type' : 'AdaDelta', # for SGD and AdaGrad 'learning_rate' : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5), # 'learning_rate' : 0.001, 'momentum' : (1e-2, 1e-1, 0.5, 0.9), # 'momentum' : 0.1, # 'momentum' : 0.5, # for AdaDelta 'rho' : 0.95, 'eps' : 1e-6, }), # end learning_method #===========================[ Dataset ]===========================# 'dataset' : DD({ # 'type' : 'Laura_Blocks', 'type' : 'Laura_Warp_Blocks', 'num_blocks' : 20, 'feature_size' : 2049, 'train_valid_test_ratio': [8, 1, 1], 'dataset_noise' : DD({ 'type' : None # 'type' : 'BlackOut', # 'type' : 'MaskOut', # 'type' : 'Gaussian', }), 'preprocessor' : DD({ # 'type' : None, 'type' : 'Scale', # 'type' : 'GCN', # 'type' : 'LogGCN', # 'type' : 'Standardize', # for Scale 'global_max' : 89, 'global_min' : -23, 'buffer' : 0.9, 'scale_range': [-1, 1], }), 'batch_size' : (50, 100, 150, 200), # 'batch_size' : 50, 'num_batches' : None, 'iter_class' : 'SequentialSubsetIterator', 'rng' : None }), # end dataset # #============================[ Layers ]===========================# 'hidden1' : DD({ 'name' : 'hidden1', # 'model' : 'AE0911_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140912_2337_04263067', # 'model' : 'AE0916_Warp_Blocks_2049_500_tanh_tanh_gpu_dropout_20140916_1705_29139505', # 'model' :'AE0912_Blocks_2049_500_tanh_tanh_gpu_clean_20140914_1242_27372903', # 'model' : 'AE0915_Blocks_2049_500_tanh_tanh_gpu_Dropout_20140915_1900_37160748', # 'model' : 'AE1002_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_dropout_20141001_0321_33382955', # 'model' : 'AE0930_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140930_1345_29800576', # 'model' : 'AE1110_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_clean_continue_20141110_1235_21624029', # 'model' : 'AE1110_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_batchout_continue_20141111_0957_22484008', # 'model' : 'AE1121_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_gaussian_continue_20141126_1543_50554671', # 'model' : 'AE1122_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141128_1421_47179280', 'model' : 'AE1210_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141210_1728_15311837', 'dropout_below' : None, # 'dropout_below' : (0.1, 0.2, 0.3, 0.4, 0.5), # 'dropout_below' : 0.1, }), # end hidden_layer 'hidden2' : DD({ 'name' : 'hidden2', # 'model' : 'AE0914_Warp_Blocks_500_180_tanh_tanh_gpu_clean_20140915_0400_30113212', # 'model' : 'AE0918_Warp_Blocks_500_180_tanh_tanh_gpu_dropout_20140918_1125_23612485', # 'model' : 'AE0916_Blocks_500_180_tanh_tanh_gpu_clean_20140916_2255_06553688', # 'model' : 'AE0918_Blocks_500_180_tanh_tanh_gpu_dropout_20140918_0920_42738052', # 'model' : 'AE1001_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_dropout_20141001_2158_16765065', # 'model' : 'AE1001_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_clean_20141002_0348_53679208', # 'model' : 'AE1110_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_clean_20141111_2157_47387660', # 'model' : 'AE1111_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_batchout_continue_20141112_0844_45882544', # 'model' : 'AE1127_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_gaussian_20141127_1313_31905279', # 'model' : 'AE1201_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_clean_20141202_2352_57643114', 'model' : 'AE1210_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_maskout_20141212_2056_15976132', 'dropout_below' : None, }), # end hidden_layer 'hidden3' : DD({ 'name' : 'hidden3', # 'model' : 'AE0915_Warp_Blocks_180_120_tanh_gpu_dropout_clean_20140916_1028_26875210', # 'model' : 'AE0918_Warp_Blocks_180_120_tanh_tanh_gpu_dropout_20140919_1649_54631649', # 'model' : 'AE0914_Blocks_180_120_tanh_tanh_gpu_clean_20140918_0119_40376829', # 'model' : 'AE0919_Blocks_180_120_tanh_tanh_gpu_dropout_20140919_1345_22865393', # 'model' : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_dropout_20141002_1711_48207269', # 'model' : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_dropout_20141002_1457_08966968', # 'model' : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_clean_20141002_1713_16791523', # 'model' : 'AE1120_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_clean_20141122_0044_09351031', # 'model' : 'AE1121_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_batchout_20141122_0348_49379314', # 'model' : 'AE1127_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_gaussian_20141201_0345_39835964', # 'model' : 'AE1201_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_clean_20141204_0137_07827194', 'model' : 'AE1210_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_maskout_20141213_1608_33432934', 'dropout_below' : None, }), # end hidden_layer }) # end autoencoder
config = DD({ 'module_name' : 'Laura_Continue', 'model' : DD({ 'rand_seed' : None }), # end mlp 'log' : DD({ 'experiment_name' : 'AE0302_Scale_Warp_Blocks_2049_500_Clean_AdaGrad_20150304_0512_00344145_continue', 'description' : '', 'save_outputs' : True, 'save_learning_rule' : True, 'save_model' : True, 'save_epoch_error' : True, 'save_to_database_name' : 'Laura13.db' }), # end log 'learning_rule' : DD({ 'max_col_norm' : 1, 'L1_lambda' : None, 'L2_lambda' : None, 'cost' : 'mse', 'stopping_criteria' : DD({ 'max_epoch' : 100, 'epoch_look_back' : 5, 'cost' : 'mse', 'percent_decrease' : 0.05 }) # end stopping_criteria }), # end learning_rule 'learning_method' : DD({ # 'type' : 'SGD', 'type' : 'AdaGrad', # 'type' : 'AdaDelta', # for SGD and AdaGrad 'learning_rate' : 0.01, 'momentum' : 0.01, # for AdaDelta 'rho' : 0.95, 'eps' : 1e-6, }), # end learning_method #===========================[ Dataset ]===========================# 'dataset' : DD({ # 'type' : 'Laura_Warp_Blocks_500', # 'type' : 'Laura_Blocks_500', # 'type' : 'Laura_Blocks', 'type' : 'Laura_Warp_Blocks', # 'type' : 'Mnist_Blocks', # 'type' : 'Laura_Scale_Warp_Blocks_500_Tanh', # 'type' : 'Laura_Warp_Blocks_500_Tanh_Noisy_MaskOut', # 'type' : 'Laura_Warp_Blocks_500_Tanh_Noisy_Gaussian', 'num_blocks' : 20, 'feature_size' : 2049, 'train_valid_test_ratio': [8, 1, 1], 'dataset_noise' : DD({ # 'type' : 'BlackOut', # 'type' : 'MaskOut', # 'type' : 'Gaussian', 'type' : None }), 'preprocessor' : DD({ # 'type' : None, 'type' : 'Scale', # 'type' : 'GCN', # 'type' : 'LogGCN', # 'type' : 'Standardize', # for Scale 'global_max' : 89, 'global_min' : -23, 'buffer' : 0.5, 'scale_range': [-1, 1], }), 'batch_size' : 100, 'num_batches' : None, 'iter_class' : 'SequentialSubsetIterator', 'rng' : None }), # end dataset # #============================[ Layers ]===========================# 'fine_tuning_only' : True, 'hidden1' : DD({ 'name' : 'hidden1', 'model' : 'AE0302_Scale_Warp_Blocks_2049_500_Clean_AdaGrad_20150304_0512_00344145', }), # end hidden_layer }) # end autoencoder
config = DD({ 'model': 'attention', 'random_seed': 1234, # ERASE everything under save_model_path 'erase_history': True, 'attention': DD({ 'reload_': False, 'save_model_dir': exp_path + 'res_obj10+beam6/', 'from_dir': '', 'dataset': 'youtube2text', 'video_feature': 'resnet152', 'dim_word': 300, # 474 'ctx_dim': -1, # auto set 'dim': 512, # lstm dim # 536 'n_layers_out': 1, # for predicting next word 'n_layers_init': 0, 'encoder_dim': 300, 'prev2out': True, 'ctx2out': True, 'patience': 20, 'max_epochs': 500, 'decay_c': 1e-4, 'alpha_entropy_r': 0., 'alpha_c': 0.70602, 'lrate': 2e-5, 'selector': True, 'n_words': 14021, 'maxlen': 30, # max length of the descprition 'optimizer': 'adadelta', 'clip_c': 5., 'batch_size': 64, # for trees use 25 'valid_batch_size': 200, # in the unit of minibatches 'dispFreq': 10, 'validFreq': 2000, 'saveFreq': -1, # this is disabled, now use sampleFreq instead 'sampleFreq': 100, # blue, meteor, or both 'metric': 'everything', # set to perplexity on DVS 'use_dropout': True, 'K': 28, # 26 when compare 'OutOf': None, # used to be 240, for motionfeature use 26 'verbose': True, 'debug': False, }), })
import theano import theano.tensor as TT import memnet.train_model_adam_gru_soft n_trials = 64 lr_min = 8e-5 lr_max = 1e-2 batches = [100, 200, 400, 800] renormalization_scale = [1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0] mem_nels = [200, 220, 230, 240, 250, 260, 290, 300] mem_sizes = [20, 24, 28, 30, 32] std_min = 0.01 std_max = 0.05 state = DD() state.lr = 6e-6 state.batch_size = 200 state.sub_mb_size = 25 state.std = 0.01 state.max_iters = 20000 state.n_hids = 200 state.mem_nel = 200 state.mem_size = 28 np.random.seed(3) ri = np.random.random_integers learning_rates = np.logspace(np.log10(lr_min), np.log10(lr_max), 100) stds = np.random.uniform(std_min, std_max, 100)
def jobman_insert_random( n_jobs, table_name="emotiw_mlp_audio_sigm_fixed_pool2_mixed5_nrelu"): JOBDB = 'postgresql://[email protected]/gulcehrc_db?table=' + table_name EXPERIMENT_PATH = "experiment_cg_2layer_sigm_hyper2_fixed2_pool2_save_mixed5_nrelu.jobman_entrypoint" nlr = 45 learning_rates = numpy.logspace(numpy.log10(0.0008), numpy.log10(0.09), nlr) max_col_norms = [1.9835, 1.8256, 1.2124, 0.98791] jobs = [] for _ in range(n_jobs): job = DD() id_lr = numpy.random.random_integers(0, nlr - 1) rnd_maxcn = numpy.random.random_integers(0, len(max_col_norms) - 1) job.n_hiddens = numpy.random.randint(100, 540) job.n_layers = 2 job.learning_rate = learning_rates[id_lr] job.momentum = 10.**numpy.random.uniform(-1, -0) job.hidden_dropout = numpy.random.uniform(low=0.1, high=0.9) job.rmsprop = 1 job.rbm_epochs = 12 job.rho = 0.96 job.validerror = 0.0 job.loss = 0.0 job.epoch = 0 job.epoch_time = 0 job.use_nesterov = 1 job.trainerror = 0.0 job.features = "full.pca" job.max_col_norm = max_col_norms[rnd_maxcn] job.example_dropout = numpy.random.randint(60, 200) job.tag = "sigm_norm_const_fixed_pool2" jobs.append(job) print job answer = raw_input("Submit %d jobs?[y/N] " % len(jobs)) if answer == "y": numpy.random.shuffle(jobs) db = jobman.sql.db(JOBDB) for job in jobs: job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) jobman.sql.insert_dict(job, db) print "inserted %d jobs" % len(jobs) print "To run: jobdispatch --condor --mem=3G --gpu --env=THEANO_FLAGS='floatX=float32, device=gpu' --repeat_jobs=%d jobman sql -n 1 '%s' ." % ( len(jobs), JOBDB)
def jobman_insert_random(n_jobs, table_name="emotiw_mlp_audio_fixed_pool2_mixed_grbmx2"): JOBDB = 'postgresql://[email protected]/gulcehrc_db?table=' + table_name EXPERIMENT_PATH = "experiment_cg_2layer_sigm_hyper2_fixed2_pool2_save_mixed_grbmx2.jobman_entrypoint" nlr = 45 learning_rates = numpy.logspace(numpy.log10(0.0008), numpy.log10(0.09), nlr) max_col_norms = [1.9835, 1.8256, 1.2124, 0.98791] jobs = [] for _ in range(n_jobs): job = DD() id_lr = numpy.random.random_integers(0, nlr-1) rnd_maxcn = numpy.random.random_integers(0, len(max_col_norms)-1) job.n_hiddens = numpy.random.random_integers(2,5) * 100 + 2 * numpy.random.random_integers(0,15) job.n_layers = 2 job.learning_rate = learning_rates[id_lr] job.momentum = 10.**numpy.random.uniform(-1, -0) job.hidden_dropout = numpy.random.uniform(low=0.1, high=0.2) job.layer_dropout = 0 job.topN_pooling = 1 job.no_final_dropout = 1 job.l2 = numpy.random.random_integers(1, 20) * 1e-3 job.rmsprop = 1 job.normalize_acts = 0 job.enable_standardization = 0 job.response_normalize = 0 job.rbm_epochs = 15 job.rho = 0.94 job.validerror = 0.0 job.loss = 0.0 job.epoch = 0 job.epoch_time = 0 job.use_nesterov = 1 job.trainerror = 0.0 job.features = "full.pca" job.max_col_norm = max_col_norms[rnd_maxcn] job.example_dropout = numpy.random.randint(60, 200) job.tag = "relu_nlayers_dbn" jobs.append(job) print job answer = raw_input("Submit %d jobs?[y/N] " % len(jobs)) if answer == "y": numpy.random.shuffle(jobs) db = jobman.sql.db(JOBDB) for job in jobs: job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) jobman.sql.insert_dict(job, db) print "inserted %d jobs" % len(jobs) print "To run: jobdispatch --condor --mem=3G --gpu --env=THEANO_FLAGS='floatX=float32, device=gpu' --repeat_jobs=%d jobman sql -n 1 '%s' ." % (len(jobs), JOBDB)
def get_parameters(opt, exp="class"): params = DD() params.net = DD() params.net.enc = DD() get_encoder_parameters(params, opt) params.net.gendec = DD() params.net.classdec = DD() if exp == "class": get_class_parameters(params, opt) elif exp == "gen": get_gendec_parameters(params, opt) else: get_class_parameters(params, opt) params.net.enc.pt = "none" params.train = DD() params.train.static = DD() params.train.dynamic = DD() params.eval = DD() params.data = DD() # Which experiment to run: {class = classification; gen = generation} params.exp = opt.experiment # Task to run: {emotion, motivation} params.task = opt.task if params.exp == "class": # % of development set stories to keep as training data params.train.static.tr = opt.train_ratio # granularity of labels # motivation: {maslow, reiss} # emotion: {plutchik, plutchik16} params.granularity = opt.granularity # Labeling type (default = majority) params.data.label = opt.label if params.exp == "gen": # Number of positive examples per negative example params.train.static.pnr = opt.pos_neg_ratio # Loss to use during training params.train.static.wcrit = "nll" # Prune useless words in motivation sequences such as # "to be" in "to be famous" params.data.pruned = opt.pruned # Max norm at which to clip gradients params.train.static.gc = opt.grad_clip # Random seed params.train.static.seed = opt.random_seed # learning rate params.train.dynamic.lr = opt.learning_rate # batch size params.train.dynamic.bs = opt.batch_size # optimizer to use {adam, rmsprop, etc.} # Only Adam is actually implemented params.train.dynamic.optim = opt.optimizer # Default parameters for the CNN model from # 2014 Yoon Kim paper if params.net.enc.model == "cnn+stock": params.net.enc.ks = "3,4,5" params.net.enc.kn = 100 params.net.classdec.dpt = 0.5 params.train.dynamic.lr = 0.001 params.train.dynamic.bs = 64 params.data.shuffle = False params.train.static.l2 = 3 params.net.enc.iSize = 128 params.net.classdec.hSize = 300 meta = DD() meta.iterations = opt.iterations meta.epochs = opt.epochs meta.mark = opt.mark return params, meta
default_config = DD({ # available options: mnist, curves 'dataset': 'mnist', 'seed': 123, #------------------------------------- # layerwise pretraining # number of pretraining epochs, layerwise 'pretraining_epochs': 1, 'pretrain_lr': 0.1, 'top_layer_pretrain_lr': 0.001, # CD-k 'k': 1, # not used 'weight_decay': 0.00002, #-------------------------------------- # global pretraining # number of global pretraining epochs # this only makes sense when sgd is used # originally 5000 'global_pretraining_epochs': 1, 'global_pretrain_lr': 0.02, 'global_pretraining_batch_size': 3000, # or mse 'reconstruction_cost_type': 'cross_entropy', # preconditioner for lcg. jacobi 'preconditioner': 'martens', # hf or sgd 'global_pretraining_optimization': 'hf', #--------------------------------------- # fine tuning # originally 1000 'training_epochs': 1, # standard or russ 'supervised_training_type': 'russ', 'finetune_lr': 0.1, #----------------------------------------- # minibatch size for both layerwise pretraining and finetuning # note that if global pretraining is sgd, then this batch_size # is used as well. 'batch_size': 20, })
config = DD({ 'model': 'attention', 'random_seed': 1234, # ERASE everything under save_model_path 'erase_history': True, 'attention': DD({ 'reload_': False, 'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/', 'from_dir': '', 'dataset': 'youtube2text',#'youtube2text',#'lsmdc',mvad. 'ysvd' 'video_feature': 'googlenet', 'dim_word':468, # 474 'ctx_dim':-1,# auto set 'dim':3518, # lstm dim # 536 'n_layers_out':1, # for predicting next word 'n_layers_init':0, 'encoder_dim': 300, 'prev2out':True, 'ctx2out':True, 'patience':20, 'max_epochs':500, 'decay_c':1e-4, 'alpha_entropy_r': 0., 'alpha_c':0.70602, 'lrate':0.01, 'selector':True, 'n_words':20000, 'maxlen':30, # max length of the descprition 'optimizer':'adadelta', 'clip_c': 10., 'batch_size': 64, # for trees use 25 # 'batch_size': 2, # for trees use 25 'valid_batch_size':200, # 'valid_batch_size':2, # in the unit of minibatches 'dispFreq':200, 'validFreq':2000, 'saveFreq':-1, # this is disabled, now use sampleFreq instead 'sampleFreq':100, # blue, meteor, or both 'metric': 'everything', # set to perplexity on DVS 'use_dropout':True, 'K':28, # 26 when compare 'OutOf':None, # used to be 240, for motionfeature use 26 'verbose': True, 'debug': False, 'dec':'standard', 'encoder':None, 'mode':'train', 'proc':'nostd', 'data_dir':'', 'feats_dir':'' }), 'iLSTM': DD({ 'reload_': False, 'save_model_dir': exp_path + 'attention_mod/', 'dec':'standard', 'valid_batch_size':200, 'dataset': 'youtube2text', 'encoder': None, 'max_epochs':500, 'from_dir': '', }), 'attention_mod': DD({ 'reload_': False, 'save_model_dir': exp_path + 'attention_mod/', 'dec':'multi-stdist' }), 'mtle': DD({ 'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/', 'reload_': False, 'from_dir': '', 'dec':'multi-stdist', 'dim_word':468, # 474 'encoder':None, 'encoder_dim': 300, 'batch_size': 64, #64 for trees use 25 'valid_batch_size':200, 'dataset': 'vtt', 'dim':3518, # lstm dim # 536 'video_feature': 'googlenet', 'validFreq': 2000, 'max_epochs': 500, 'mode':'train', 'proc':'nostd', 'K':28, # 26 when compare 'lrate':0.0001, 'data_dir':'', 'dispFreq':10, 'feats_dir':'', 'cost_type':'v1' }), 'fcoupled': DD({ 'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/', 'reload_': False, 'dec':'multi-random', 'encoder':None, 'encoder_dim': 300, 'batch_size': 64, # for trees use 25 'dataset': 'youtube2text', 'dim':3518, # lstm dim # 536 'from_dir': '', 'valid_batch_size':200, 'max_epochs':500, 'video_feature': 'googlenet', }), 'const': DD({ 'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/', 'reload_': False, 'dec':'multi-random', 'encoder':None, 'encoder_dim': 300, 'batch_size': 64, # for trees use 25 'dataset': 'youtube2text', 'dim':3518, # lstm dim # 536 'from_dir': '', }), 'const2': DD({ 'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/', 'reload_': False, 'dec':'multi-random', 'encoder':None, 'encoder_dim': 300, 'batch_size': 64, # for trees use 25 'dataset': 'youtube2text' }), 'LSTM': DD({ 'reload_': False, 'save_model_dir': exp_path + 'attention_mod/', 'dec':'standard', 'valid_batch_size':200, 'dataset': 'youtube2text', 'encoder': 'lstm_uni', 'max_epochs':500, 'from_dir': '', }), 'lstmdd': DD({ 'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/', 'reload_': False, 'from_dir': '', 'dec':'multi-stdi', 'dim_word':468, # 474 'encoder':None, 'encoder_dim': 300, 'batch_size': 64, #64 for trees use 25 'valid_batch_size':200, 'dataset': 'vtt', 'dim':3518, # lstm dim # 536 'video_feature': 'googlenet', 'validFreq': 2000, 'max_epochs': 500, 'mode':'train', 'proc':'nostd', 'K':28, # 26 when compare 'lrate':0.0001, 'data_dir':'', 'dispFreq':10, 'feats_dir':'', 'cost_type':'v1' }), 'gru': DD({ 'reload_': False, 'save_model_dir': exp_path + 'gru_model2/', 'from_dir': '', 'dataset': 'youtube2text',#'youtube2text',#'lsmdc',mvad. 'ysvd' 'video_feature': 'googlenet', 'dim_word':468, # 474 'ctx_dim':-1,# auto set 'dim':3518, # lstm dim # 536 'n_layers_out':1, # for predicting next word 'n_layers_init':0, 'encoder_dim': 300, 'prev2out':True, 'ctx2out':True, 'patience':20, 'max_epochs':500, 'decay_c':1e-4, 'alpha_entropy_r': 0., 'alpha_c':0.70602, 'lrate':0.01, 'selector':True, 'n_words':20000, 'maxlen':30, # max length of the descprition 'optimizer':'adadelta', 'clip_c': 10., 'batch_size': 64, # for trees use 25 # 'batch_size': 2, # for trees use 25 'valid_batch_size':200, # 'valid_batch_size':2, # in the unit of minibatches 'dispFreq':10, 'validFreq':2000, 'saveFreq':-1, # this is disabled, now use sampleFreq instead 'sampleFreq':100, # blue, meteor, or both 'metric': 'everything', # set to perplexity on DVS 'use_dropout':True, 'K':28, # 26 when compare 'OutOf':None, # used to be 240, for motionfeature use 26 'verbose': True, 'debug': False, 'dec':'standard', 'encoder':None, 'mode':'train', 'proc':'nostd' }), 'fc': DD({ 'reload_': False, 'save_model_dir': exp_path + 'attention_mod/', 'dec':'standard', 'dataset': 'youtube2text', 'encoder': None, 'from_dir': '', }), 'ic': DD({ 'reload_': False, 'save_model_dir': exp_path + 'attention_mod/', 'dec':'standard', 'dataset': 'youtube2text', 'encoder': None, 'from_dir': '', }), 'const_w': DD({ 'save_model_dir': exp_path + 'const_w/', 'reload_': False, 'dec':'multi-stdist', 'encoder':None, 'encoder_dim': 300, 'batch_size': 64, # for trees use 25 'dataset': 'youtube2text', 'video_feature': 'googlenet', }), })
options = DD({ ### Loop 'text_only': True, 'do_eval': False, 'save_model_dir': save_dir, #+'LSMDC/', 'load_model_from': None, #'/Tmp/ballasn/LSMDC/model/baseline_textonly_h_320_wemb_data_10/LSMDC/', #None, #'/Tmp/ballasn/LSMDC/model/baseline_textonyl_h_320/LSMDC/', 'load_options_from': None, 'erase': False, 'max_epoch': 300, 'dispFreq': 10, 'estimate_population_statistics': False, 'debug': True, ### Dataset 'data_path' : "/Tmp/ballasn/LSMDC/LSMDC2016.pkl", ### Full None, 10%=>9511, 50%=>47559, 100%=>95118 'max_train_example': 9511, 'input_dim': 4096+1024, # 1024 gnet, 4096 C3D 'features_type' : "Fuse", # 2D/3D/Fuse 'features_path' : "/Tmp/ballasn/LSMDC/feat/", #"/data/lisatmp4/ballasn/datasets/LSMDC2016/LSMDC_googlenetfeatures.pkl", 'n_subframes': 15, 'batch_size': 24, 'max_n_epoch': 1000, ### Vocabulary 'train_emb': True, # Use only word present > 50 times in the training sets for the output vocabulary 'use_out_vocab': True, # Use only word present > 50 times in the training sets for the output vocabulary 'reduce_vocabulary': False, # Use only word present > 3 times in the training sets 'n_words': 26818, 'dim_word': 512, 'hdims': 320, 'use_dropout': True, 'use_residual': False, 'use_zoneout': True, 'use_bn': True, 'initial_gamma': 0.1, 'initial_beta': 0., 'use_popstats': False, ### required to be false # Model: standard, momentum, adagrad, rmsprop 'memory_update_rule': 'standard', 'lstm_alpha': 0.95, ### Optimization 'ita': 0.001, 'optimizer': 'adam', 'lr': 0.001, 'clip_c': 10., 'patience': 5, 'valid_freq': -1, })
model_config = DD({ # MLP 'mlp': DD({ 'model_class': 'mlp', 'train_class': 'sgd', #'config_id' : 'GaussianNoise1000cifar200epoch', #'config_id' : 'Clean100cifar200epoch', #'config_id' : 'Clean100cifar200epochPreproc', #'config_id' : 'GaussianNoise1000cifar200epochPreproc', #'config_id' : 'GaussNoise2k-2kCifar200epochPreproc', #'config_id' : 'Noisy200-2kCifar200epochPreproc1', 'config_id': 'Clean200-200Cifar200epochPreproc1', # TODO: cached should always be True! 'cached': True, # dataset can be mnist or svhn or cifar10 'dataset': 'svhn', 'input_space_id': None, 'nvis': None, # Channel and dataset monitoring # mca : mean classification average of a minibatch #'channel_array' : ['mca'], 'channel_array': None, # valid or test or both 'monitoring_dataset': ['valid'], 'random_seed': 251, 'batch_size': 200, 'learning_rate': ((1e-4, 1.0), float), 'init_momentum': ((0.5, 0.99), float), # for mnist #train_iteration_mode' : 'random_uniform', # for svhn 'train_iteration_mode': 'sequential', #<training modes> #sequential #shuffled_sequential #random_slice #random_uniform #batchwise_shuffled_sequential # TODO: cached should always be True! 'cached': True, # dataset can be mnist or svhn or cifar10 'dataset': 'cifar10', 'input_space_id': None, 'nvis': None, # Channel and dataset monitoring # mca : mean classification average of a minibatch #'channel_array' : ['mca'], 'channel_array': None, # valid or test or both 'monitoring_dataset': ['valid'], 'random_seed': 251, 'batch_size': 200, 'learning_rate': ((1e-4, 1.0), float), 'init_momentum': ((0.5, 0.99), float), # for mnist #train_iteration_mode' : 'random_uniform', # for svhn 'train_iteration_mode': 'sequential', #<training modes> #sequential #shuffled_sequential #random_slice #random_uniform #batchwise_shuffled_sequential # Momentum and exponential decay 'ext_array': DD({ 'exp_decay': DD({ 'ext_class': 'exponentialdecayoverepoch', 'decay_factor': ((0.85, 0.999), float), 'min_lr_scale': ((1e-3, 1e-1), float), }), 'moment_adj': DD({ 'ext_class': 'momentumadjustor', 'final_momentum': 0.9, 'start_epoch': 1, 'saturate_epoch': ((20, 50), int), }), }), # Termination criteria 'term_array': DD({ # Max number of training epochs 'epoch_count': DD({ 'term_class': 'epochcounter', 'max_epochs': 100, }), # Early stopping on validation set # If after max_epochs, we don't see significant improvement # on validation cost, we stop the training. 'early_stopping': DD({ 'term_class': 'monitorbased', 'proportional_decrease': 1e-4, 'max_epochs': 20, 'channel_name': 'valid_softmax2_nll', 'save_best_channel': 'valid_softmax2_nll', }) }), 'layers': DD({ # IMPORTANT: For each layer, only add hyperparams that are different than # the default hyperparams from layer_config # NOTE: always start the name of your hidden layers with hidden and your # output layers with output in order for the hidden layers # to be found first before the output layers when going # through the layers DD dictionary. # NOTE: the supported activation functions are: # tanh, sigmoid, rectifiedlinear, softmax # First hidden layer # 'hidden1' : DD({ # 'layer_class' : 'rectifiedlinear', # #'dim' : ((100, 2000), int), # 'dim' : 200, # 'max_col_norm' : ((0.1, 8.), float), # #'weight_decay' : ((0.1, 7.), float), # 'sparse_init' : 15 # }), # First hidden layer 'hidden1': DD({ 'layer_class': 'tanh', #'dim' : ((100, 2000), int), 'dim': 200, 'max_col_norm': ((0.1, 5.), float) #'weight_decay' : ((1., 9.), float), }), 'hidden2': DD({ 'layer_class': 'tanh', #'dim' : ((100, 2000), int), 'dim': 200, 'max_col_norm': ((0.1, 5.), float) #'weight_decay' : ((1., 9.), float), }), # 'hidden1' : DD({ # 'layer_class' : 'gaussianRELU', # #'dim' : ((100, 2000), int), # 'dim' : 2000, # 'max_col_norm' : ((0.1, 5.), float), # 'adjust_threshold_factor' : ((0.0001, 1), float), # 'desired_active_rate' : 0.1, # 'noise_std' : ((0.1, 10), float), # # #'weight_decay' : ((1., 9.), float), # # 'sparse_init' : 15 # }), # # 'hidden2' : DD({ # 'layer_class' : 'gaussianRELU', # #'dim' : ((100, 2000), int), # 'dim' : 2000, # 'max_col_norm' : ((0.1, 5.), float), # 'adjust_threshold_factor' : ((0.0001, 1), float), # 'desired_active_rate' : 0.1, # 'noise_std' : ((0.1, 10), float), # # #'weight_decay' : ((1., 9.), float), # # 'sparse_init' : 15 # }), #First hidden layer # 'hidden1' : DD({ # 'layer_class' : 'noisyRELU', # 'sparse_init' : 15, # 'dim' : 3000, # 'max_col_norm' : ((0.1, 5.), float), # 'noise_factor' : ((0.0001, 1.), float), # 'adjust_threshold_factor' : ((0.0001, 1), float), # 'desired_active_rate' : 0.1 # }), # #Second hidden layer # 'hidden2' : DD({ # 'layer_class' : 'tanh', # #'dim' : ((100, 2000), int), # 'dim' : 100, # 'max_col_norm' : ((0.1, 5.), float) # #'weight_decay' : ((1., 9.), float), # # }), # Last (output) layer # The fun model only takes 1 output. 'output1': DD({ 'layer_class': 'softmax', 'dim': 10, 'irange': 0.05 #'sparse_init' : 15 }) }), }), })
config = DD({ 'module_name': 'NN', 'model': DD({'rand_seed': None}), # end mlp 'log': DD({ 'experiment_name': 'mlp_dropout', 'description': '', 'save_outputs': False, 'save_learning_rule': False, 'save_model': False, 'save_epoch_error': False, 'save_to_database_name': "mnist_model.db" }), # end log 'learning_method': DD({ # 'type' : 'SGD', # 'type' : 'AdaGrad', 'type': 'AdaDelta', ###[ For SGD and AdaGrad ]### # 'learning_rate' : 0.001, # 'learning_rate' : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5), 'learning_rate': 0.1, 'momentum': 0.5, # 'momentum' : 0., # 'momentum' : (1e-2, 1e-1, 0.5, 0.9), # For AdaDelta # 'rho' : ((0.90, 0.99), float), 'rho': 0.95, # 'eps' : (1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7), 'eps': 1e-6, }), 'learning_rule': DD({ 'max_col_norm': None, 'L1_lambda': None, 'L2_lambda': 0.0001, 'cost': 'entropy', 'stopping_criteria': DD({ 'max_epoch': 10, 'epoch_look_back': 5, 'cost': 'error', 'percent_decrease': 0.05 }) # end stopping_criteria }), # end learning_rule 'dataset': DD({ 'type': 'Mnist', 'train_valid_test_ratio': [5, 1, 1], 'feature_size': 784, 'target_size': 10, 'dataset_noise': DD({ # 'type' : 'BlackOut', # 'type' : 'MaskOut', # 'type' : 'Gaussian', 'type': None }), 'preprocessor': DD({ 'type': None, # 'type' : 'Scale', # 'type' : 'GCN', # 'type' : 'LogGCN', # 'type' : 'Standardize', # for Scale 'global_max': 4.0, 'global_min': 0., 'buffer': 0., 'scale_range': [0., 1.], }), 'batch_size': (50, 100, 150, 200), # 'batch_size' : 20, 'num_batches': None, 'iter_class': 'SequentialSubsetIterator', 'rng': None }), # end dataset #============================[ Layers ]===========================# 'hidden1': DD({ 'name': 'hidden1', 'type': 'Tanh', 'dim': 500, # 'dropout_below' : (0.05, 0.1, 0.15, 0.2) # 'dropout_below' : (0, 0.5), 'dropout_below': None, 'layer_noise': DD({ 'type': None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', # 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut 'ratio': 0.5, # for Gaussian 'std': 0.1, 'mean': 0, }) }), # end hidden_layer 'output': DD({ 'name': 'output', 'type': 'Sigmoid', 'dim': 10, # 'dropout_below' : 0.5, 'dropout_below': None, 'layer_noise': DD({ 'type': None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', # 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut 'ratio': 0.5, # for Gaussian 'std': 0.1, 'mean': 0, }) }) # end output_layer })
def jobman_insert_random(n_jobs, table_name="emotiw_mlp_audio_tanh"): JOBDB = 'postgresql://[email protected]/gulcehrc_db?table=' + table_name EXPERIMENT_PATH = "experiment_cg.jobman_entrypoint" nlr = 50 learning_rates = numpy.logspace(numpy.log10(0.001), numpy.log10(0.3), nlr) jobs = [] for _ in range(n_jobs): job = DD() id_lr = numpy.random.random_integers(0, nlr-1) job.n_hiddens = numpy.random.randint(100, 800) job.n_layers = numpy.random.randint(1, 4) job.learning_rate = learning_rates[id_lr] job.momentum = 10.**numpy.random.uniform(-1, -0) job.rmsprop = numpy.random.binomial(1, 0.5) job.validerror = 0.0 job.loss = 0.0 job.epoch = 0 job.epoch_time = 0 job.trainerror = 0.0 job.features = "full.pca" job.max_col_norm = 1.8456 job.example_dropout = numpy.random.randint(16, 200) job.rbm_learning_rate = 10.**numpy.random.uniform(-3, -0) job.rbm_epochs = 0 #numpy.random.randint(8, 100) job.tag = "tanh_norm_const" jobs.append(job) print job answer = raw_input("Submit %d jobs?[y/N] " % len(jobs)) if answer == "y": numpy.random.shuffle(jobs) db = jobman.sql.db(JOBDB) for job in jobs: job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) jobman.sql.insert_dict(job, db) print "inserted %d jobs" % len(jobs) print "To run: jobdispatch --condor --mem=3G --gpu --env=THEANO_FLAGS='floatX=float32, device=gpu' --repeat_jobs=%d jobman sql -n 1 '%s' ." % (len(jobs), JOBDB)
import warnings import numpy as np from jobman import DD, flatten, api0, sql import theano import theano.tensor as TT import train_model_adam import sys sys.path.append("../codes/") from core.nan_guard import NanGuardMode state = DD() state.lr = 3e-3 state.batch_size = 160 state.sub_mb_size = 160 state.std = 0.05 state.max_iters = 40000 state.n_hids = 240 state.mem_nel = 150 state.mem_size = 28 state.renormalization_scale = 5.0 state.bowout = True state.use_ff_controller = False state.std = 0.01 state.bow_size = 80 state.n_reading_steps = 1
default_config = DD({ # theano profiling, 0 not printing 'profile': 1, # specify the correct data path # cifar10.npz # curves.npz # mnist_6k_1k_1k.npz 'data': '/scratch/yaoli/Exp_scratch/data/mnist_6k_1k_1k.npz', 'verbose': 3, 'device': 'gpu', # batch for computing gradient # 50000 for mnist, 40000 for cifar, 20000 for curves # gbs=mbs=ebs=cbs=200 when sgd 'gbs': 60000, # batch for computing the metric, 10000 for non-sgd 'mbs': 10000, # batch for evaluating the model # and doing line search, 10000 for non-sgd 'ebs': 10000, # number of samples to consider at any time # 250 'cbs': 250, # daa, mlp 'model': 'daa', #'sgd' 'krylov' 'natNCG', 'natSGD_jacobi' 'algo': 'natSGD_jacobi', #'krylov', # Gf for park metric, amari otherwise 'type': 'Gf', # keep it under 1000, but bigger for sgd 'loopIters': 1000, # 1 is catching NaN 'gotNaN': 0, 'seed': 312, # there must not be any space between numbers below, otherwise # jobman raise an error # mlp [1000,1000,1000], # cifar deep [2000,1000,1000], # to compare: #------------------ #mnist(mlp): [500,500,2000] #mnist(ae):[1000,500,250,30] #cifar(mlp): 1000, 10000 #curves(ae):[400,200,100,50,25,5] 'hids': '[1000,500,250,30]', # stop LCG till this difference is reached 'mrtol': 1e-4, # damping factor for the matrix, should be fixed for natNCG 'mreg': 45, # damping factor for preconditioning 'jreg': .02, # NCG restart 'resetFreq': 40, # max iterations of LCG 'miters': numpy.int32(20), # sgd:0.03, other 0.9, 1 or 2 'lr': 1, # weight initialization formula .. not very useful to change it right now # xavier or small 'init': 'xavier', # error cost for deep autoencoder (note Dumi and I think Martens used cross entropy for MNIST) 'daacost': 'cross', 'l2norm': 1e-5, # numbers of linear search 'lsIters': 80, # checking the validation score, keep it low, unless SGD. 'checkFreq': 5, # the size krylov space 'krylovDim': 15, # lbfgs steps 'lbfgsIters': 10, # natNCG uses 0 'adaptivedamp': 1, })
def jobman_insert_random(n_jobs, table_name="emotiw_mlp_audio_tanh"): JOBDB = 'postgresql://[email protected]/gulcehrc_db?table=' + table_name EXPERIMENT_PATH = "experiment_cg.jobman_entrypoint" nlr = 50 learning_rates = numpy.logspace(numpy.log10(0.001), numpy.log10(0.3), nlr) jobs = [] for _ in range(n_jobs): job = DD() id_lr = numpy.random.random_integers(0, nlr - 1) job.n_hiddens = numpy.random.randint(100, 800) job.n_layers = numpy.random.randint(1, 4) job.learning_rate = learning_rates[id_lr] job.momentum = 10.**numpy.random.uniform(-1, -0) job.rmsprop = numpy.random.binomial(1, 0.5) job.validerror = 0.0 job.loss = 0.0 job.epoch = 0 job.epoch_time = 0 job.trainerror = 0.0 job.features = "full.pca" job.max_col_norm = 1.8456 job.example_dropout = numpy.random.randint(16, 200) job.rbm_learning_rate = 10.**numpy.random.uniform(-3, -0) job.rbm_epochs = 0 #numpy.random.randint(8, 100) job.tag = "tanh_norm_const" jobs.append(job) print job answer = raw_input("Submit %d jobs?[y/N] " % len(jobs)) if answer == "y": numpy.random.shuffle(jobs) db = jobman.sql.db(JOBDB) for job in jobs: job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) jobman.sql.insert_dict(job, db) print "inserted %d jobs" % len(jobs) print "To run: jobdispatch --condor --mem=3G --gpu --env=THEANO_FLAGS='floatX=float32, device=gpu' --repeat_jobs=%d jobman sql -n 1 '%s' ." % ( len(jobs), JOBDB)
def jobman_insert_random(n_jobs, table_name="emotiw_mlp_audio_sigm_fixed_pool2_mixed_norbm3"): JOBDB = 'postgresql://[email protected]/gulcehrc_db?table=' + table_name EXPERIMENT_PATH = "experiment_cg_2layer_sigm_hyper2_fixed2_pool2_save_mixed_norbm3.jobman_entrypoint" nlr = 45 learning_rates = numpy.logspace(numpy.log10(0.0008), numpy.log10(0.1), nlr) max_col_norms = [1.8256, 1.5679, 1.2124, 0.98791] rhos = [0.96, 0.92, 0.88] jobs = [] for _ in range(n_jobs): job = DD() id_lr = numpy.random.random_integers(0, nlr-1) rnd_maxcn = numpy.random.random_integers(0, len(max_col_norms)-1) rnd_rho = numpy.random.random_integers(0, len(rhos)-1) job.n_hiddens = numpy.random.randint(80, 500) job.n_layers = numpy.random.random_integers(1, 2) job.learning_rate = learning_rates[id_lr] job.momentum = 10.**numpy.random.uniform(-1, -0) job.rmsprop = 1 job.rho = rhos[rnd_rho] job.validerror = 0.0 job.loss = 0.0 job.seed = 1938471 job.rbm_epochs = 0 job.epoch = 0 job.epoch_time = 0 job.use_nesterov = 1 job.trainerror = 0.0 job.features = "full.pca" job.max_col_norm = max_col_norms[rnd_maxcn] job.example_dropout = numpy.random.randint(60, 200) job.tag = "sigm_norm_const_fixed_pool2_norbm3" jobs.append(job) print job answer = raw_input("Submit %d jobs?[y/N] " % len(jobs)) if answer == "y": numpy.random.shuffle(jobs) db = jobman.sql.db(JOBDB) for job in jobs: job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) jobman.sql.insert_dict(job, db) print "inserted %d jobs" % len(jobs) print "To run: jobdispatch --condor --mem=3G --gpu --env=THEANO_FLAGS='floatX=float32, device=gpu' --repeat_jobs=%d jobman sql -n 1 '%s' ." % (len(jobs), JOBDB)
config = DD({ 'model': 'attention', 'random_seed': 1234, # ERASE everything under save_model_path 'erase_history': True, 'attention': DD({ 'reload_': False, 'verbose': True, 'debug': False, 'save_model_dir': RAB_EXP_PATH + 'save_dir/', 'from_dir': RAB_EXP_PATH + 'from_dir/', # dataset 'dataset': 'youtube2text', 'video_feature': 'googlenet', 'K': 28, # 26 when compare 'OutOf': None, # network 'dim_word': 512, #468, # 474 'tu_dim': 512, 'mu_dim': 512, # 1024, 'vu_dim': 512, # 1024, 'ctx_dim': -1, # auto set 'n_layers_out': 1, # for predicting next word 'n_layers_init': 0, 'prev2out': True, 'ctx2out': True, 'selector': True, 'n_words': 20000, 'maxlen': 30, # max length of the descprition 'use_dropout': True, 'isGlobal': False, # training 'patience': 20, 'max_epochs': 500, 'decay_c': 1e-4, 'alpha_entropy_r': 0., 'alpha_c': 0.70602, 'lrate': 0.0001, 'optimizer': 'adadelta', 'clip_c': 10., 'batch_size': 256, # for trees use 25 'valid_batch_size': 200, 'dispFreq': 10, 'validFreq': 500, 'saveFreq': -1, # this is disabled, now use sampleFreq instead 'sampleFreq': 100, # blue, meteor, or both 'metric': 'everything', # set to perplexity on DVS }), })
import theano import cPickle as pkl import warnings import argparse import numpy as np from jobman import DD, flatten, api0, sql import theano import theano.tensor as TT import train_model_adam from train_model_adam import search_model_adam state = DD() parser = argparse.ArgumentParser("Parameters for the single soft model.") parser.add_argument("--task_id", default=1, type=int) parser.add_argument("--reload_model", default=1, type=int) parser.add_argument("--save_path", default=".", type=str) parser.add_argument("--seed", default=".", type=str) args = parser.parse_args() state.reload_model = args.reload_model state.task_id = args.task_id state.save_path = args.save_path state.lr = 8.2 * 1e-3 state.batch_size = 160 state.sub_mb_size = 160 state.max_iters = 90000
def pento(n_trials): ri = numpy.random.random_integers state = DD() with open("mnist_powerup_temp.yaml") as ymtmp: state.yaml_string = ymtmp.read() state.powerup_nunits = 240 state.powerup_npieces = 5 state.W_lr_scale = 0.04 state.p_lr_scale = 0.01 state.lr_rate = 0.1 state.l2_pen = 1e-5 state.l2_pen2 = 0.0000 state.init_mom = 0.5 state.final_mom = 0.5 state.decay_factor = 0.5 state.max_col_norm = 1.9365 state.max_col_norm2 = 1.8365 state.save_path = "./" n_pieces = [2, 3, 4, 5, 6, 8, 10, 12] n_units = [200, 240, 280, 320, 420] learning_rates = numpy.logspace(numpy.log10(0.001), numpy.log10(1.0), 30) learning_rate_scalers = numpy.logspace(numpy.log10(0.01), numpy.log10(1), 30) l2_pen = numpy.logspace(numpy.log10(1e-6), numpy.log10(8 * 1e-4), 100) max_col_norms = [1.7365, 1.8365, 1.9365, 2.1365, 2.2365, 2.4365] ind = 0 TABLE_NAME = "powerup_mnist_1layer_finer" db = api0.open_db("postgresql://[email protected]/gulcehrc_db?table=" + TABLE_NAME) for i in xrange(n_trials): state.lr_rate = learning_rates[ri(learning_rates.shape[0]) - 1] state.powerup_nunits = n_units[ri(len(n_units)) - 1] state.powerup_npieces = n_pieces[ri(len(n_pieces)) - 1] state.W_lr_scale = learning_rate_scalers[ri(len(learning_rate_scalers)) - 1] state.p_lr_scale = learning_rate_scalers[ri(len(learning_rate_scalers)) - 1] state.l2_pen = l2_pen[ri(l2_pen.shape[0]) - 1] state.init_mom = numpy.random.uniform(low=0.3, high=0.6) state.final_mom = numpy.random.uniform(low=state.init_mom + 1.0, high=0.9) state.decay_factor = numpy.random.uniform(low=0.01, high=0.05) state.max_col_norm = max_col_norms[ri(len(max_col_norms)) - 1] alphabet = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUWXYZ0123456789") numpy.random.shuffle(alphabet) state.save_path = "./" state.save_path += "".join(alphabet[:7]) + "_" sql.insert_job(experiment, flatten(state), db) ind += 1 db.createView(TABLE_NAME + "_view") print "{} jobs submitted".format(ind)
config = DD({ 'model': 'DeepOrderlessNADE', 'load_trained': DD({ # action: 0 standard train, 1 load trained model and evaluate, 2 continue training 'action': 0, 'from_path': best_2h_model, 'epoch': 3999, }), 'random_seed': 1234, 'save_model_path': exp_path + '/nade_k_nips14_release_final/test_h2/', 'dataset': DD({ 'signature': 'MNIST_binary_russ', }), 'DeepOrderlessNADE': DD({ 'n_in': None, 'n_out': None, 'n_hidden': 500, 'n_layers': 2, 'hidden_act': 'tanh', 'tied_weights': False, # only for the first step of mean field 'use_mask': False, # use data mean to intialize the mean field 'init_mean_field': True, # not avg cost over k steps but only take the cost from the last step 'cost_from_last': False, # 1:0.01 gaussian,2: formula 'init_weights': 1, # centering v 'center_v': False, 'train': DD({ # valid once every 'valid_freq' epochs 'valid_freq': 250, # compute valid and test LL over this many of orderings 'n_orderings': 5, 'n_epochs': 1000, 'minibatch_size': 100, # 0 for momentum, 1 for adadelta 'sgd_type': 1, 'momentum': 0.9, 'lr': 0.001, # 0.0012279827881 for 2h model # 0.0 for 1h model 'l2': 0.0012279827881, # number of mean field steps 'k': 5, 'verbose': True, 'fine_tune': DD({ 'activate': True, 'n_epochs': 3000, }) }) }) })
"motivation": (True, True), "sentence": (True, True), "reiss": (False, False), "maslow": (False, False), "plutchik": (False, False), "plutchik16": (False, False), "entity": (True, False) } splits = ["train", "dev", "test"] for experiment in ["emotion", "motivation"]: print("Making {} data for {} class of models".format( experiment, sys.argv[1])) opt = DD() opt.data = DD() opt.data.pruned = True # Make a memory model (EntNet, NPN) data loader for generation if sys.argv[1] == "memory": # Make save name name = "processed/{}/{}_{}_data_loader.pth".format( experiment, "gen_memory", "-".join(splits)) print(name) # Initialize data loader and load vocabs and raw data data_loader = data.MemoryGenModelDataLoader() data_loader.load_vocabs(vocab_paths, vocab_text) data_loader.load_data(opt, splits,
def get_encoder_parameters(params, opt): params.net.enc = DD() # Whether to encode the entity context # For LSTM and CNN, this adds a separate encoder # for entity-specific context lines. # For REN and NPN, this does nothing since those models # represent entities using memory slots, so the var is # overwritten below params.net.enc.ctx = opt.encoder.ctx # Type of encoder to us -- {lstm, cnn, ren, npn} params.net.enc.model = opt.encoder.model # Hidden sizes of encoder # Self-explanatory for LSTMs params.net.enc.hSize = opt.hidden_size # Input embedding size params.net.enc.iSize = opt.embed_size # Dropout probability for any nodes in the encoder params.net.enc.dpt = opt.encoder.dropout # Type of pretrained embeddings for the encoder # Either glove or none (feel free to add other types) params.net.enc.pt = opt.encoder.pt # Initialization to use # d = default xavier glorot initialization # Gets overwritten for REN or NPN initialization params.net.enc.init = opt.encoder.init if params.net.enc.model in ['gru', 'lstm', 'rnn']: # num layers in rnn params.net.enc.nL = opt.encoder.rnn.num_layers # make encoder bidirectional params.net.enc.bid = opt.encoder.rnn.bid elif params.net.enc.model in ["ren", "npn"]: # Set context to be true automatically params.net.enc.ctx = True # tie entity cells to story entities params.net.enc.tied = opt.encoder.ren.tied # how many entity slots params.net.enc.ents = opt.encoder.ren.num_slots # activation function for entity update # (P = PReLU or I = Identity) params.net.enc.act = opt.encoder.ren.activation # Size of entity hidden cells params.net.enc.eSize = opt.encoder.ren.entity_size # how to intialize parameters # format is gauss+{}+{}.format(mean, std) # n = the default initialization pytorch params.net.enc.init = opt.encoder.ren.init # entity_act update function options: # k = key projection (REN-style), # v = value projection (REN-style), # c = context projection (REN-style)} params.net.enc.afunc = opt.encoder.ren.application_function # use action and affect labels to supervise entitiy selection params.net.enc.sup = opt.encoder.ren.supervise # lock keys to glove init params.net.enc.lk = opt.encoder.ren.lock_keys # use glove embeddings for pretrained entities params.net.enc.entpt = opt.encoder.ren.entpt if params.net.enc.model == "npn": # Number of actions params.net.enc.na = opt.encoder.npn.actions # Size of action embeddings params.net.enc.aSize = opt.encoder.npn.action_size # Number of MLP layers for selecting actions params.net.enc.aNL = opt.encoder.npn.action_num_layers # Dropout for action selector MLP params.net.enc.adpt = opt.encoder.npn.action_dropout # Activation functions between layers of action selector MLP params.net.enc.aI = opt.encoder.npn.action_init # number of layers of projections for entity selection params.net.enc.eNL = opt.encoder.npn.entity_num_layers # dropout probability in preprocess layers params.net.enc.edpt = opt.encoder.npn.entity_dropout # use recurrent attention (See Bosselut et al., 2018) params.net.enc.rec = opt.encoder.npn.entity_recurrent_attention # Sum entity selection or just scale params.net.enc.eRed = opt.encoder.npn.entity_reduce # If it's using an NPN, you need to activate action contribution # to entity update if you mistakenly haven't done so in # params.net.enc.afunc if "a" not in params.net.enc.afunc: params.net.enc.afunc = "a" + params.net.enc.afunc else: # EntNet update rule params.net.enc.afunc = "nkvc" # Initialize encoder with REN initialization params.net.enc.init = opt.encoder.ren.init # No projection layers between encoder and entity section params.net.enc.eNL = 0 elif params.net.enc.model == "cnn": # Size of kernels (different sizes separated by commas) params.net.enc.ks = opt.encoder.cnn.kernel_sizes # Number of kernel functions params.net.enc.kn = opt.encoder.cnn.kernel_num
config = DD({ 'module_name': 'I2R_AE', 'model': DD({'rand_seed': None}), # end mlp 'log': DD({ 'experiment_name': 'i2r0217_i2r_clean_clean', 'description': '', 'save_outputs': True, 'save_learning_rule': False, 'save_model': True, 'save_epoch_error': True, 'save_to_database_name': "i2r.db" }), # end log 'learning_method': DD({ 'type': 'SGD', # 'type' : 'AdaGrad', # 'type' : 'AdaDelta', ###[ For SGD and AdaGrad ]### # 'learning_rate' : 0.5, # 'learning_rate' : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5), 'learning_rate': ((1e-2, 0.5), float), # 'learning_rate' : 0.0305287335067987, # 'learning_rate' : 0.01, 'momentum': 0.9, # 'momentum' : 0., # 'momentum' : (1e-2, 1e-1, 0.5, 0.9), # For AdaDelta 'rho': ((0.90, 0.99), float), 'eps': (1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7), }), 'learning_rule': DD({ 'max_col_norm': 1, 'L1_lambda': None, 'L2_lambda': 0.0001, 'cost': 'mse', 'stopping_criteria': DD({ 'max_epoch': 100, 'epoch_look_back': 5, 'cost': 'error', 'percent_decrease': 0.05 }) # end stopping_criteria }), # end learning_rule 'dataset': DD({ # 'type' : 'I2R_Posterior_Blocks_ClnDNN_NoisyFeat', # 'type' : 'I2R_Posterior_Blocks_ClnDNN_CleanFeat', # 'type' : 'I2R_Posterior_NoisyFeat_Sample', 'type': 'I2R_Posterior_Gaussian_Noisy_Sample', # 'type' : 'Mnist', 'train_valid_test_ratio': [5, 1, 1], 'feature_size': 1998, 'target_size': 1998, 'dataset_noise': DD({ # 'type' : 'BlackOut', # 'type' : 'MaskOut', # 'type' : 'Gaussian', 'type': None, # for Gaussian # 'std' :((0.15, 0.4), float), 'std': 0.5, }), 'preprocessor': DD({ 'type': None, # 'type' : 'Scale', # 'type' : 'GCN', # 'type' : 'LogGCN', # 'type' : 'Standardize', # 'type' : 'Log', # for Scale 'global_max': 1.0, 'global_min': 0, 'buffer': 0., 'scale_range': [0.5, 1.], }), # 'batch_size' : (50, 100, 150, 200), 'batch_size': 100, 'num_batches': None, 'iter_class': 'SequentialSubsetIterator', 'rng': None }), # end dataset #============================[ Layers ]===========================# 'hidden1': DD({ 'name': 'hidden1', 'type': 'RELU', 'dim': 3000, # 'dropout_below' : (0.05, 0.1, 0.15, 0.2) 'dropout_below': 0.5, # 'dropout_below' : None, 'layer_noise': DD({ 'type': None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', # 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut 'ratio': 0.5, # for Gaussian 'std': 0.1, 'mean': 0, }) }), # end hidden_layer 'hidden2': DD({ 'name': 'hidden2', 'type': 'RELU', 'dim': 1000, # 'dropout_below' : (0.05, 0.1, 0.15, 0.2) # 'dropout_below' : (0, 0.5), 'dropout_below': None, 'layer_noise': DD({ 'type': None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', # 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut 'ratio': 0.5, # for Gaussian 'std': 0.1, 'mean': 0, }) }), # end hidden_layer 'output': DD({ 'name': 'output', 'type': 'Sigmoid', 'dim': 1998, # 'dim' : 1848, # 'dropout_below' : 0.5, # 'dropout_below' : (0, 0.5), 'dropout_below': None, 'layer_noise': DD({ 'type': None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', # 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut 'ratio': 0.5, # for Gaussian 'std': 0.1, 'mean': 0, }) }) # end output_layer })
def jobman_insert_random(n_jobs): JOBDB = 'postgres://[email protected]/dauphiya_db/emotiw_mlp_audio' EXPERIMENT_PATH = "experiment.jobman_entrypoint" jobs = [] for _ in range(n_jobs): job = DD() job.n_hiddens = numpy.random.randint(8, 512) job.n_layers = numpy.random.randint(1, 4) job.learning_rate = 10.**numpy.random.uniform(-3, -0) job.momentum = 10.**numpy.random.uniform(-1, -0) job.features = ["minimal.pca", "full.pca"][numpy.random.binomial(1, 0.5)] job.example_dropout = numpy.random.randint(16, 200) job.rbm_learning_rate = 10.**numpy.random.uniform(-3, -0) job.rbm_epochs = numpy.random.randint(8, 100) job.tag = "pretrain" jobs.append(job) print job answer = raw_input("Submit %d jobs?[y/N] " % len(jobs)) if answer == "y": numpy.random.shuffle(jobs) db = jobman.sql.db(JOBDB) for job in jobs: job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) jobman.sql.insert_dict(job, db) print "inserted %d jobs" % len(jobs) print "To run: jobdispatch --condor --repeat_jobs=%d jobman sql -n 1 'postgres://[email protected]/dauphiya_db/emotiw_mlp_audio' ." % len(jobs)
def tfd(n_trials): ri = numpy.random.random_integers state = DD() with open('tfd_powerup_temp.yaml') as ymtmp: state.yaml_string = ymtmp.read() state.powerup_nunits = 240 state.powerup_npieces = 5 state.W_lr_scale = 0.04 state.p_lr_scale = 0.01 state.lr_rate = 0.1 state.l2_pen = 1e-5 state.l2_pen2 = 0.0000 state.init_mom = 0.5 state.final_mom = 0.5 state.decay_factor = 0.5 state.max_col_norm = 1.9365 state.max_col_norm2 = 1.8365 state.batch_size = 128 state.save_path = './' n_pieces = [2, 3, 4, 5, 6] n_units = [200, 240, 320, 360, 420, 480, 540] learning_rates = numpy.logspace(numpy.log10(0.002), numpy.log10(1.0), 40) learning_rate_scalers = numpy.logspace(numpy.log10(0.04), numpy.log10(1), 30) l2_pen = numpy.logspace(numpy.log10(1e-6), numpy.log10(8*1e-3), 90) max_col_norms = [1.8365, 1.9365, 2.1365, 2.2365, 2.3486] batch_sizes = [128, 100, 80] ind = 0 TABLE_NAME = "powerup_tfd_1layer_finer_large_fixed2" db = api0.open_db('postgresql://[email protected]/gulcehrc_db?table=' + TABLE_NAME) for i in xrange(n_trials): state.lr_rate = learning_rates[ri(learning_rates.shape[0]) - 1] state.powerup_nunits = n_units[ri(len(n_units)) - 1] state.powerup_npieces = n_pieces[ri(len(n_pieces) - 1)] state.W_lr_scale = numpy.random.uniform(low=0.02, high=1.0)#learning_rate_scalers[ri(len(learning_rate_scalers)) - 1] state.p_lr_scale = numpy.random.uniform(low=0.02, high=1.0)#learning_rate_scalers[ri(len(learning_rate_scalers)) - 1] state.l2_pen = l2_pen[ri(l2_pen.shape[0]) - 1] state.init_mom = numpy.random.uniform(low=0.3, high=0.6) state.final_mom = numpy.random.uniform(low=state.init_mom + 0.1, high=0.9) state.decay_factor = numpy.random.uniform(low=0.01, high=0.05) state.max_col_norm = max_col_norms[ri(len(max_col_norms)) - 1] state.batch_size = batch_sizes[ri(len(batch_sizes)) - 1] alphabet = list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUWXYZ0123456789') state.save_path = './' state.save_path += ''.join(alphabet[:7]) + '_' sql.insert_job(experiment, flatten(state), db) ind += 1 db.createView(TABLE_NAME + '_view') print "{} jobs submitted".format(ind)
def tfd(n_trials): ri = numpy.random.random_integers state = DD() with open('mnist_powerup_temp_l2.yaml') as ymtmp: state.yaml_string = ymtmp.read() state.powerup_nunits = 240 state.powerup_npieces = 5 state.powerup_nunits2 = 240 state.powerup_npieces2 = 5 state.W_lr_scale = 0.04 state.p_lr_scale = 0.01 state.lr_rate = 0.1 state.init_mom = 0.5 state.final_mom = 0.5 state.decay_factor = 0.5 state.max_col_norm = 1.9365 state.save_path = './' n_pieces = [2, 3, 4, 5] n_units = [200, 240, 320, 360, 420, 480] learning_rates = numpy.logspace(numpy.log10(0.09), numpy.log10(1.2), 60) learning_rate_scalers = numpy.logspace(numpy.log10(0.1), numpy.log10(1), 50) decay_factors = numpy.logspace(numpy.log10(0.001), numpy.log10(0.06), 40) max_col_norms = [1.8365, 1.9365, 2.1365, 2.2365, 2.3486] ind = 0 TABLE_NAME = "powerup_mnist_finest_large_2l" db = api0.open_db('postgresql://[email protected]/gulcehrc_db?table=' + TABLE_NAME) for i in xrange(n_trials): state.lr_rate = learning_rates[ri(learning_rates.shape[0]) - 1] state.powerup_nunits = n_units[ri(len(n_units)) - 1] state.powerup_npieces = n_pieces[ri(len(n_pieces) - 1)] state.powerup_nunits2 = state.powerup_nunits state.powerup_npieces2 = state.powerup_npieces state.W_lr_scale = numpy.random.uniform(low=0.09, high=1.0) state.p_lr_scale = numpy.random.uniform(low=0.09, high=1.0) state.init_mom = numpy.random.uniform(low=0.3, high=0.6) state.final_mom = numpy.random.uniform(low=state.init_mom + 0.1, high=0.9) state.decay_factor = decay_factors[ri(len(decay_factors)) - 1] state.max_col_norm = max_col_norms[ri(len(max_col_norms)) - 1] alphabet = list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUWXYZ0123456789') state.save_path = './' state.save_path += ''.join(alphabet[:7]) + '_' sql.insert_job(experiment, flatten(state), db) ind += 1 db.createView(TABLE_NAME + '_view') print "{} jobs submitted".format(ind)
model_config = DD({ ############################[AE_Testing]########################## ################################################################## 'AE_Testing' : DD({ 'model' : DD({ 'rand_seed' : None }), # end mlp 'log' : DD({ # 'experiment_name' : 'AE_Testing_Mnist_784_500', 'experiment_name' : 'AE_Mnist_784_100', 'description' : '', 'save_outputs' : False, 'save_learning_rule' : False, 'save_model' : False, 'save_epoch_error' : False, 'save_to_database_name' : 'Database_Name.db' }), # end log 'learning_rule' : DD({ 'max_col_norm' : (1, 10, 50), 'L1_lambda' : None, 'L2_lambda' : None, 'cost' : 'mse', 'stopping_criteria' : DD({ 'max_epoch' : 100, 'epoch_look_back' : 5, 'cost' : 'mse', 'percent_decrease' : 0.05 }) # end stopping_criteria }), # end learning_rule 'learning_method' : DD({ 'type' : 'SGD', # 'type' : 'AdaGrad', # 'type' : 'AdaDelta', 'learning_rate' : 0.9, 'momentum' : 0.01, }), # end learning_method 'dataset' : DD({ 'type' : 'Mnist', 'train_valid_test_ratio': [8, 1, 1], 'feature_size' : 784, # 'preprocessor' : None, # 'preprocessor' : 'Scale', # 'preprocessor' : 'GCN', # 'preprocessor' : 'LogGCN', 'dataset_noise' : DD({ 'type' : None # 'type' : 'BlackOut', # 'type' : 'MaskOut', # 'type' : 'Gaussian', }), 'preprocessor' : DD({ 'type' : None, # 'type' : 'Scale', # 'type' : 'GCN', # 'type' : 'LogGCN', # 'type' : 'Standardize', # for Scale 'global_max' : 89, 'global_min' : -23, 'buffer' : 0.5, 'scale_range': [-1, 1], }), 'batch_size' : 100, 'num_batches' : None, 'iter_class' : 'SequentialSubsetIterator', 'rng' : None }), # end dataset #============================[ Layers ]===========================# 'hidden1' : DD({ 'name' : 'hidden1', 'type' : 'SoftRELU', 'dim' : 100, # 'dropout_below' : (0.05, 0.1, 0.15, 0.2) 'dropout_below' : 0.5, 'layer_noise' : DD({ # 'type' : None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut 'ratio' : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5), # 'ratio' : 0.05, # for Gaussian # 'std' : (0.001, 0.005, 0.01, 0.015, 0.02), 'std' : (0.005, 0.01, 0.02, 0.03, 0.04), # 'std' : 0.001, 'mean' : 0, }) }), # end hidden_layer 'h1_mirror' : DD({ 'name' : 'h1_mirror', 'type' : 'Sigmoid', # 'dim' : 2049, # dim = input.dim 'dropout_below' : None, 'layer_noise' : DD({ # 'type' : None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut 'ratio' : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5), # 'ratio' : 0.05, # for Gaussian # 'std' : (0.001, 0.005, 0.01, 0.015, 0.02), 'std' : (0.005, 0.01, 0.02, 0.03, 0.04), # 'std' : 0.001, 'mean' : 0, }) }) # end output_layer }), # end autoencoder #############################[Mapping]############################ ################################################################## 'Laura_Mapping' : DD({ 'model' : DD({ 'rand_seed' : None }), # end mlp 'log' : DD({ 'experiment_name' : 'AE1001_Warp_Laura_Blocks_GCN_Mapping', #helios 'description' : '', 'save_outputs' : True, 'save_learning_rule' : True, 'save_model' : True, 'save_epoch_error' : True, 'save_to_database_name' : 'Laura.db' }), # end log 'learning_rule' : DD({ 'max_col_norm' : (1, 10, 50), # 'learning_rate' : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5), 'learning_rate' : ((1e-8, 1e-3), float), 'momentum' : (1e-3, 1e-2, 1e-1, 0.5, 0.9), 'momentum_type' : 'normal', 'L1_lambda' : None, 'L2_lambda' : None, 'cost' : 'entropy', 'stopping_criteria' : DD({ 'max_epoch' : 100, 'epoch_look_back' : 10, 'cost' : 'entropy', 'percent_decrease' : 0.05 }) # end stopping_criteria }), # end learning_rule 'learning_method' : DD({ 'type' : 'SGD', # 'type' : 'AdaGrad', # 'type' : 'AdaDelta', 'learning_rate' : 0.9, 'momentum' : 0.01, }), # end learning_method #===========================[ Dataset ]===========================# 'dataset' : DD({ # 'type' : 'Laura_Blocks_GCN_Mapping', 'type' : 'Laura_Warp_Blocks_GCN_Mapping', 'feature_size' : 2049, 'target_size' : 1, 'train_valid_test_ratio': [8, 1, 1], 'preprocessor' : 'GCN', 'batch_size' : (50, 100, 150, 200), 'num_batches' : None, 'iter_class' : 'SequentialSubsetIterator', 'rng' : None }), # end dataset #============================[ Layers ]===========================# 'num_layers' : 1, 'hidden1' : DD({ 'name' : 'hidden1', 'type' : 'Tanh', 'dim' : 1000, 'dropout_below' : None, }), # end hidden_layer 'hidden2' : DD({ 'name' : 'hidden2', 'type' : 'Tanh', 'dim' : 500, 'dropout_below' : None, }), # end hidden_layer 'output' : DD({ 'name' : 'output', 'type' : 'Linear', 'dim' : 1, 'dropout_below' : None, }), # end hidden_layer }), # end Laura_Mapping #############################[Laura]############################## ################################################################## 'Laura' : DD({ 'model' : DD({ # 'rand_seed' : 4520, 'rand_seed' : None, # 'rand_seed' : 2137 }), # end mlp 'log' : DD({ # 'experiment_name' : 'testing_blackout', # 'experiment_name' : 'AE0910_Warp_Blocks_2049_500_tanh_gpu_blockout_more_no_filter_latest', # 'experiment_name' : 'AE0829_Warp_Standardize_GCN_Blocks_2049_500_tanh_gpu', # 'experiment_name' : 'AE0912_Blocks_2049_500_tanh_gpu_clean', # 'experiment_name' : 'AE0829_Standardize_GCN_Blocks_2049_500_tanh_gpu', # 'experiment_name' : 'AE0901_Warp_Blocks_500_180_tanh_gpu', # 'experiment_name' : 'AE1016_Warp_Blocks_180_120_tanh_tanh_gpu_dropout', #helios # 'experiment_name' : 'AE1018_Warp_Blocks_2049_500_tanh_tanh_gpu_blackout', #helios # 'experiment_name' : 'AE0919_Blocks_180_120_tanh_tanh_gpu_dropout', #helios # 'experiment_name' : 'AE0918_Blocks_180_120_tanh_tanh_gpu_clean', #helios # 'experiment_name' : 'AE0916_Blocks_180_120_tanh_tanh_gpu_output_sig_dropout', # 'experiment_name' : 'AE0916_Blocks_180_120_tanh_tanh_gpu_output_sig_clean', # 'experiment_name' : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_dropout', #helios # 'experiment_name' : 'AE1210_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_maskout', #helios 'experiment_name' : 'AE1216_Transfactor_blocks_150_50small', 'description' : 'scale_buffer=0.9', 'save_outputs' : True, 'save_learning_rule' : True, 'save_model' : True, 'save_epoch_error' : True, # 'save_to_database_name' : 'Laura12.db' 'save_to_database_name' : 'transfactor.db', }), # end log 'learning_rule' : DD({ 'max_col_norm' : 1, 'L1_lambda' : None, 'L2_lambda' : None, 'cost' : 'mse', 'stopping_criteria' : DD({ 'max_epoch' : 100, 'epoch_look_back' : 5, 'cost' : 'mse', 'percent_decrease' : 0.05 }) # end stopping_criteria }), # end learning_rule 'learning_method' : DD({ 'type' : 'SGD', # 'type' : 'AdaGrad', # 'type' : 'AdaDelta', ###[ For SGD and AdaGrad ]### # 'learning_rate' : 0.001, 'learning_rate' : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5), # 'momentum' : 0.5, # 'momentum' : 0., 'momentum' : (1e-2, 1e-1, 0.5, 0.9), ###[ For AdaDelta ]### 'rho' : 0.95, 'eps' : 1e-6, }), # end learning_method #===========================[ Dataset ]===========================# 'dataset' : DD({ # 'type' : 'Laura_Blocks', # 'type' : 'Laura_Warp_Blocks', # 'type' : 'Laura_Warp_Blocks_500_Tanh', # 'type' : 'Laura_Warp_Blocks_180_Tanh_Dropout', # 'type' : 'Laura_Cut_Warp_Blocks_300', # 'type' : 'Laura_Blocks_180_Tanh_Tanh', # 'type' : 'Laura_Blocks_180_Tanh_Tanh_Dropout', # 'type' : 'Laura_Blocks_500_Tanh_Sigmoid', # 'type' : 'Laura_Blocks_500', # 'type' : 'Laura_Warp_Standardize_Blocks', # 'type' : 'Laura_Standardize_Blocks', # 'type' : 'Laura_Scale_Warp_Blocks_500_Tanh', # 'type' : 'Laura_Scale_Warp_Blocks_180_Tanh_Dropout', # 'type' : 'Laura_Warp_Blocks_180_Tanh_Blackout', # 'type' : 'Mnist', # 'type' : 'Laura_Warp_Blocks_180_Tanh_Noisy_MaskOut', # 'type' : 'TransFactor_AE', 'type' : 'TransFactor_Blocks150', 'feature_size' : 150, 'train_valid_test_ratio': [8, 1, 1], 'dataset_noise' : DD({ 'type' : None # 'type' : 'BlackOut', # 'type' : 'MaskOut', # 'type' : 'Gaussian', }), 'preprocessor' : DD({ 'type' : None, # 'type' : 'Scale', # 'type' : 'GCN', # 'type' : 'LogGCN', # 'type' : 'Standardize', # for Scale # 'global_max' : 89, # 'global_min' : -23, 'global_max' : 4.0, 'global_min' : 0., 'buffer' : 0.9, 'scale_range': [-1, 1], }), # 'batch_size' : 50, 'batch_size' : (50, 100, 150, 200), 'num_batches' : None, 'iter_class' : 'SequentialSubsetIterator', 'rng' : None }), # end dataset #============================[ Layers ]===========================# 'num_layers' : 1, 'hidden1' : DD({ 'name' : 'hidden1', 'type' : 'Tanh', # 'type' : 'SoftRELU', 'dim' : 50, 'dropout_below' : None, # 'dropout_below' : (0.3, 0.4, 0.5), # 'dropout_below' : 0.5, 'layer_noise' : DD({ # 'type' : None, 'type' : 'BlackOut', # 'type' : 'Gaussian', # 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut # 'ratio' : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5), 'ratio' : 0.5, # for Gaussian # 'std' : (0.001, 0.005, 0.01, 0.015, 0.02), 'std' : (0.005, 0.01, 0.02, 0.03, 0.04), # 'std' : 0.001, 'mean' : 0, }) }), # end hidden_layer # 'hidden2' : DD({ # 'name' : 'hidden2', # 'type' : 'RELU', # 'dim' : 100, # 'dropout_below' : None, # # 'blackout_below' : None # }), # end hidden_layer # # 'h2_mirror' : DD({ # 'name' : 'h2_mirror', # 'type' : 'RELU', # # 'dim' : 2049, # dim = input.dim # 'dropout_below' : None, # # 'blackout_below' : None # }), # end output_layer 'h1_mirror' : DD({ 'name' : 'h1_mirror', 'type' : 'Tanh', # 'dim' : 2049, # dim = input.dim 'dropout_below' : None, # 'dropout_below' : 0.5, }) # end output_layer }), # end autoencoder ########################[Laura_Two_Layers]######################## ################################################################## 'Laura_Two_Layers' : DD({ 'model' : DD({ 'rand_seed' : None }), # end mlp 'log' : DD({ # 'experiment_name' : 'AE1214_Scale_Warp_Blocks_2Layers_finetune_2049_180_tanh_tanh_gpu_maskout', 'experiment_name' : 'Transfactor1215_500_50_Two_Layers_Finetune_small', 'description' : '', 'save_outputs' : True, 'save_learning_rule' : True, 'save_model' : True, 'save_epoch_error' : True, # 'save_to_database_name' : 'Laura12.db' 'save_to_database_name' : 'transfactor.db', }), # end log 'learning_rule' : DD({ 'max_col_norm' : 1, 'L1_lambda' : None, 'L2_lambda' : None, 'cost' : 'mse', 'stopping_criteria' : DD({ 'max_epoch' : 100, 'epoch_look_back' : 5, 'cost' : 'mse', 'percent_decrease' : 0.05 }) # end stopping_criteria }), # end learning_rule 'learning_method' : DD({ 'type' : 'SGD', # 'type' : 'AdaGrad', # 'type' : 'AdaDelta', # for SGD and AdaGrad 'learning_rate' : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5), 'momentum' : (1e-2, 1e-1, 0.5, 0.9), # for AdaDelta 'rho' : 0.95, 'eps' : 1e-6, }), # end learning_method #===========================[ Dataset ]===========================# 'dataset' : DD({ # 'type' : 'Laura_Blocks', # 'type' : 'Laura_Warp_Blocks', 'type' : 'TransFactor_Blocks', 'feature_size' : 500, 'train_valid_test_ratio': [8, 1, 1], 'dataset_noise' : DD({ 'type' : None # 'type' : 'BlackOut', # 'type' : 'MaskOut', # 'type' : 'Gaussian', }), 'preprocessor' : DD({ 'type' : None, # 'type' : 'Scale', # 'type' : 'GCN', # 'type' : 'LogGCN', # 'type' : 'Standardize', # for Scale # 'global_max' : 89, # 'global_min' : -23, 'global_max' : 4.0, 'global_min' : 0., 'buffer' : 0.9, 'scale_range': [-1, 1], }), 'batch_size' : (50, 100, 150, 200), 'num_batches' : None, 'iter_class' : 'SequentialSubsetIterator', 'rng' : None }), # end dataset # #============================[ Layers ]===========================# 'hidden1' : DD({ 'name' : 'hidden1', # 'model' : 'AE0911_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140912_2337_04263067', # 'model' : 'AE1112_Scale_Warp_Blocks_2Layers_finetune_2049_180_tanh_tanh_gpu_clean_20141112_2145_06823495', # 'model' : 'AE1121_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_gaussian_continue_20141126_1543_50554671', # 'model' : 'AE1122_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141128_1421_47179280', # 'model' : 'AE1210_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141210_1728_15311837', 'model' : 'AE1216_Transfactor_blocks_500_150small_20141215_1748_06646265', 'dropout_below' : None, # 'dropout_below' : (0.1, 0.2, 0.3, 0.4, 0.5), # 'dropout_below' : 0.1, }), # end hidden_layer 'hidden2' : DD({ 'name' : 'hidden2', # 'model' : 'AE1001_Warp_Blocks_500_120_tanh_tanh_gpu_clean_20141003_0113_02206401', # 'model' : 'AE1115_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_clean_20141119_1327_11490503', # 'model' : 'AE1127_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_gaussian_20141127_1313_31905279', # 'model' : 'AE1201_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_clean_20141202_2352_57643114', # 'model' : 'AE1210_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_maskout_20141212_2056_15976132', 'model' : 'AE1216_Transfactor_blocks_150_50small_20141215_2028_14707382', 'dropout_below' : None, }) }), # end autoencoder ########################[Laura_Three_Layers]######################## #################################################################### 'Laura_Three_Layers' : DD({ 'fine_tuning_only' : False, 'model' : DD({ 'rand_seed' : None }), # end mlp 'log' : DD({ # 'experiment_name' : 'AE0917_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_clean', # 'experiment_name' : 'AE0919_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_noisy', # 'experiment_name' : 'AE0917_Blocks_3layers_finetune_2049_120_tanh_sigmoid_gpu_clean', # 'experiment_name' : 'AE0917_Blocks_3layers_finetune_2049_120_tanh_sigmoid_gpu_noisy', # 'experiment_name' : 'AE0917_Warp_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_clean', # 'experiment_name' : 'AE0919_Warp_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_noisy', # 'experiment_name' : 'AE1002_Scale_Warp_Blocks_3Layers_finetune_2049_120_tanh_tanh_gpu_noisy', # 'experiment_name' : 'AE1002_Scale_Warp_Blocks_3Layers_finetune_2049_120_tanh_tanh_gpu_clean', 'experiment_name' : 'AE1213_Scale_Laura_Warp_Blocks_3layers_finetune_2049_120_tanh_tanh_gpu_maskout', 'description' : '', 'save_outputs' : True, 'save_learning_rule' : True, 'save_model' : True, 'save_epoch_error' : True, 'save_to_database_name' : 'Laura12.db' }), # end log 'learning_rule' : DD({ 'max_col_norm' : 1, 'L1_lambda' : None, 'L2_lambda' : None, 'cost' : 'mse', 'stopping_criteria' : DD({ 'max_epoch' : 100, 'epoch_look_back' : 5, 'cost' : 'mse', 'percent_decrease' : 0.05 }) # end stopping_criteria }), # end learning_rule 'learning_method' : DD({ 'type' : 'SGD', # 'type' : 'AdaGrad', # 'type' : 'AdaDelta', # for SGD and AdaGrad 'learning_rate' : (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5), # 'learning_rate' : 0.001, 'momentum' : (1e-2, 1e-1, 0.5, 0.9), # 'momentum' : 0.1, # 'momentum' : 0.5, # for AdaDelta 'rho' : 0.95, 'eps' : 1e-6, }), # end learning_method #===========================[ Dataset ]===========================# 'dataset' : DD({ # 'type' : 'Laura_Blocks', 'type' : 'Laura_Warp_Blocks', 'feature_size' : 2049, 'train_valid_test_ratio': [8, 1, 1], 'dataset_noise' : DD({ 'type' : None # 'type' : 'BlackOut', # 'type' : 'MaskOut', # 'type' : 'Gaussian', }), 'preprocessor' : DD({ # 'type' : None, 'type' : 'Scale', # 'type' : 'GCN', # 'type' : 'LogGCN', # 'type' : 'Standardize', # for Scale 'global_max' : 89, 'global_min' : -23, 'buffer' : 0.9, 'scale_range': [-1, 1], }), 'batch_size' : (50, 100, 150, 200), # 'batch_size' : 50, 'num_batches' : None, 'iter_class' : 'SequentialSubsetIterator', 'rng' : None }), # end dataset # #============================[ Layers ]===========================# 'hidden1' : DD({ 'name' : 'hidden1', # 'model' : 'AE0911_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140912_2337_04263067', # 'model' : 'AE0916_Warp_Blocks_2049_500_tanh_tanh_gpu_dropout_20140916_1705_29139505', # 'model' :'AE0912_Blocks_2049_500_tanh_tanh_gpu_clean_20140914_1242_27372903', # 'model' : 'AE0915_Blocks_2049_500_tanh_tanh_gpu_Dropout_20140915_1900_37160748', # 'model' : 'AE1002_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_dropout_20141001_0321_33382955', # 'model' : 'AE0930_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_clean_20140930_1345_29800576', # 'model' : 'AE1110_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_clean_continue_20141110_1235_21624029', # 'model' : 'AE1110_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_batchout_continue_20141111_0957_22484008', # 'model' : 'AE1121_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_gaussian_continue_20141126_1543_50554671', # 'model' : 'AE1122_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141128_1421_47179280', 'model' : 'AE1210_Scale_Warp_Blocks_2049_500_tanh_tanh_gpu_sgd_maskout_20141210_1728_15311837', 'dropout_below' : None, # 'dropout_below' : (0.1, 0.2, 0.3, 0.4, 0.5), # 'dropout_below' : 0.1, }), # end hidden_layer 'hidden2' : DD({ 'name' : 'hidden2', # 'model' : 'AE0914_Warp_Blocks_500_180_tanh_tanh_gpu_clean_20140915_0400_30113212', # 'model' : 'AE0918_Warp_Blocks_500_180_tanh_tanh_gpu_dropout_20140918_1125_23612485', # 'model' : 'AE0916_Blocks_500_180_tanh_tanh_gpu_clean_20140916_2255_06553688', # 'model' : 'AE0918_Blocks_500_180_tanh_tanh_gpu_dropout_20140918_0920_42738052', # 'model' : 'AE1001_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_dropout_20141001_2158_16765065', # 'model' : 'AE1001_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_clean_20141002_0348_53679208', # 'model' : 'AE1110_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_clean_20141111_2157_47387660', # 'model' : 'AE1111_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_batchout_continue_20141112_0844_45882544', # 'model' : 'AE1127_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_gaussian_20141127_1313_31905279', # 'model' : 'AE1201_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_clean_20141202_2352_57643114', 'model' : 'AE1210_Scale_Warp_Blocks_500_180_tanh_tanh_gpu_sgd_maskout_20141212_2056_15976132', 'dropout_below' : None, }), # end hidden_layer 'hidden3' : DD({ 'name' : 'hidden3', # 'model' : 'AE0915_Warp_Blocks_180_120_tanh_gpu_dropout_clean_20140916_1028_26875210', # 'model' : 'AE0918_Warp_Blocks_180_120_tanh_tanh_gpu_dropout_20140919_1649_54631649', # 'model' : 'AE0914_Blocks_180_120_tanh_tanh_gpu_clean_20140918_0119_40376829', # 'model' : 'AE0919_Blocks_180_120_tanh_tanh_gpu_dropout_20140919_1345_22865393', # 'model' : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_dropout_20141002_1711_48207269', # 'model' : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_dropout_20141002_1457_08966968', # 'model' : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_clean_20141002_1713_16791523', # 'model' : 'AE1120_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_clean_20141122_0044_09351031', # 'model' : 'AE1121_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_batchout_20141122_0348_49379314', # 'model' : 'AE1127_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_gaussian_20141201_0345_39835964', # 'model' : 'AE1201_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_clean_20141204_0137_07827194', 'model' : 'AE1210_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_maskout_20141213_1608_33432934', 'dropout_below' : None, }), # end hidden_layer }), # end autoencoder #####################[Two_Layers_No_Transpose]###################### #################################################################### 'Laura_Two_Layers_No_Transpose' : DD({ 'model' : DD({ 'rand_seed' : 4520 }), # end mlp 'log' : DD({ 'experiment_name' : 'AE1107_No_Transpose_Scale_Warp_Blocks_2049_500_gpu_adagrad_dropout', 'description' : '', 'save_outputs' : True, 'save_learning_rule' : True, 'save_model' : True, 'save_epoch_error' : True, 'save_to_database_name' : 'Laura5.db' }), # end log 'learning_rule' : DD({ 'max_col_norm' : 1, 'L1_lambda' : None, 'L2_lambda' : None, 'cost' : 'mse', 'stopping_criteria' : DD({ 'max_epoch' : 100, 'epoch_look_back' : 5, 'cost' : 'mse', 'percent_decrease' : 0.05 }) # end stopping_criteria }), # end learning_rule 'learning_method' : DD({ # 'type' : 'SGD', 'type' : 'AdaGrad', # 'type' : 'AdaDelta', # for SGD and AdaGrad 'learning_rate' : 0.9, 'momentum' : 0.01, # for AdaDelta 'rho' : 0.95, 'eps' : 1e-6, }), # end learning_method #===========================[ Dataset ]===========================# 'dataset' : DD({ # 'type' : 'Laura_Warp_Blocks_180', # 'type' : 'Laura_Cut_Warp_Blocks_300', # 'type' : 'Laura_Blocks_500', # 'type' : 'Laura_Blocks', 'type' : 'Laura_Warp_Blocks', 'feature_size' : 2049, 'train_valid_test_ratio': [8, 1, 1], 'dataset_noise' : DD({ # 'type' : 'BlackOut', # 'type' : 'MaskOut', # 'type' : 'Gaussian', 'type' : None }), 'preprocessor' : DD({ # 'type' : None, 'type' : 'Scale', # 'type' : 'GCN', # 'type' : 'LogGCN', # 'type' : 'Standardize', # for Scale 'global_max' : 89, 'global_min' : -23, 'buffer' : 0.5, 'scale_range': [-1, 1], }), 'batch_size' : (50, 100, 150, 200), 'num_batches' : None, 'iter_class' : 'SequentialSubsetIterator', 'rng' : None }), # end dataset # #============================[ Layers ]===========================# 'num_layers' : 1, 'hidden1' : DD({ 'name' : 'hidden1', 'type' : 'Tanh', 'dim' : 500, 'dropout_below' : 0.5, 'layer_noise' : None, # 'layer_noise' : 'BlackOut', # 'layer_noise' : 'Gaussian', # 'layer_noise' : 'MaskOut', # 'layer_noise' : 'BatchOut', }), # end hidden_layer 'h1_mirror' : DD({ 'name' : 'h1_mirror', 'type' : 'Tanh', # 'dim' : 2049, # dim = input.dim 'dropout_below' : 0.5, 'layer_noise' : None, # 'layer_noise' : 'BlackOut', # 'layer_noise' : 'Gaussian', # 'layer_noise' : 'MaskOut', # 'layer_noise' : 'BatchOut', }) # end output_layer }), # end autoencoder }) # end model_config
config = DD({ 'module_name': 'Two_Layers_No_Transpose', 'model': DD({'rand_seed': 4520}), # end mlp 'log': DD({ 'experiment_name': 'AE1107_No_Transpose_Scale_Warp_Blocks_2049_500_gpu_adagrad_dropout', 'description': '', 'save_outputs': True, 'save_learning_rule': True, 'save_model': True, 'save_epoch_error': True, 'save_to_database_name': 'Laura5.db' }), # end log 'learning_rule': DD({ 'max_col_norm': 1, 'L1_lambda': None, 'L2_lambda': None, 'cost': 'mse', 'stopping_criteria': DD({ 'max_epoch': 100, 'epoch_look_back': 5, 'cost': 'mse', 'percent_decrease': 0.05 }) # end stopping_criteria }), # end learning_rule 'learning_method': DD({ # 'type' : 'SGD', 'type': 'AdaGrad', # 'type' : 'AdaDelta', # for SGD and AdaGrad 'learning_rate': 0.9, 'momentum': 0.01, # for AdaDelta 'rho': 0.95, 'eps': 1e-6, }), # end learning_method #===========================[ Dataset ]===========================# 'dataset': DD({ # 'type' : 'Laura_Warp_Blocks_180', # 'type' : 'Laura_Cut_Warp_Blocks_300', # 'type' : 'Laura_Blocks_500', # 'type' : 'Laura_Blocks', 'type': 'Laura_Warp_Blocks', 'feature_size': 2049, 'train_valid_test_ratio': [8, 1, 1], 'dataset_noise': DD({ # 'type' : 'BlackOut', # 'type' : 'MaskOut', # 'type' : 'Gaussian', 'type': None }), 'preprocessor': DD({ # 'type' : None, 'type': 'Scale', # 'type' : 'GCN', # 'type' : 'LogGCN', # 'type' : 'Standardize', # for Scale 'global_max': 89, 'global_min': -23, 'buffer': 0.5, 'scale_range': [-1, 1], }), 'batch_size': (50, 100, 150, 200), 'num_batches': None, 'iter_class': 'SequentialSubsetIterator', 'rng': None }), # end dataset # #============================[ Layers ]===========================# 'num_layers': 1, 'hidden1': DD({ 'name': 'hidden1', 'type': 'Tanh', 'dim': 500, 'dropout_below': 0.5, 'layer_noise': None, # 'layer_noise' : 'BlackOut', # 'layer_noise' : 'Gaussian', # 'layer_noise' : 'MaskOut', # 'layer_noise' : 'BatchOut', }), # end hidden_layer 'h1_mirror': DD({ 'name': 'h1_mirror', 'type': 'Tanh', # 'dim' : 2049, # dim = input.dim 'dropout_below': 0.5, 'layer_noise': None, # 'layer_noise' : 'BlackOut', # 'layer_noise' : 'Gaussian', # 'layer_noise' : 'MaskOut', # 'layer_noise' : 'BatchOut', }) # end output_layer }), # end autoencoder
config = DD({ 'module_name': 'Laura_No_Transpose', 'model': DD({ # 'rand_seed' : 4520, 'rand_seed': None, # 'rand_seed' : 2137 }), # end mlp 'log': DD({ # 'experiment_name' : 'testing_blackout', # 'experiment_name' : 'AE0910_Warp_Blocks_2049_500_tanh_gpu_blockout_more_no_filter_latest', # 'experiment_name' : 'AE0829_Warp_Standardize_GCN_Blocks_2049_500_tanh_gpu', # 'experiment_name' : 'AE0912_Blocks_2049_500_tanh_gpu_clean', # 'experiment_name' : 'AE0829_Standardize_GCN_Blocks_2049_500_tanh_gpu', # 'experiment_name' : 'AE0901_Warp_Blocks_500_180_tanh_gpu', # 'experiment_name' : 'AE1016_Warp_Blocks_180_120_tanh_tanh_gpu_dropout', #helios # 'experiment_name' : 'AE1018_Warp_Blocks_2049_500_tanh_tanh_gpu_blackout', #helios # 'experiment_name' : 'AE0919_Blocks_180_120_tanh_tanh_gpu_dropout', #helios # 'experiment_name' : 'AE0918_Blocks_180_120_tanh_tanh_gpu_clean', #helios # 'experiment_name' : 'AE0916_Blocks_180_120_tanh_tanh_gpu_output_sig_dropout', # 'experiment_name' : 'AE0916_Blocks_180_120_tanh_tanh_gpu_output_sig_clean', # 'experiment_name' : 'AE1001_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_dropout', #helios # 'experiment_name' : 'AE1210_Scale_Warp_Blocks_180_120_tanh_tanh_gpu_sgd_maskout', #helios # 'experiment_name' : 'AE1216_Transfactor_blocks_150_50small', 'experiment_name': 'AE0306_Warp_Blocks_2049_120_Scale_No_Transpose_Clean', 'description': 'scale_buffer=0.5', 'save_outputs': True, 'save_learning_rule': True, 'save_model': True, 'save_epoch_error': True, 'save_to_database_name': 'Laura13.db' # 'save_to_database_name' : 'transfactor.db', }), # end log 'learning_rule': DD({ 'max_col_norm': 1, 'L1_lambda': None, 'L2_lambda': None, 'cost': 'mse', 'stopping_criteria': DD({ 'max_epoch': 100, 'epoch_look_back': 5, 'cost': 'mse', 'percent_decrease': 0.05 }) # end stopping_criteria }), # end learning_rule 'learning_method': DD({ 'type': 'SGD', # 'type' : 'AdaGrad', # 'type' : 'AdaDelta', ###[ For SGD and AdaGrad ]### # 'learning_rate' : 0.001, 'learning_rate': (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.5), # 'momentum' : 0.5, # 'momentum' : 0., 'momentum': (1e-2, 1e-1, 0.5, 0.9), ###[ For AdaDelta ]### 'rho': 0.95, 'eps': 1e-6, }), # end learning_method #===========================[ Dataset ]===========================# 'dataset': DD({ # 'type' : 'Laura_Blocks', 'type': 'Laura_Warp_Blocks', # 'type' : 'Laura_Warp_Blocks_500_Tanh', # 'type' : 'Laura_Warp_Blocks_180_Tanh_Dropout', # 'type' : 'Laura_Cut_Warp_Blocks_300', # 'type' : 'Laura_Blocks_180_Tanh_Tanh', # 'type' : 'Laura_Blocks_180_Tanh_Tanh_Dropout', # 'type' : 'Laura_Blocks_500_Tanh_Sigmoid', # 'type' : 'Laura_Blocks_500', # 'type' : 'Laura_Warp_Standardize_Blocks', # 'type' : 'Laura_Standardize_Blocks', # 'type' : 'Laura_Scale_Warp_Blocks_500_Tanh', # 'type' : 'Laura_Scale_Warp_Blocks_180_Tanh_Dropout', # 'type' : 'Laura_Warp_Blocks_180_Tanh_Blackout', # 'type' : 'Mnist', # 'type' : 'Laura_Warp_Blocks_180_Tanh_Noisy_MaskOut', 'num_blocks': 20, 'feature_size': 2049, 'train_valid_test_ratio': [8, 1, 1], 'dataset_noise': DD({ 'type': None # 'type' : 'BlackOut', # 'type' : 'MaskOut', # 'type' : 'Gaussian', }), 'preprocessor': DD({ # 'type' : None, 'type': 'Scale', # 'type' : 'GCN', # 'type' : 'LogGCN', # 'type' : 'Standardize', # for Scale 'global_max': 89, 'global_min': -23, 'buffer': 0.5, 'scale_range': [-1, 1], }), # 'batch_size' : 50, 'batch_size': (50, 100, 150, 200), 'num_batches': None, 'iter_class': 'SequentialSubsetIterator', 'rng': None }), # end dataset #============================[ Layers ]===========================# 'num_layers': 3, 'hidden1': DD({ 'name': 'hidden1', 'type': 'Tanh', # 'type' : 'SoftRELU', 'dim': 500, 'dropout_below': None, # 'dropout_below' : (0.3, 0.4, 0.5), # 'dropout_below' : 0.5, 'layer_noise': DD({ 'type': None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', # 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut # 'ratio' : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5), 'ratio': 0.5, # for Gaussian # 'std' : (0.001, 0.005, 0.01, 0.015, 0.02), 'std': (0.005, 0.01, 0.02, 0.03, 0.04), # 'std' : 0.001, 'mean': 0, }) }), # end hidden_layer 'hidden2': DD({ 'name': 'hidden2', 'type': 'Tanh', 'dim': 180, 'dropout_below': None, 'layer_noise': DD({ 'type': None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', # 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut # 'ratio' : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5), 'ratio': 0.5, # for Gaussian # 'std' : (0.001, 0.005, 0.01, 0.015, 0.02), 'std': (0.005, 0.01, 0.02, 0.03, 0.04), # 'std' : 0.001, 'mean': 0, }) }), # end hidden_layer 'hidden3': DD({ 'name': 'hidden3', 'type': 'Tanh', 'dim': 120, 'dropout_below': None, 'layer_noise': DD({ 'type': None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', # 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut # 'ratio' : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5), 'ratio': 0.5, # for Gaussian # 'std' : (0.001, 0.005, 0.01, 0.015, 0.02), 'std': (0.005, 0.01, 0.02, 0.03, 0.04), # 'std' : 0.001, 'mean': 0, }) }), # end hidden_layer 'h3_mirror': DD({ 'name': 'h3_mirror', 'type': 'Tanh', 'dim': 180, 'dropout_below': None, 'layer_noise': DD({ 'type': None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', # 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut # 'ratio' : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5), 'ratio': 0.5, # for Gaussian # 'std' : (0.001, 0.005, 0.01, 0.015, 0.02), 'std': (0.005, 0.01, 0.02, 0.03, 0.04), # 'std' : 0.001, 'mean': 0, }) }), # end output_layer 'h2_mirror': DD({ 'name': 'h2_mirror', 'type': 'Tanh', 'dim': 500, 'dropout_below': None, 'layer_noise': DD({ 'type': None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', # 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut # 'ratio' : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5), 'ratio': 0.5, # for Gaussian # 'std' : (0.001, 0.005, 0.01, 0.015, 0.02), 'std': (0.005, 0.01, 0.02, 0.03, 0.04), # 'std' : 0.001, 'mean': 0, }) }), # end output_layer 'h1_mirror': DD({ 'name': 'h1_mirror', 'type': 'Tanh', 'dim': 2049, 'dropout_below': None, # 'dropout_below' : 0.5, 'layer_noise': DD({ 'type': None, # 'type' : 'BlackOut', # 'type' : 'Gaussian', # 'type' : 'MaskOut', # 'type' : 'BatchOut', # for BlackOut, MaskOut and BatchOut # 'ratio' : (0.05, 0.1, 0.2, 0.3, 0.4, 0.5), 'ratio': 0.5, # for Gaussian # 'std' : (0.001, 0.005, 0.01, 0.015, 0.02), 'std': (0.005, 0.01, 0.02, 0.03, 0.04), # 'std' : 0.001, 'mean': 0, }) }) # end output_layer }) # end Laura
def jobman_insert_random(n_jobs): JOBDB = 'postgres://[email protected]/dauphiya_db/emotiw_mlp_audio' EXPERIMENT_PATH = "experiment.jobman_entrypoint" jobs = [] for _ in range(n_jobs): job = DD() job.n_hiddens = numpy.random.randint(8, 512) job.n_layers = numpy.random.randint(1, 4) job.learning_rate = 10.**numpy.random.uniform(-3, -0) job.momentum = 10.**numpy.random.uniform(-1, -0) job.features = ["minimal.pca", "full.pca"][numpy.random.binomial(1, 0.5)] job.example_dropout = numpy.random.randint(16, 200) job.rbm_learning_rate = 10.**numpy.random.uniform(-3, -0) job.rbm_epochs = numpy.random.randint(8, 100) job.tag = "pretrain" jobs.append(job) print job answer = raw_input("Submit %d jobs?[y/N] " % len(jobs)) if answer == "y": numpy.random.shuffle(jobs) db = jobman.sql.db(JOBDB) for job in jobs: job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) jobman.sql.insert_dict(job, db) print "inserted %d jobs" % len(jobs) print "To run: jobdispatch --condor --repeat_jobs=%d jobman sql -n 1 'postgres://[email protected]/dauphiya_db/emotiw_mlp_audio' ." % len( jobs)