def pento(n_trials): ri = numpy.random.random_integers state = DD() with open('mnist_powerup_temp.yaml') as ymtmp: state.yaml_string = ymtmp.read() state.powerup_nunits = 240 state.powerup_npieces = 5 state.W_lr_scale = 0.04 state.p_lr_scale = 0.01 state.lr_rate = 0.1 state.l2_pen = 1e-5 state.l2_pen2 = 0.0000 state.init_mom = 0.5 state.final_mom = 0.5 state.decay_factor = 0.5 state.max_col_norm = 1.9365 state.max_col_norm2 = 1.8365 state.batch_size = 128 state.save_path = './' n_pieces = [2, 3, 4, 5, 6, 8, 10, 12, 14, 16] n_units = [200, 240, 280, 320, 360, 420, 480] batch_sizes = [128, 256, 512] learning_rates = numpy.logspace(numpy.log10(0.001), numpy.log10(1.0), 30) learning_rate_scalers = numpy.logspace(numpy.log10(0.01), numpy.log10(1), 30) l2_pen = numpy.logspace(numpy.log10(1e-6), numpy.log10(8*1e-3), 100) max_col_norms = [1.7365, 1.8365, 1.9365, 2.1365, 2.2365, 2.4365] ind = 0 TABLE_NAME = "powerup_mnist_1layer_fixed" db = api0.open_db('postgresql://[email protected]/gulcehrc_db?table=' + TABLE_NAME) for i in xrange(n_trials): state.lr_rate = learning_rates[ri(learning_rates.shape[0]) - 1] state.powerup_nunits = n_units[ri(len(n_units)) - 1] state.powerup_npieces = n_pieces[ri(len(n_pieces)) - 1] state.W_lr_scale = learning_rate_scalers[ri(len(learning_rate_scalers)) - 1] state.p_lr_scale = learning_rate_scalers[ri(len(learning_rate_scalers)) - 1] state.batch_size = batch_sizes[ri(len(batch_sizes)) - 1] state.l2_pen = l2_pen[ri(l2_pen.shape[0]) - 1] state.init_mom = numpy.random.uniform(low=0.3, high=0.6) state.final_mom = numpy.random.uniform(low=state.init_mom + 0.1, high=0.9) state.decay_factor = numpy.random.uniform(low=0.01, high=0.05) state.max_col_norm = max_col_norms[ri(len(max_col_norms)) - 1] alphabet = list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUWXYZ0123456789') numpy.random.shuffle(alphabet) state.save_path = './' state.save_path += ''.join(alphabet[:7]) + '_' sql.insert_job(experiment, flatten(state), db) ind += 1 db.createView(TABLE_NAME + '_view') print "{} jobs submitted".format(ind)
import memnet.train_model_adam_gru_soft n_trials = 64 lr_min = 8e-5 lr_max = 1e-2 batches = [100, 200, 400, 800] renormalization_scale = [1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0] mem_nels = [200, 220, 230, 240, 250, 260, 290, 300] mem_sizes = [20, 24, 28, 30, 32] std_min = 0.01 std_max = 0.05 state = DD() state.lr = 6e-6 state.batch_size = 200 state.sub_mb_size = 25 state.std = 0.01 state.max_iters = 20000 state.n_hids = 200 state.mem_nel = 200 state.mem_size = 28 np.random.seed(3) ri = np.random.random_integers learning_rates = np.logspace(np.log10(lr_min), np.log10(lr_max), 100) stds = np.random.uniform(std_min, std_max, 100) #Change the table name everytime you try TABLE_NAME = "adam_grusoft_model_search_v0"
state = DD() parser = argparse.ArgumentParser("Parameters for the single soft model.") parser.add_argument("--task_id", default=1, type=int) parser.add_argument("--reload_model", default=1, type=int) parser.add_argument("--save_path", default=".", type=str) parser.add_argument("--seed", default=".", type=str) args = parser.parse_args() state.reload_model = args.reload_model state.task_id = args.task_id state.save_path = args.save_path state.lr = 8.2 * 1e-3 state.batch_size = 160 state.sub_mb_size = 160 state.max_iters = 90000 state.n_hids = 180 state.mem_nel = 120 state.mem_size = 28 state.renormalization_scale = 5.0 state.use_ff_controller = True state.seed = args.seed state.use_gru_inp = True state.use_bow_inp = False state.std = 0.034 state.bow_size = 100 state.bow_weight_start = 0.64