def main(_): console.start('{} on TIMIT task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) th.prefix = '{}_'.format(date_string()) summ_name = model_name th.suffix = '' th.visible_gpu_id = 0 # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model """ Layers state_size #params L1 2x44: 10189 3x30: 10225 4x23: 10444 L2 2x29: 10146 3x20: 9945 L3 2x23: 9938 3x16: 9785 """ th.num_layers = 1 th.unit_size = 3 th.num_units = 30 # Setting truncate_grad to False works better th.truncate_grad = False # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.epoch = 1000 th.batch_size = 1 th.optimizer = 'adam' th.learning_rate = 0.0008 th.validation_per_round = 4 # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.train = True th.overwrite = True # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- th.mark = '{}({}x{})'.format(model_name, th.unit_size, th.num_units) th.gather_summ_name = th.prefix + summ_name + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on TIMIT task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) th.prefix = '{}_'.format(date_string()) summ_name = model_name th.suffix = '' th.visible_gpu_id = 0 # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model """For SxN GDU, params # = 2x(14+SxN)x(SxN)+25xSxN+25 = (53+2xSxN)xSxN+25 Denote SxN as x, # = 2*x^2 + 53*x + 25 x = (sqrt(2609+8*#)-53)/4 5000: 38; 10000: 58.6 2 layers: 37; 3 layers: 29; """ th.num_layers = 1 layer2config = { 1: '15x2+5x5+3x1', 2: '15x2+5x1+2x1', 3: '15x1+12x1+2x1', } th.gdu_string = '15x2+7x4' th.gdu_string = layer2config[th.num_layers] th.state_size = sum([ np.prod([int(x) for x in g.split('x')]) for g in th.gdu_string.split('+') ]) # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.epoch = 1000 th.batch_size = 1 th.optimizer = 'adam' th.learning_rate = 0.003 th.validation_per_round = 4 # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.train = True th.overwrite = True # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- th.mark = '{}({})'.format(model_name, th.gdu_string) th.gather_summ_name = th.prefix + summ_name + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on CIFAR-10 task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. date set setup # --------------------------------------------------------------------------- # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name th.prefix = '{}_'.format(date_string()) th.suffix = '_t00' th.visible_gpu_id = 0 # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.centralize_data = True th.num_layers = 50 th.layer_width = 100 th.spatial_activation = 'tanh' th.bias_initializer = -5. # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.epoch = 200 th.batch_size = 128 th.validation_per_round = 1 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.0004 th.patience = 5 th.early_stop = False th.validate_train_set = True th.val_decimals = 6 # --------------------------------------------------------------------------- # 4. summary and note setup th.export_tensors_upon_validation = True # th.export_gates = True th.train = True th.save_model = True th.overwrite = True # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- th.mark = '{}({}x{}-{})'.format( model_name, th.layer_width, th.num_layers, th.spatial_activation) th.gather_summ_name = th.prefix + summ_name + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on sCIFAR-10 task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. date set setup # --------------------------------------------------------------------------- # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name th.prefix = '{}_'.format(date_string()) th.suffix = '' th.visible_gpu_id = 0 # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.gdu_string = '5x60' th.use_reset_gate = True th.sog_version = 1 # sog_v1 is much faster th.dropout = 0.1 th.output_dropout = 0.2 # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.epoch = 10000 th.batch_size = 128 th.validation_per_round = 10 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.001 th.clip_threshold = 1.0 th.clip_method = 'value' # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.train = True th.save_model = True th.overwrite = False # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- th.mark = GDU.mark() th.mark += '_rdp{}odp{}gc{}'.format(th.dropout, th.output_dropout, th.clip_threshold) th.gather_summ_name = th.prefix + summ_name + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on TO task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. date set setup # --------------------------------------------------------------------------- th.sequence_length = 100 th.bits = 3 # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name th.visible_gpu_id = 0 th.prefix = '{}_'.format(date_string()) th.suffix = '' # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.gam_config = '6x10' th.head_size = 10 th.hyper_kernel = 'gru' th.state_size = 60 th.num_layers = 1 # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.max_iterations = 50000 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.001 # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.export_tensors_upon_validation = True # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- tail = '_{}bits_L{}'.format(th.bits, th.sequence_length) th.mark = GamRHN.mark() + tail th.gather_summ_name = th.prefix + summ_name + tail + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on TIMIT task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) th.prefix = '{}_'.format(date_string()) summ_name = model_name th.suffix = '' th.visible_gpu_id = 0 # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model """ Layers state_size #params 1 33 10156 2 21 10378 3 17 10752 """ th.num_layers = 1 th.state_size = 33 layer2size = {1: 33, 2: 21, 3: 17, 4: 14} th.state_size = layer2size[th.num_layers] # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.epoch = 1000 th.batch_size = 1 th.optimizer = 'adam' th.learning_rate = 0.003 th.validation_per_round = 4 # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.train = True th.overwrite = True # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- th.mark = '{}({})'.format(model_name, th.state_size) th.gather_summ_name = th.prefix + summ_name + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on TO task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. date set setup # --------------------------------------------------------------------------- th.sequence_length = 100 th.bits = 3 # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name th.visible_gpu_id = 0 th.prefix = '{}_'.format(date_string()) th.suffix = '' # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.fast_layers = 2 table = {2: 41, 3: 45, 4: 47, 5: 49, 6: 50} th.fast_size = table[th.fast_layers] th.slow_size = th.fast_size th.hyper_kernel = 'lstm' th.forget_bias_initializer = 2.0 # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.max_iterations = 50000 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.001 # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.export_tensors_upon_validation = True # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- tail = '_{}bits_L{}'.format(th.bits, th.sequence_length) th.mark = FastSlow.mark() + tail th.gather_summ_name = th.prefix + summ_name + tail + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on MNIST task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. date set setup # --------------------------------------------------------------------------- # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name th.prefix = '{}_'.format(date_string()) th.suffix = '_t00' th.visible_gpu_id = 0 # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.spatial_activation = 'relu' th.use_batchnorm = True th.archi_string = '200-100' # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.epoch = 1000 th.batch_size = 128 th.print_cycle = 20 th.validation_per_round = 2 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.003 th.patience = 2 th.early_stop = True # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.train = True th.save_model = True th.overwrite = True # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- th.mark = '{}({})'.format(model_name, th.archi_string) th.gather_summ_name = th.prefix + summ_name + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on CIFAR-10 task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. date set setup # --------------------------------------------------------------------------- # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name prefix = '{}_'.format(date_string()) suffix = '' th.visible_gpu_id = 0 # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.dropout = 0.2 # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.epoch = 1000 th.batch_size = 64 th.validation_per_round = 5 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.001 th.patience = 5 # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.train = True th.save_model = True th.overwrite = True # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- tail = suffix th.mark = prefix + '{}({}){}'.format(model_name, th.num_layers, tail) th.gather_summ_name = prefix + summ_name + tail + '.sum' core.activate(True)
def main(_): console.start('{} on TO task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. date set setup # --------------------------------------------------------------------------- th.sequence_length = 200 th.bits = 3 # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name th.visible_gpu_id = 0 prefix = '{}_'.format(date_string()) suffix = '' # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.state_size = 67 th.forget_bias_initializer = 2.0 # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.max_iterations = 10000 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.001 # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.export_tensors_upon_validation = True # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- tail = '_{}bits_L{}'.format(th.bits, th.sequence_length) + suffix th.mark = prefix + '{}({})'.format(model_name, th.state_size) + tail th.gather_summ_name = prefix + summ_name + tail + '.sum' core.activate()
def main(_): console.start('{} on AP task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. date set setup # --------------------------------------------------------------------------- th.sequence_length = 200 th.terminal_threshold = 0.002 # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name th.prefix = '{}_'.format(date_string()) th.suffix = '' th.visible_gpu_id = 0 # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.gdu_string = '10x10' # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.01 # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.export_tensors_upon_validation = True # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- tail = '_T{}'.format(th.sequence_length) th.mark = '{}({})'.format(model_name, th.state_size) + tail th.gather_summ_name = th.prefix + summ_name + tail + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on mERG task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. date set setup # --------------------------------------------------------------------------- th.multiple = 10 # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name prefix = '{}_'.format(date_string()) th.suffix = '' th.visible_gpu_id = 0 # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.state_size = 100 # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.0003 # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- tail = '_m{}'.format(th.multiple) th.mark = prefix + '{}({})'.format(model_name, th.state_size) + tail th.gather_summ_name = prefix + summ_name + tail + th.suffix + '.sum' core.activate()
def default_summ_name(self): script_name = re_find_single(r's\d+_\w+(?=.py)') return '{}_{}'.format(date_string(), script_name)
def main(_): console.start('{} on FI-2010 task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) th.prefix = '{}_'.format(date_string()) summ_name = model_name th.visible_gpu_id = 0 th.suffix = '' # --------------------------------------------------------------------------- # 1. dataset setup # --------------------------------------------------------------------------- th.volume_only = True # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.conveyor_length = 15 th.conveyor_input_shape = [20] # - - - - - - - - - - - - - - - - - - - - - - ↑ common - - ↓ model specific # Bilinear part th.archi_string = '20x60+15x80+10x100+5x120+5x4' th.max_norm = 2.5 # GAM-RHN part th.gam_config = '2x50' th.head_size = 15 th.hyper_kernel = 'gru' th.state_size = 200 th.num_layers = 8 th.dropout = 0.2 th.gam_dropout = 0.3 th.rhn_dropout = 0.5 th.output_dropout = 0.5 # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.batch_size = 64 th.sub_seq_len = 5000 th.num_steps = 10 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.001 th.clip_threshold = 1.0 th.clip_method = 'value' th.validation_per_round = 2 th.lives = 5 th.lr_decay = 0.4 # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.train = True th.overwrite = True # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- th.mark = '{}_{}'.format(th.archi_string, GamRHN.mark()) th.gather_summ_name = th.prefix + summ_name + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on pMNIST task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. date set setup # --------------------------------------------------------------------------- th.permute = True # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name th.visible_gpu_id = 0 th.prefix = '{}_'.format(date_string()) th.suffix = '' # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.gam_config = '4x60' th.head_size = 60 th.hyper_kernel = 'gru' th.state_size = 120 th.num_layers = 1 th.gam_dropout = 0.2 th.rhn_dropout = 0.2 th.output_dropout = 0.1 # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.epoch = 10000 th.batch_size = 128 th.validation_per_round = 10 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.0008 th.clip_threshold = 1.0 th.clip_method = 'value' # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.train = True th.save_model = True th.overwrite = False # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- tail = '_{}'.format('P' if th.permute else 'NP') th.mark = GamRHN.mark() + tail th.mark += '_g{}r{}o{}'.format(th.gam_dropout, th.rhn_dropout, th.output_dropout) th.gather_summ_name = th.prefix + summ_name + tail + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on TEXT8 task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name th.prefix = '{}_'.format(date_string()) th.suffix = '' th.visible_gpu_id = 0 th.allow_growth = True # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.hidden_dim = 50 th.gam_config = '3x750' th.head_size = 300 th.hyper_kernel = 'gru' th.state_size = 750 th.num_layers = 10 th.gam_dropout = 0.4 th.rhn_dropout = 0.3 th.input_dropout = 0.2 th.output_dropout = 0.2 # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.epoch = 10000 th.batch_size = 100 th.num_steps = 100 th.patience = 5 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.0001 th.clip_threshold = 1.0 th.clip_method = 'value' th.validation_per_round = 2 # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.train = True th.overwrite = False th.val_num_steps = 1000 th.val_batch_size = 20 # --------------------------------------------------------------------------- # 5. dynamic evaluation # --------------------------------------------------------------------------- if th.dynamic_evaluation: th.train = False th.de_max_batches = 450 th.de_val_pct = 0.1 th.de_eval_val_set = True th.train_stats_exists = False th.de_eta = '0.00001' th.de_lambda = '0.0075' # --------------------------------------------------------------------------- # 6. other stuff and activate # --------------------------------------------------------------------------- th.mark = GamRHN.mark() th.gather_summ_name = th.prefix + summ_name + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on pMNIST task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. date set setup # --------------------------------------------------------------------------- th.permute = True # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) th.prefix = '{}_'.format(date_string()) summ_name = model_name th.suffix = '' th.visible_gpu_id = 0 # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.num_layers = 1 th.state_size = 256 th.unit_size = 5 th.num_units = th.state_size // th.unit_size th.delta = 1.0 th.rec_dropout = 0.4 th.output_dropout = 0.2 th.gdu_string = '{}x{}x{}'.format(th.unit_size, th.num_units, th.delta) # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.epoch = 100 th.batch_size = 100 th.validation_per_round = 10 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.001 th.clip_threshold = 1.0 th.clip_method = 'value' # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.train = True th.save_model = True th.overwrite = True # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- tail = '_{}'.format('P' if th.permute else 'NP') th.mark = '{}({}{})'.format(model_name, th.gdu_string, '-r' if th.use_reset_gate else '') + tail th.mark += '_bs{}lr{}rd{}od{}gc{}'.format(th.batch_size, th.learning_rate, th.rec_dropout, th.output_dropout, th.clip_threshold) th.gather_summ_name = th.prefix + summ_name + tail + th.suffix + '.sum' core.activate()
def main(_): console.start('{} on CIFAR-10 task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. date set setup # --------------------------------------------------------------------------- # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name th.prefix = '{}_'.format(date_string()) th.visible_gpu_id = 1 # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.spatial_activation = 'relu' th.developer_code = '1024-512' th.fc_dims = [int(s) for s in th.developer_code.split('-')] th.use_batchnorm = False th.dropout = 0.0 th.centralize_data = True # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.epoch = 1 th.batch_size = 64 th.validation_per_round = 5 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.001 th.patience = 5 th.lives = 1 th.lr_decay = 0.6 th.clip_threshold = 10.0 th.reset_optimizer_after_resurrection = False th.summary = True # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.train = True th.save_model = True th.overwrite = True th.print_cycle = 20 # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- tail = '' th.mark = '{}({})'.format(model_name, '-'.join( [str(dim) for dim in th.fc_dims])) + tail th.gather_summ_name = th.prefix + summ_name + tail + '.sum' core.activate()
def main(_): console.start('{} on cPTB task'.format(model_name.upper())) th = core.th # --------------------------------------------------------------------------- # 0. data set setup # --------------------------------------------------------------------------- # --------------------------------------------------------------------------- # 1. folder/file names and device # --------------------------------------------------------------------------- th.job_dir += '/{:02d}_{}'.format(id, model_name) summ_name = model_name th.prefix = '{}_'.format(date_string()) th.suffix = '' th.visible_gpu_id = 0 th.allow_growth = True # --------------------------------------------------------------------------- # 2. model setup # --------------------------------------------------------------------------- th.model = model th.hidden_dim = 150 th.gam_config = '3x600' th.head_size = 300 th.hyper_kernel = 'gru' th.state_size = 600 th.num_layers = 5 th.gam_dropout = 0.4 th.rhn_dropout = 0.3 th.input_dropout = 0.3 th.output_dropout = 0.2 # --------------------------------------------------------------------------- # 3. trainer setup # --------------------------------------------------------------------------- th.epoch = 1000 th.batch_size = 128 th.num_steps = 100 th.patience = 15 th.optimizer = tf.train.AdamOptimizer th.learning_rate = 0.0004 th.clip_threshold = 1.0 th.clip_method = 'value' th.validation_per_round = 2 # --------------------------------------------------------------------------- # 4. summary and note setup # --------------------------------------------------------------------------- th.train = True th.overwrite = False th.val_num_steps = 1000 th.val_batch_size = 20 # --------------------------------------------------------------------------- # 5. other stuff and activate # --------------------------------------------------------------------------- th.mark = GamRHN.mark() th.gather_summ_name = th.prefix + summ_name + th.suffix + '.sum' core.activate()