batch_generator_classes=BatchGenerator, vocabulary=vocabulary) add_metrics = ['bpc', 'perplexity', 'accuracy'] NUM_EXERCISES = 10 NUM_UNROLLINGS = 4 OPT_INF_NAME = 'COLD' OPT_INF_RESTORE_PUPIL_PATHS = [(OPT_INF_NAME, None)] env.build_pupil(batch_size=32, num_layers=1, num_nodes=[100], num_output_layers=1, num_output_nodes=[], vocabulary_size=vocabulary_size, embedding_size=150, num_unrollings=NUM_UNROLLINGS, init_parameter=3., num_gpus=1, regime='training_with_meta_optimizer', additional_metrics=add_metrics, going_to_limit_memory=True) env.build_optimizer( regime='train', # regime='inference', num_optimizer_unrollings=10, num_exercises=NUM_EXERCISES, res_size=2000, permute=False, optimizer_for_opt_type='adam',
hp_names = get_hp_names_from_conf_file(parameter_set_file_name) for_plotting = get_optimizer_evaluation_results(save_path, hp_names, AVERAGING_NUMBER) best = get_best(for_plotting, 'optimizer') metric_res = best['adam_prep']['loss'] best_on_valid = metric_res['validation'] print(' ' * 2 + 'loss' + ':', best_on_valid[1]) print_hps(hp_names, best_on_valid[0], 4) best_conf = dict(list(zip(hp_names, best_on_valid[0]))) env.build_pupil( batch_size=BATCH_SIZE, **LSTM_SIZE, regime='training_with_meta_optimizer', additional_metrics=add_metrics, going_to_limit_memory=True, ) env.build_optimizer( **OPTIMIZER_PARAMETERS, optimizer_init_parameter=best_conf['optimizer_init_parameter'], ) stop_specs = 20000 learning_rate = dict( type='exponential_decay', period=4000,
{'placeholder': 'dropout', 'value': 1.}] add_metrics = ['bpc', 'perplexity', 'accuracy'] # tf.set_random_seed(1) NUM_UNROLLINGS = 30 BATCH_SIZE = 32 env.build_pupil( batch_size=BATCH_SIZE, num_layers=2, num_nodes=[250, 250], num_output_layers=1, num_output_nodes=[], vocabulary_size=vocabulary_size, embedding_size=150, num_unrollings=NUM_UNROLLINGS, init_parameter=3., # character_positions_in_vocabulary=cpiv, num_gpus=1, additional_metrics=add_metrics, going_to_limit_memory=True, optimizer='adam' ) print('building is finished') stop_specs = dict( type='while_progress', max_no_progress_points=10, changing_parameter_name='learning_rate', path_to_target_metric_storage=('default_1', 'loss') )
) kwargs_for_model_building = dict( rnn_type=config['rnn_type'], embed_inputs=config['embed_inputs'], rnn_map=rnn_map, num_out_nodes=[], voc_size=vocabulary_size, emb_size=256, init_parameter=3., num_gpus=1, metrics=metrics, optimizer=config['optimizer'], dropout_rate=0.1, randomize_state_stddev=config["randomize_state_stddev"]) env.build_pupil(**kwargs_for_model_building) BATCH_SIZE = 32 restore_path = None if config['restore_path'] is None else os.path.expanduser( config['restore_path']) if config['train']: learning_rate = dict(type='adaptive_change', max_no_progress_points=10, decay=.5, init=4e-4, path_to_target_metric_storage=('valid', 'loss')) stop_specs = config['stop_specs'] if isinstance(stop_specs, dict): stop_specs['changing_parameter_name'] = "learning_rate" stop_specs['path_to_target_metric_storage'] = ["valid", "loss"] stop_specs['type'] = "while_progress"
hp_names = get_hp_names_from_conf_file(parameter_set_file_name) for_plotting = get_optimizer_evaluation_results(save_path, hp_names, AVERAGING_NUMBER) best = get_best(for_plotting, 'optimizer') metric_res = best['adam_prep']['loss'] best_on_valid = metric_res['validation'] print(' ' * 2 + 'loss' + ':', best_on_valid[1]) print_hps(hp_names, best_on_valid[0], 4) best_conf = dict(list(zip(hp_names, best_on_valid[0]))) env.build_pupil( batch_size=BATCH_SIZE, **MLP_SIZE, regime='training_with_meta_optimizer', additional_metrics=add_metrics, ) env.build_optimizer( **OPTIMIZER_PARAMETERS, clip_norm=best_conf['clip_norm'], optimizer_init_parameter=best_conf['optimizer_init_parameter'], pupil_learning_rate=best_conf['pupil_learning_rate'], ) stop_specs = 20000 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! learning_rate = dict( type='exponential_decay', period=4000,
train_add_feed = [{'placeholder': 'dropout', 'value': .9}] if 'momentum' in hps: train_add_feed.append({ 'placeholder': 'momentum', 'value': hps['momentum'] }) valid_add_feed = [{'placeholder': 'dropout', 'value': 1.}] add_metrics = ['bpc', 'perplexity', 'accuracy'] VALID_SIZE = 1000 BATCH_SIZE = 32 env.build_pupil(batch_size=BATCH_SIZE, num_layers=2, num_hidden_nodes=[1000], input_shape=[3072], num_classes=10, init_parameter=hps['init_parameter'], additional_metrics=add_metrics, optimizer=opt) print('building is finished') stop_specs = dict(type='while_progress_no_changing_parameter', max_no_progress_points=40, path_to_target_metric_storage=('valid', 'loss')) for run_num in range(num_runs): path = save_path % run_num learning_rate = dict( type='fixed', value=hps['learning_rate'], )
valid_add_feed = [ # {'placeholder': 'sampling_prob', 'value': 1.}, { 'placeholder': 'dropout', 'value': 1. } ] add_metrics = ['bpc', 'perplexity', 'accuracy'] tf.set_random_seed(1) BATCH_SIZE = 32 env.build_pupil(batch_size=BATCH_SIZE, num_layers=2, num_hidden_nodes=[1000], input_shape=[784], num_classes=10, init_parameter=3., additional_metrics=add_metrics, optimizer='sgd') print('building is finished') stop_specs = dict(type='while_progress', max_no_progress_points=10, changing_parameter_name='learning_rate', path_to_target_metric_storage=('valid', 'loss')) learning_rate = dict(type='adaptive_change', max_no_progress_points=10, decay=.5, init=4., path_to_target_metric_storage=('valid', 'loss')) env.train(
data_dir = os.path.join(*(['..'] * ROOT_HEIGHT + ['datasets', 'mnist'])) env = Environment( pupil_class=Mlp, meta_optimizer_class=ChiTerm, batch_generator_classes=MnistBatchGenerator, ) add_metrics = ['bpc', 'perplexity', 'accuracy'] BATCH_SIZE = 32 env.build_pupil( batch_size=BATCH_SIZE, num_layers=2, num_hidden_nodes=[1000], input_shape=[784], num_classes=10, init_parameter=3., additional_metrics=add_metrics, regime='training_with_meta_optimizer', ) env.build_optimizer( regime='inference', additional_metrics=add_metrics, chi_application='exp', ) print('building is finished') add_feed = [{ 'placeholder': 'dropout', 'value': .9
add_metrics = ['bpc', 'perplexity', 'accuracy'] train_add_feed = [{'placeholder': 'dropout', 'value': .9}] valid_add_feed = [{'placeholder': 'dropout', 'value': 1.}] dataset_name = 'valid' tf.set_random_seed(1) BATCH_SIZE = 32 NUM_UNROLLINGS = 10 env.build_pupil(batch_size=BATCH_SIZE, num_layers=1, num_nodes=[100], num_output_layers=1, num_output_nodes=[], vocabulary_size=vocabulary_size, embedding_size=150, num_unrollings=10, num_gpus=1, init_parameter=2., regime='autonomous_training', additional_metrics=add_metrics, going_to_limit_memory=True, optimizer='sgd') tf.set_random_seed(1) env.train( allow_growth=True, # save_path='debug_grid_search', result_types=['loss', 'bpc', 'perplexity', 'accuracy'], additions_to_feed_dict=train_add_feed, # pupil_restore_paths=['debug_empty_meta_optimizer/not_learning_issue_es20_nn20/checkpoints/0'], # stop=stop_specs,
'value': 1. } ] add_metrics = ['bpc', 'perplexity', 'accuracy'] tf.set_random_seed(1) env.build_pupil( batch_size=32, num_layers=1, num_nodes=[100], num_output_layers=1, num_output_nodes=[], vocabulary_size=vocabulary_size, embedding_size=150, num_unrollings=10, init_parameter=3., # character_positions_in_vocabulary=cpiv, num_gpus=1, additional_metrics=add_metrics, going_to_limit_memory=True, optimizer='sgd') print('building is finished') stop_specs = dict(type='while_progress', max_no_progress_points=10, changing_parameter_name='learning_rate', path_to_target_metric_storage=('default_1', 'loss')) learning_rate = dict(type='adaptive_change', max_no_progress_points=10,
} ] add_metrics = ['bpc', 'perplexity', 'accuracy'] tf.set_random_seed(1) NUM_UNROLLINGS = 100 BATCH_SIZE = 32 env.build_pupil(batch_size=BATCH_SIZE, num_layers=2, num_nodes=[1500, 1500], num_output_layers=1, num_output_nodes=[], vocabulary_size=vocabulary_size, embedding_size=500, num_unrollings=NUM_UNROLLINGS, init_parameter=1., num_gpus=1, additional_metrics=add_metrics, going_to_limit_memory=True, optimizer='sgd') print('building is finished') stop_specs = dict(type='while_progress', max_no_progress_points=10, changing_parameter_name='learning_rate', path_to_target_metric_storage=('default_1', 'loss')) learning_rate = dict(type='adaptive_change', max_no_progress_points=10, decay=.5,
# dict( # module_name='word_enc_dec', # num_nodes=[320, 600], # input_idx=0, # output_idx=1, # ) # ] ) env.build_pupil( rnn_type='lstm', embed_inputs=True, rnn_map=rnn_map, num_out_nodes=[], voc_size=vocabulary_size, emb_size=256, init_parameter=3., num_gpus=1, metrics=metrics, optimizer='adam', # regime='inference', # backward_connections=True, # matrix_dim_adjustment=True, ) learning_rate = dict( type='adaptive_change', max_no_progress_points=1, decay=.5, init=2e-3, # init=1e-3, path_to_target_metric_storage=('default_1', 'loss') )
BATCH_SIZE = 32 hp_names = get_hp_names_from_conf_file(parameter_set_file_name) for_plotting = get_pupil_evaluation_results(eval_save_path, hp_names) best = get_best(for_plotting, 'pupil') print(best) env.build_pupil( batch_size=BATCH_SIZE, num_layers=1, num_nodes=[100], num_output_layers=1, num_output_nodes=[], vocabulary_size=vocabulary_size, embedding_size=150, num_unrollings=10, num_gpus=1, regime='inference', additional_metrics=add_metrics, going_to_limit_memory=True, optimizer=opt ) for dataset_name, dataset_res in best.items(): print('dataset:', dataset_name) for metric, b in dataset_res.items(): print(' ' * 2 + metric + ':', b[1]) print_hps(hp_names, b[0], 4) best_conf = dict(list(zip(hp_names, b[0]))) training_path = os.path.join('..', 'text8_finish_std', base, metric + '_best', 'test', 'training')
# voc_name = 'text8_voc.txt' with open(voc_name, 'r') as f: vocabulary = list(f.read()) vocabulary_size = len(vocabulary) env = Environment(Lstm, BatchGenerator, vocabulary=vocabulary) valid_add_feed = [{'placeholder': 'dropout', 'value': 1.}] env.build_pupil( num_layers=2, num_nodes=[1500, 1500], num_output_layers=1, num_output_nodes=[], vocabulary_size=vocabulary_size, embedding_size=500, num_gpus=1, regime='inference', going_to_limit_memory=True, ) # env.build_pupil( # num_layers=1, # num_nodes=[100], # num_output_layers=1, # num_output_nodes=[], # vocabulary_size=vocabulary_size, # embedding_size=150, # num_gpus=1,