env.train_optimizer( allow_growth=True, save_path=save_path, result_types=['loss', 'bpc', 'perplexity', 'accuracy'], additions_to_feed_dict=train_opt_add_feed, # pupil_restore_paths=['debug_empty_meta_optimizer/not_learning_issue_es20_nn20/checkpoints/0'], reset_period=1, num_exercises=NUM_EXERCISES, stop=4000, train_dataset_texts=[train_text], opt_inf_is_performed=True, opt_inf_stop=500, opt_inf_pupil_restore_paths=OPT_INF_RESTORE_PUPIL_PATHS, opt_inf_additions_to_feed_dict=opt_inf_add_feed, opt_inf_validation_dataset_texts=[valid_text], opt_inf_train_dataset_texts=[train_text], validation_additions_to_feed_dict=valid_add_feed, vocabulary=vocabulary, batch_size=32, batch_gen_init_is_random=True, num_unrollings=NUM_UNROLLINGS, learning_rate={ 'type': 'exponential_decay', 'init': 3e-4, 'decay': .1, 'period': 3500 }, results_collect_interval=100, opt_inf_results_collect_interval=1, permute=False, summary=True, add_graph_to_summary=True)
training_path = os.path.join(base, 'loss_best', 'test', 'training') env.train_optimizer( allow_growth=True, save_path=training_path, result_types=['loss', 'bpc', 'perplexity', 'accuracy'], additions_to_feed_dict=train_opt_add_feed, # pupil_restore_paths=['debug_empty_meta_optimizer/not_learning_issue_es20_nn20/checkpoints/0'], reset_period=RESET_PERIOD, num_exercises=NUM_EXERCISES, stop=stop_specs, train_datasets=[('train', 'train')], opt_inf_is_performed=True, opt_inf_stop=OPTIMIZER_RANGE, opt_inf_pupil_restore_paths=OPT_INF_RESTORE_PUPIL_PATHS, opt_inf_additions_to_feed_dict=opt_inf_add_feed, opt_inf_validation_datasets=[['validation', 'valid']], opt_inf_train_datasets=[['train', 'train']], validation_additions_to_feed_dict=valid_add_feed, batch_size=BATCH_SIZE, batch_gen_init_is_random=True, learning_rate=learning_rate, results_collect_interval=2000, opt_inf_results_collect_interval=10, permute=False, summary=True, add_graph_to_summary=True, one_batch_gen=True, train_batch_kwargs=dict(data_dir=data_dir), valid_batch_kwargs=dict(data_dir=data_dir), )
training_path = os.path.join(base, 'loss_best', 'test', 'training') env.train_optimizer( allow_growth=True, save_path=training_path, result_types=['loss', 'bpc', 'perplexity', 'accuracy'], additions_to_feed_dict=train_opt_add_feed, pupil_restore_paths=[the_only_pupil_restore_path], # pupil_restore_paths=['debug_empty_meta_optimizer/not_learning_issue_es20_nn20/checkpoints/0'], reset_period=RESET_PERIOD, num_exercises=NUM_EXERCISES, stop=stop_specs, train_dataset_texts=[train_text], opt_inf_is_performed=True, opt_inf_stop=OPT_INF_STOP, opt_inf_pupil_restore_paths=OPT_INF_RESTORE_PUPIL_PATHS, opt_inf_additions_to_feed_dict=opt_inf_add_feed, opt_inf_validation_dataset_texts=[valid_text], opt_inf_train_dataset_texts=[train_text], validation_additions_to_feed_dict=valid_add_feed, vocabulary=vocabulary, batch_size=BATCH_SIZE, batch_gen_init_is_random=True, num_unrollings=NUM_UNROLLINGS, learning_rate=learning_rate, results_collect_interval=2000, opt_inf_results_collect_interval=10, permute=False, summary=True, add_graph_to_summary=True )
env.train_optimizer( allow_growth=True, save_path='res_net_relu/from_%s' % step, result_types=['loss', 'bpc', 'perplexity', 'accuracy'], additions_to_feed_dict=train_opt_add_feed, pupil_restore_paths=['lstm/test_res_net_1000_emb150_nl1_nn100_bs32_nu10/checkpoints/%s' % step], # pupil_restore_paths=['debug_empty_meta_optimizer/not_learning_issue_es20_nn20/checkpoints/0'], reset_period=1, stop=41, train_dataset_texts=[train_text], opt_inf_is_performed=True, opt_inf_stop=10, opt_inf_pupil_restore_paths=[ ('prelearn%s' % step, 'lstm/test_res_net_1000_emb150_nl1_nn100_bs32_nu10/checkpoints/%s' % step) ], opt_inf_additions_to_feed_dict=opt_inf_add_feed, opt_inf_validation_dataset_texts=[valid_text], opt_inf_train_dataset_texts=[train_text], validation_additions_to_feed_dict=valid_add_feed, vocabulary=vocabulary, batch_size=32, batch_gen_init_is_random=False, num_unrollings=4, learning_rate={'type': 'exponential_decay', 'init': .002, 'decay': .5, 'period': 400}, results_collect_interval=10, opt_inf_results_collect_interval=1, permute=False, summary=True, add_graph_to_summary=True )