f.write("std: %s\n" % trainer._training_data._std) f.write("max: %s\n" % trainer._training_data._max) f.write("min: %s\n" % trainer._training_data._min) f.write("cap: %s\n" % (trainer._training_data._mean+2*trainer._training_data._std+100)) total = np.sum(trainer._training_data._y_count[1:]) for i, j in enumerate(trainer._training_data._y_count): if i > 0: f.write("%02d %05d %0.5f %s\n" % (i, j, j/total, params['id_to_keyword'][i - 1])) else: f.write("%02d %05d %0.5f %s\n" % (i, j, j/total, '')) def train_iteration_done(trainer, epoch, index, iteration_count, loss_value, training_done, run_results, params): save_y_count(trainer, live_replacement_count_filename) _default_train_iteration_done(trainer, epoch, index, iteration_count, loss_value, training_done, run_results, params) #print(training_data.next_batch(10)) trainer = Trainer(inference=model.inference, batch_size=utils.get_dict_value(params, 'batch_size', 128), loss=losses.softmax_xentropy , model_output_location=utils.get_dict_value(params, 'output_location') , name=utils.get_dict_value(params, 'model_name') , training_data=training_data, train_iteration_done=train_iteration_done, params=params) trainer.run(restore_latest_ckpt=True, save_network=True, save_ckpt=True, mini_batches_between_checkpoint=utils.get_dict_value(params, 'mini_batches_between_checkpoint', 1000), additional_nodes_to_evaluate=['encoded_sentence'] ,on_checkpoint_saved=on_checkpoint_saved)
param_file = 'params.py' params = utils.load_param_file(param_file) os.makedirs(utils.get_dict_value(params, 'output_location'), exist_ok=True) files_to_copy = [param_file] for file in files_to_copy: shutil.copyfile( file, os.path.join(utils.get_dict_value(params, 'output_location'), file)) files = [] for i in range(29): files.append(['features_%03d.npy' % i, 'scores_%03d.npy' % i]) training_data = data.PPTDesignerData(params=params, files=files) trainer = Trainer(inference=model.inference, batch_size=utils.get_dict_value(params, 'batch_size', 128), loss=losses.l2_loss, model_output_location=utils.get_dict_value( params, 'output_location'), name=utils.get_dict_value(params, 'model_name'), training_data=training_data, params=params) trainer.run(restore_latest_ckpt=False, save_network=True, save_ckpt=True, mini_batches_between_checkpoint=utils.get_dict_value( params, 'mini_batches_between_checkpoint', 1000))
print("NEW LEARNING RATE %s" % new_lr) trainer.set_learning_rate(new_lr) params['eval_results'] = [run_results['tpp']] trainer.last_epoch = epoch return framework.trainer._default_train_iteration_done( trainer, epoch, index, iteration_count, loss_value, training_done, run_results, params) #print(training_data.next_batch(10)) trainer = Trainer(inference=model.inference, batch_size=utils.get_dict_value(params, 'batch_size', 128), loss=model.loss, model_output_location=utils.get_dict_value( params, 'output_location'), name=utils.get_dict_value(params, 'model_name'), training_data=data, train_iteration=Trainer._rnn_train_iteration, train_iteration_done=train_iteration_done, optimizer=model.optimizer, params=params) trainer.run(restore_latest_ckpt=False, save_network=True, save_ckpt=True, mini_batches_between_checkpoint=utils.get_dict_value( params, 'mini_batches_between_checkpoint', 1000), additional_nodes_to_evaluate=['tpp'], on_checkpoint_saved=on_checkpoint_saved)
# print([v.name for v in tf.all_variables()]) print([iteration, loss_value]) if (loss_value < 0.15): done = True return done # training code: # 1 - generate some fake data, # 2 - create a trainer with the data, # 3 - run the trainer, # 4 - and save the model fake_data = training_data.generate_fake_1d_training_data(['x', 'y0']) trainer = Trainer(inference=inference, batch_size=128, model_output_location=OUTPUT_DIR, name=MODEL_NAME, training_data=fake_data, train_iteration_done=train_iteration_done) trainer.run(restore_latest_ckpt=False, save_network=True) trainer.save(output_dir=OUTPUT_DIR, pb_filename=PB_FILENAME, ckpt_filename=CKPT_FILENAME) print("Training done") # test evaluation code e = Evaluator.load(model_dir=OUTPUT_DIR, pb_filename=PB_FILENAME, ckpt_filename=CKPT_FILENAME) print(e.eval({'x': 10}, 'ybar'))
poly, _ = polynomial(x, 2, name='p') poly, _ = rename_nodes(poly, ['ybar']) return poly def train_iteration_done(trainer, iteration, loss_value, done, run_results): # debug dump of the variables as we train if iteration % 100 == 0: a0 = [v for v in tf.all_variables() if v.name == 'p/a0:0'] a1 = [v for v in tf.all_variables() if v.name == 'p/a1:0'] a2 = [v for v in tf.all_variables() if v.name == 'p/a2:0'] logging.info([trainer._training_data.current_epoch(), iteration, loss_value, a0[0].eval(), a1[0].eval(), a2[0].eval()]) # # example of how to exit on condition: # if (loss_value < 1): # done = True return done logging.basicConfig(level=logging.INFO) # training code: generate some fake data, create a trainer with the data, run the trainer, and save the model fake_data = training_data.generate_fake_1d_training_data(['x', 'y0']) trainer = Trainer(inference=inference, model_output_location=OUTPUT_DIR, name='quadratic', training_data=fake_data, train_iteration_done=train_iteration_done,batch_size=16) trainer.run(num_epochs=5000,restore_latest_ckpt=False, save_network=False) trainer.save(output_dir=OUTPUT_DIR,pb_filename=PB_FILENAME,ckpt_filename=CKPT_FILENAME) # test evaluation code e = Evaluator.load(model_dir=OUTPUT_DIR,pb_filename=PB_FILENAME,ckpt_filename=CKPT_FILENAME) logging.info(e.eval({'x': 10}, 'ybar'))