def test_inference(self): test_agent = behavioral_cloning_agent.BehavioralCloningAgent( self._time_step_spec, self._action_spec, self._network, tf.compat.v1.train.AdamOptimizer(), num_outer_dims=2) test_trainer = trainer.Trainer(root_dir=self.get_temp_dir(), agent=test_agent) inference_batch_size = 1 random_time_step = tensor_spec.sample_spec_nest( self._time_step_spec, outer_dims=(inference_batch_size, )) initial_policy_state = test_trainer._agent.policy.get_initial_state( inference_batch_size) action_outputs = test_trainer._agent.policy.action( random_time_step, initial_policy_state) self.assertAllEqual([inference_batch_size], action_outputs.action.shape) action_outputs = test_trainer._agent.policy.action( random_time_step, action_outputs.state) self.assertAllEqual([inference_batch_size], action_outputs.action.shape)
def train_eval(agent_name='behavioral_cloning', num_iterations=100, batch_size=64, train_sequence_length=1): """Train for LLVM inliner.""" root_dir = os.path.expanduser(FLAGS.root_dir) root_dir = os.path.normpath(root_dir) # Initialize trainer and policy saver. time_step_spec, action_spec = config.create_signature_specs(config.CONFIG) tf_agent = agent_creators.create_agent(agent_name, time_step_spec, action_spec) llvm_trainer = trainer.Trainer(root_dir=root_dir, agent=tf_agent) policy_dict = { 'saved_policy': tf_agent.policy, 'saved_collect_policy': tf_agent.collect_policy, } saver = policy_saver.PolicySaver(policy_dict=policy_dict) tfrecord_iterator_fn = data_reader.create_tfrecord_iterator_fn( agent_name=agent_name, config=config.CONFIG, batch_size=batch_size, train_sequence_length=train_sequence_length) # Train. dataset_iter = tfrecord_iterator_fn(FLAGS.data_path) llvm_trainer.train(dataset_iter, num_iterations) # Save final policy. saver.save(root_dir)
def test_trainer_initialization(self): test_agent = behavioral_cloning_agent.BehavioralCloningAgent( self._time_step_spec, self._action_spec, self._network, tf.compat.v1.train.AdamOptimizer(), num_outer_dims=2) test_trainer = trainer.Trainer(root_dir=self.get_temp_dir(), agent=test_agent) self.assertEqual(0, test_trainer._global_step.numpy())
def test_training(self): test_agent = behavioral_cloning_agent.BehavioralCloningAgent( self._time_step_spec, self._action_spec, self._network, tf.compat.v1.train.AdamOptimizer(), num_outer_dims=2) test_trainer = trainer.Trainer(root_dir=self.get_temp_dir(), agent=test_agent) self.assertEqual(0, test_trainer._global_step.numpy()) dataset_iter = _create_test_data(batch_size=3, sequence_length=3) test_trainer.train(dataset_iter, num_iterations=10) self.assertEqual(10, test_trainer._global_step.numpy())
def test_training(self): test_agent = behavioral_cloning_agent.BehavioralCloningAgent( self._time_step_spec, self._action_spec, self._network, tf.compat.v1.train.AdamOptimizer(), num_outer_dims=2) test_trainer = trainer.Trainer(root_dir=self.get_temp_dir(), agent=test_agent) self.assertEqual(0, test_trainer._global_step.numpy()) dataset_iter = _create_test_data(batch_size=3, sequence_length=3) monitor_dict = {'test': 1} with mock.patch.object(tf.summary, 'scalar', autospec=True) as mock_scalar_summary: test_trainer.train(dataset_iter, monitor_dict, num_iterations=10) self.assertEqual( 10, sum(1 for c in mock_scalar_summary.mock_calls if c[2]['name'] == 'test')) self.assertEqual(10, test_trainer._global_step.numpy())
def train_eval(agent_name='ppo', warmstart_policy_dir=None, num_policy_iterations=0, num_iterations=100, batch_size=64, train_sequence_length=1, deploy_policy_name='saved_policy'): """Train for LLVM inliner.""" root_dir = FLAGS.root_dir # Initialize trainer and policy saver. time_step_spec, action_spec = config.create_signature_specs(config.CONFIG) tf_agent = agent_creators.create_agent(agent_name, time_step_spec, action_spec) llvm_trainer = trainer.Trainer(root_dir=root_dir, agent=tf_agent) policy_dict = { 'saved_policy': tf_agent.policy, 'saved_collect_policy': tf_agent.collect_policy, } saver = policy_saver.PolicySaver(policy_dict=policy_dict) if warmstart_policy_dir: warmstart_policy = policy_loader.load(warmstart_policy_dir) tf_agent.policy.update(policy=warmstart_policy, tau=1.0, tau_non_trainable=None, sort_variables_by_name=False) with open(os.path.join(FLAGS.data_path, 'module_paths'), 'r') as f: module_paths = [ os.path.join(FLAGS.data_path, name.rstrip('\n')) for name in f ] file_paths = [(path + '.bc', path + '.cmd') for path in module_paths] runner = inlining_runner.InliningRunner( clang_path=FLAGS.clang_path, llvm_size_path=FLAGS.llvm_size_path) sequence_example_iterator_fn = ( data_reader.create_sequence_example_iterator_fn( agent_name=agent_name, config=config.CONFIG, batch_size=batch_size, train_sequence_length=train_sequence_length)) data_collector = local_data_collector.LocalDataCollector( file_paths=file_paths, num_workers=FLAGS.num_workers, num_modules=FLAGS.num_modules, runner=runner.collect_data, parser=sequence_example_iterator_fn) for policy_iteration in range(num_policy_iterations): policy_path = os.path.join(root_dir, 'policy', str(policy_iteration)) saver.save(policy_path) dataset_iter = data_collector.collect_data( policy_path=os.path.join(policy_path, deploy_policy_name)) llvm_trainer.train(dataset_iter, num_iterations) data_collector.on_dataset_consumed(dataset_iter) # Save final policy. saver.save(root_dir)