Exemple #1
0
    def test_inference(self):
        test_agent = behavioral_cloning_agent.BehavioralCloningAgent(
            self._time_step_spec,
            self._action_spec,
            self._network,
            tf.compat.v1.train.AdamOptimizer(),
            num_outer_dims=2)
        test_trainer = trainer.Trainer(root_dir=self.get_temp_dir(),
                                       agent=test_agent)

        inference_batch_size = 1
        random_time_step = tensor_spec.sample_spec_nest(
            self._time_step_spec, outer_dims=(inference_batch_size, ))

        initial_policy_state = test_trainer._agent.policy.get_initial_state(
            inference_batch_size)

        action_outputs = test_trainer._agent.policy.action(
            random_time_step, initial_policy_state)
        self.assertAllEqual([inference_batch_size],
                            action_outputs.action.shape)

        action_outputs = test_trainer._agent.policy.action(
            random_time_step, action_outputs.state)
        self.assertAllEqual([inference_batch_size],
                            action_outputs.action.shape)
Exemple #2
0
def train_eval(agent_name='behavioral_cloning',
               num_iterations=100,
               batch_size=64,
               train_sequence_length=1):
    """Train for LLVM inliner."""
    root_dir = os.path.expanduser(FLAGS.root_dir)
    root_dir = os.path.normpath(root_dir)

    # Initialize trainer and policy saver.
    time_step_spec, action_spec = config.create_signature_specs(config.CONFIG)
    tf_agent = agent_creators.create_agent(agent_name, time_step_spec,
                                           action_spec)
    llvm_trainer = trainer.Trainer(root_dir=root_dir, agent=tf_agent)
    policy_dict = {
        'saved_policy': tf_agent.policy,
        'saved_collect_policy': tf_agent.collect_policy,
    }
    saver = policy_saver.PolicySaver(policy_dict=policy_dict)

    tfrecord_iterator_fn = data_reader.create_tfrecord_iterator_fn(
        agent_name=agent_name,
        config=config.CONFIG,
        batch_size=batch_size,
        train_sequence_length=train_sequence_length)

    # Train.
    dataset_iter = tfrecord_iterator_fn(FLAGS.data_path)
    llvm_trainer.train(dataset_iter, num_iterations)

    # Save final policy.
    saver.save(root_dir)
Exemple #3
0
 def test_trainer_initialization(self):
     test_agent = behavioral_cloning_agent.BehavioralCloningAgent(
         self._time_step_spec,
         self._action_spec,
         self._network,
         tf.compat.v1.train.AdamOptimizer(),
         num_outer_dims=2)
     test_trainer = trainer.Trainer(root_dir=self.get_temp_dir(),
                                    agent=test_agent)
     self.assertEqual(0, test_trainer._global_step.numpy())
Exemple #4
0
    def test_training(self):
        test_agent = behavioral_cloning_agent.BehavioralCloningAgent(
            self._time_step_spec,
            self._action_spec,
            self._network,
            tf.compat.v1.train.AdamOptimizer(),
            num_outer_dims=2)
        test_trainer = trainer.Trainer(root_dir=self.get_temp_dir(),
                                       agent=test_agent)
        self.assertEqual(0, test_trainer._global_step.numpy())

        dataset_iter = _create_test_data(batch_size=3, sequence_length=3)
        test_trainer.train(dataset_iter, num_iterations=10)
        self.assertEqual(10, test_trainer._global_step.numpy())
    def test_training(self):
        test_agent = behavioral_cloning_agent.BehavioralCloningAgent(
            self._time_step_spec,
            self._action_spec,
            self._network,
            tf.compat.v1.train.AdamOptimizer(),
            num_outer_dims=2)
        test_trainer = trainer.Trainer(root_dir=self.get_temp_dir(),
                                       agent=test_agent)
        self.assertEqual(0, test_trainer._global_step.numpy())

        dataset_iter = _create_test_data(batch_size=3, sequence_length=3)
        monitor_dict = {'test': 1}

        with mock.patch.object(tf.summary, 'scalar',
                               autospec=True) as mock_scalar_summary:
            test_trainer.train(dataset_iter, monitor_dict, num_iterations=10)
            self.assertEqual(
                10,
                sum(1 for c in mock_scalar_summary.mock_calls
                    if c[2]['name'] == 'test'))
            self.assertEqual(10, test_trainer._global_step.numpy())
def train_eval(agent_name='ppo',
               warmstart_policy_dir=None,
               num_policy_iterations=0,
               num_iterations=100,
               batch_size=64,
               train_sequence_length=1,
               deploy_policy_name='saved_policy'):
    """Train for LLVM inliner."""
    root_dir = FLAGS.root_dir

    # Initialize trainer and policy saver.
    time_step_spec, action_spec = config.create_signature_specs(config.CONFIG)
    tf_agent = agent_creators.create_agent(agent_name, time_step_spec,
                                           action_spec)
    llvm_trainer = trainer.Trainer(root_dir=root_dir, agent=tf_agent)
    policy_dict = {
        'saved_policy': tf_agent.policy,
        'saved_collect_policy': tf_agent.collect_policy,
    }
    saver = policy_saver.PolicySaver(policy_dict=policy_dict)

    if warmstart_policy_dir:
        warmstart_policy = policy_loader.load(warmstart_policy_dir)
        tf_agent.policy.update(policy=warmstart_policy,
                               tau=1.0,
                               tau_non_trainable=None,
                               sort_variables_by_name=False)

    with open(os.path.join(FLAGS.data_path, 'module_paths'), 'r') as f:
        module_paths = [
            os.path.join(FLAGS.data_path, name.rstrip('\n')) for name in f
        ]
        file_paths = [(path + '.bc', path + '.cmd') for path in module_paths]

    runner = inlining_runner.InliningRunner(
        clang_path=FLAGS.clang_path, llvm_size_path=FLAGS.llvm_size_path)

    sequence_example_iterator_fn = (
        data_reader.create_sequence_example_iterator_fn(
            agent_name=agent_name,
            config=config.CONFIG,
            batch_size=batch_size,
            train_sequence_length=train_sequence_length))

    data_collector = local_data_collector.LocalDataCollector(
        file_paths=file_paths,
        num_workers=FLAGS.num_workers,
        num_modules=FLAGS.num_modules,
        runner=runner.collect_data,
        parser=sequence_example_iterator_fn)

    for policy_iteration in range(num_policy_iterations):
        policy_path = os.path.join(root_dir, 'policy', str(policy_iteration))
        saver.save(policy_path)

        dataset_iter = data_collector.collect_data(
            policy_path=os.path.join(policy_path, deploy_policy_name))
        llvm_trainer.train(dataset_iter, num_iterations)

        data_collector.on_dataset_consumed(dataset_iter)

    # Save final policy.
    saver.save(root_dir)