def test_cc_bc_model(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') model = BehavioralCloningModel(env.brains["RealFakeBrain"]) init = tf.global_variables_initializer() sess.run(init) run_list = [model.sample_action, model.policy] feed_dict = {model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]])} sess.run(run_list, feed_dict=feed_dict) env.close()
def test_visual_cc_bc_model(): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): model = BehavioralCloningModel( make_brain_parameters(discrete_action=False, visual_inputs=2)) init = tf.global_variables_initializer() sess.run(init) run_list = [model.sample_action, model.policy] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.visual_in[0]: np.ones([2, 40, 30, 3]), model.visual_in[1]: np.ones([2, 40, 30, 3]), } sess.run(run_list, feed_dict=feed_dict)
def test_dc_bc_model(): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): model = BehavioralCloningModel( make_brain_parameters(discrete_action=True, visual_inputs=0)) init = tf.global_variables_initializer() sess.run(init) run_list = [model.sample_action, model.action_probs] feed_dict = { model.batch_size: 2, model.dropout_rate: 1.0, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.action_masks: np.ones([2, 2]), } sess.run(run_list, feed_dict=feed_dict)
def __init__(self, seed, brain, trainer_parameters, load): """ :param seed: Random seed. :param brain: Assigned Brain object. :param trainer_parameters: Defined training parameters. :param load: Whether a pre-trained model will be loaded or a new one created. """ super(BCPolicy, self).__init__(seed, brain, trainer_parameters) self.last_loss = 0 with self.graph.as_default(): with self.graph.as_default(): self.model = BehavioralCloningModel( h_size=int(trainer_parameters["hidden_units"]), lr=float(trainer_parameters["learning_rate"]), n_layers=int(trainer_parameters["num_layers"]), m_size=self.m_size, normalize=False, use_recurrent=trainer_parameters["use_recurrent"], brain=brain, seed=seed, ) if load: self._load_graph() else: self._initialize_graph() self.inference_dict = { "action": self.model.sample_action, "action_probs": self.model.action_probs } self.update_dict = { "policy_loss": self.model.loss, "update_batch": self.model.update, } if self.use_recurrent: self.inference_dict["memory_out"] = self.model.memory_out self.evaluate_rate = 1.0 self.update_rate = 0.5
def test_visual_dc_bc_model(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=2) env = UnityEnvironment(" ") model = BehavioralCloningModel(env.brains["RealFakeBrain"]) init = tf.global_variables_initializer() sess.run(init) run_list = [model.sample_action, model.action_probs] feed_dict = { model.batch_size: 2, model.dropout_rate: 1.0, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.visual_in[0]: np.ones([2, 40, 30, 3]), model.visual_in[1]: np.ones([2, 40, 30, 3]), model.action_masks: np.ones([2, 2]), } sess.run(run_list, feed_dict=feed_dict) env.close()
def __init__(self, seed, brain, trainer_parameters, load): """ :param seed: Random seed. :param brain: Assigned Brain object. :param trainer_parameters: Defined training parameters. :param load: Whether a pre-trained model will be loaded or a new one created. """ super(BCPolicy, self).__init__(seed, brain, trainer_parameters) with self.graph.as_default(): with self.graph.as_default(): self.model = BehavioralCloningModel( h_size=int(trainer_parameters['hidden_units']), lr=float(trainer_parameters['learning_rate']), n_layers=int(trainer_parameters['num_layers']), m_size=self.m_size, normalize=False, use_recurrent=trainer_parameters['use_recurrent'], brain=brain, seed=seed) if load: self._load_graph() else: self._initialize_graph() self.inference_dict = {'action': self.model.sample_action} self.update_dict = { 'policy_loss': self.model.loss, 'update_batch': self.model.update } if self.use_recurrent: self.inference_dict['memory_out'] = self.model.memory_out self.evaluate_rate = 1.0 self.update_rate = 0.5