Python BehaviorSpec.create_random_action Exemples

Langage de programmation: Python

Espace de nommage/Pack: mlagents_envs.base_env

Class/Type: BehaviorSpec

Méthode/Fonction: create_random_action

Exemples au hotexamples.com: 2

Python BehaviorSpec.create_random_action - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de mlagents_envs.base_env.BehaviorSpec.create_random_action extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

BehaviorSpec(30)

is_action_discrete(6)

is_action_continuous(3)

create_empty_action(2)

create_random_action(2)

Méthodes fréquemment utilisées

BehaviorSpec (30)

is_action_discrete (6)

is_action_continuous (3)

create_empty_action (2)

create_random_action (2)

Exemple #1

0

Afficher le fichier

Fichier : test_steps.py Projet : alclimb/ml-ex01

def test_action_generator(): # Continuous action_len = 30 specs = BehaviorSpec( observation_shapes=[(5, )], action_type=ActionType.CONTINUOUS, action_shape=action_len, ) zero_action = specs.create_empty_action(4) assert np.array_equal(zero_action, np.zeros((4, action_len), dtype=np.float32)) random_action = specs.create_random_action(4) assert random_action.dtype == np.float32 assert random_action.shape == (4, action_len) assert np.min(random_action) >= -1 assert np.max(random_action) <= 1 # Discrete action_shape = (10, 20, 30) specs = BehaviorSpec( observation_shapes=[(5, )], action_type=ActionType.DISCRETE, action_shape=action_shape, ) zero_action = specs.create_empty_action(4) assert np.array_equal(zero_action, np.zeros((4, len(action_shape)), dtype=np.int32)) random_action = specs.create_random_action(4) assert random_action.dtype == np.int32 assert random_action.shape == (4, len(action_shape)) assert np.min(random_action) >= 0 for index, branch_size in enumerate(action_shape): assert np.max(random_action[:, index]) < branch_size

Exemple #2

0

Afficher le fichier

def create_agent_buffer(behavior_spec: BehaviorSpec, number: int, reward: float = 0.0) -> AgentBuffer: buffer = AgentBuffer() curr_observations = [ np.random.normal(size=shape) for shape in behavior_spec.observation_shapes ] next_observations = [ np.random.normal(size=shape) for shape in behavior_spec.observation_shapes ] action = behavior_spec.create_random_action(1)[0, :] for _ in range(number): curr_split_obs = SplitObservations.from_observations(curr_observations) next_split_obs = SplitObservations.from_observations(next_observations) for i, _ in enumerate(curr_split_obs.visual_observations): buffer["visual_obs%d" % i].append( curr_split_obs.visual_observations[i]) buffer["next_visual_obs%d" % i].append( next_split_obs.visual_observations[i]) buffer["vector_obs"].append(curr_split_obs.vector_observations) buffer["next_vector_in"].append(next_split_obs.vector_observations) buffer["actions"].append(action) buffer["done"].append(np.zeros(1, dtype=np.float32)) buffer["reward"].append(np.ones(1, dtype=np.float32) * reward) buffer["masks"].append(np.ones(1, dtype=np.float32)) return buffer