Python Environment.get_state Exemples

Langage de programmation: Python

Espace de nommage/Pack: environment.environment

Class/Type: Environment

Méthode/Fonction: get_state

Exemples au hotexamples.com: 2

Python Environment.get_state - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de environment.environment.Environment.get_state extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

create_environment(30)

Environment(30)

get_action_size(30)

reset(23)

observation_size(12)

get_objective_size(10)

set_cluster_size(8)

set_observation_rotation_size(4)

__init__(3)

get_obs_size(3)

render(3)

robotStep(3)

run(3)

get(3)

get_actions(3)

set_mode(3)

outer_env(2)

getObservation(2)

define(2)

get_state(2)

get_actions_n(2)

get_observation_size_buffer(2)

start(2)

obs(2)

setRunTimeStat(1)

get_valid_actions(1)

initialize_memory(1)

set_wall(1)

set_time_step(1)

new_simulation(1)

set_snake(1)

reached_top(1)

perform_action(1)

reached_max_steps(1)

run_human_game(1)

relations(1)

set_scene(1)

set_primary_agent(1)

reward(1)

set_log_dir(1)

set_fruit(1)

get_state_size(1)

get_observation_size(1)

get_scenes(1)

getRunTimeStat(1)

add_obstacle(1)

adjust_learning_rate_by_stage(1)

agents(1)

assign(1)

close(1)

Méthodes fréquemment utilisées

create_environment (30)

Environment (30)

get_action_size (30)

reset (23)

observation_size (12)

get_objective_size (10)

set_cluster_size (8)

set_observation_rotation_size (4)

__init__ (3)

get_obs_size (3)

Méthodes fréquemment utilisées

render (3)

robotStep (3)

run (3)

get (3)

get_actions (3)

set_mode (3)

outer_env (2)

getObservation (2)

define (2)

get_state (2)

get_actions_n (2)

get_observation_size_buffer (2)

start (2)

obs (2)

setRunTimeStat (1)

get_valid_actions (1)

initialize_memory (1)

set_wall (1)

set_time_step (1)

new_simulation (1)

Méthodes fréquemment utilisées

get_actions_n (2)

get_observation_size_buffer (2)

start (2)

obs (2)

setRunTimeStat (1)

get_valid_actions (1)

initialize_memory (1)

set_wall (1)

set_time_step (1)

new_simulation (1)

set_snake (1)

reached_top (1)

perform_action (1)

reached_max_steps (1)

run_human_game (1)

relations (1)

set_scene (1)

set_primary_agent (1)

reward (1)

set_log_dir (1)

set_fruit (1)

get_state_size (1)

get_observation_size (1)

get_scenes (1)

getRunTimeStat (1)

add_obstacle (1)

adjust_learning_rate_by_stage (1)

agents (1)

assign (1)

close (1)

Méthodes fréquemment utilisées

set_snake (1)

reached_top (1)

perform_action (1)

reached_max_steps (1)

run_human_game (1)

relations (1)

set_scene (1)

set_primary_agent (1)

reward (1)

set_log_dir (1)

set_fruit (1)

get_state_size (1)

get_observation_size (1)

get_scenes (1)

getRunTimeStat (1)

add_obstacle (1)

adjust_learning_rate_by_stage (1)

agents (1)

assign (1)

close (1)

connect_client (1)

create (1)

create_agent (1)

done (1)

draw (1)

eat_fruit_if_possible (1)

evaluate (1)

execute (1)

get_active_player (1)

get_possible_actions (1)

get_affordances (1)

get_agents (1)

get_cur_img (1)

get_entities (1)

get_eye_dir (1)

get_intentions (1)

get_max_time_step (1)

get_n_instances (1)

get_nearest_actor (1)

get_new_nodes (1)

Exemple #1

0

Afficher le fichier

Fichier : mcts.py Projet : sunnihu/AI-Game

def mcts(simulation_time: float, env: Environment, root_node: typing.Optional[UCTNode] = None) -> UCTNode: start_time = time.time() root_node = UCTNode( state=env.get_state(), active_player=env.get_active_player(), action=None, parent=None, num_actions=env.get_num_actions(), valid_actions=env.get_valid_actions(), ) while time.time() - start_time < simulation_time: leaf_node, winner = root_node.select(env) if winner is not None: leaf_node.backup(winner) continue else: leaf_node.expand() winner = leaf_node.simulate(env) leaf_node.backup(winner) return root_node

Exemple #2

0

Afficher le fichier

Fichier : main.py Projet : fredrikwaaler/AiProg3

def main(): """ Sets the parameters for the Environment, Critic, and Actor according to the imported config file. Creates an environment where a predefined number of episodes can be performed. Instantiates an actor to keep track of the policy, and a critic to keep track of the value at each state Runs a predefined number of episodes creating a new board for each episode. For each episode, the actor and the critic are updated according to the Actor-Critic model. Finally, epsilon is set to zero, and the environment plays a game with the updated policy. """ env = Environment(env_cfg) granularity = env_cfg["granularity"] critic = Critic(critic_cfg, granularity) actor = Actor(actor_cfg) episodes = training_cfg["number_of_episodes"] visualize_episodes = training_cfg["visualize_episodes"] steps_per_episode = [] for episode in tqdm(range(episodes), desc=f"Playing {episodes} episodes", colour='#39ff14'): env.new_simulation() path = [] positions = [] critic.reset_eli_dict() actor.reset_eli_dict() while not env.reached_top() and not env.reached_max_steps(): env.update_steps() current_state = copy(env.get_state()) legal_actions = env.get_actions() action = actor.get_action(state=current_state, legal_actions=legal_actions) path.append((str(current_state), str(action))) reward = env.perform_action(action=action) td_err = critic.compute_td_err(current_state=current_state, next_state=env.get_state(), reward=reward) # Previous states on the path are updated as well during the call to train() by eligibility traces critic.train(state=current_state, td_error=td_err) critic.update_eligs() # Update actor beliefs on SAPs for all pairs seen thus far in the episode for i, sap in enumerate(reversed(path)): actor.update_eli_dict(state=str(sap[0]), action=str(sap[1]), i=i) actor.update_policy_dict(state=str(sap[0]), action=str(sap[1]), td_err=td_err) positions.append(env.get_position()) print("steps used in this episode", env.steps) if episode in visualize_episodes: env.visualize_landscape(positions) steps_per_episode.append(env.steps) plot_learning(steps_per_episode) # Enable history tracking to visualize final simulation env.new_simulation() print(f"Actor final epsilon: {actor.epsilon}") actor.epsilon = 0 # Set exploration to 0 print("Attempting final simulation to show you how smart I am now") while not env.reached_top() and not env.reached_max_steps(): current_state = env.get_state() legal_actions = env.get_actions() action = actor.get_action(current_state, legal_actions) env.perform_action(action)