Python DDPGAgent.get_action Exemples

Langage de programmation: Python

Espace de nommage/Pack: ddpg

Class/Type: DDPGAgent

Méthode/Fonction: get_action

Exemples au hotexamples.com: 3

Python DDPGAgent.get_action - 3 exemples trouvés. Ce sont les exemples réels les mieux notés de ddpg.DDPGAgent.get_action extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

DDPGAgent(30)

act(5)

load_models(4)

test(3)

update(3)

compile(3)

get_action(3)

save_models(2)

save_weights(2)

test_model(2)

train(2)

fit(2)

train_one_episode(2)

remember(1)

take_action(1)

save(1)

update_target_net(1)

replay(1)

noisy_act(1)

record(1)

optimize(1)

log2summary(1)

load_model(1)

load(1)

learning(1)

learn(1)

get_exploration_action(1)

get_exploitation_action(1)

eval(1)

choose_action(1)

action_input(1)

update_targets(1)

Méthodes fréquemment utilisées

DDPGAgent (30)

act (5)

load_models (4)

test (3)

update (3)

compile (3)

get_action (3)

save_models (2)

save_weights (2)

test_model (2)

Méthodes fréquemment utilisées

train (2)

fit (2)

train_one_episode (2)

remember (1)

take_action (1)

save (1)

update_target_net (1)

replay (1)

noisy_act (1)

record (1)

optimize (1)

log2summary (1)

load_model (1)

load (1)

learning (1)

learn (1)

get_exploration_action (1)

get_exploitation_action (1)

eval (1)

choose_action (1)

Méthodes fréquemment utilisées

optimize (1)

log2summary (1)

load_model (1)

load (1)

learning (1)

learn (1)

get_exploration_action (1)

get_exploitation_action (1)

eval (1)

choose_action (1)

action_input (1)

update_targets (1)

Méthodes fréquemment utilisées

action_input (1)

update_targets (1)

Exemple #1

0

Afficher le fichier

Fichier : main.py Projet : sourcery-ai-bot/OpenAI-Gyms

def learn(self): agent = DDPGAgent( env=self.env, replay_memory_size=REPLAY_MEMORY_SIZE, learning_rate=LEARNING_RATE, batch_size=MINIBATCH_SIZE, gamma=GAMMA, tau=TAU ) stats = {'scores': [], 'avg': [], 'min': [], 'max': []} for ep in tqdm(range(1, self.episodes + 1), ascii=True, unit='episodes'): print(self.epsilon) action_stats = [0, 0] current_state = self.env.reset() current_state = self.convert_gray(current_state) done = False score = 0 steps = 0 while not done: steps += 1 if np.random.random() > self.epsilon: action_stats[0] += 1 action = agent.get_action(current_state) else: action_stats[1] += 1 action = self.env.action_space.sample() action[2] = min(action[2], 0.2) action[1] = action[1]*2 new_state, reward, done, _ = self.env.step(action) if ep % self.results_every_n_episodes == 0: self.env.render() score += reward new_state = self.convert_gray(new_state) agent.memory.push(current_state, action, reward, new_state) if steps % 64 == 0: agent.update() current_state = new_state if self.epsilon > 0.1: self.epsilon -= self.epsilon_decay_value if score < 0: break print(action_stats) print(score) stats['scores'].append(score) self.env.close() return agent.actor

Exemple #2

0

Afficher le fichier

## extract data state = env_data["observation"] goal = env_data["desired_goal"] ## logging rewards episode_reward = 0 for step in range(STEPS_PER_EPISODE): ## normalize state and goal # state = normalizer(state, 5.0) # goal = normalizer(goal, 5.0) ## get action from behavioural policy action = agent.get_action(state, goal) if action is not None: action = noise.get_action(action, step) else: action = env.action_space.sample() time.sleep(0.002) next_state, reward, _, _ = env.step(action) env.render() episode_reward += reward ## store transition - Standard Experience Replay state_rep = np.concatenate((state, goal), axis=0) next_state_rep = np.concatenate((next_state["observation"], goal), axis=0)

Exemple #3

0

Afficher le fichier

Fichier : main.py Projet : ocortina/DDPG-TD3-Control_Continuous_Tasks

batch_size = 128 rewards = [] avg_rewards = [] for episode in range(50): state = env.reset() #state = state['observation'] #print("state = ", state) ou_noise.reset() episode_reward = 0 for step in range(500): if episode >= 45: env.render() #action = agent.get_action(state, ou_noise) action = agent.get_action(state, ou_noise) #print("action = ", action) new_state, reward, done, _ = env.step(action) #print("new state =", new_state) #new_state = new_state['observation'] #print("new state =", new_state) agent.memory.push(state, action, reward, new_state, done) if len(agent.memory) > batch_size: agent.train(batch_size) state = new_state episode_reward += reward if done: if episode == 0: