Python DDPGAgent.get_action Examples

Programming Language: Python

Namespace/Package Name: ddpg

Class/Type: DDPGAgent

Method/Function: get_action

Examples at hotexamples.com: 3

Python DDPGAgent.get_action - 3 examples found. These are the top rated real world Python examples of ddpg.DDPGAgent.get_action extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DDPGAgent(30)

act(5)

load_models(4)

test(3)

update(3)

compile(3)

get_action(3)

save_models(2)

save_weights(2)

test_model(2)

train(2)

fit(2)

train_one_episode(2)

remember(1)

take_action(1)

save(1)

update_target_net(1)

replay(1)

noisy_act(1)

record(1)

optimize(1)

log2summary(1)

load_model(1)

load(1)

learning(1)

learn(1)

get_exploration_action(1)

get_exploitation_action(1)

eval(1)

choose_action(1)

action_input(1)

update_targets(1)

Example #1

Show file

File: main.py Project: sourcery-ai-bot/OpenAI-Gyms

    def learn(self):
        agent = DDPGAgent(
            env=self.env,
            replay_memory_size=REPLAY_MEMORY_SIZE,
            learning_rate=LEARNING_RATE,
            batch_size=MINIBATCH_SIZE,
            gamma=GAMMA,
            tau=TAU
        )

        stats = {'scores': [], 'avg': [], 'min': [], 'max': []}
        for ep in tqdm(range(1, self.episodes + 1), ascii=True, unit='episodes'):

            print(self.epsilon)
            action_stats = [0, 0]
            current_state = self.env.reset()
            current_state = self.convert_gray(current_state)

            done = False
            score = 0
            steps = 0

            while not done:
                steps += 1

                if np.random.random() > self.epsilon:
                    action_stats[0] += 1
                    action = agent.get_action(current_state)
                else:
                    action_stats[1] += 1
                    action = self.env.action_space.sample()
                    action[2] = min(action[2], 0.2)
                    action[1] = action[1]*2

                new_state, reward, done, _ = self.env.step(action)
                if ep % self.results_every_n_episodes == 0:
                    self.env.render()

                score += reward

                new_state = self.convert_gray(new_state)

                agent.memory.push(current_state, action, reward, new_state)

                if steps % 64 == 0:
                    agent.update()

                current_state = new_state

                if self.epsilon > 0.1:
                    self.epsilon -= self.epsilon_decay_value

                if score < 0:
                    break

            print(action_stats)
            print(score)
            stats['scores'].append(score)
        self.env.close()
        return agent.actor

Example #2

Show file

		
		## extract data
		state = env_data["observation"]
		goal = env_data["desired_goal"]

		## logging rewards
		episode_reward = 0

		for step in range(STEPS_PER_EPISODE):
			## normalize state and goal
			# state = normalizer(state, 5.0)
			# goal = normalizer(goal, 5.0)

			## get action from behavioural policy
			action = agent.get_action(state, goal)
			if action is not None:
				action = noise.get_action(action, step)
			else:
				action = env.action_space.sample()

			time.sleep(0.002)
			next_state, reward, _, _ = env.step(action)
			
			env.render()

			episode_reward += reward

			## store transition - Standard Experience Replay
			state_rep = np.concatenate((state, goal), axis=0)
			next_state_rep = np.concatenate((next_state["observation"], goal), axis=0)

Example #3

Show file

File: main.py Project: ocortina/DDPG-TD3-Control_Continuous_Tasks

batch_size = 128
rewards = []
avg_rewards = []

for episode in range(50):
    state = env.reset()
    #state = state['observation']
    #print("state = ", state)
    ou_noise.reset()
    episode_reward = 0

    for step in range(500):
        if episode >= 45:
            env.render()
        #action = agent.get_action(state, ou_noise)
        action = agent.get_action(state, ou_noise)
        #print("action = ", action)
        new_state, reward, done, _ = env.step(action)
        #print("new state =", new_state)
        #new_state = new_state['observation']
        #print("new state =", new_state)
        agent.memory.push(state, action, reward, new_state, done)

        if len(agent.memory) > batch_size:
            agent.train(batch_size)

        state = new_state
        episode_reward += reward

        if done:
            if episode == 0: