Python ReplayBuffer.fetch_sample 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: replay_buffer

클래스/타입: ReplayBuffer

메소드/함수: fetch_sample

hotexamples.com에서의 예제들: 2

Python ReplayBuffer.fetch_sample - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 replay_buffer.ReplayBuffer.fetch_sample에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

ReplayBuffer(30)

add(30)

count(26)

get_batch(25)

append(17)

encode_recent_observation(7)

getBatch(7)

add_sample(7)

insert(7)

clear(6)

load(5)

load_memory(5)

can_sample(4)

add_episode(4)

add_transition(4)

get_last_steps(3)

get_size(3)

create_batch(3)

is_ready(3)

add_experience(3)

add_to_memory(2)

make_index(2)

compute_values(2)

compute_value_difference(2)

compute_reward_distances(2)

compute_returns(2)

compute_lambda_returns(2)

fetch_sample(2)

compute_episode_boundaries(2)

encoder_recent_observation(2)

idx(2)

get_batch_data(1)

get_current_size(1)

get_experiences(1)

get_len(1)

get_current_state(1)

load_replay(1)

get_losses_offline(1)

get_memory(1)

get_minibatch(1)

importance_sampling(1)

in_order_iterate(1)

insert_sample(1)

k(1)

length(1)

lenth(1)

load_from_file(1)

get_random_minibatch(1)

dump(1)

get(1)

예제 #1

파일 보기

파일: agent.py 프로젝트: troyesivanyh/soft-actor-critic

class AntAgent:

    def __init__(self, render=False, model=None):
        # create an environment
        self.environment = gym.make('MountainCarContinuous-v0')
        # reset environment when an agent is initialized
        self.current_observation = self.reset_environment()
        self.render = render
        self.model = model

        self.buffer = ReplayBuffer()

    def reset_environment(self):
        current_observation = self.environment.reset()
        return current_observation

    def get_action(self, current_observation):
        """Fetch an action according to model policy"""
        if self.model is None:
            action = self.environment.action_space.sample()
        else:
            action = self.model.predict(current_observation)
        return action

    def get_transitions(self, action):
        """Take one step in the environment and return the observations"""
        next_observation, reward, done, _ = self.environment.step(action)
        if self.render:
            self.environment.render()
        return next_observation, reward, done

    def run_episode(self, num_episodes=1):
        """run episodes `num_episodes` times using `model` policy"""
        for episode in range(num_episodes):
            self.current_observation = self.reset_environment()
            episode_id = self.buffer.create_episode()

            done = False
            transition = dict()

            while not done:
                transition['current_observation'] = self.current_observation
                transition['action'] = self.get_action(self.current_observation)
                transition['next_observation'], transition['reward'], done = self.get_transitions(transition['action'])

                self.buffer.add_sample(episode_id, transition)

            self.buffer.add_episode(episode_id)

    def learn(self, step=0, restore=False):
        """Train SAC model using transitions in replay buffer"""
        if self.model is None:
            raise Exception("This agent has no brain! Add a model which implements fit() function to train.")

        # Sample array of transitions from replay buffer.
        transition_matrices = self.buffer.fetch_sample()

        if step != 0:
            restore = True

        # Fit the SAC model.
        self.model.fit(transition_matrices, restore=restore, global_step=step)

예제 #2

파일 보기

            # Store transition in replay buffer
            replay.store(current_state, action, reward, next_state, end)

            # Update current state
            current_state = next_state

            step += 1
            global_step += 1



        if (step % 1 == 0) and (global_step > args.start_steps):
            for epoch in range(args.epochs):

                # Randomly sample minibatch of transitions from replay buffer
                current_states, actions, rewards, next_states, ends = replay.fetch_sample(num_samples=args.batch_size)

                # Perform single step of gradient descent on Q and policy
                # network
                critic1_loss, critic2_loss, actor_loss, alpha_loss = sac.train(current_states, actions, rewards, next_states, ends)
                if args.verbose:
                    print(episode, global_step, epoch, critic1_loss.numpy(),
                          critic2_loss.numpy(), actor_loss.numpy(), episode_reward)


                with writer.as_default():
                    tf.summary.scalar("actor_loss", actor_loss, sac.epoch_step)
                    tf.summary.scalar("critic1_loss", critic1_loss, sac.epoch_step)
                    tf.summary.scalar("critic2_loss", critic2_loss, sac.epoch_step)
                    tf.summary.scalar("alpha_loss", alpha_loss, sac.epoch_step)