Python Episode.reset 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: episode

클래스/타입: Episode

메소드/함수: reset

hotexamples.com에서의 예제들: 1

Python Episode.reset - 1개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 episode.Episode.reset에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Episode(30)

from_string(3)

tokenize(3)

title(3)

run(3)

player_wins(3)

compile_file_name(3)

generate(3)

Add(2)

update_file_meta(2)

opponent_wins(2)

insert(2)

guid(2)

getbyfeed(2)

get_last(2)

Clear(2)

url(2)

add(2)

Complete(2)

Episode2CSV(2)

_from_json(2)

enclosure_length(1)

Query(1)

show_name(1)

season(1)

scrap(1)

run_script(1)

_MigrateShare(1)

retime_subs(1)

retime_audio(1)

reset(1)

player_score(1)

play(1)

opponent_score(1)

episode(1)

description(1)

episodes_from_rss(1)

execute(1)

from_soup(1)

e_nr(1)

draw(1)

generate_symmetric(1)

demux(1)

mux(1)

get_title(1)

chapters(1)

cached(1)

add_step(1)

make_avs(1)

move_demuxed_files(1)

예제 #1

파일 보기

파일: agent.py 프로젝트: yukgu/cse573

class A3CAgent:
    """ Base class for all actor-critic agents. """
    def __init__(self, model, args, rank, gpu_id=-1):
        self.gpu_id = gpu_id
        self._model = None
        self.model = model(args)
        self._episode = Episode(args, gpu_id, rank)
        self.eps_len = 0
        self.values = []
        self.log_probs = []
        self.rewards = []
        self.entropies = []
        self.done = True
        self.info = None
        self.reward = 0
        self.hidden = None
        self.actions = []
        self.verbose = args.verbose
        self.max_episode_length = args.max_episode_length
        self.hidden_state_sz = args.hidden_state_sz
        self.action_space = args.action_space

    def sync_with_shared(self, shared_model):
        """ Sync with the shared model. """
        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                self.model.load_state_dict(shared_model.state_dict())
        else:
            self.model.load_state_dict(shared_model.state_dict())

    def eval_at_state(self):
        model_input = ModelInput()
        model_input.state = self.preprocess_frame(
            self.episode.state_for_agent())
        model_input.hidden = self.hidden
        model_output = self.model.forward(model_input)
        return model_output

    @property
    def state(self):
        return self.preprocess_frame(self.episode.state_for_agent())

    @property
    def episode(self):
        """ Return the current episode. """
        return self._episode

    @property
    def environment(self):
        """ Return the current environmnet. """
        return self.episode.environment

    @property
    def model(self):
        """ Returns the model. """
        return self._model

    def print_info(self):
        """ Print the actions. """
        for action in self.actions:
            print(action)

    @model.setter
    def model(self, model_to_set):
        self._model = model_to_set
        if self.gpu_id >= 0 and self._model is not None:
            with torch.cuda.device(self.gpu_id):
                self._model = self.model.cuda()

    def _increment_episode_length(self):
        self.eps_len += 1
        if self.eps_len >= self.max_episode_length:
            if not self.done:
                self.max_length = True
                self.done = True
            else:
                self.max_length = False
        else:
            self.max_length = False

    def action(self, training=True):
        """ Train the agent. """
        if training:
            self.model.train()
        else:
            self.model.eval()

        model_output = self.eval_at_state()
        self.hidden = model_output.hidden

        # Convert policy logit into probability.
        prob = F.softmax(model_output.policy, dim=1)

        if training:
            # Sample the action.
            action = prob.multinomial(1).data
        else:
            # Take the best action.
            action = prob.argmax(dim=1, keepdim=True).data

        log_prob = F.log_softmax(model_output.policy, dim=1)
        entropy = -(log_prob * prob).sum(1)
        log_prob = log_prob.gather(1, Variable(action))

        self.reward, self.done, self.info = self.episode.step(action[0, 0])

        self.entropies.append(entropy)
        self.values.append(model_output.value)
        self.log_probs.append(log_prob)
        self.rewards.append(self.reward)
        self.actions.append(action)

        self._increment_episode_length()

        # populate the success
        self.success = self.episode.success

        return model_output.value, prob, action

    def reset_hidden(self):
        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                self.hidden = (torch.zeros(1, self.hidden_state_sz).cuda(),
                               torch.zeros(1, self.hidden_state_sz).cuda())
        else:
            self.hidden = (torch.zeros(1, self.hidden_state_sz),
                           torch.zeros(1, self.hidden_state_sz))

    def repackage_hidden(self):
        self.hidden = (self.hidden[0].detach(), self.hidden[1].detach())

    def clear_actions(self):
        """ Clear the information stored by the agent. """
        self.values = []
        self.log_probs = []
        self.rewards = []
        self.entropies = []
        self.actions = []
        self.reward = 0

    def preprocess_frame(self, frame):
        """ Preprocess the current frame for input into the model. """
        frame = resnet_input_transform(frame, 84)
        state = torch.Tensor(frame)
        return gpuify(state.unsqueeze(0), self.gpu_id)

    def exit(self):
        self.episode.environment.controller.stop()

    def reset_episode(self):
        """ Reset the episode. """
        return self._episode.reset()