Python FCSoftmaxPolicy 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: modules

클래스/타입: FCSoftmaxPolicy

hotexamples.com에서의 예제들: 5

Python FCSoftmaxPolicy - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 modules.FCSoftmaxPolicy에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

compute_policy(3)

FCSoftmaxPolicy(2)

자주 사용되는 메소드들

compute_policy (3)

FCSoftmaxPolicy (2)

예제 #1

파일 보기

파일: a3c_model.py 프로젝트: xueweili/a3c-starcraft

    def __init__(self, n_actions):
        super(A3CMLP, self).__init__()

        self.head = IndependentHead()
        self.pi = FCSoftmaxPolicy(self.head.n_output_channels, n_actions)
        self.v = FCVFunction(self.head.n_output_channels)
        self.reset_state()

예제 #2

파일 보기

파일: a3c_model.py 프로젝트: marvin521/a3c-deepmind-pytorch

    def __init__(self, n_actions):
        super(A3CLSTM, self).__init__()

        self.head = NIPSDQNHead()
        self.pi = FCSoftmaxPolicy(self.head.n_output_channels, n_actions)
        self.v = FCVFunction(self.head.n_output_channels)
        self.lstm = nn.LSTMCell(self.head.n_output_channels, self.head.n_output_channels)
        self.reset_state()

예제 #3

파일 보기

파일: a3c_model.py 프로젝트: xueweili/a3c-starcraft

class A3CLSTM(nn.Module, A3CModel):
    def __init__(self, n_actions):
        super(A3CLSTM, self).__init__()

        self.head = IndependentHead()
        self.pi = FCSoftmaxPolicy(self.head.n_output_channels, n_actions)
        self.v = FCVFunction(self.head.n_output_channels)
        self.lstm = nn.LSTMCell(self.head.n_output_channels,
                                self.head.n_output_channels)
        self.reset_state()

    def reset_state(self):
        self.un_init = True

    def pi_and_v(self, state, keep_same_state=False):
        if self.un_init:
            batch_size = state.size()[0]
            self.h, self.c = Variable(
                torch.zeros(batch_size,
                            self.head.n_output_channels)), Variable(
                                torch.zeros(batch_size,
                                            self.head.n_output_channels))
            self.un_init = False

        out = self.head(state)
        h, c = self.lstm(out, (self.h, self.c))
        if not keep_same_state:
            self.h, self.c = h, c
        return self.pi.compute_policy(h), self.v(h)

    def unchain_backward(self):
        if self.un_init:
            return
        self.h.detach_()
        self.c.detach_()

예제 #4

파일 보기

파일: a3c_model.py 프로젝트: xueweili/a3c-starcraft

class PG(nn.Module, A3CModel):
    def __init__(self, n_actions):
        super(PG, self).__init__()

        self.head = IndependentHead()
        self.pi = FCSoftmaxPolicy(self.head.n_output_channels, n_actions)
        self.reset_state()

    def reset_state(self):
        return

    def pi_and_v(self, state):
        out = self.head(state)
        return self.pi.compute_policy(out)

    def unchain_backward(self):
        return

예제 #5

파일 보기

파일: a3c_model.py 프로젝트: xueweili/a3c-starcraft

class A3CLSTM_commnet(nn.Module, A3CModel):
    def __init__(self, n_actions):
        super(A3CLSTM_commnet, self).__init__()

        # n * 128
        self.head = CommnetHead()
        self.pi = FCSoftmaxPolicy(self.head.n_output_channels, n_actions)
        self.v = FCVFunction(self.head.n_output_channels)
        self.lstm = nn.LSTMCell(self.head.n_output_channels,
                                self.head.n_output_channels)
        self.reset_state()

    def reset_state(self):
        self.un_init = True

    def pi_and_v(self, state, keep_same_state=False):
        if self.un_init:
            # number of agent
            #print('State.size: {}'.format(state.size()))
            batch_size = state.size()[0]
            self.h, self.c = Variable(
                torch.zeros(batch_size,
                            self.head.n_output_channels)), Variable(
                                torch.zeros(batch_size,
                                            self.head.n_output_channels))
            self.un_init = False
        out = self.head(state)
        #print('-----------------------')
        #print(out.size())
        #print(self.h.size())
        #print(self.c.size())
        #print('------------------------')
        h, c = self.lstm(out, (self.h, self.c))
        if not keep_same_state:
            self.h, self.c = h, c
        return self.pi.compute_policy(h), self.v(h)

    def unchain_backward(self):
        if self.un_init:
            return
        self.h.detach_()
        self.c.detach_()