def __init__(self, obs_num, n_actions): self.head= A3C_HEAD(obs_num, n_actions) self.pi= policy.FCSoftmaxPolicy( n_actions, n_actions) self.v= v_function.FCVFunction(n_actions) super().__init__(self.head, self.pi, self.v)
def __init__(self, n_actions): self.head = links.NIPSDQNHead() self.pi = policy.FCSoftmaxPolicy(self.head.n_output_channels, n_actions) self.v = v_function.FCVFunction(self.head.n_output_channels) self.lstm = L.LSTM(self.head.n_output_channels, self.head.n_output_channels) super().__init__(self.head, self.lstm, self.pi, self.v)
def __init__(self, n_input, n_actions, n_hidden): self.head = QFunction(n_input, n_hidden) self.pi = policy.FCSoftmaxPolicy( self.head.n_output_channels, n_actions) self.v = v_function.FCVFunction(self.head.n_output_channels) super().__init__(self.head, self.pi, self.v)
def __init__(self, n_actions): self.head = ICLRACERHead(activation=guided_relu) self.pi = policy.FCSoftmaxPolicy(self.head.n_output_channels, n_actions) self.v = v_function.FCVFunction(self.head.n_output_channels) super().__init__(self.head, self.pi, self.v)
def __init__(self, trial, n_actions, width=None, height=None): self.head = MyHead(trial, width=width, height=height) self.pi = policy.FCSoftmaxPolicy(self.head.n_output_channels, n_actions) self.v = v_function.FCVFunction(self.head.n_output_channels) super().__init__(self.head, self.pi, self.v)