def __init__(self, net, dim_actions): super().__init__() self.net = net self.dim_actions = dim_actions self.action_distribution = CategoricalNet(self.net.output_size, self.dim_actions) self.critic = CriticHead(self.net.output_size)
def __init__(self, net, dim_actions): super().__init__() self.net = net self.dim_actions = dim_actions self.supervise_stop = False if self.supervise_stop: self.non_stop_action_distribution = CategoricalNet( self.net.output_size, self.dim_actions - 1) self.stop_action_distribution = CategoricalNet( self.net.output_size, 2) else: self.action_distribution = CategoricalNet(self.net.output_size, self.dim_actions) self.critic = CriticHead(self.net.output_size)
def __init__( self, observation_space, action_space, goal_sensor_uuid, hidden_size=512, ): super().__init__() self.dim_actions = action_space.n self.goal_sensor_uuid = goal_sensor_uuid self.net = Net( observation_space=observation_space, hidden_size=hidden_size, goal_sensor_uuid=goal_sensor_uuid, ) self.action_distribution = CategoricalNet(self.net.output_size, self.dim_actions)