def initialize_action_model(self, layers, data):
     base = create_nn_model(layers, *data.get_action_state_size())[:-1]
     a_s = data.get_action_state_size()
     stream_input_size = base[-2].out_features
     dueling_head = DuelingDQNModule(a_s=a_s,
                                     stream_input_size=stream_input_size)
     return nn.Sequential(base, dueling_head)
 def initialize_action_model(self, layers, data):
     return create_nn_model(layers, *data.get_action_state_size())
    def __init__(self, a_s, stream_input_size):
        super().__init__()

        self.val = create_nn_model([stream_input_size], 1, stream_input_size)
        self.adv = create_nn_model([stream_input_size], a_s[0],
                                   stream_input_size)
 def initialize_critic_model(self, layers, data):
     """ Instead of state -> action, we ware going state + action -> single expected reward. """
     return create_nn_model(layers, 1, sum(data.get_action_state_size()),
                            True)