def initialize_action_model(self, layers, data): base = create_nn_model(layers, *data.get_action_state_size())[:-1] a_s = data.get_action_state_size() stream_input_size = base[-2].out_features dueling_head = DuelingDQNModule(a_s=a_s, stream_input_size=stream_input_size) return nn.Sequential(base, dueling_head)
def initialize_action_model(self, layers, data): return create_nn_model(layers, *data.get_action_state_size())
def __init__(self, a_s, stream_input_size): super().__init__() self.val = create_nn_model([stream_input_size], 1, stream_input_size) self.adv = create_nn_model([stream_input_size], a_s[0], stream_input_size)
def initialize_critic_model(self, layers, data): """ Instead of state -> action, we ware going state + action -> single expected reward. """ return create_nn_model(layers, 1, sum(data.get_action_state_size()), True)