def forward(self, input): mus, sigmas, logpi, rewards, not_terminals, next_hiddens = self.data_parallel( input.action.float_features, input.state.float_features) return rlt.MemoryNetworkOutput( mus=mus, sigmas=sigmas, logpi=logpi, reward=rewards, terminal=not_terminals, next_lstm_hidden=next_hiddens[0], next_lstm_cell=next_hiddens[1], )
def forward(self, input): ( mus, sigmas, logpi, rewards, not_terminals, all_steps_hidden, last_step_hidden_and_cell, ) = self.data_parallel(input.action.float_features, input.state.float_features) return rlt.MemoryNetworkOutput( mus=mus, sigmas=sigmas, logpi=logpi, reward=rewards, terminal=not_terminals, next_lstm_hidden=last_step_hidden_and_cell[0], next_lstm_cell=last_step_hidden_and_cell[1], all_steps_lstm_hidden=all_steps_hidden, )