def backup_on_minibatch(self, q_network, backup_minibatch): X = np.array([self.delegate.construct_features(state, action)[0] for state, action, target in backup_minibatch]) Y_info = [(action, target) for _state, action, target in backup_minibatch] Y = q_network.predict_on_batch(X) assert len(Y) == len(Y_info) for y, (action, target) in zip(Y, Y_info): y[action_index(action)] = target loss = q_network.train_on_batch(X, Y)
def backup_on_minibatch(self, backup_minibatch): X = np.array([self.delegate.construct_features(state, action)[0] for state, action, target in backup_minibatch]) Y_info = [(action, target) for _state, action, target in backup_minibatch] Y = self.delegate.model.predict_on_batch(X) assert len(Y) == len(Y_info) for y, (action, target) in zip(Y, Y_info): y[action_index(action)] = target loss = self.delegate.model.train_on_batch(X, Y) self.delegate.loss_history.append(loss) self.delegate.prediction_cache = (None, None)
def predict_value_by_network(self, network, state, action): X, action = self.delegate.construct_features(state, action) values = network.predict_on_batch(np.array([X]))[0].tolist() valur_for_action = values[action_index(action)] return valur_for_action