class ApproxActionValueFunction(DeepSarsaApproxActionValueFunction): def __init__(self, handicappers=None): super(DeepSarsaApproxActionValueFunction, self).__init__() self._handicappers = handicappers def setup(self): self.delegate = MLPOneLayerActionRecordScaledScalarFeaturesValueFunction(NB_UNIT, blind_structure, self._handicappers) self.delegate.setup() def construct_features(self, state, action): return self.delegate.construct_features(state, action) def approx_predict_value(self, features): return self.delegate.approx_predict_value(features) def backup_on_minibatch(self, backup_minibatch): X = np.array([self.delegate.construct_features(state, action)[0] for state, action, target in backup_minibatch]) Y_info = [(action, target) for _state, action, target in backup_minibatch] Y = self.delegate.model.predict_on_batch(X) assert len(Y) == len(Y_info) for y, (action, target) in zip(Y, Y_info): y[action_index(action)] = target loss = self.delegate.model.train_on_batch(X, Y) self.delegate.loss_history.append(loss) self.delegate.prediction_cache = (None, None) def save(self, save_dir_path): self.delegate.save(save_dir_path) def load(self, load_dir_path): self.delegate.load(load_dir_path)
class ApproxActionValueFunction(SarsaApproxActionValueFunction): def __init__(self, handicappers=None): super(SarsaApproxActionValueFunction, self).__init__() self._handicappers = handicappers def setup(self): self.delegate = MLPOneLayerActionRecordScaledScalarFeaturesValueFunction( NB_UNIT, blind_structure, self._handicappers) self.delegate.setup() def construct_features(self, state, action): return self.delegate.construct_features(state, action) def approx_predict_value(self, features): return self.delegate.approx_predict_value(features) def approx_backup(self, features, backup_target, alpha): self.delegate.approx_backup(features, backup_target, alpha) def save(self, save_dir_path): self.delegate.save(save_dir_path) def load(self, load_dir_path): self.delegate.load(load_dir_path)