def setup_mixins(policy, obs_space, action_space, config): # copied from PPO KLCoeffMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) # hack: put in a noop VF so some of the inherited PPO code runs policy.value_function = tf.zeros( tf.shape(policy.get_placeholder(SampleBatch.CUR_OBS))[0])
def setup_mixins(policy, obs_space, action_space, config): ValueNetworkMixin.__init__(policy, obs_space, action_space, config) KLCoeffMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) ImitationLearningRateSchedule.__init__( policy, config["model"]["custom_options"]["num_imitation_iters"], config["model"]["custom_options"]["imitation_weight"], config)
def setup_mixins(policy, obs_space, action_space, config): KLCoeffMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) ImitationLearningRateSchedule.__init__(policy, config["model"]["custom_options"]["num_imitation_iters"], config["model"]["custom_options"]["imitation_weight"], config) # hack: put in a noop VF so some of the inherited PPO code runs policy.value_function = tf.zeros( tf.shape(policy.get_placeholder(SampleBatch.CUR_OBS))[0])
def setup_mixins(policy, obs_space, action_space, config): """Copied from PPO""" KLCoeffMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"])
def setup_mixins(policy, obs_space, action_space, config): LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy)