Beispiel #1
0
    def build_graph(self, weights):
        # Build graph
        sg_global_step = graph.GlobalStep()
        sg_update_step = graph.GlobalStep()
        sg_weights = weights

        if dppo_config.config.use_linear_schedule:
            if dppo_config.config.schedule_step == 'update':
                sg_schedule_step = sg_update_step
            elif dppo_config.config.schedule_step == 'environment':
                sg_schedule_step = sg_global_step
            else:
                assert False, 'Valid options for the schedule step are: update OR environment.' \
                              'You provide the following option:'.format(dppo_config.config.schedule_step)
            sg_learning_rate = lr_schedule.Linear(sg_schedule_step, dppo_config.config)
        else:
            sg_learning_rate = dppo_config.config.initial_learning_rate

        sg_optimizer = optimizer.AdamOptimizer(sg_learning_rate, epsilon=dppo_config.config.optimizer.epsilon)
        sg_gradients = optimizer.Gradients(sg_weights, optimizer=sg_optimizer)
        sg_average_reward = graph.LinearMovingAverage(dppo_config.config.avg_in_num_batches)
        sg_initialize = graph.Initialize()

        # Weights get/set for updating the policy
        sg_get_weights_flatten = graph.GetVariablesFlatten(sg_weights)
        sg_set_weights_flatten = graph.SetVariablesFlatten(sg_weights)

        # Expose public API
        self.op_n_step = self.Op(sg_global_step.n)
        self.op_upd_step = self.Op(sg_update_step.n)
        self.op_score = self.Op(sg_average_reward.average)

        self.op_inc_global_step = self.Ops(sg_global_step.increment, increment=sg_global_step.ph_increment)
        self.op_inc_global_step_and_average_reward = self.Ops(sg_global_step.increment,
                                                              sg_average_reward.add,
                                                              increment=sg_global_step.ph_increment,
                                                              reward_sum=sg_average_reward.ph_sum,
                                                              reward_weight=sg_average_reward.ph_count)

        self.op_get_weights = self.Op(sg_weights)
        self.op_get_weights_signed = self.Ops(sg_weights, sg_update_step.n)

        self.op_apply_gradients = self.Ops(sg_gradients.apply, sg_update_step.increment,
                                           gradients=sg_gradients.ph_gradients,
                                           increment=sg_update_step.ph_increment)

        self.op_get_weights_flatten = self.Op(sg_get_weights_flatten)
        self.op_set_weights_flatten = self.Op(sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value)

        # Gradient combining routines
        self.op_submit_gradients = self.Call(graph.get_gradients_apply_routine(dppo_config.config))

        self.op_initialize = self.Op(sg_initialize)
Beispiel #2
0
    def build_graph(self):
        sg_weights = _ManagerNetwork().weights

        sg_global_step = graph.GlobalStep()
        # self.learning_rate_input = graph.Placeholder(np.float32, shape=(1,), name="manager_lr")
        # tf.placeholder(tf.float32, [], name="manager_lr")
        sg_learning_rate = fun_graph.LearningRate(sg_global_step)

        sg_optimizer = optimizer.RMSPropOptimizer(
            learning_rate=sg_learning_rate,
            decay=cfg.RMSProp.decay,
            momentum=0.0,
            epsilon=cfg.RMSProp.epsilon)

        sg_gradients = optimizer.Gradients(sg_weights, optimizer=sg_optimizer)
        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_n_step = self.Op(sg_global_step.n)
        self.op_get_weights = self.Op(sg_weights)
        self.op_apply_gradients = self.Ops(
            sg_gradients.apply,
            sg_global_step.increment,
            gradients=sg_gradients.ph_gradients,
            increment=sg_global_step.ph_increment)
        self.op_initialize = self.Op(sg_initialize)
Beispiel #3
0
    def build_graph(self):
        # Build graph

        sg_policy_net = PolicyNet()

        sg_n_iter = trpo_graph.NIter()

        sg_global_step = graph.GlobalStep()

        sg_value_net = ValueNet()

        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_n_step = self.Op(sg_global_step.n)
        self.op_inc_step = self.Op(sg_global_step.increment, increment=sg_global_step.ph_increment)
        self.op_initialize = self.Op(sg_initialize)

        self.call_wait_for_iteration = self.Call(self.wait_for_iteration)
        self.call_send_experience = self.Call(self.send_experience)
        self.call_receive_weights = self.Call(self.receive_weights)

        self.op_turn_collect_on = sg_n_iter.op_turn_collect_on
        self.op_turn_collect_off = sg_n_iter.op_turn_collect_off
        self.op_n_iter_value = sg_n_iter.op_n_iter_value
        self.op_n_iter = sg_n_iter.op_n_iter
        self.op_next_iter = sg_n_iter.op_next_iter

        self.policy = sg_policy_net
        self.value = sg_value_net
Beispiel #4
0
    def build_graph(self):
        # Build graph
        sg_global_step = graph.GlobalStep()
        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_n_step = self.Op(sg_global_step.n)
        self.op_initialize = self.Op(sg_initialize)
Beispiel #5
0
    def build_graph(self):
        # Build graph
        sg_global_step = graph.GlobalStep()
        sg_network = Network()
        self.actor = sg_network.actor
        self.critic = sg_network.critic

        if da3c_config.config.optimizer == 'Adam':
            sg_actor_optimizer = optimizer.AdamOptimizer(
                da3c_config.config.initial_learning_rate)
            sg_critic_optimizer = optimizer.AdamOptimizer(
                da3c_config.config.initial_learning_rate)
        else:
            sg_learning_rate = da3c_graph.LearningRate(
                sg_global_step, da3c_config.config.initial_learning_rate)
            sg_actor_optimizer = optimizer.RMSPropOptimizer(
                learning_rate=sg_learning_rate,
                decay=da3c_config.config.RMSProp.decay,
                momentum=0.0,
                epsilon=da3c_config.config.RMSProp.epsilon)
            sg_critic_optimizer = optimizer.RMSPropOptimizer(
                learning_rate=sg_learning_rate,
                decay=da3c_config.config.RMSProp.decay,
                momentum=0.0,
                epsilon=da3c_config.config.RMSProp.epsilon)
        sg_actor_gradients = optimizer.Gradients(self.actor.weights,
                                                 optimizer=sg_actor_optimizer)
        sg_critic_gradients = optimizer.Gradients(
            self.critic.weights, optimizer=sg_critic_optimizer)

        if da3c_config.config.use_icm:
            sg_icm_optimizer = optimizer.AdamOptimizer(
                da3c_config.config.icm.lr)
            sg_icm_weights = icm_model.ICM().weights
            sg_icm_gradients = optimizer.Gradients(sg_icm_weights,
                                                   optimizer=sg_icm_optimizer)

            # Expose ICM public API
            self.op_icm_get_weights = self.Op(sg_icm_weights)
            self.op_icm_apply_gradients = self.Op(
                sg_icm_gradients.apply,
                gradients=sg_icm_gradients.ph_gradients)

        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_n_step = self.Op(sg_global_step.n)
        self.op_check_weights = self.Ops(self.actor.weights.check,
                                         self.critic.weights.check)
        self.op_get_weights = self.Ops(self.actor.weights, self.critic.weights)
        self.op_apply_gradients = self.Ops(
            sg_actor_gradients.apply,
            sg_critic_gradients.apply,
            sg_global_step.increment,
            gradients=(sg_actor_gradients.ph_gradients,
                       sg_critic_gradients.ph_gradients),
            increment=sg_global_step.ph_increment)
        self.op_initialize = self.Op(sg_initialize)
Beispiel #6
0
    def build_graph(self):
        sg_global_step = graph.GlobalStep()
        sg_network = Network()

        sg_get_weights_flatten = graph.GetVariablesFlatten(sg_network.weights)
        sg_set_weights_flatten = graph.SetVariablesFlatten(sg_network.weights)

        if config.use_linear_schedule:
            sg_learning_rate = lr_schedule.Linear(sg_global_step, config)
        else:
            sg_learning_rate = config.initial_learning_rate

        if config.optimizer == 'Adam':
            sg_optimizer = optimizer.AdamOptimizer(sg_learning_rate)
        elif config.optimizer == 'RMSProp':
            sg_optimizer = optimizer.RMSPropOptimizer(
                learning_rate=sg_learning_rate,
                decay=config.RMSProp.decay,
                epsilon=config.RMSProp.epsilon)
        else:
            assert False, 'There 2 valid options for optimizers: Adam | RMSProp'

        sg_gradients_apply = optimizer.Gradients(sg_network.weights,
                                                 optimizer=sg_optimizer)

        sg_average_reward = graph.LinearMovingAverage(
            config.avg_in_num_batches)
        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_n_step = self.Op(sg_global_step.n)
        self.op_score = self.Op(sg_average_reward.average)

        self.op_get_weights_signed = self.Ops(sg_network.weights,
                                              sg_global_step.n)
        self.op_assign_weights = self.Op(sg_network.weights.assign,
                                         weights=sg_network.weights.ph_weights)

        self.op_apply_gradients = self.Ops(
            sg_gradients_apply.apply,
            sg_global_step.increment,
            gradients=sg_gradients_apply.ph_gradients,
            increment=sg_global_step.ph_increment)
        self.op_add_rewards_to_model_score_routine = self.Ops(
            sg_average_reward.add,
            reward_sum=sg_average_reward.ph_sum,
            reward_weight=sg_average_reward.ph_count)

        self.op_get_weights_flatten = self.Op(sg_get_weights_flatten)
        self.op_set_weights_flatten = self.Op(
            sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value)

        # Gradient combining routines
        self.op_submit_gradients = self.Call(
            graph.get_gradients_apply_routine(config))

        self.op_initialize = self.Op(sg_initialize)
Beispiel #7
0
    def build_graph(self):
        # Build graph
        sg_global_step = graph.GlobalStep()
        sg_weights = Network().weights
        sg_optimizer = optimizer.AdamOptimizer(pg_config.config.learning_rate)
        sg_gradients = optimizer.Gradients(sg_weights, optimizer=sg_optimizer)
        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_n_step = self.Op(sg_global_step.n)
        self.op_get_weights = self.Op(sg_weights)
        self.op_apply_gradients = self.Ops(sg_gradients.apply,
                                           sg_global_step.increment, gradients=sg_gradients.ph_gradients,
                                           increment=sg_global_step.ph_increment)
        self.op_initialize = self.Op(sg_initialize)
Beispiel #8
0
    def build_graph(self):
        sg_global_step = graph.GlobalStep()
        sg_network = Network()

        if config.optimizer == 'Adam':
            sg_optimizer = optimizer.AdamOptimizer(
                config.initial_learning_rate)
        elif config.optimizer == 'RMSProp':
            param = {}
            if hasattr(config, 'RMSProp'):
                if hasattr(config.RMSProp, "decay"):
                    param["decay"] = config.RMSProp.decay
                if hasattr(config.RMSProp, "epsilon"):
                    param["epsilon"] = config.RMSProp.epsilon

            sg_optimizer = optimizer.RMSPropOptimizer(
                config.initial_learning_rate, **param)
        else:
            raise NotImplementedError

        sg_gradients_apply = optimizer.Gradients(sg_network.weights,
                                                 optimizer=sg_optimizer)

        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_n_step = self.Op(sg_global_step.n)

        self.op_get_weights = self.Op(sg_network.weights)
        self.op_assign_weights = self.Op(sg_network.weights.assign,
                                         weights=sg_network.weights.ph_weights)

        self.op_apply_gradients = self.Ops(
            sg_gradients_apply.apply,
            sg_global_step.increment,
            gradients=sg_gradients_apply.ph_gradients,
            n_steps=sg_global_step.ph_increment)

        self.op_initialize = self.Op(sg_initialize)
Beispiel #9
0
    def build_graph(self):
        # Build graph
        sg_policy_net = PolicyNet()

        sg_n_iter = trpo_graph.NIter()

        sg_global_step = graph.GlobalStep()

        sg_value_net = ValueNet()

        sg_average_reward = graph.LinearMovingAverage(
            trpo_config.config.avg_in_num_batches)
        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_n_step = self.Op(sg_global_step.n)
        self.op_inc_step = self.Op(sg_global_step.increment,
                                   increment=sg_global_step.ph_increment)

        self.op_score = self.Op(sg_average_reward.average)
        self.op_add_reward_to_model_score_routine = self.Ops(
            sg_average_reward.add,
            reward_sum=sg_average_reward.ph_sum,
            reward_weight=sg_average_reward.ph_count)

        self.call_wait_for_iteration = self.Call(self.wait_for_iteration)
        self.call_send_experience = self.Call(self.send_experience)
        self.call_receive_weights = self.Call(self.receive_weights)

        self.op_turn_collect_on = sg_n_iter.op_turn_collect_on
        self.op_turn_collect_off = sg_n_iter.op_turn_collect_off
        self.op_n_iter_value = sg_n_iter.op_n_iter_value
        self.op_n_iter = sg_n_iter.op_n_iter
        self.op_next_iter = sg_n_iter.op_next_iter

        self.policy = sg_policy_net
        self.value = sg_value_net
        self.op_initialize = self.Op(sg_initialize)
Beispiel #10
0
    def build_graph(self):
        sg_weights = _WorkerNetwork().weights

        sg_global_step = graph.GlobalStep()
        sg_learning_rate = fun_graph.LearningRate(sg_global_step)

        sg_optimizer = optimizer.RMSPropOptimizer(
            learning_rate=sg_learning_rate,
            decay=cfg.RMSProp.decay,
            momentum=0.0,
            epsilon=cfg.RMSProp.epsilon)

        sg_gradients = optimizer.Gradients(sg_weights, optimizer=sg_optimizer)
        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_n_step = self.Op(sg_global_step.n)
        self.op_get_weights = self.Op(sg_weights)
        self.op_apply_gradients = self.Ops(
            sg_gradients.apply,
            sg_global_step.increment,
            gradients=sg_gradients.ph_gradients,
            increment=sg_global_step.ph_increment)
        self.op_initialize = self.Op(sg_initialize)
    def build_graph(self):
        # Build graph
        sg_global_step = graph.GlobalStep()
        sg_network = Network()
        sg_weights = sg_network.weights

        if da3c_config.config.use_linear_schedule:
            sg_learning_rate = lr_schedule.Linear(sg_global_step,
                                                  da3c_config.config)
        else:
            sg_learning_rate = da3c_config.config.initial_learning_rate

        if da3c_config.config.optimizer == 'Adam':
            sg_optimizer = optimizer.AdamOptimizer(sg_learning_rate)
        else:
            sg_optimizer = optimizer.RMSPropOptimizer(
                learning_rate=sg_learning_rate,
                decay=da3c_config.config.RMSProp.decay,
                momentum=0.0,
                epsilon=da3c_config.config.RMSProp.epsilon)
        sg_gradients = optimizer.Gradients(sg_weights, optimizer=sg_optimizer)

        if da3c_config.config.use_icm:
            sg_icm_optimizer = optimizer.AdamOptimizer(
                da3c_config.config.icm.lr)
            sg_icm_weights = icm_model.ICM().weights
            sg_icm_gradients = optimizer.Gradients(sg_icm_weights,
                                                   optimizer=sg_icm_optimizer)

            # Expose ICM public API
            self.op_icm_get_weights = self.Op(sg_icm_weights)
            self.op_icm_apply_gradients = self.Op(
                sg_icm_gradients.apply,
                gradients=sg_icm_gradients.ph_gradients)

        sg_average_reward = graph.LinearMovingAverage(
            da3c_config.config.avg_in_num_batches)
        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_n_step = self.Op(sg_global_step.n)
        self.op_score = self.Op(sg_average_reward.average)

        self.op_check_weights = self.Op(sg_weights.check)
        self.op_get_weights = self.Ops(sg_weights, sg_global_step.n)

        self.op_apply_gradients = self.Ops(
            sg_gradients.apply,
            sg_global_step.increment,
            gradients=sg_gradients.ph_gradients,
            increment=sg_global_step.ph_increment)
        self.op_add_rewards_to_model_score_routine = self.Ops(
            sg_average_reward.add,
            reward_sum=sg_average_reward.ph_sum,
            reward_weight=sg_average_reward.ph_count)

        # Determine Gradients' applying methods: fifo (by default), averaging, delay compensation
        sg_get_weights_flatten = graph.GetVariablesFlatten(sg_weights)
        sg_set_weights_flatten = graph.SetVariablesFlatten(sg_weights)
        self.op_get_weights_flatten = self.Op(sg_get_weights_flatten)
        self.op_set_weights_flatten = self.Op(
            sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value)

        self.op_submit_gradients = self.Call(
            graph.get_gradients_apply_routine(da3c_config.config))

        self.op_initialize = self.Op(sg_initialize)
Beispiel #12
0
    def build_graph(self):
        # Build graph
        sg_global_step = graph.GlobalStep()
        sg_episode_cnt = graph.GlobalStep()

        sg_actor_weights = ActorNetwork().weights
        sg_critic_weights = CriticNetwork().weights

        sg_actor_target_weights = ActorNetwork().weights
        sg_critic_target_weights = CriticNetwork().weights

        sg_get_weights_flatten = \
            graph.GetVariablesFlatten(graph.Variables(sg_actor_weights, sg_critic_weights))
        sg_set_weights_flatten = \
            graph.SetVariablesFlatten(graph.Variables(sg_actor_weights, sg_critic_weights))

        # needs reassign weights from actor & critic to target networks
        sg_init_actor_target_weights = \
            graph.AssignWeights(sg_actor_target_weights, sg_actor_weights).op
        sg_init_critic_target_weights = \
            graph.AssignWeights(sg_critic_target_weights, sg_critic_weights).op

        sg_update_actor_target_weights = \
            graph.AssignWeights(sg_actor_target_weights, sg_actor_weights, cfg.config.tau).op
        sg_update_critic_target_weights = \
            graph.AssignWeights(sg_critic_target_weights, sg_critic_weights, cfg.config.tau).op

        sg_actor_optimizer = optimizer.AdamOptimizer(cfg.config.actor_learning_rate)
        sg_critic_optimizer = optimizer.AdamOptimizer(cfg.config.critic_learning_rate)

        sg_actor_gradients = optimizer.Gradients(sg_actor_weights, optimizer=sg_actor_optimizer)
        sg_critic_gradients = optimizer.Gradients(sg_critic_weights, optimizer=sg_critic_optimizer)

        sg_average_reward = graph.LinearMovingAverage(cfg.config.avg_in_num_batches)
        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_get_weights_signed = self.Ops(sg_actor_weights, sg_actor_target_weights,
                                              sg_critic_weights, sg_critic_target_weights, sg_global_step.n)

        self.op_get_weights_flatten = self.Op(sg_get_weights_flatten)
        self.op_set_weights_flatten = self.Op(sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value)

        self.op_init_target_weights = self.Ops(sg_init_actor_target_weights,
                                               sg_init_critic_target_weights)

        self.op_update_target_weights = self.Ops(sg_update_actor_target_weights,
                                                 sg_update_critic_target_weights)

        self.op_apply_gradients = self.Ops(sg_actor_gradients.apply, sg_critic_gradients.apply,
                                           sg_global_step.increment,
                                           gradients=(sg_actor_gradients.ph_gradients,
                                                      sg_critic_gradients.ph_gradients),
                                           increment=sg_global_step.ph_increment)
        self.op_add_rewards_to_model_score_routine = self.Ops(sg_average_reward.add,
                                                              reward_sum=sg_average_reward.ph_sum,
                                                              reward_weight=sg_average_reward.ph_count)
        self.op_score = self.Op(sg_average_reward.average)

        self.op_n_step = self.Op(sg_global_step.n)
        self.op_inc_step = self.Op(sg_global_step.increment, increment=sg_global_step.ph_increment)

        self.op_get_episode_cnt = self.Op(sg_episode_cnt.n)
        self.op_inc_episode_cnt = self.Op(sg_episode_cnt.increment, increment=sg_episode_cnt.ph_increment)

        self.op_submit_gradients = self.Call(graph.get_gradients_apply_routine(cfg.config))
        self.op_initialize = self.Op(sg_initialize)
Beispiel #13
0
    def build_graph(self):
        # Build graph
        sg_global_step = graph.GlobalStep()
        sg_episode_cnt = graph.GlobalStep()

        sg_actor_weights = ActorNetwork().weights
        sg_critic_weights = CriticNetwork().weights

        sg_actor_target_weights = ActorNetwork().weights
        sg_critic_target_weights = CriticNetwork().weights

        # needs reassign weights from actor & critic to target networks
        sg_init_actor_target_weights = \
            graph.AssignWeights(sg_actor_target_weights, sg_actor_weights).op
        sg_init_critic_target_weights = \
            graph.AssignWeights(sg_critic_target_weights, sg_critic_weights).op

        sg_update_actor_target_weights = \
            graph.AssignWeights(sg_actor_target_weights, sg_actor_weights, cfg.config.tau).op
        sg_update_critic_target_weights = \
            graph.AssignWeights(sg_critic_target_weights, sg_critic_weights, cfg.config.tau).op

        sg_actor_optimizer = optimizer.AdamOptimizer(
            cfg.config.actor_learning_rate)
        sg_critic_optimizer = optimizer.AdamOptimizer(
            cfg.config.critic_learning_rate)

        sg_actor_gradients = optimizer.Gradients(sg_actor_weights,
                                                 optimizer=sg_actor_optimizer)
        sg_critic_gradients = optimizer.Gradients(
            sg_critic_weights, optimizer=sg_critic_optimizer)

        sg_initialize = graph.Initialize()

        # Expose public API
        self.op_get_weights = self.Ops(sg_actor_weights,
                                       sg_actor_target_weights,
                                       sg_critic_weights,
                                       sg_critic_target_weights)

        self.op_init_target_weights = self.Ops(sg_init_actor_target_weights,
                                               sg_init_critic_target_weights)

        self.op_update_target_weights = self.Ops(
            sg_update_actor_target_weights, sg_update_critic_target_weights)

        self.op_apply_actor_gradients = self.Ops(
            sg_actor_gradients.apply,
            sg_global_step.increment,
            gradients=sg_actor_gradients.ph_gradients,
            increment=sg_global_step.ph_increment)
        self.op_apply_critic_gradients = self.Op(
            sg_critic_gradients.apply,
            gradients=sg_critic_gradients.ph_gradients)

        self.op_n_step = self.Op(sg_global_step.n)
        self.op_inc_step = self.Op(sg_global_step.increment,
                                   increment=sg_global_step.ph_increment)

        self.op_get_episode_cnt = self.Op(sg_episode_cnt.n)
        self.op_inc_episode_cnt = self.Op(
            sg_episode_cnt.increment, increment=sg_episode_cnt.ph_increment)

        self.op_initialize = self.Op(sg_initialize)