Exemple #1
0
    def _init_model(self):
        """init model from parameters"""
        self.env, env_continuous, num_states, num_actions = get_env_info(
            self.env_id)
        tf.keras.backend.set_floatx('float64')
        # seeding
        np.random.seed(self.seed)
        tf.random.set_seed(self.seed)
        self.env.seed(self.seed)

        if env_continuous:
            self.policy_net = Policy(num_states, num_actions)  # current policy
        else:
            self.policy_net = DiscretePolicy(num_states, num_actions)

        self.running_state = ZFilter((num_states, ), clip=5)

        if self.model_path:
            print("Loading Saved Model {}_reinforce_tf2.p".format(self.env_id))
            self.running_state = pickle.load(
                open(
                    '{}/{}_reinforce_tf2.p'.format(self.model_path,
                                                   self.env_id), "rb"))
            self.policy_net.load_weights("{}/{}_reinforce_tf2".format(
                self.model_path, self.env_id))

        self.collector = MemoryCollector(self.env,
                                         self.policy_net,
                                         render=self.render,
                                         running_state=self.running_state,
                                         num_process=self.num_process)

        self.optimizer_p = optim.Adam(lr=self.lr_p, clipnorm=20)
    def _init_model(self):
        """init model from parameters"""
        self.env, env_continuous, num_states, num_actions = get_env_info(
            self.env_id
        )

        tf.keras.backend.set_floatx("float64")

        # seeding
        np.random.seed(self.seed)
        tf.random.set_seed(self.seed)
        self.env.seed(self.seed)

        if env_continuous:
            self.policy_net = Policy(num_states, num_actions)
        else:
            self.policy_net = DiscretePolicy(num_states, num_actions)

        self.value_net = Value(num_states, l2_reg=1e-3)
        self.running_state = ZFilter((num_states,), clip=5)

        if self.model_path:
            print("Loading Saved Model {}_trpo_tf2.p".format(self.env_id))
            self.running_state = pickle.load(
                open(
                    "{}/{}_trpo_tf2.p".format(self.model_path, self.env_id),
                    "rb",
                )
            )
            self.policy_net.load_weights(
                "{}/{}_trpo_tf2_p".format(self.model_path, self.env_id)
            )
            self.value_net.load_weights(
                "{}/{}_trpo_tf2_v".format(self.model_path, self.env_id)
            )

        self.collector = MemoryCollector(
            self.env,
            self.policy_net,
            render=self.render,
            running_state=self.running_state,
            num_process=self.num_process,
        )

        self.optimizer_v = optim.Adam(lr=self.lr_v)