def _init_model(self): """init model from parameters""" self.env, env_continuous, num_states, num_actions = get_env_info( self.env_id) tf.keras.backend.set_floatx('float64') # seeding np.random.seed(self.seed) tf.random.set_seed(self.seed) self.env.seed(self.seed) if env_continuous: self.policy_net = Policy(num_states, num_actions) # current policy else: self.policy_net = DiscretePolicy(num_states, num_actions) self.running_state = ZFilter((num_states, ), clip=5) if self.model_path: print("Loading Saved Model {}_reinforce_tf2.p".format(self.env_id)) self.running_state = pickle.load( open( '{}/{}_reinforce_tf2.p'.format(self.model_path, self.env_id), "rb")) self.policy_net.load_weights("{}/{}_reinforce_tf2".format( self.model_path, self.env_id)) self.collector = MemoryCollector(self.env, self.policy_net, render=self.render, running_state=self.running_state, num_process=self.num_process) self.optimizer_p = optim.Adam(lr=self.lr_p, clipnorm=20)
def _init_model(self): """init model from parameters""" self.env, env_continuous, num_states, num_actions = get_env_info( self.env_id ) tf.keras.backend.set_floatx("float64") # seeding np.random.seed(self.seed) tf.random.set_seed(self.seed) self.env.seed(self.seed) if env_continuous: self.policy_net = Policy(num_states, num_actions) else: self.policy_net = DiscretePolicy(num_states, num_actions) self.value_net = Value(num_states, l2_reg=1e-3) self.running_state = ZFilter((num_states,), clip=5) if self.model_path: print("Loading Saved Model {}_trpo_tf2.p".format(self.env_id)) self.running_state = pickle.load( open( "{}/{}_trpo_tf2.p".format(self.model_path, self.env_id), "rb", ) ) self.policy_net.load_weights( "{}/{}_trpo_tf2_p".format(self.model_path, self.env_id) ) self.value_net.load_weights( "{}/{}_trpo_tf2_v".format(self.model_path, self.env_id) ) self.collector = MemoryCollector( self.env, self.policy_net, render=self.render, running_state=self.running_state, num_process=self.num_process, ) self.optimizer_v = optim.Adam(lr=self.lr_v)