Python TRPO.sim Examples

Programming Language: Python

Namespace/Package Name: trpo

Class/Type: TRPO

Method/Function: sim

Examples at hotexamples.com: 2

Python TRPO.sim - 2 examples found. These are the top rated real world Python examples of trpo.TRPO.sim extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

TRPO(14)

train(6)

sim(2)

_update_policy(1)

_update_value(1)

backward(1)

get_value(1)

sample_action(1)

update(1)

Example #1

Show file

class LEARNER():
    def __init__(self, args, sess, simulator):
        self.args = args
        self.sess = sess
        self.simulator = simulator

        #Construct simulation environment
        self.simulator = gym.make('Pendulum-v0')

        #Define learning agent (TRPO)
        self.agent = TRPO(self.args, self.simulator, self.sess)

    def learn(self):
        train_index = 0
        total_episode = 0
        total_steps = 0
        all_logs = list()
        while True:
            #Train the TRPO agent
            train_index += 1
            train_log = self.agent.train()
            total_steps += train_log["Total Step"]
            total_episode += train_log["Num episode"]

            all_logs.append(train_log)

            #Simulate system w/ new parameters
            if train_index % 20 == 0:
                self.agent.sim()

            if total_steps > self.args.total_train_step:
                nn_weights = {
                    'policy_network': self.agent.get_value(),
                    'advantage_network': self.agent.gae.get_value()
                }
                savemat(
                    'data_' +
                    datetime.datetime.now().strftime("%y-%m-%d-%H-%M") +
                    '.mat', dict(data=all_logs, args=self.args))
                savemat(
                    'weights_' +
                    datetime.datetime.now().strftime("%y-%m-%d-%H-%M") +
                    '.mat', dict(policy_weights=nn_weights, args=self.args))
                break

Example #2

Show file

File: learn.py Project: wsqwsq/Safe-MPC-RL

class LEARNER():
    def __init__(self, args, sess, simulator):
        self.args = args
        self.sess = sess
        self.simulator = simulator

        #Construct simulation environment
        self.simulator = gym.make('Pendulum-v0')
        self.simulator.unwrapped.max_torque = 15.
        self.simulator.unwrapped.max_speed = 60.
        self.simulator.unwrapped.action_space = spaces.Box(low=-self.simulator.unwrapped.max_torque, high=self.simulator.unwrapped.max_torque, shape=(1,))
        high = np.array([1., 1., self.simulator.unwrapped.max_speed])
        self.simulator.unwrapped.observation_space = spaces.Box(low=-high, high=high)

        
        #Define learning agent (TRPO)
        self.agent = TRPO(self.args, self.simulator, self.sess)

    def learn(self):
        train_index = 0
        total_episode = 0
        total_steps = 0
        all_logs = list()
        while True:
            #Train the TRPO agent
            train_index += 1
            train_log = self.agent.train()
            total_steps += train_log["Total Step"]
            total_episode += train_log["Num episode"]

            all_logs.append(train_log)
            
            #Simulate system w/ new parameters
            if train_index%5 == 0:
                self.agent.sim()
                print(train_index)

            if total_steps > self.args.total_train_step:
                savemat('data4_' + datetime.datetime.now().strftime("%y-%m-%d-%H-%M") + '.mat',dict(data=all_logs, args=self.args))
                break