예제 #1
0
        saver = tf.train.Saver()
        saver.save(self.sess, path)

    def load(self, path):
        saver = tf.train.Saver()
        saver.restore(self.sess, path)


NUM_JOINTS = 2
LINK_LENGTH = [200, 140]
INI_JOING_ANGLES = [0.1, 0.1]
SCREEN_SIZE = 1000
SPARSE_REWARD = False
SCREEN_SHOT = False
DETERMINISTIC = False
env=Reacher(screen_size=SCREEN_SIZE, num_joints=NUM_JOINTS, link_lengths = LINK_LENGTH, \
    ini_joint_angles=INI_JOING_ANGLES, target_pos = [369,430], render=True)
ppo = PPO()

if args.train:
    all_ep_r = []

    for ep in range(EP_MAX):
        s = env.reset(SCREEN_SHOT)
        s = s / 100.
        buffer_s, buffer_a, buffer_r = [], [], []
        ep_r = 0
        for t in range(EP_LEN):  # in one episode
            # env.render()
            a = ppo.choose_action(s)
            s_, r, done, distance2goal = env.step(a, SPARSE_REWARD,
                                                  SCREEN_SHOT)