saver = tf.train.Saver() saver.save(self.sess, path) def load(self, path): saver = tf.train.Saver() saver.restore(self.sess, path) NUM_JOINTS = 2 LINK_LENGTH = [200, 140] INI_JOING_ANGLES = [0.1, 0.1] SCREEN_SIZE = 1000 SPARSE_REWARD = False SCREEN_SHOT = False DETERMINISTIC = False env=Reacher(screen_size=SCREEN_SIZE, num_joints=NUM_JOINTS, link_lengths = LINK_LENGTH, \ ini_joint_angles=INI_JOING_ANGLES, target_pos = [369,430], render=True) ppo = PPO() if args.train: all_ep_r = [] for ep in range(EP_MAX): s = env.reset(SCREEN_SHOT) s = s / 100. buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0 for t in range(EP_LEN): # in one episode # env.render() a = ppo.choose_action(s) s_, r, done, distance2goal = env.step(a, SPARSE_REWARD, SCREEN_SHOT)