Exemple #1
0
from mover import Mover

if __name__ == "__main__":
    logger = logging.getLogger("learner")
    logger.setLevel(logging.INFO)
    fh = logging.FileHandler("logfile.txt")
    formatter = logging.Formatter(
        "%(levelname)s:%(thread)d:%(filename)s:%(funcName)s:%(asctime)s::%(message)s"
    )
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    mov = Mover("127.0.0.1", 10000)
    mov.init_client()
    print('ori' + str(mov.get_orientation()))
    print('pos' + str(mov.get_position()))
    mov.stop_simulation()
    # mov.stop_simulation()

    # perform one thousand steps
    for i in range(0, 10):
        rand_pos = np.random.uniform(-0.2, +0.2, (2, ))
        time.sleep(0.5)
        mov.start_simulation()
        mov.set_position_and_rotation(rand_pos,
                                      np.random.uniform(-np.pi, +np.pi))
        # mov.set_target_vel(np.zeros(2))
        print('ori' + str(mov.get_orientation()))
        print('pos' + str(mov.get_position()))
        time.sleep(1)
Exemple #2
0
class MoverTurnEnv(object):
    def __init__(self, args):
        self.delta_theta = None
        # subprocess.Popen(
        #    [args['vrep_exec_path'], '-h', '-gREMOTEAPISERVERSERVICE_' + str(args['server_port']) + '_FALSE_TRUE',
        #     args['vrep_scene_file']])
        self.mover = Mover(args['server_ip'], args['server_port'])
        self.mover.init_client()
        self.per_step_reward = args['per_step_reward']
        self.final_reward = args['final_reward']
        self.tolerance = args['tolerance']
        self.server_ip, self.server_port = args['server_ip'], args['server_port']
        self.spawn_radius = args['spawn_radius']
        self.begin_pos = None
        self.goal = 0

    def set_goal(self, delta_theta):
        self.delta_theta = delta_theta

    def reset_goal(self, delta_theta):
        self.delta_theta = delta_theta
        goal = self.mover.get_orientation()[0, -1].item() + self.delta_theta
        if goal > np.pi:
            goal -= 2 * np.pi
        elif goal < -np.pi:
            goal += 2 * np.pi
        self.goal = goal

    def start(self):
        # logging.getLogger("learner").info("ENV::START:goal:%f" % self.delta_theta)
        state = np.hstack((self.mover.get_joint_pos(), self.mover.get_joint_vel()))
        begin_angle = self.mover.start_orientation
        self.begin_pos = self.mover.start_pos
        self.goal = begin_angle + self.delta_theta
        if self.goal > np.pi:
            self.goal -= 2 * np.pi
        elif self.goal < -np.pi:
            self.goal += 2 * np.pi
        return state, np.asarray(self.delta_theta).reshape((1, -1))

    def _is_terminal(self, current):
        diff = np.absolute(current - self.goal)
        if diff > np.pi:
            diff -= 2 * np.pi
        if diff < -np.pi:
            diff += 2 * np.pi
        if diff < self.tolerance:
            return True
        else:
            return False

    def get_state(self):
        return np.hstack((self.mover.get_joint_pos(), self.mover.get_joint_vel()))

    def step(self, action):
        # logging.getLogger("learner").info("stepping")
        self.mover.set_target_vel(action)
        new_state = np.hstack((self.mover.get_joint_pos(), self.mover.get_joint_vel()))
        new_orient = self.mover.get_orientation()
        new_goal = self.goal - new_orient[0, -1].item()
        if new_goal > np.pi:
            new_goal -= 2 * np.pi
        elif new_goal < -np.pi:
            new_goal += 2 * np.pi
        new_goal = np.asarray(new_goal).reshape((1, -1))
        pos = self.mover.get_position()
        # if out of bounds, end with negative reward
        if abs(pos[0, 0]) > 12 or abs(pos[0, 1]) > 12:
            return new_state, new_goal, -10, True
        # logging.getLogger("learner").info("goal:%0.4f, cur:%0.4f" % (self.goal, new_orient[0, -1].item()))
        if abs(new_goal) < self.tolerance:
            disp_reward = -np.linalg.norm(self.mover.get_position()[0, 1:2] - self.begin_pos)
            vel_reward = -np.linalg.norm(new_state[0, 23:])
            return new_state, new_goal, (vel_reward + disp_reward + self.final_reward), True
        else:
            return new_state, new_goal, self.per_step_reward, False

    def reset(self):
        logging.getLogger("learner").info("ENV::RESET")
        self.mover.set_target_vel(np.zeros(2))
        self.mover.stop_simulation()
        time.sleep(0.5)
        self.mover.start_simulation()
        spawn_rad = np.random.uniform(-self.spawn_radius, +self.spawn_radius)
        angle = np.random.uniform(-np.pi, np.pi)
        self.mover.set_position_and_rotation(spawn_rad * np.asarray((np.cos(angle), np.sin(angle))),
                                             np.random.uniform(-np.pi, +np.pi))