Python generate_action_no_sampling 예제들, model.ppo.generate_action_no_sampling Python 예제들

예제 #1

0

파일 보기

파일: circle_test.py 프로젝트: LanxinL/rl-collision-avoidance

def enjoy(comm, env, policy, action_bound):

    if env.index == 0:
        env.reset_world()

    env.reset_pose()

    env.generate_goal_point()
    step = 1
    terminal = False

    obs = env.get_laser_observation()
    obs_stack = deque([obs, obs, obs])
    goal = np.asarray(env.get_local_goal())
    speed = np.asarray(env.get_self_speed())
    state = [obs_stack, goal, speed]

    while not rospy.is_shutdown():
        state_list = comm.gather(state, root=0)

        # generate actions at rank==0
        mean, scaled_action = generate_action_no_sampling(
            env=env,
            state_list=state_list,
            policy=policy,
            action_bound=action_bound)

        # execute actions
        real_action = comm.scatter(scaled_action, root=0)
        if terminal == True:
            real_action[0] = 0
        print(real_action)
        env.control_vel(real_action)
        # rate.sleep()
        rospy.sleep(0.001)
        # get informtion
        r, terminal, result = env.get_reward_and_terminate(step)
        step += 1

        # get next state
        s_next = env.get_laser_observation()
        left = obs_stack.popleft()
        obs_stack.append(s_next)
        goal_next = np.asarray(env.get_local_goal())
        speed_next = np.asarray(env.get_self_speed())
        state_next = [obs_stack, goal_next, speed_next]

        state = state_next

예제 #2

0

파일 보기

파일: rl_collision_avoidance_node_tb3.py 프로젝트: shen-hue/stage-ros-oa

    def cbComputeAction(self, event):
        while self.scan is None or self.sub_goal.x is None:
            pass
        # ************************************ Inpsut ************************************
        obs = self.get_laser_observation()
        obs_stack = deque([obs, obs, obs])
        self.state = [
            self.pose.pose.position.x, self.pose.pose.position.y, self.psi
        ]  # x, y, theta
        self.goal = np.asarray(self.get_local_goal())
        self.speed = np.asarray([self.vel.x, self.vel_angular],
                                dtype='float64')

        obs_state_list = [[obs_stack, self.goal, self.speed]]
        # self.control_pose(state)

        # ************************************ Output ************************************
        _, scaled_action = generate_action_no_sampling(self.env,
                                                       obs_state_list,
                                                       self.policy,
                                                       self.action_bound)
        action = scaled_action[0]
        action[0] = 0.3 * action[0]  # the maximum speed of cmd_vel 0.3
        self.control_vel(action)

예제 #3

0

파일 보기

def run(comm, env, policy, policy_path, action_bound, optimizer):

    # rate = rospy.Rate(5)
    buff = []
    global_update = 0
    global_step = 0
    mpiId = comm.Get_rank()
    MAX_EPISODES = 500
    numTest = 100

    if env.index == 0:
        env.reset_world()
    env.store_resetPose()


    for id in range(MAX_EPISODES):
        env.generate_goal_point()
        env.reset_pose() ##must after generate goal
        print("new goal:",env.goal_point,"and rnak:",env.mpi_rank)

        terminal = False
        ep_reward = 0
        step = 0
        result = ""

        obs = env.get_laser_observation()
        if(not LASER_NORM):
            obs = (obs + 0.5)*3.5
        obs_stack = deque([obs, obs, obs])
        goal = np.asarray(env.get_local_goal())
        speed = np.asarray(env.get_self_speed())
        state = [obs_stack, goal, speed]

        startPose = [env.first_pose.position.x,env.first_pose.position.y]
        startTime = time.time()
        goalPose = env.goal_point
        deltaDistance = 0.0
        nowPos = env.get_self_state()
        lastPos = list(nowPos)
        reachFlag = 0

        while ((not terminal) and not rospy.is_shutdown()):
            state_list = comm.gather(state, root=0)


            # generate actions at rank==0
            mean, scaled_action=generate_action_no_sampling(env=env, state_list=state_list,policy=policy, action_bound=action_bound)

            # execute actions
            real_action = comm.scatter(scaled_action, root=0)
            env.control_vel(real_action)


            # rate.sleep()
            rospy.sleep(0.001)

            # get informtion
            r, terminal, result = env.get_reward_and_terminate(step)
            ep_reward += r
            global_step += 1

            if(step > 5000):
                terminal = True
                reachFlag = 2
            
            if(result == "Reach Goal"):
                reachFlag = 1
                print("reach goal:",env.goal_point)

            if (result == "Crashed"):
                print("crash goal:",env.goal_point)
                obs = env.get_laser_observation()
                if(not LASER_NORM):
                    obs = (obs + 0.5)*3.5
                obs_stack = deque([obs, obs, obs])


            # get next state
            s_next = env.get_laser_observation()
            if(not LASER_NORM):
                s_next = (s_next + 0.5) * 3.5
            left = obs_stack.popleft()
            obs_stack.append(s_next)
            goal_next = np.asarray(env.get_local_goal())
            speed_next = np.asarray(env.get_self_speed())
            state_next = [obs_stack, goal_next, speed_next]


            # add transitons in buff and update policy
            r_list = comm.gather(r, root=0)
            state = state_next

            # calculate daltaDis
            nowPos = env.get_self_state()
            tempDeltaDis = np.sqrt((nowPos[0] - lastPos[0])**2 + (nowPos[1]-lastPos[1])**2)
            deltaDistance += tempDeltaDis
            step +=1
            lastPos = nowPos


        endTime = time.time()
        deltaTime = (endTime - startTime)
        stateInfo = "scenarioId: %d, start:(%4f,%4f), goal: (%4f,%4f), state: %d, time: %4f, distance: %4f"\
            %(mpiId,startPose[0],startPose[1],goalPose[0],goalPose[1],reachFlag,deltaTime,deltaDistance)
        if( (id <= 1000) and (id > 10) ):
            print(stateInfo)
            logger_cal.info(stateInfo)

예제 #4

0

파일 보기

파일: demo.py 프로젝트: shen-hue/stage-ros-oa

# ************************************ Input ************************************
# agent: goal, start position
input_goal = [-25.00,
              -0.00]  # [-25,0] is for the cordinate system in circle wolrd
input_start_position = [0.5, 0.0, np.pi]  # x, y, theta

env.goal_point = input_goal
goal = np.asarray(
    env.get_local_goal())  # transfer to robot based codinate system
speed = np.asarray(env.get_self_speed())
state = [obs_stack, goal, speed]

env.control_pose(input_start_position)

# ************************************ Output ************************************
# agent: postion(x,y,theta),velocity(v,angular)

_, scaled_action = generate_action_no_sampling(env=env,
                                               state_list=[state],
                                               policy=policy,
                                               action_bound=action_bound)
action = scaled_action[0]
print('velocity : ', action[0], 'velocity_angular : ', action[1])

env.control_vel(
    action)  #involke the ros system publisher to send velocity to ros system
rospy.sleep(0.01)
# the following output function is depending on callback function in ros.
print('positon: x, y, theta', env.get_self_state())
print('speed of agent: v, angular ', env.get_self_speed())

예제 #5

0

파일 보기

def enjoy(env, policy, action_bound, respawn_goal):

    # if env.index == 0:
    #     env.reset_world()

    #env.reset_pose()

    env.generate_goal_point()
    step = 1
    terminal = False

    obs = env.get_laser_observation()
    #print("obs",obs)
    obs_stack = deque([obs, obs, obs])
    #print("get obs_stack")
    # goals = raw_input("Press Enter to continue...")
    goals = respawn_goal.getPosition()
    env.goal_point = [(goals[0]), (goals[1])]
    goal = np.asarray(env.get_local_goal())
    # print("goal",goal)
    # print("goal.shape",goal.shape)
    # print("goal.shape[0]",goal.shape[0])
    speed = np.asarray(env.get_self_speed())
    #print("speed",speed)
    state = [obs_stack, goal, speed]

    while not rospy.is_shutdown():
        # get next state
        s_next = env.get_laser_observation()
        left = obs_stack.popleft()
        obs_stack.append(s_next)

        if terminal == True:
            real_action = [0, 0]
            env.control_vel(real_action)
            # goals = raw_input("Press Enter to continue...")
            # goals = goals.split(" ")
            goals = respawn_goal.getPosition()
            env.goal_point = [(goals[0]), (goals[1])]

        goal_next = np.asarray(env.get_local_goal())
        #print("goal_next",goal_next)
        speed_next = np.asarray(env.get_self_speed())
        state_next = [obs_stack, goal_next, speed_next]

        state = state_next

        #state_list = comm.gather(state, root=0)
        state_list = state

        # generate actions at rank==0
        mean, scaled_action = generate_action_no_sampling(
            env=env,
            state_list=state_list,
            policy=policy,
            action_bound=action_bound)
        # execute actions
        real_action = copy.deepcopy(scaled_action[0])
        #real_action[1]=real_action[1]
        #real_action = comm.scatter(scaled_action, root=0)

        print("real_action", real_action)
        env.control_vel(real_action)
        # rate.sleep()
        rospy.sleep(0.05)
        # get informtion
        r, terminal, result = env.get_reward_and_terminate(step)
        step += 1

예제 #6

0

파일 보기

파일: ppo_ag_general_test.py 프로젝트: QinjieLin-NU/carl_server

def run(comm, env, policy, policy_path, action_bound, optimizer):

    # rate = rospy.Rate(5)
    buff = []
    global_update = 0
    global_step = 0

    if env.index == 0:
        env.reset_world()
    env.store_resetPose()

    for id in range(MAX_EPISODES):
        env.generate_goal_point()
        print("new goal:", env.goal_point, "and rnak:", env.mpi_rank)
        terminal = False
        next_ep = False
        ep_reward = 0
        step = 1

        env.reset_pose()

        obs = env.get_laser_observation()
        if (not LASER_NORM):
            obs = (obs + 0.5) * 3.5
        obs_stack = deque([obs, obs, obs])
        goal = np.asarray(env.get_local_goal())
        speed = np.asarray(env.get_self_speed())
        state = [obs_stack, goal, speed]

        while not next_ep and not rospy.is_shutdown():
            state_list = comm.gather(state, root=0)

            # generate actions at rank==0
            mean, scaled_action = generate_action_no_sampling(
                env=env,
                state_list=state_list,
                policy=policy,
                action_bound=action_bound)

            # execute actions
            real_action = comm.scatter(scaled_action, root=0)
            env.control_vel(real_action)

            # rate.sleep()
            rospy.sleep(0.001)

            # get informtion
            r, terminal, result = env.get_reward_and_terminate(step)
            ep_reward += r
            global_step += 1

            if (result == "Reach Goal"):
                env.generate_goal_point()
                print("new goal:", env.goal_point)
            if (terminal):
                env.reset_pose()
                obs = env.get_laser_observation()
                if (not LASER_NORM):
                    obs = (obs + 0.5) * 3.5
                obs_stack = deque([obs, obs, obs])

            # get next state
            s_next = env.get_laser_observation()
            if (not LASER_NORM):
                s_next = (s_next + 0.5) * 3.5
            left = obs_stack.popleft()
            obs_stack.append(s_next)
            goal_next = np.asarray(env.get_local_goal())
            speed_next = np.asarray(env.get_self_speed())
            state_next = [obs_stack, goal_next, speed_next]

            # add transitons in buff and update policy
            r_list = comm.gather(r, root=0)
            step += 1
            state = state_next

예제 #7

0

파일 보기

def enjoy(comm, env, policy, action_bound):

    if env.index == 0:
        env.reset_world()

    for id in range(MAX_EPISODES):

        while not rospy.is_shutdown():
            get_goal = env.is_sub_goal
            if get_goal:
                break

        step = 1
        terminal = False
        ep_reward = 0
        ep_v = 0
        ep_w = 0

        obs = env.get_laser_observation()
        obs_stack = deque([obs, obs, obs])
        goal = np.asarray(env.get_local_goal())
        speed = np.asarray(env.get_self_speed())
        state = [obs_stack, goal, speed]

        while not terminal and not rospy.is_shutdown():
            state_list = comm.gather(state, root=0)

            # generate actions at rank==0
            mean, scaled_action = generate_action_no_sampling(
                env=env,
                state_list=state_list,
                policy=policy,
                action_bound=action_bound)

            # execute actions
            real_action = comm.scatter(scaled_action, root=0)
            if terminal == True:
                real_action[0] = 0
                real_action[1] = 0

            env.control_vel(real_action)
            # rate.sleep()
            rospy.sleep(0.001)
            # get informtion
            r, terminal, result = env.get_reward_and_terminate(step)
            step += 1

            ep_reward += r
            ep_v += real_action[0]
            ep_w += real_action[1]

            # get next state
            s_next = env.get_laser_observation()
            left = obs_stack.popleft()
            obs_stack.append(s_next)
            goal_next = np.asarray(env.get_local_goal())
            speed_next = np.asarray(env.get_self_speed())
            state_next = [obs_stack, goal_next, speed_next]

            if result == "Crashed" or result == "Time out":
                env.set_gazebo_pose(0, 0, 3.14)

            state = state_next

        env.control_vel([0, 0])
        env.is_sub_goal = False

        logger.info('Goal (%05.1f, %05.1f), Episode %05d, setp %03d, Action [%1.3f, %1.3f], %s' % \
                        (env.goal_point[0], env.goal_point[1], id + 1, step, ep_v/step, ep_w/step, result))
        logger_cal.info(ep_reward)