Beispiel #1
0
    def __init__(self):
        # Get the vehicle name, which comes in as HOSTNAME
        self.vehicle = os.getenv('HOSTNAME')

        # Use our env launcher
        self.env = launch_env()

        # Subscribes to the output of the lane_controller_node and IK node
        self.action_sub = rospy.Subscriber('/{}/lane_controller_node/car_cmd'.format(
            self.vehicle), Twist2DStamped, self._action_cb)
        self.ik_action_sub = rospy.Subscriber('/{}/wheels_driver_node/wheels_cmd'.format(
            self.vehicle), WheelsCmdStamped, self._ik_action_cb)
        
        # Place holder for the action
        self.action = np.array([0, 0])

        # Publishes onto the corrected image topic 
        # since image out of simulator is currently rectified
        self.cam_pub = rospy.Publisher('/{}/corrected_image/compressed'.format(
            self.vehicle), CompressedImage, queue_size=10)
        
        # Publisher for camera info - needed for the ground_projection
        self.cam_info_pub = rospy.Publisher('/{}/camera_node/camera_info'.format(
            self.vehicle), CameraInfo, queue_size=1)

        # Initializes the node
        rospy.init_node('ROSAgent')

        # 10Hz ROS Cycle - TODO: What is this number?
        self.r = rospy.Rate(10)
Beispiel #2
0
    def __init__(self):
        super().__init__("duckietown_agent")

        #        self.node = rclpy.create_node("duckietown_agent")

        # Get the vehicle name, which comes in as HOSTNAME
        self.vehicle = os.getenv('HOSTNAME')

        # Use our env launcher
        self.env = launch_env()

        # Subscribes to the output of the lane_controller_node and IK node
        self.action_sub = self.create_subscription(
            Twist2DStamped,
            '/{}/lane_controller_node/car_cmd'.format(self.vehicle),
            self._action_cb, 10)
        self.ik_action_sub = self.create_subscription(
            WheelsCmdStamped,
            '/{}/wheels_driver_node/wheels_cmd'.format(self.vehicle),
            self._ik_action_cb, 10)

        # Place holder for the action
        self.action = np.array([0, 0])

        # Publishes onto the corrected image topic
        # since image out of simulator is currently rectified
        self.cam_pub = self.create_publisher(
            CompressedImage,
            '/{}/corrected_image/compressed'.format(self.vehicle), 10)

        # Publisher for camera info - needed for the ground_projection
        self.cam_info_pub = self.create_publisher(
            CameraInfo, '/{}/camera_node/camera_info'.format(self.vehicle), 10)

        # Initialize timer for continuous publication of camera images and info
        timer_period = 0.1
        self.timer = self.create_timer(timer_period, self.timer_callback)
Beispiel #3
0
    # Cause tee's stdin to get a copy of our stdin/stdout (as well as that
    # of any child processes we spawn)
    os.dup2(tee.stdin.fileno(), sys.stdout.fileno())
    os.dup2(tee.stdin.fileno(), sys.stderr.fileno())

file_name = "{}_{}".format(
    policy_name,
    str(args.seed),
)

if not os.path.exists("./results"):
    os.makedirs("./results")
if args.save_models and not os.path.exists("./pytorch_models"):
    os.makedirs("./pytorch_models")

env = launch_env()

# Set seeds
seed(args.seed)

state_dim = env.observation_space.shape
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])

# Initialize policy
policy = DDPG(state_dim, action_dim, max_action, net_type="cnn")

replay_buffer = ReplayBuffer(args.replay_buffer_max_size)

# Evaluate untrained policy
evaluations = [evaluate_policy(env, policy)]
Beispiel #4
0
def _train(args):   
    if not os.path.exists("./results"):
        os.makedirs("./results")
    if not os.path.exists(args.model_dir):
        os.makedirs(args.model_dir)
        
    # Launch the env with our helper function
    env = launch_env()
    print("Initialized environment")

    # Wrappers
    env = ResizeWrapper(env)
    env = NormalizeWrapper(env)
    env = ImgWrapper(env) # to make the images from 160x120x3 into 3x160x120
    env = ActionWrapper(env)
    env = DtRewardWrapper(env)
    print("Initialized Wrappers")
    
    device = "cpu" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Set seeds
    seed(args.seed)

    state_dim = env.observation_space.shape
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])

    # Initialize policy
    policy = DDPG(state_dim, action_dim, max_action, net_type="cnn")
    replay_buffer = utils.ReplayBuffer(args.replay_buffer_max_size)
    print("Initialized DDPG")
    
    # Evaluate untrained policy
    evaluations= [evaluate_policy(env, policy)]
   
    total_timesteps = 0
    timesteps_since_eval = 0
    episode_num = 0
    done = True
    episode_reward = None
    env_counter = 0
    reward = 0
    print("Starting training")
    while total_timesteps < args.max_timesteps:
        
        print("timestep: {} | reward: {}".format(total_timesteps, reward))
            
        if done:
            if total_timesteps != 0:
                print(("Total T: %d Episode Num: %d Episode T: %d Reward: %f") % (
                    total_timesteps, episode_num, episode_timesteps, episode_reward))
                policy.train(replay_buffer, episode_timesteps, args.batch_size, args.discount, args.tau)

                # Evaluate episode
                if timesteps_since_eval >= args.eval_freq:
                    timesteps_since_eval %= args.eval_freq
                    evaluations.append(evaluate_policy(env, policy))
                    print("rewards at time {}: {}".format(total_timesteps, evaluations[-1]))

                    if args.save_models:
                        policy.save(file_name, directory=args.model_dir)
                    np.savez("./results/{}.npz".format(file_name),evaluations)

            # Reset environment
            env_counter += 1
            obs = env.reset()
            done = False
            episode_reward = 0
            episode_timesteps = 0
            episode_num += 1

        # Select action randomly or according to policy
        if total_timesteps < args.start_timesteps:
            action = env.action_space.sample()
        else:
            action = policy.predict(np.array(obs))
            if args.expl_noise != 0:
                action = (action + np.random.normal(
                    0,
                    args.expl_noise,
                    size=env.action_space.shape[0])
                          ).clip(env.action_space.low, env.action_space.high)

        # Perform action
        new_obs, reward, done, _ = env.step(action)

        if episode_timesteps >= args.env_timesteps:
            done = True

        done_bool = 0 if episode_timesteps + 1 == args.env_timesteps else float(done)
        episode_reward += reward

        # Store data in replay buffer
        replay_buffer.add(obs, new_obs, action, reward, done_bool)

        obs = new_obs

        episode_timesteps += 1
        total_timesteps += 1
        timesteps_since_eval += 1
    
    print("Training done, about to save..")
    policy.save(filename='ddpg', directory=args.model_dir)
    print("Finished saving..should return now!")
Beispiel #5
0
import time
import matplotlib.pyplot as plt

args = get_td3_args_test()

experiment = 2  #args.experiment
seed = args.seed
policy_name = "TD3"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

file_name = "{}_{}_{}_{}".format(policy_name, experiment, args.exp_label,
                                 args.seed)

# Launch the env with our helper function
env = launch_env(seed=111, map_name=args.map_name)

# Wrappers
env = wrappers.Monitor(env,
                       './videos/test/' + file_name + '/',
                       force=True,
                       video_callable=lambda x: True)
env = CropWrapper(env)
env = ResizeWrapper(env)
#env = FrameStack(env, args.frame_stack_k)
env = NormalizeWrapper(env)
env = ImgWrapper(env)  # to make the images from 160x120x3 into 3x160x120
env = SteeringToWheelVelWrapper(env)
#env = SoftActionWrapper(env)
#env = ActionWrapper(env)
#env = DtRewardWrapper(env) # not during testing
Beispiel #6
0
    experiment,
    args.exp_label,
    str(args.seed),
)

if not os.path.exists("./results"):
    os.makedirs("./results")
if args.save_models and not os.path.exists("./pytorch_models"):
    os.makedirs("./pytorch_models")

map_names = [
    "loop_empty", "small_loop", "small_loop_cw", "straight_road", "udem1"
]
envs = []
for mn in map_names:
    env = launch_env(map_name=mn, seed=args.seed)
    env = ResizeWrapper(env)
    #env = FrameStack(env, k = args.frame_stack_k)
    env = NormalizeWrapper(env)
    env = ImgWrapper(env)  # to make the images from 160x120x3 into 3x160x120
    env = SteeringToWheelVelWrapper(env)
    #env = StableRewardWrapper(env)
    #env = ActionWrapper(env)
    #env = SoftActionWrapper(env)
    env = DtRewardWrapper(env)
    envs.append(env)

# Set seeds
seed(args.seed)

state_dim = envs[0].observation_space.shape
exp = Experiment("[duckietown] - td3")

file_name = "{}_{}_{}_{}".format(
    policy_name,
    experiment,
    args.exp_label,
    str(args.seed),
)

if not os.path.exists("./results"):
    os.makedirs("./results")
if args.save_models and not os.path.exists("./pytorch_models{}".format(args.epi)):
    os.makedirs("./pytorch_models{}".format(args.epi))

# Launch the env with our helper function
env = launch_env(map_name = args.map_name)

time_str = time.strftime("%Y-%m-%d %H:%M", time.localtime(time.time()))

# Wrappers
#env = wrappers.Monitor(env, './videos/train/' + time_str + '/', force=True)
#env = CropWrapper(env)
env = ResizeWrapper(env)
#env = FrameStack(env, k = args.frame_stack_k)
env = NormalizeWrapper(env)
env = ImgWrapper(env)  # to make the images from 160x120x3 into 3x160x120
env = SteeringToWheelVelWrapper(env)
#env = StableRewardWrapper(env)
#env = ActionWrapper(env)
#env = SoftActionWrapper(env)
env = DtRewardWrapper(env)