コード例 #1
0
def get_observations(num_samples, seed):
    env = DuckietownEnv(
        map_name = 'map1',
        domain_rand = False,
        draw_bbox = False,
        max_steps = 2000,
        seed = seed
    )
    env = ResizeWrapper(env, shape=(64, 80, 3))
    env = NormalizeWrapper(env)

    samples = num_samples
    time_steps = 0
    obs_array = np.zeros(shape=(samples, 64, 80, 3))
    while time_steps < samples:
        # collect trajectories
        env.seed(seed)
        obs = env.reset()
        obs_array[time_steps] = obs
        time_steps += 1
        while True:
            action = env.action_space.sample()
            obs, reward, done, _ = env.step(action)
            rollout_done = done or time_steps >= samples
            if rollout_done:
                break

            obs_array[time_steps] = obs
            time_steps += 1

        seed+=1
        print(time_steps)

    return obs_array
コード例 #2
0
ファイル: task_etc.py プロジェクト: duckieT/ducksducks
 def __init__(self):
     from gym_duckietown.envs.duckietown_env import DuckietownEnv
     args = {
         'map_name': map_name,
         'frame_skip': frame_skip,
         'distortion': distortion,
         'max_steps': max_steps
     }
     self.env = DuckietownEnv(
         **{k: v
            for k, v in args.items() if not v is None})
コード例 #3
0
ファイル: env.py プロジェクト: lychanl/duck-driving-golem
    def creator():
        if args.env_name is None:
            env = DuckietownEnv(
                map_name=args.map_name,
                draw_curve=args.draw_curve,
                draw_bbox=args.draw_bbox,
                domain_rand=args.domain_rand,
                frame_skip=args.frame_skip,
                distortion=args.distortion,
            )
        else:
            env = gym.make(args.env_name)

        return DiscreteWrapper(env) if discrete else env
コード例 #4
0
def train(args, agent_opts, train_opts, rewards=None):
    duckie.logger.disabled = True # Disable log messages from ducki  
    env = DuckietownEnv(
        seed = None,
        map_name = "4way_bordered",
        max_steps = 500000,
        draw_curve = False,
        draw_bbox = False,
        domain_rand = False,
        randomize_maps_on_reset = False,
        accept_start_angle_deg = 4,
        full_transparency = False,
        user_tile_start = None,
        num_tris_distractors = 12,
        enable_leds = False,
        navigation=True,
        num_past_navdirs = 3,
        num_past_positions = 3,
        num_past_actions = 2,
    )

    # Load Encoder
    encoder = BetaVAE_H(10, 3)
    loaded_model = torch.load(args.encoder_path)
    encoder.load_state_dict(loaded_model['model_states']['net'])
    env = ResizeWrapper(env, 64, 64)
    env = SwapDimensionsWrapper(env)
    env = ImageNormalizeWrapper(env)
    env = TorchifyWrapper(env, agent_opts.use_gpu)
    env = EncoderWrapper(env, encoder, agent_opts.use_gpu)
    #env = DtRewardWrapper(env)
    env = ActionWrapper(env)
    env = GymEnvironment(env)

    state_size = 31  # Bottleneck of VAE plus the additional informations
    act_size = env.gym_env.action_space.shape[0]
    action_def = ContinuousDefinition(env.gym_env.action_space.shape, \
        env.gym_env.action_space.high, \
        env.gym_env.action_space.low)

    multihead_net = DuckieNetwork(state_size, act_size)
   
    agent = SACAgent(multihead_net, action_def, agent_opts)

    trainer = trn.Trainer(agent, env, train_opts)
    trainer.train()
コード例 #5
0
def launch_env(id=None):
    env = None
    if id is None:
        # Launch the environment
        # from gym_duckietown.simulator import Simulator
        from gym_duckietown.envs.duckietown_env import DuckietownEnv
        # env = Simulator(
        env = DuckietownEnv(
            seed=123,  # random seed
            map_name="loop_empty",
            max_steps=500001,  # we don't want the gym to reset itself
            domain_rand=0,
            camera_width=640,
            camera_height=480,
            accept_start_angle_deg=4,  # start close to straight
            full_transparency=True,
            distortion=True,
        )
    else:
        env = gym.make(id)

    return env
コード例 #6
0
ファイル: ttt_action.py プロジェクト: tpvt99/sbcs5478
from stable_baselines3.common.vec_env import VecNormalize, VecFrameStack, VecTransposeImage
from global_configuration import PROJECT_PATH
import json
# Below 2 lines is for Windows 10 Environment. Comment if running on other OS
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

results_dir = osp.join(PROJECT_PATH, "results", "ppo", "2021-04-13_ppo",
                       "2021-04-13_15-49-19_ppo")

with open(osp.join(results_dir, "config.json"), 'r') as f:
    custom_params = json.load(f)

env = DuckietownEnv(map_name='map3',
                    domain_rand=False,
                    draw_bbox=False,
                    max_steps=1500,
                    seed=1)

env = RewardWrapper(env)
env = ResizeWrapper(env, shape=(64, 80, 3))

if custom_params['algo'] == 'dqn':
    env = DiscreteWrapper(env)

if custom_params['USING_VAE']:
    env = NormalizeWrapper(env)  # No need to use normalization if image
    env = FinalLayerObservationWrapper(env, latent_dim=1028, map="map3")

# Step 3.b. To make Vectorized Environment to be able to use Normalize or FramStack (Optional)
env = make_vec_env(lambda: env, n_envs=1)
コード例 #7
0
            self.observation_space.high[0, 0, 0],
            [obs_shape[2], obs_shape[0], obs_shape[1]],
            dtype=self.observation_space.dtype)

    def observation(self, observation):
        return observation.transpose(2, 0, 1)


class GTDenseRewardInfoWrapperDT(gym.Wrapper):
    def gt_reward(self):
        lane_pose = self.env.get_lane_pos2(self.env.cur_pos,
                                           self.env.cur_angle)
        dist = lane_pose.dist  # Distance to lane center. Left is negative, right is positive.
        angle = angleLimit(lane_pose.angle_rad)
        gt_reward = state2reward(dist, angle)
        return gt_reward

    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        info['GT_reward'] = self.gt_reward()
        return observation, reward, done, info


#### Action wrappers
if __name__ == '__main__':

    sim = DTNoisyLaneFollowingRewardWrapper(DuckietownEnv(), 10)
    print('Environment loaded!')
    image = sim.render(mode='drone')

    input()
parser.add_argument('--env-name', default=None)
parser.add_argument('--map-name', default='udem1')
parser.add_argument('--distortion', default=False, action='store_true')
parser.add_argument('--draw-curve', action='store_true', help='draw the lane following curve')
parser.add_argument('--draw-bbox', action='store_true', help='draw collision detection bounding boxes')
parser.add_argument('--domain-rand', action='store_true', help='enable domain randomization')
parser.add_argument('--frame-skip', default=1, type=int, help='number of frames to skip')
args = parser.parse_args()

if args.env_name is None:

    env = DuckietownEnv(
        map_name="loop_empty",
        max_steps=500001,  # we don't want the gym to reset itself
        domain_rand=0,
        camera_width=640,
        camera_height=480,
        accept_start_angle_deg=1,  # start close to straight
        accept_start_dist = 0.05,
        full_transparency=False,
        distortion=False)




else:
    env = gym.make(args.env_name)
env.reset()
env.render()

@env.unwrapped.window.event
def on_key_press(symbol, modifiers):
コード例 #9
0
if platform == 'win32':
    import os
    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

# Below is training on linux and if GPU is available
if platform == 'linux' and torch.cuda.is_available():
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(640, 480))
    display.start()

PROJECT_PATH = osp.abspath(osp.dirname(osp.dirname(__file__)))

# Step 1. Initialize the environment
env = DuckietownEnv(map_name='map3',
                    domain_rand=False,
                    draw_bbox=False,
                    max_steps=1500,
                    seed=2)

# Step 2. Check the custom environment. Must do it before any wrappers
#check_env(env)

# Step 3. Initialize some parameters
custom_params = {
    'USING_VAE': False,  # whether to use VAE
    'FRAME_STACK': 4,
    'USING_NORMALIZATION': True,
    'algo': 'ppo',
    'sac_parameters': {
        'buffer_size': int(1e5),
        'gradient_steps': 64,
コード例 #10
0
ファイル: duckie_test.py プロジェクト: Kuanta/Reinforcement
def test(args):
    duckie.logger.disabled = True # Disable log messages from ducki  
    env = DuckietownEnv(
        seed = None,
        map_name = "4way_bordered",
        max_steps = 500001,
        draw_curve = False,
        draw_bbox = False,
        domain_rand = False,
        randomize_maps_on_reset = False,
        accept_start_angle_deg = 4,
        full_transparency = True,
        user_tile_start = None,
        num_tris_distractors = 12,
        enable_leds = False,
    )

     # Load Encoder
    encoder = BetaVAE_H(10, 3)
    loaded_model = torch.load(args.encoder_path)
    encoder.load_state_dict(loaded_model['model_states']['net'])
    env = ResizeWrapper(env, 64, 64)
    env = SwapDimensionsWrapper(env)
    env = ImageNormalizeWrapper(env)
    env = TorchifyWrapper(env)
    env = EncoderWrapper(env, encoder)
    #env = ActionWrapper(env)
    env = GymEnvironment(env)

    state_size = 14
    act_size = env.gym_env.action_space.shape[0]
    action_def = ContinuousDefinition(env.gym_env.action_space.shape, \
        env.gym_env.action_space.high, \
        env.gym_env.action_space.low)



    multihead_net = DuckieNetwork(state_size, act_size)
   
    agent = SACAgent(multihead_net, action_def)
   
    agent.load_model(args.model_path)
   

    for i in range(args.n_episodes):
        total_reward = 0
        state = env.reset()
        state = torch.from_numpy(state).float().unsqueeze(0)
        while True:
            action = agent.act(state, evaluation=True)
            print(action)
            next_state, reward, done, _ = env.step(action)
            
            state = torch.from_numpy(next_state).float().unsqueeze(0)
            total_reward += reward
            env.render()
            if done:
                print("Total reward:{}".format(total_reward))
                break