예제 #1
0
def exp(env_name='straight_4lane_obs_cam',
        lr=1e-4,
        eps=0.0003125,
        max_timesteps=25e6,
        buffer_size=1e6,
        batch_size=32,
        exp_t1=1e6,
        exp_p1=0.1,
        exp_t2=25e6,
        exp_p2=0.01,
        train_freq=4,
        learning_starts=5e4,
        target_network_update_freq=1e4,
        gamma=0.99,
        num_cpu=50,
        dist_params={'Vmin': -10, 'Vmax': 10, 'nb_atoms': 51},
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        #action_res=None
        ):

    env = rlsim_env.make(env_name)
    # env = wrapper_car_racing(env)  # frame stack
    logger.configure(dir=os.path.join('.', datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f")))
    # logger.configure()

    n_action, action_map = discrete_action(env, env_name) # , action_res=action_res)

    model = distdeepq.models.cnn_to_dist_mlp(
        convs=convs, # [(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=hiddens, # [512],
        # n_action=n_action,
        dueling=False
    )
    act = distdeepq.learn(
        env,
        p_dist_func=model,
        lr=lr,  # 1e-4
        eps=eps,
        max_timesteps=int(max_timesteps), # 25M
        buffer_size=int(buffer_size), # 1M
        batch_size=int(batch_size),
        exp_t1=exp_t1,
        exp_p1=exp_p1,
        exp_t2=exp_t2,
        exp_p2=exp_p2,
        train_freq=train_freq,
        learning_starts=learning_starts, # 50000
        target_network_update_freq=target_network_update_freq, # 10000
        gamma=gamma,
        num_cpu=num_cpu,
        prioritized_replay=False,
        dist_params=dist_params,
        n_action=int(n_action),
        action_map=action_map
    )
    act.save("car_racing_model.pkl")
예제 #2
0
import rlsim_env as rl
import numpy as np
from real_safe_pid import PID_safe

n_lane = 2
n_obs = 4
print('making environment')
env = rl.make("straight_lane", obs_num=n_obs, nr_lane=n_lane)
print('done')
pid = PID_safe(n_lane)

print('reset environment')
s = env.reset()
print('done')
pid.reset(pid.get_current_lane(s))
done = False
ep_num = 0
step_num = 0
reward = 0
successes = []
while True:
    step_num += 1
    if done:
        if step_num > 498:
            success = True
        successes.append(success)
        step_num = 0
        ep_num += 1
        s = env.reset()
        #pid.reset(pid.get_current_lane(s))
        done = False
예제 #3
0
    if len(action)==1:
        action = action[0]

    angle_cmd = 4*(action[0]-0.5)

    acc = action[1]

    pedal_cmd = acc
    #break_cmd = 2*(0.5-acc) if acc<0.5 else 0
    break_cmd = 0

    return [angle_cmd,pedal_cmd,break_cmd]


# env = gym.make('Pendulum-v0')
env = rlsim_env.make('straight_4lane_cam')

print 'updated'

seed = 0
np.random.seed(seed)
random.seed(seed)

obs_dim = env.observation_space.shape #.shape[0]
act_dim = 2 # env.action_space #.shape[0]

policy = Policy(obs_dim, act_dim, epochs=50, convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], lr=3e-4, clip_range=0.2,seed=seed)
val_func = Value(obs_dim, epochs=100, convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], lr=1e-3, seed=seed)

episode_size = 8 #80
batch_size = 128
예제 #4
0
    if len(action) == 1:
        action = action[0]

    angle_cmd = 4 * (action[0] - 0.5)

    acc = action[1]

    pedal_cmd = acc
    #break_cmd = 2*(0.5-acc) if acc<0.5 else 0
    break_cmd = 0

    return [angle_cmd, pedal_cmd, break_cmd]


env = rlsim_env.make('straight_4lane')
obs_dim = env.observation_space
act_dim = 2  #env.action_space

policy = Policy(obs_dim,
                act_dim,
                epochs=50,
                hdim=64,
                lr=3e-4,
                clip_range=0.2,
                seed=seed)
val_func = Value(obs_dim, epochs=100, hdim=64, lr=1e-3, seed=seed)

policy.restore_graph(0)
val_func.restore_graph(0)
예제 #5
0
    accel = action % 3

    if steering == 0:
        angle_cmd = -1.0
    elif steering == 1:
        angle_cmd = 0.0
    else:
        angle_cmd = 1.0

    pedal_cmd = 0.5 if accel == 1 else 0
    break_cmd = 0.5 if accel == 2 else 0

    return [angle_cmd, pedal_cmd, break_cmd]


env = rlsim_env.make('straight_4lane_obs')
obs_dim = env.observation_space
act_dim = 9  #env.action_space

mode = ['train', 'test']
cur_mode = 'train'

max_t = env.time_limit
agent = DQNAgent(obs_dim,
                 act_dim,
                 memory_mode='PER',
                 target_mode='DDQN',
                 policy_mode='argmax',
                 restore=False,
                 net_dir='q_learning2_iter_1000.ckpt'
                 )  # memory_mode='PER',target_mode='DDQN'