Ejemplo n.º 1
0
def validation(env):
    env = make_env(env)
    env = Monitor(env, './video',force=True)
    sonic = SonicAgent(env,TIMESTEPS_PER_EPISODE* EPISODES, True)
    sonic.load_model('sonic_model_final.h5')
    obs = env.reset()
    while True:
        action = sonic.policy(obs)
        #action = random.choice([a for a in range(env.action_space.n)])
        next_obs, reward, done, info = env.step(action)
        print("Para la accion #{} la recompensa es {}".format(action, reward))
        env.render()
        obs = next_obs
        if done:
            obs = env.close()
Ejemplo n.º 2
0
def training(env):
    
    total_reward = 0.0
    reward_per_episode = []
    env = make_env(env)

    sonic = SonicAgent(env,TIMESTEPS_PER_EPISODE * EPISODES)
    file = Path('sonic_model_final.h5')
    if file.is_file():
        sonic.load_model('sonic_model_final.h5')
    
    obs = env.reset()
    for episodes in range(EPISODES):
        done = False
        print("Empieza Episodio #{}".format(episodes + 1))
        while not done:
            action = sonic.policy(obs)
            next_obs, reward, done, info = env.step(action)
            sonic.save_memory(obs,action,reward,next_obs,done)
            if (len(sonic.memory) > sonic.batch_size):
                sonic.replay_and_learn()
            total_reward += reward
           
            obs = next_obs
            if done:
                break
            
        if episodes % 50 == 0:
            sonic.save_model('models/' + str(episodes) + '_sonic_model.h5')
        reward_per_episode.append(total_reward)
        sonic.update_target_model()
        print("Episodio #{} finalizado con recompensa {}".format(episodes + 1, total_reward))
        obs = env.reset()
        total_reward = 0.0
        
    #print(reward_per_episode)
    env.close()
    sonic.save_model('sonic_model_final.h5')
Ejemplo n.º 3
0
if __name__ == "__main__":
    import tensorflow as tf
    from a2c.A2C import A2C
    from utils.utils import make_env
    from env.subproc_vec_env import SubprocVecEnv

    envs = SubprocVecEnv([make_env(i) for i in range(4)])
    sess = tf.Session()
    a2c = A2C(sess,
              envs,
              num_iterations=1000,
              save_step=1000,
              checkpoint_dir="./a2c/checkpoints/")
    a2c.train()
    env = SubprocVecEnv([make_env(0)])
    a2c.test(env, 100000)
    sess.close()
Ejemplo n.º 4
0
    n_timesteps = args.n_timesteps
else:
    n_timesteps = int(hyperparams['n_timesteps'])
del hyperparams['n_timesteps']

normalize = False
normalize_kwargs = {}
if 'normalize' in hyperparams.keys():
    normalize = hyperparams['normalize']
    if isinstance(normalize, str):
        normalize_kwargs = eval(normalize)
        normalize = True
    del hyperparams['normalize']

if not args.teleop:
    env = DummyVecEnv([make_env(args.seed, vae=vae, teleop=args.teleop)])
else:
    env = make_env(args.seed,
                   vae=vae,
                   teleop=args.teleop,
                   n_stack=hyperparams.get('frame_stack', 1))()

if normalize:
    if hyperparams.get('normalize', False) and args.algo in ['ddpg']:
        print("WARNING: normalization not supported yet for DDPG")
    else:
        print("Normalizing input and return")
        env = VecNormalize(env, **normalize_kwargs)

# Optional Frame-stacking
n_stack = 1
Ejemplo n.º 5
0
for key in SIM_PARAMS:
    saved_hyperparams[key] = eval(key)
print(saved_hyperparams)
#########################################################

################# number of timesteps ###################
# hyperparamsからn_timestepsを読み込む
# parserで指定されたら上書き
if args.n_timesteps > 0:
    n_timesteps = args.n_timesteps
else:
    n_timesteps = int(hyperparams["n_timesteps"])
del hyperparams["n_timesteps"]
#########################################################

env = DummyVecEnv([make_env(args.seed, vae=vae)])

##################### normalize #########################
# If normalize exsists in hyperparams, add in normalize_kwargs
# and set normalize to True
normalize = False
normalize_kwargs = {}
if "normalize" in hyperparams.keys():
    normalize = hyperparams["normalize"]
    if isinstance(normalize, str):
        normalize_kwargs = eval(normalize)
        normalize = True
    del hyperparams["normalize"]

# Normalize the input image
if normalize:
Ejemplo n.º 6
0
            hyperparams[key] = linear_schedule(initial_value)
        elif isinstance(hyperparams[key], float):
            hyperparams[key] = constfn(hyperparams[key])
        else:
            raise ValueError('Invalid valid for {}: {}'.format(key, hyperparams[key]))

if args.n_timesteps > 0:
    n_timesteps = args.n_timesteps
else:
    n_timesteps = int(hyperparams['n_timesteps'])
del hyperparams['n_timesteps']

with make_carla_client('localhost', 2000) as client:
    print("CarlaClient connected")

    env = DummyVecEnv([make_env(client, args.seed, vae=vae)])

    # Optional Frame-stacking
    n_stack = 1
    if hyperparams.get('frame_stack', False):
        n_stack = hyperparams['frame_stack']
        env = VecFrameStack(env, n_stack)
        print("Stacking {} frames".format(n_stack))
        del hyperparams['frame_stack']

    # Parse noise string for DDPG
    if args.algo == 'ddpg' and hyperparams.get('noise_type') is not None:
        noise_type = hyperparams['noise_type'].strip()
        noise_std = hyperparams['noise_std']
        n_actions = env.action_space.shape[0]
        if 'adaptive-param' in noise_type:
Ejemplo n.º 7
0
del hyperparams['n_timesteps']

normalize = False
normalize_kwargs = {}
if 'normalize' in hyperparams.keys():
    normalize = hyperparams['normalize']
    if isinstance(normalize, str):
        normalize_kwargs = eval(normalize)
        normalize = True
    del hyperparams['normalize']

if not args.teleop and not args.local_control:
    env = DummyVecEnv([
        make_env(
            args.seed,
            vae=vae,
            teleop=args.teleop,
            local_control=args.local_control,
        )
    ])
else:
    env = make_env(args.seed,
                   vae=vae,
                   teleop=args.teleop,
                   local_control=args.local_control,
                   n_stack=hyperparams.get('frame_stack', 1))()

if normalize:
    if hyperparams.get('normalize', False) and args.algo in ['ddpg']:
        print("WARNING: normalization not supported yet for DDPG")
    else:
        print("Normalizing input and return")
Ejemplo n.º 8
0
del hyperparams['n_timesteps']

normalize = False
normalize_kwargs = {}
if 'normalize' in hyperparams.keys():
    normalize = hyperparams['normalize']
    if isinstance(normalize, str):
        normalize_kwargs = eval(normalize)
        normalize = True
    del hyperparams['normalize']

if 'policy_kwargs' in hyperparams.keys():
    hyperparams['policy_kwargs'] = eval(hyperparams['policy_kwargs'])

if not args.teleop:
    env = DummyVecEnv([make_env(args.level, args.seed, vae=vae, teleop=args.teleop, obs_res=(IMAGE_WIDTH, IMAGE_HEIGHT))])
    # env_fn = make_env(args.level, args.seed, vae=vae, teleop=args.teleop, obs_res=(IMAGE_WIDTH, IMAGE_HEIGHT))
    # env = env_fn()
else:
    env = make_env(args.level, args.seed, vae=vae, teleop=args.teleop,
                   n_stack=hyperparams.get('frame_stack', 1), obs_res=(IMAGE_WIDTH, IMAGE_HEIGHT))()

if normalize:
    if hyperparams.get('normalize', False) and args.algo in ['ddpg']:
        print("WARNING: normalization not supported yet for DDPG")
    else:
        print("Normalizing input and return")
        env = VecNormalize(env, **normalize_kwargs)

# Optional Frame-stacking
n_stack = 1