예제 #1
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    # Environment Initialization
    if(args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif(args.type=="DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if(args.type=="DDQN"):
        algo = DDQN(action_dim, state_dim, args)
        algo.load_weights(args.model_path)
    elif(args.type=="A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)

    # Display agent
    old_state, time = env.reset(), 0
    while True:
       env.render()
       a = algo.policy_action(old_state)
       old_state, r, done, _ = env.step(a)
       time += 1
       if done: env.reset()

    env.env.close()
예제 #2
0
파일: run.py 프로젝트: romi/RL_NBV
import json
import tensorflow as tf
from DDQN.ddqn import DDQN
import os
import time

model_path = "test/models/mymodel.h5"

params = json.load(open("params.json"))

env = recons_env.scanner_env(params)
state_dim = (84, 84, 3)
action_dim = 4
state, time = env.reset(), 0

writer = tf.summary.create_file_writer("logs/test")
algo = DDQN(action_dim, state_dim, params["train"])
algo.load_weights(model_path)

state, time, done = env.reset(), 0, False

poses = []
poses.append([env.theta, env.phi])

while not (done):
    a = algo.policy_action(state)
    state, r, done, _ = env.step(a)
    poses.append([env.theta, env.phi])

json.dump(poses, open('test/poses.json', 'w'))
예제 #3
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    # if args.gpu:
    #     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    # set_session(get_session())
    # with tf.device('/gpu:0'):
    #     config = tf.ConfigProto()
    #     config.gpu_options.allow_growth = True
    #     sess = tf.Session(config=config)
    #     K.set_session(sess)
    #     set_session(sess)
    # Environment Initialization
    # if(args.is_atari):
    #     # Atari Environment Wrapper
    #     env = AtariEnvironment(args)
    #     state_dim = env.get_state_size()
    #     action_dim = env.get_action_size()
    if (args.type == "DDPG"):
        # Continuous Environments Wrapper
        env_before = gym.make(args.env)
        env_unwrapped = env_before.unwrapped
        env = Environment(env_before, args.consecutive_frames)
        # env.reset()

        state_dim = env.get_state_size()
        action_dim = env_unwrapped.action_space.shape[0]
        act_range = env_unwrapped.action_space.high
        print('state: ', state_dim)
        print('action: ', action_dim)
        print('act range', act_range)
    else:
        #     # Standard Environments
        #     env_before = gym.make(args.env)
        #     env = Environment(env_before, args.consecutive_frames)
        #     env.reset()
        #     state_dim = env.get_state_size()
        #     action_dim = env.get_action_size()
        # action_dim = gym.make(args.env).action_space.n
        env_before = gym.make(args.env)
        # env_unwrapped = env_before.unwrapped
        # env_unwrapped.observation_space = env_unwrapped.observation_shape
        # state_dim = env_unwrapped.observation_space.shape
        # action_dim = env_unwrapped.action_space.n
        env = Environment(env_before, args.consecutive_frames)
        # env.reset()

        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
        print('state: ', state_dim)
        print('action: ', action_dim)

    # Pick algorithm to train
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
        algo.load_weights(args.model_path)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)
    elif (args.type == "A3C"):
        algo = A3C(action_dim,
                   state_dim,
                   args.consecutive_frames,
                   is_atari=args.is_atari)
        algo.load_weights(args.actor_path, args.critic_path)
    elif (args.type == "DDPG"):
        algo = DDPG(args, action_dim, state_dim, act_range,
                    args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)

    # Display agent
    old_state, time = env.reset(), 0
    print('old state shape', old_state.shape)
    while True:
        # env.render()
        a = algo.policy_action(old_state)
        if (args.type == "DDPG"):
            a = np.clip(a, -act_range, act_range)
            # print('a', a)
        # print('a', a)
        # print(type(a))
        # print(a.shape)
        old_state, r, done, _ = env.step(a)
        time += 1
        # print('time ',time)
        if done:
            print('done')
            print('Solved in', time, 'steps')
            # break
            old_state = env.reset()
            time = 0
            # break

    env.env.close()
예제 #4
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)

    # Environment Initialization
    if (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif (args.type == "DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if (args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Display agent
    old_state, time = env.reset(), 0
    while True:
        env.render()
        a = algo.policy_action(old_state)
        old_state, r, done, _ = env.step(a)
        time += 1
        if done: env.reset()