def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) # Environment Initialization if(args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif(args.type=="DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if(args.type=="DDQN"): algo = DDQN(action_dim, state_dim, args) algo.load_weights(args.model_path) elif(args.type=="A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) elif(args.type=="A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) algo.load_weights(args.actor_path, args.critic_path) elif(args.type=="DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) # Display agent old_state, time = env.reset(), 0 while True: env.render() a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: env.reset() env.env.close()
import json import tensorflow as tf from DDQN.ddqn import DDQN import os import time model_path = "test/models/mymodel.h5" params = json.load(open("params.json")) env = recons_env.scanner_env(params) state_dim = (84, 84, 3) action_dim = 4 state, time = env.reset(), 0 writer = tf.summary.create_file_writer("logs/test") algo = DDQN(action_dim, state_dim, params["train"]) algo.load_weights(model_path) state, time, done = env.reset(), 0, False poses = [] poses.append([env.theta, env.phi]) while not (done): a = algo.policy_action(state) state, r, done, _ = env.step(a) poses.append([env.theta, env.phi]) json.dump(poses, open('test/poses.json', 'w'))
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set # if args.gpu: # os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # sess = get_session() # set_session(sess) # K.set_session(sess) # with tf.device('/gpu:0'): # config = tf.ConfigProto() # config.gpu_options.allow_growth = True # sess = tf.Session(config=config) # K.set_session(sess) # set_session(sess) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif (args.type == "DDPG"): # Continuous Environments Wrapper # env = Environment(gym.make(args.env), args.consecutive_frames) # env.reset() # state_dim = env.get_state_size() # action_space = gym.make(args.env).action_space # action_dim = action_space.high.shape[0] # act_range = action_space.high env_before = gym.make(args.env) env_unwrapped = env_before.unwrapped # env_unwrapped.observation_space = env_unwrapped.observation_shape # state_dim = env_unwrapped.observation_space.shape # action_dim = env_unwrapped.action_space.n env = Environment(env_before, args.consecutive_frames) env.reset() state_dim = env.get_state_size() # action_space = env.action_space # action_dim = env.get_action_size() action_dim = env_unwrapped.action_space.shape[0] act_range = env_unwrapped.action_space.high print('state: ', state_dim) print('action: ', action_dim) print('act range', act_range) else: # Standard Environments # env = Environment(gym.make(args.env), args.consecutive_frames) # env.reset() # state_dim = env.get_state_size() # action_dim = gym.make(args.env).action_space.n #unreal env_before = gym.make(args.env) # env_unwrapped = env_before.unwrapped # env_unwrapped.observation_space = env_unwrapped.observation_shape # state_dim = env_unwrapped.observation_space.shape # action_dim = env_unwrapped.action_space.n env = Environment(env_before, args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = env.get_action_size() print('state: ', state_dim) print('action: ', action_dim) # state_dim= (640,380) # action_dim =3 # Pick algorithm to train print('args type: ', args.type) if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif (args.type == "DDPG"): algo = DDPG(args, action_dim, state_dim, act_range, args.consecutive_frames) if args.pretrain: print('pretrain') algo.load_weights(args.weights_path) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set # if args.gpu: # os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # set_session(get_session()) # with tf.device('/gpu:0'): # config = tf.ConfigProto() # config.gpu_options.allow_growth = True # sess = tf.Session(config=config) # K.set_session(sess) # set_session(sess) # Environment Initialization # if(args.is_atari): # # Atari Environment Wrapper # env = AtariEnvironment(args) # state_dim = env.get_state_size() # action_dim = env.get_action_size() if (args.type == "DDPG"): # Continuous Environments Wrapper env_before = gym.make(args.env) env_unwrapped = env_before.unwrapped env = Environment(env_before, args.consecutive_frames) # env.reset() state_dim = env.get_state_size() action_dim = env_unwrapped.action_space.shape[0] act_range = env_unwrapped.action_space.high print('state: ', state_dim) print('action: ', action_dim) print('act range', act_range) else: # # Standard Environments # env_before = gym.make(args.env) # env = Environment(env_before, args.consecutive_frames) # env.reset() # state_dim = env.get_state_size() # action_dim = env.get_action_size() # action_dim = gym.make(args.env).action_space.n env_before = gym.make(args.env) # env_unwrapped = env_before.unwrapped # env_unwrapped.observation_space = env_unwrapped.observation_shape # state_dim = env_unwrapped.observation_space.shape # action_dim = env_unwrapped.action_space.n env = Environment(env_before, args.consecutive_frames) # env.reset() state_dim = env.get_state_size() action_dim = env.get_action_size() print('state: ', state_dim) print('action: ', action_dim) # Pick algorithm to train if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) algo.load_weights(args.model_path) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) algo.load_weights(args.actor_path, args.critic_path) elif (args.type == "DDPG"): algo = DDPG(args, action_dim, state_dim, act_range, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) # Display agent old_state, time = env.reset(), 0 print('old state shape', old_state.shape) while True: # env.render() a = algo.policy_action(old_state) if (args.type == "DDPG"): a = np.clip(a, -act_range, act_range) # print('a', a) # print('a', a) # print(type(a)) # print(a.shape) old_state, r, done, _ = env.step(a) time += 1 # print('time ',time) if done: print('done') print('Solved in', time, 'steps') # break old_state = env.reset() time = 0 # break env.env.close()