def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) # Environment Initialization if(args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif(args.type=="DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if(args.type=="DDQN"): algo = DDQN(action_dim, state_dim, args) algo.load_weights(args.model_path) elif(args.type=="A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) elif(args.type=="A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) algo.load_weights(args.actor_path, args.critic_path) elif(args.type=="DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) # Display agent old_state, time = env.reset(), 0 while True: env.render() a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: env.reset() env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization # if(args.is_atari): # # Atari Environment Wrapper # env = AtariEnvironment(args) # state_dim = env.get_state_size() # action_dim = env.get_action_size() # else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train # if(args.type=="DDQN"): algo = DDQN(action_dim, state_dim, args) # elif(args.type=="A2C"): # algo = A2C(action_dim, state_dim, args.consecutive_frames) # elif(args.type=="A3C"): # algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) # elif(args.type=="DDPG"): # algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # if args.actor_path is not None and args.critic_path is not None: # print("Loading weights...") # algo.load_weights(args.actor_path, args.critic_path) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) if args.wandb: wandb.init(entity=args.wandb_id, project=args.wandb_project) # Environment Initialization if (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif (args.type == "DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif (args.type == "DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # Train stats = algo.train(env, args) # e, mean, stdev: e is episode # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.env.close()
import json import tensorflow as tf from DDQN.ddqn import DDQN import os import time model_path = "test/models/mymodel.h5" params = json.load(open("params.json")) env = recons_env.scanner_env(params) state_dim = (84, 84, 3) action_dim = 4 state, time = env.reset(), 0 writer = tf.summary.create_file_writer("logs/test") algo = DDQN(action_dim, state_dim, params["train"]) algo.load_weights(model_path) state, time, done = env.reset(), 0, False poses = [] poses.append([env.theta, env.phi]) while not (done): a = algo.policy_action(state) state, r, done, _ = env.step(a) poses.append([env.theta, env.phi]) json.dump(poses, open('test/poses.json', 'w'))
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set # if args.gpu: # os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # sess = get_session() # set_session(sess) # K.set_session(sess) # with tf.device('/gpu:0'): # config = tf.ConfigProto() # config.gpu_options.allow_growth = True # sess = tf.Session(config=config) # K.set_session(sess) # set_session(sess) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif (args.type == "DDPG"): # Continuous Environments Wrapper # env = Environment(gym.make(args.env), args.consecutive_frames) # env.reset() # state_dim = env.get_state_size() # action_space = gym.make(args.env).action_space # action_dim = action_space.high.shape[0] # act_range = action_space.high env_before = gym.make(args.env) env_unwrapped = env_before.unwrapped # env_unwrapped.observation_space = env_unwrapped.observation_shape # state_dim = env_unwrapped.observation_space.shape # action_dim = env_unwrapped.action_space.n env = Environment(env_before, args.consecutive_frames) env.reset() state_dim = env.get_state_size() # action_space = env.action_space # action_dim = env.get_action_size() action_dim = env_unwrapped.action_space.shape[0] act_range = env_unwrapped.action_space.high print('state: ', state_dim) print('action: ', action_dim) print('act range', act_range) else: # Standard Environments # env = Environment(gym.make(args.env), args.consecutive_frames) # env.reset() # state_dim = env.get_state_size() # action_dim = gym.make(args.env).action_space.n #unreal env_before = gym.make(args.env) # env_unwrapped = env_before.unwrapped # env_unwrapped.observation_space = env_unwrapped.observation_shape # state_dim = env_unwrapped.observation_space.shape # action_dim = env_unwrapped.action_space.n env = Environment(env_before, args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = env.get_action_size() print('state: ', state_dim) print('action: ', action_dim) # state_dim= (640,380) # action_dim =3 # Pick algorithm to train print('args type: ', args.type) if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif (args.type == "DDPG"): algo = DDPG(args, action_dim, state_dim, act_range, args.consecutive_frames) if args.pretrain: print('pretrain') algo.load_weights(args.weights_path) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter("{}/tensorboard_M1_{}_M1_{}_snr1_{}_snr2_{}".format(args.out_dir, args.M1, args.M1, args.snr_M1, args.snr_M2)) # Initialize the wireless environment users_env = UsersEnvCluster(args.M1, args.M2, args.snr_M1, args.snr_M2, fixed_channel=False) print(users_env) # Wrap the environment to use consecutive frames env = Environment(users_env, args.consecutive_frames) env.reset() # Define parameters for the DDQN and DDPG algorithms state_dim = env.get_state_size() action_dim = users_env.action_dim act_range = 1 act_min = 0 # Initialize the DQN algorithm for the clustering optimization n_clusters = users_env.n_clusters algo_clustering = DDQN(n_clusters, state_dim, args) # Initialize the DDPG algorithm for the beamforming optimization algo = DDPG(action_dim, state_dim, act_range, act_min, args.consecutive_frames, algo_clustering, episode_length=args.episode_length) if args.step == "train": # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if(args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.out_dir + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models_M1_{}_M2_{}_snr1_{}_snr2_{}/'.format(args.out_dir, args.M1, args.M2, args.snr_M1, args.snr_M2) if not os.path.exists(exp_dir): os.makedirs(exp_dir) # Save DDPG export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDPG", args.nb_episodes, args.batch_size) algo.save_weights(export_path) # Save DDQN export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDQN", args.nb_episodes, args.batch_size) algo.ddqn_clustering.save_weights(export_path) elif args.step == "inference": print("Loading the DDPG networks (actor and critic) and the DDQN policy network ...") path_actor = '<add the path of the .h5 file of the DDPG actor>' path_critic = '<add the path of the .h5 file of the DDPG critic>' path_ddqn = '<add the path of the .h5 file of the DDQN actor>' algo.load_weights(path_actor, path_critic, path_ddqn) # run a random policy during inference as an example s = np.random.rand(1, args.Nr) s_1 = np.zeros_like(s) s = np.vstack((s_1, s)) while True: W = algo.policy_action(s) cluster_index = algo.ddqn_clustering.policy_action(s) a_and_c = {'a': W, 'c': cluster_index} new_state, r, done, _ = env.step(a_and_c) print("RL min rate = {}".format(r)) print("RL state = {}".format(np.log(1 + new_state))) s = new_state input('Press Enter to continue ...')
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set # if args.gpu: # os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # set_session(get_session()) # with tf.device('/gpu:0'): # config = tf.ConfigProto() # config.gpu_options.allow_growth = True # sess = tf.Session(config=config) # K.set_session(sess) # set_session(sess) # Environment Initialization # if(args.is_atari): # # Atari Environment Wrapper # env = AtariEnvironment(args) # state_dim = env.get_state_size() # action_dim = env.get_action_size() if (args.type == "DDPG"): # Continuous Environments Wrapper env_before = gym.make(args.env) env_unwrapped = env_before.unwrapped env = Environment(env_before, args.consecutive_frames) # env.reset() state_dim = env.get_state_size() action_dim = env_unwrapped.action_space.shape[0] act_range = env_unwrapped.action_space.high print('state: ', state_dim) print('action: ', action_dim) print('act range', act_range) else: # # Standard Environments # env_before = gym.make(args.env) # env = Environment(env_before, args.consecutive_frames) # env.reset() # state_dim = env.get_state_size() # action_dim = env.get_action_size() # action_dim = gym.make(args.env).action_space.n env_before = gym.make(args.env) # env_unwrapped = env_before.unwrapped # env_unwrapped.observation_space = env_unwrapped.observation_shape # state_dim = env_unwrapped.observation_space.shape # action_dim = env_unwrapped.action_space.n env = Environment(env_before, args.consecutive_frames) # env.reset() state_dim = env.get_state_size() action_dim = env.get_action_size() print('state: ', state_dim) print('action: ', action_dim) # Pick algorithm to train if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) algo.load_weights(args.model_path) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) algo.load_weights(args.actor_path, args.critic_path) elif (args.type == "DDPG"): algo = DDPG(args, action_dim, state_dim, act_range, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) # Display agent old_state, time = env.reset(), 0 print('old state shape', old_state.shape) while True: # env.render() a = algo.policy_action(old_state) if (args.type == "DDPG"): a = np.clip(a, -act_range, act_range) # print('a', a) # print('a', a) # print(type(a)) # print(a.shape) old_state, r, done, _ = env.step(a) time += 1 # print('time ',time) if done: print('done') print('Solved in', time, 'steps') # break old_state = env.reset() time = 0 # break env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif (args.type == "DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Display agent old_state, time = env.reset(), 0 while True: env.render() a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: env.reset()
def main(args=None): # Parse arguments 解析参数 if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set 检查是否设置了GPU ID if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization 环境初始化 if (args.is_atari): # Atari Environment Wrapper Atari环境包装器 env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif (args.type == "DDPG"): # Continuous Environments Wrapper 连续环境包装 env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments 标准环境 env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train 选择算法进行训练 if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif (args.type == "DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # Train 训练 stats = algo.train(env, args, summary_writer) # Export results to CSV 将结果导出到CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments 保存权重并关闭环境 exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.env.close()