def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) # Environment Initialization if(args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif(args.type=="DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if(args.type=="DDQN"): algo = DDQN(action_dim, state_dim, args) algo.load_weights(args.model_path) elif(args.type=="A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) elif(args.type=="A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) algo.load_weights(args.actor_path, args.critic_path) elif(args.type=="DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) # Display agent old_state, time = env.reset(), 0 while True: env.render() a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: env.reset() env.env.close()
for t in range(steps): env.render() a = ddpg.act(s) s, r, d, info = env.step(a) if t == steps - 1: d = True if d: break env.close() env = gym.make('LunarLanderContinuous-v2') ddpg = DDPG(in_dim=8, out_dim=2, p_alpha=1e-3, q_alpha=1e-3) reward = train(env, ddpg, epochs=1000, episodes=1, steps=200, render=False, graph=True) #print(ddpg.p_loss) run(ddpg, env) plt.plot(reward / np.max(reward), label="Reward") plt.plot(np.array(ddpg.q_loss) / np.max(ddpg.q_loss), label="Q loss") plt.plot(np.array(ddpg.p_loss) / np.max(ddpg.p_loss), label="P loss")
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set # if args.gpu: # os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # sess = get_session() # set_session(sess) # K.set_session(sess) # with tf.device('/gpu:0'): # config = tf.ConfigProto() # config.gpu_options.allow_growth = True # sess = tf.Session(config=config) # K.set_session(sess) # set_session(sess) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif (args.type == "DDPG"): # Continuous Environments Wrapper # env = Environment(gym.make(args.env), args.consecutive_frames) # env.reset() # state_dim = env.get_state_size() # action_space = gym.make(args.env).action_space # action_dim = action_space.high.shape[0] # act_range = action_space.high env_before = gym.make(args.env) env_unwrapped = env_before.unwrapped # env_unwrapped.observation_space = env_unwrapped.observation_shape # state_dim = env_unwrapped.observation_space.shape # action_dim = env_unwrapped.action_space.n env = Environment(env_before, args.consecutive_frames) env.reset() state_dim = env.get_state_size() # action_space = env.action_space # action_dim = env.get_action_size() action_dim = env_unwrapped.action_space.shape[0] act_range = env_unwrapped.action_space.high print('state: ', state_dim) print('action: ', action_dim) print('act range', act_range) else: # Standard Environments # env = Environment(gym.make(args.env), args.consecutive_frames) # env.reset() # state_dim = env.get_state_size() # action_dim = gym.make(args.env).action_space.n #unreal env_before = gym.make(args.env) # env_unwrapped = env_before.unwrapped # env_unwrapped.observation_space = env_unwrapped.observation_shape # state_dim = env_unwrapped.observation_space.shape # action_dim = env_unwrapped.action_space.n env = Environment(env_before, args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = env.get_action_size() print('state: ', state_dim) print('action: ', action_dim) # state_dim= (640,380) # action_dim =3 # Pick algorithm to train print('args type: ', args.type) if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif (args.type == "DDPG"): algo = DDPG(args, action_dim, state_dim, act_range, args.consecutive_frames) if args.pretrain: print('pretrain') algo.load_weights(args.weights_path) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter("{}/tensorboard_M1_{}_M1_{}_snr1_{}_snr2_{}".format(args.out_dir, args.M1, args.M1, args.snr_M1, args.snr_M2)) # Initialize the wireless environment users_env = UsersEnvCluster(args.M1, args.M2, args.snr_M1, args.snr_M2, fixed_channel=False) print(users_env) # Wrap the environment to use consecutive frames env = Environment(users_env, args.consecutive_frames) env.reset() # Define parameters for the DDQN and DDPG algorithms state_dim = env.get_state_size() action_dim = users_env.action_dim act_range = 1 act_min = 0 # Initialize the DQN algorithm for the clustering optimization n_clusters = users_env.n_clusters algo_clustering = DDQN(n_clusters, state_dim, args) # Initialize the DDPG algorithm for the beamforming optimization algo = DDPG(action_dim, state_dim, act_range, act_min, args.consecutive_frames, algo_clustering, episode_length=args.episode_length) if args.step == "train": # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if(args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.out_dir + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models_M1_{}_M2_{}_snr1_{}_snr2_{}/'.format(args.out_dir, args.M1, args.M2, args.snr_M1, args.snr_M2) if not os.path.exists(exp_dir): os.makedirs(exp_dir) # Save DDPG export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDPG", args.nb_episodes, args.batch_size) algo.save_weights(export_path) # Save DDQN export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDQN", args.nb_episodes, args.batch_size) algo.ddqn_clustering.save_weights(export_path) elif args.step == "inference": print("Loading the DDPG networks (actor and critic) and the DDQN policy network ...") path_actor = '<add the path of the .h5 file of the DDPG actor>' path_critic = '<add the path of the .h5 file of the DDPG critic>' path_ddqn = '<add the path of the .h5 file of the DDQN actor>' algo.load_weights(path_actor, path_critic, path_ddqn) # run a random policy during inference as an example s = np.random.rand(1, args.Nr) s_1 = np.zeros_like(s) s = np.vstack((s_1, s)) while True: W = algo.policy_action(s) cluster_index = algo.ddqn_clustering.policy_action(s) a_and_c = {'a': W, 'c': cluster_index} new_state, r, done, _ = env.step(a_and_c) print("RL min rate = {}".format(r)) print("RL state = {}".format(np.log(1 + new_state))) s = new_state input('Press Enter to continue ...')
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if(args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif(args.type=="DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if(args.type=="DDQN"): algo = DDQN(action_dim, state_dim, args) elif(args.type=="A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif(args.type=="A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif(args.type=="DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if(args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set # if args.gpu: # os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # set_session(get_session()) # with tf.device('/gpu:0'): # config = tf.ConfigProto() # config.gpu_options.allow_growth = True # sess = tf.Session(config=config) # K.set_session(sess) # set_session(sess) # Environment Initialization # if(args.is_atari): # # Atari Environment Wrapper # env = AtariEnvironment(args) # state_dim = env.get_state_size() # action_dim = env.get_action_size() if (args.type == "DDPG"): # Continuous Environments Wrapper env_before = gym.make(args.env) env_unwrapped = env_before.unwrapped env = Environment(env_before, args.consecutive_frames) # env.reset() state_dim = env.get_state_size() action_dim = env_unwrapped.action_space.shape[0] act_range = env_unwrapped.action_space.high print('state: ', state_dim) print('action: ', action_dim) print('act range', act_range) else: # # Standard Environments # env_before = gym.make(args.env) # env = Environment(env_before, args.consecutive_frames) # env.reset() # state_dim = env.get_state_size() # action_dim = env.get_action_size() # action_dim = gym.make(args.env).action_space.n env_before = gym.make(args.env) # env_unwrapped = env_before.unwrapped # env_unwrapped.observation_space = env_unwrapped.observation_shape # state_dim = env_unwrapped.observation_space.shape # action_dim = env_unwrapped.action_space.n env = Environment(env_before, args.consecutive_frames) # env.reset() state_dim = env.get_state_size() action_dim = env.get_action_size() print('state: ', state_dim) print('action: ', action_dim) # Pick algorithm to train if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) algo.load_weights(args.model_path) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) algo.load_weights(args.actor_path, args.critic_path) elif (args.type == "DDPG"): algo = DDPG(args, action_dim, state_dim, act_range, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) # Display agent old_state, time = env.reset(), 0 print('old state shape', old_state.shape) while True: # env.render() a = algo.policy_action(old_state) if (args.type == "DDPG"): a = np.clip(a, -act_range, act_range) # print('a', a) # print('a', a) # print(type(a)) # print(a.shape) old_state, r, done, _ = env.step(a) time += 1 # print('time ',time) if done: print('done') print('Solved in', time, 'steps') # break old_state = env.reset() time = 0 # break env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif (args.type == "DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Display agent old_state, time = env.reset(), 0 while True: env.render() a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: env.reset()
"learning_rate": 0.01, "linear_hidden_units": [400, 300], "final_layer_activation": "None", "batch_norm": False, "buffer_size": 100000, "tau": 0.01, "gradient_clipping_norm": 5 }, "batch_size": 32, "discount_rate": 0.99, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.2, # for O-H noise "action_noise_std": 0.2, # fIntel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHzor TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "clip_rewards": False } if __name__ == "__main__": # AGENTS = [DDPG] # trainer = Trainer(config, AGENTS) # trainer.run_games_for_agents() agent = DDPG(config) if config.test: agent.load_model(path=config.model_path + "/" + '0929-124226' + "/") Env = Multicast_Env(config, agent) Env.run()