예제 #1
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    # Environment Initialization
    if(args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif(args.type=="DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if(args.type=="DDQN"):
        algo = DDQN(action_dim, state_dim, args)
        algo.load_weights(args.model_path)
    elif(args.type=="A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)

    # Display agent
    old_state, time = env.reset(), 0
    while True:
       env.render()
       a = algo.policy_action(old_state)
       old_state, r, done, _ = env.step(a)
       time += 1
       if done: env.reset()

    env.env.close()
예제 #2
0
        for t in range(steps):

            env.render()
            a = ddpg.act(s)
            s, r, d, info = env.step(a)
            if t == steps - 1: d = True

            if d:
                break

    env.close()


env = gym.make('LunarLanderContinuous-v2')
ddpg = DDPG(in_dim=8, out_dim=2, p_alpha=1e-3, q_alpha=1e-3)
reward = train(env,
               ddpg,
               epochs=1000,
               episodes=1,
               steps=200,
               render=False,
               graph=True)

#print(ddpg.p_loss)

run(ddpg, env)

plt.plot(reward / np.max(reward), label="Reward")
plt.plot(np.array(ddpg.q_loss) / np.max(ddpg.q_loss), label="Q loss")
plt.plot(np.array(ddpg.p_loss) / np.max(ddpg.p_loss), label="P loss")
예제 #3
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    # if args.gpu:
    #     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    # sess = get_session()
    # set_session(sess)
    # K.set_session(sess)
    # with tf.device('/gpu:0'):
    #     config = tf.ConfigProto()
    #     config.gpu_options.allow_growth = True
    #     sess = tf.Session(config=config)
    #     K.set_session(sess)
    #     set_session(sess)
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)

    # Environment Initialization
    if (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif (args.type == "DDPG"):
        # Continuous Environments Wrapper
        # env = Environment(gym.make(args.env), args.consecutive_frames)
        # env.reset()
        # state_dim = env.get_state_size()
        # action_space = gym.make(args.env).action_space
        # action_dim = action_space.high.shape[0]
        # act_range = action_space.high

        env_before = gym.make(args.env)
        env_unwrapped = env_before.unwrapped
        # env_unwrapped.observation_space = env_unwrapped.observation_shape
        # state_dim = env_unwrapped.observation_space.shape
        # action_dim = env_unwrapped.action_space.n
        env = Environment(env_before, args.consecutive_frames)
        env.reset()

        state_dim = env.get_state_size()
        # action_space = env.action_space
        # action_dim = env.get_action_size()
        action_dim = env_unwrapped.action_space.shape[0]
        act_range = env_unwrapped.action_space.high
        print('state: ', state_dim)
        print('action: ', action_dim)
        print('act range', act_range)
    else:
        # Standard Environments
        # env = Environment(gym.make(args.env), args.consecutive_frames)
        # env.reset()
        # state_dim = env.get_state_size()
        # action_dim = gym.make(args.env).action_space.n

        #unreal
        env_before = gym.make(args.env)
        # env_unwrapped = env_before.unwrapped
        # env_unwrapped.observation_space = env_unwrapped.observation_shape
        # state_dim = env_unwrapped.observation_space.shape
        # action_dim = env_unwrapped.action_space.n
        env = Environment(env_before, args.consecutive_frames)
        env.reset()

        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
        print('state: ', state_dim)
        print('action: ', action_dim)

        # state_dim= (640,380)
        # action_dim =3

    # Pick algorithm to train
    print('args type: ', args.type)
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "A3C"):
        algo = A3C(action_dim,
                   state_dim,
                   args.consecutive_frames,
                   is_atari=args.is_atari)
    elif (args.type == "DDPG"):
        algo = DDPG(args, action_dim, state_dim, act_range,
                    args.consecutive_frames)

    if args.pretrain:
        print('pretrain')
        algo.load_weights(args.weights_path)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if (args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type,
                                                      args.env,
                                                      args.nb_episodes,
                                                      args.batch_size)

    algo.save_weights(export_path)
    env.env.close()
예제 #4
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    summary_writer = tf.summary.FileWriter("{}/tensorboard_M1_{}_M1_{}_snr1_{}_snr2_{}".format(args.out_dir, args.M1, args.M1, args.snr_M1, args.snr_M2))

    # Initialize the wireless environment
    users_env = UsersEnvCluster(args.M1, args.M2, args.snr_M1, args.snr_M2, fixed_channel=False)
    print(users_env)

    # Wrap the environment to use consecutive frames
    env = Environment(users_env, args.consecutive_frames)
    env.reset()

    # Define parameters for the DDQN and DDPG algorithms
    state_dim = env.get_state_size()
    action_dim = users_env.action_dim
    act_range = 1
    act_min = 0

    # Initialize the DQN algorithm for the clustering optimization
    n_clusters = users_env.n_clusters
    algo_clustering = DDQN(n_clusters, state_dim, args)

    # Initialize the DDPG algorithm for the beamforming optimization
    algo = DDPG(action_dim, state_dim, act_range, act_min, args.consecutive_frames, algo_clustering, episode_length=args.episode_length)

    if args.step == "train":
        # Train
        stats = algo.train(env, args, summary_writer)

        # Export results to CSV
        if(args.gather_stats):
            df = pd.DataFrame(np.array(stats))
            df.to_csv(args.out_dir + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f')

        # Save weights and close environments
        exp_dir = '{}/models_M1_{}_M2_{}_snr1_{}_snr2_{}/'.format(args.out_dir, args.M1, args.M2, args.snr_M1, args.snr_M2)
        if not os.path.exists(exp_dir):
            os.makedirs(exp_dir)
        # Save DDPG
        export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDPG", args.nb_episodes, args.batch_size)
        algo.save_weights(export_path)

        # Save DDQN
        export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDQN", args.nb_episodes, args.batch_size)
        algo.ddqn_clustering.save_weights(export_path)

    elif args.step == "inference":
        print("Loading the DDPG networks (actor and critic) and the DDQN policy network ...")
        path_actor = '<add the path of the .h5 file of the DDPG actor>'
        path_critic = '<add the path of the .h5 file of the DDPG critic>'
        path_ddqn = '<add the path of the .h5 file of the DDQN actor>'
        algo.load_weights(path_actor, path_critic, path_ddqn)

        # run a random policy during inference as an example
        s = np.random.rand(1, args.Nr)
        s_1 = np.zeros_like(s)
        s = np.vstack((s_1, s))

        while True:
            W = algo.policy_action(s)
            cluster_index = algo.ddqn_clustering.policy_action(s)
            a_and_c = {'a': W, 'c': cluster_index}
            new_state, r, done, _ = env.step(a_and_c)
            print("RL min rate = {}".format(r))
            print("RL state = {}".format(np.log(1 + new_state)))
            s = new_state
            input('Press Enter to continue ...')
예제 #5
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env)

    # Environment Initialization
    if(args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif(args.type=="DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if(args.type=="DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif(args.type=="A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif(args.type=="A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
    elif(args.type=="DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if(args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir,
        args.type,
        args.env,
        args.nb_episodes,
        args.batch_size)

    algo.save_weights(export_path)
    env.env.close()
예제 #6
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    # if args.gpu:
    #     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    # set_session(get_session())
    # with tf.device('/gpu:0'):
    #     config = tf.ConfigProto()
    #     config.gpu_options.allow_growth = True
    #     sess = tf.Session(config=config)
    #     K.set_session(sess)
    #     set_session(sess)
    # Environment Initialization
    # if(args.is_atari):
    #     # Atari Environment Wrapper
    #     env = AtariEnvironment(args)
    #     state_dim = env.get_state_size()
    #     action_dim = env.get_action_size()
    if (args.type == "DDPG"):
        # Continuous Environments Wrapper
        env_before = gym.make(args.env)
        env_unwrapped = env_before.unwrapped
        env = Environment(env_before, args.consecutive_frames)
        # env.reset()

        state_dim = env.get_state_size()
        action_dim = env_unwrapped.action_space.shape[0]
        act_range = env_unwrapped.action_space.high
        print('state: ', state_dim)
        print('action: ', action_dim)
        print('act range', act_range)
    else:
        #     # Standard Environments
        #     env_before = gym.make(args.env)
        #     env = Environment(env_before, args.consecutive_frames)
        #     env.reset()
        #     state_dim = env.get_state_size()
        #     action_dim = env.get_action_size()
        # action_dim = gym.make(args.env).action_space.n
        env_before = gym.make(args.env)
        # env_unwrapped = env_before.unwrapped
        # env_unwrapped.observation_space = env_unwrapped.observation_shape
        # state_dim = env_unwrapped.observation_space.shape
        # action_dim = env_unwrapped.action_space.n
        env = Environment(env_before, args.consecutive_frames)
        # env.reset()

        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
        print('state: ', state_dim)
        print('action: ', action_dim)

    # Pick algorithm to train
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
        algo.load_weights(args.model_path)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)
    elif (args.type == "A3C"):
        algo = A3C(action_dim,
                   state_dim,
                   args.consecutive_frames,
                   is_atari=args.is_atari)
        algo.load_weights(args.actor_path, args.critic_path)
    elif (args.type == "DDPG"):
        algo = DDPG(args, action_dim, state_dim, act_range,
                    args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)

    # Display agent
    old_state, time = env.reset(), 0
    print('old state shape', old_state.shape)
    while True:
        # env.render()
        a = algo.policy_action(old_state)
        if (args.type == "DDPG"):
            a = np.clip(a, -act_range, act_range)
            # print('a', a)
        # print('a', a)
        # print(type(a))
        # print(a.shape)
        old_state, r, done, _ = env.step(a)
        time += 1
        # print('time ',time)
        if done:
            print('done')
            print('Solved in', time, 'steps')
            # break
            old_state = env.reset()
            time = 0
            # break

    env.env.close()
예제 #7
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)

    # Environment Initialization
    if (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif (args.type == "DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if (args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Display agent
    old_state, time = env.reset(), 0
    while True:
        env.render()
        a = algo.policy_action(old_state)
        old_state, r, done, _ = env.step(a)
        time += 1
        if done: env.reset()
예제 #8
0
        "learning_rate": 0.01,
        "linear_hidden_units": [400, 300],
        "final_layer_activation": "None",
        "batch_norm": False,
        "buffer_size": 100000,
        "tau": 0.01,
        "gradient_clipping_norm": 5
    },
    "batch_size": 32,
    "discount_rate": 0.99,
    "mu": 0.0,  # for O-H noise
    "theta": 0.15,  # for O-H noise
    "sigma": 0.2,  # for O-H noise
    "action_noise_std":
    0.2,  # fIntel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHzor TD3
    "action_noise_clipping_range": 0.5,  # for TD3
    "update_every_n_steps": 1,
    "learning_updates_per_learning_session": 1,
    "clip_rewards": False
}

if __name__ == "__main__":
    # AGENTS = [DDPG]
    # trainer = Trainer(config, AGENTS)
    # trainer.run_games_for_agents()
    agent = DDPG(config)
    if config.test:
        agent.load_model(path=config.model_path + "/" + '0929-124226' + "/")
    Env = Multicast_Env(config, agent)
    Env.run()