def main(w_count, success_count):
    env = MyEnv({})

    while True:
        # print(f'step {step}')
        # ランダムアクションの選択
        # action_index = env.action_space.sample()
        a = np.array([0, 1, 2, 3])
        p = np.array([1, 1, 1, 1])
        """
        a = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8])
        p = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1])
        """
        p = p / np.sum(p)
        action_index = np.random.choice(a, p=p)

        # 環境を1step 実行
        observation, reward, done, _ = env.step(action_index)
        if args.render_mode == 'human':
            print(f'\naction is selected at {env.steps}')
            status_print(env, observation, reward, done)

        # 環境の描画
        shot = env.render(mode=args.render_mode)

        # Space keyでpause, デバッグ用
        pause_for_debug()

        # エピソードの終了処理
        if done:
            # print('done')
            w_count, success_count = conunt_results(env, w_count,
                                                    success_count)
            break
    return w_count, success_count
def main():
    # Initialize ray
    ray.init(ignore_reinit_error=True, log_to_driver=False)

    # Generate & Check environment
    env = MyEnv({})

    # Define trainer agent
    model_name = MODEL_NAME

    config = ppo.DEFAULT_CONFIG.copy()
    config['env_config'] = {}
    config['num_gpus'] = 0
    config['framework'] = 'tfe'
    config['eager_tracing'] = True

    agent = ppo.PPOTrainer(config=config, env=MyEnv)
    agent.restore(model_name)

    for idx in range(90):
        """ Initialization """
        observation = env.reset()
        frames = []
        """ Save some initial values """
        fighter_0 = env.fighter.ingress
        jammer_0 = env.jammer.ingress

        while True:
            action_index = agent.compute_action(observation)

            # 環境を1step 実行
            observation, reward, done, _ = env.step(action_index)

            # 環境の描画とビデオ録画
            # shot = env.render(mode=args.render_mode)
            frames.append(env.render(mode=args.render_mode))

            # Space keyでpause, デバッグ用
            pause_for_debug()

            # Slow down rendering
            pygame.time.wait(10)

            # エピソードの終了処理
            if done:
                status_print(env, observation, reward, done, fighter_0,
                             jammer_0)
                video_name = ALGORITHM + '_' + env.mission_condition + '-' + str(
                    idx)
                make_video(video_name, frames)
                make_jason(env, video_name, fighter_0, jammer_0)
                break
Ejemplo n.º 3
0
        MIN_rewards = []
        MIN_serveratio = []
        MIN_incentives = []

        rewards = []

        # Number of trials (episodes)
        no_episodes = 50

        stats = plotting.EpisodeStats(episode_lengths=np.zeros(no_episodes),
                                      episode_rewards=np.zeros(no_episodes))

        T = 2000
        number_of_contents = 10
        myenv = MyEnv(density=density,
                      T=T,
                      number_of_contents=number_of_contents)

        if (RL == False):
            RL = DeepQNetwork(myenv.no_actions,
                              myenv.observation_length,
                              learning_rate=0.001,
                              reward_decay=0.9,
                              e_greedy=0.9,
                              replace_target_iter=5000,
                              memory_size=2000,
                              batch_size=220
                              # output_graph=True
                              )

        print("No. vehicles:" + str(myenv.number_of_vehicles))
Ejemplo n.º 4
0
    all_rewards_max = []
    all_powers_max = []
    all_services_max = []
    all_upload_max = []

    # Q = None;
    # myenv = MyEnv(density=density, T=100000)
    # print("learning:"+str(myenv.number_of_vehicles))
    #
    # Q, stats = qLearning(myenv, 1)

    cache += 1
    for iteration in range(iterations):
        myenv = MyEnv(density=density,
                      T=time,
                      number_of_contents=numbers[cache])
        #myenv.RSU_cache_size = caches[cache]
        #myenv.hit_energy_ratio = ws[cache]

        print("Testing:" + str(myenv.number_of_vehicles))
        # 1) Greedy Algorithm
        myenv.reset()
        start = 0

        myenv.i = start
        for i in range(start, myenv.number_of_vehicles):
            if (i not in myenv.available_contents_to_cache
                    and myenv.available[i] not in myenv.RSU_cache):
                myenv.step(1)
            else:
Ejemplo n.º 5
0
def main():
    # Initialize ray
    ray.init(ignore_reinit_error=True, log_to_driver=False)

    # Generate & Check environment
    env = MyEnv({})

    # Define trainer agent
    model_name = MODEL_NAME

    config = ppo.DEFAULT_CONFIG.copy()
    config['env_config'] = {}
    config['num_workers'] = NUM_WORKERS
    config['num_gpus'] = 0
    config['framework'] = 'tfe'
    config['eager_tracing'] = True

    agent = ppo.PPOTrainer(config=config, env=MyEnv)
    agent.restore(model_name)

    success_history = []
    success_count = 0
    for idx in range(N_EVAL_EPISODES):
        """ Initialization """
        observation = env.reset()
        frames = []
        """ Save some initial values """
        fighter_0 = env.fighter.ingress
        jammer_0 = env.jammer.ingress

        while True:
            action_index = agent.compute_action(observation)

            # 環境を1step 実行
            observation, reward, done, info = env.step(action_index)

            # 環境の描画とビデオ録画
            # shot = env.render(mode=args.render_mode)
            frames.append(env.render(mode=args.render_mode))

            # Slow down rendering
            # pygame.time.wait(10)

            # エピソードの終了処理
            if done:
                success_history.append(info['success'])
                if info['success'] > .5:
                    success_count += 1
                break

    n_success = success_count
    n_fail = N_EVAL_EPISODES - n_success
    if np.sum(success_history) != success_count:
        raise Exception('Something is wrong!')
    """ Summarize results """
    print('==================== Summary of the results ====================')
    print(
        f'Mission conditions = w1 : w2 : w3 = '
        f'{env.mission_probability[0]:.3f} : {env.mission_probability[1]:.3f} : {env.mission_probability[2]:.3f}'
    )
    print(f'   Model is < {MODEL_NAME} >')
    print(
        f'   Number of success missions: {round(n_success)} / {N_EVAL_EPISODES},  '
        f'   Number of failed missions {round(n_fail)} / {N_EVAL_EPISODES}')
Ejemplo n.º 6
0
from gym.wrappers import Monitor
from stable_baselines.common.noise import OrnsteinUhlenbeckActionNoise
from stable_baselines.common.vec_env import DummyVecEnv

from stable_baselines import DQN, DDPG
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines.ddpg.policies import MlpPolicy
import numpy as np
#
import time
from myenv import MyEnv

log = 'env/'
env1 = Monitor(MyEnv(8), log, force=True)
env = DummyVecEnv([lambda: env1])
# env = gym.make('CartPole-v1')

# the noise objects for DDPG
n_actions = env.action_space.shape[-1]
param_noise = None
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),
                                            sigma=float(0.5) *
                                            np.ones(n_actions))

# model = DDPG(MlpPolicy, env, verbose=1, param_noise=param_noise, action_noise=action_noise)
model = DDPG(MlpPolicy, env, verbose=1, tensorboard_log='./log/')
model.learn(total_timesteps=10000)
model.save("ddpg_mountain_8")
# del model # remove to demonstrate saving and loading
# #
# model = DDPG.load("ddpg_mountain")
Ejemplo n.º 7
0
    all_rewards_random = []
    all_powers_random = []
    all_services_random = []

    all_rewards_most = []
    all_powers_most = []
    all_services_most = []

    all_rewards_static = []
    all_powers_static = []
    all_services_static = []

    for iteration in range(iterations):

        myenv = MyEnv(density=density)
        myenv.RSU_cache_size = 100
        # 1) Greedy Algorithm
        myenv.reset()
        for i in range(myenv.number_of_vehicles):
            myenv.step(1)

        # Store the best reward for one iteration
        all_rewards_greedy.append(myenv.total_reward)
        all_powers_greedy.append(myenv.total_energy)
        all_services_greedy.append(myenv.total_download /
                                   myenv.total_request_amount)

        # 2) Random Algorithm
        myenv.reset()
        actions = [
Ejemplo n.º 8
0
if __name__ == "__main__":

    densities = [0.002]
    for density in densities:

        rewards = []

        # Number of trials (episodes)
        no_episodes = 4000;

        stats = plotting.EpisodeStats(
            episode_lengths=np.zeros(no_episodes),
            episode_rewards=np.zeros(no_episodes))


        myenv = MyEnv(density=density)
        print(myenv.number_of_vehicles)
        RL = DeepQNetwork(myenv.no_actions, myenv.number_of_contents+2,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=100,
                      # output_graph=True
                      )

        for e in range(no_episodes):

            # Reset the envirounment
            observation = myenv.reset();
Ejemplo n.º 9
0
def main():
    # Initialize ray
    ray.init(ignore_reinit_error=True, log_to_driver=False)

    # Define trainer agent
    config = ppo.DEFAULT_CONFIG.copy()
    config['env_config'] = {}
    config['num_gpus'] = 0
    config['num_workers'] = NUM_WORKERS
    config['num_cpus_per_worker'] = 1
    config['framework'] = 'tfe'
    config['eager_tracing'] = True
    # config['model']['fcnet_hiddens'] = [64, 64, 64]
    print(pretty_print(config))
    trainer = ppo.PPOTrainer(config=config,
                             env=MyEnv,
                             logger_creator=custom_log_creator(
                                 os.path.expanduser("./" + PROJECT + "/logs"),
                                 TRIAL))

    logdir = trainer.logdir
    print(f'\n********************** logdir = {logdir}\n')

    # Check trainer agent
    policy = trainer.get_policy()
    policy.model.base_model.summary()

    # Define evaluator agent
    eval_env = MyEnv({})
    # obs = eval_env.reset()

    # Train agent
    max_episode = MAX_EPISODE
    eval_freq = EVAL_FREQ
    n_eval_episode = N_EVAL_EPISODE
    best_success_count = -100
    best_checkpoint_dir = os.path.join('./' + PROJECT + '/checkpoints/',
                                       TRIAL + '_best')
    success_history = []
    iteration_history = []
    for i in range(max_episode):
        print(f'{i}th iteration is starting.')
        # Training
        result = trainer.train()
        # print(pretty_print(result))

        # Evaluation
        if i % eval_freq == 0:
            print(
                f'\n--------------- Evaluation results at {i}th iteration ---------------'
            )
            print(pretty_print(result))
            total_return = 0
            success_count = 0
            info = {}
            return_list = []
            for j in range(n_eval_episode):
                # Test the trained agent
                obs = eval_env.reset()
                done = False

                while not done:
                    action = trainer.compute_action(obs)
                    obs, reward, done, info = eval_env.step(action)
                    total_return += reward

                return_list.append(total_return)
                if info['success'] > 0.5:
                    success_count += 1

            print(
                f'\niteration {i} success_count: {success_count} / {n_eval_episode}'
            )
            # print(f'return list: {return_list}')

            success_history.append(success_count / n_eval_episode)
            iteration_history.append(i)

            success_history_np = np.array(success_history)
            file_name = './' + PROJECT + '/learning_history/trial_' + str(ID)
            # np.savez('./learning_history', iteration_history, success_history)
            np.savez(file_name, iteration_history, success_history)

            if success_count >= best_success_count:
                best_checkpoint = trainer.save(
                    checkpoint_dir=best_checkpoint_dir)
                print(f'best checkpoint saved at {best_checkpoint}\n')
                best_success_count = success_count

            print(
                f'------------------------------------------------------------\n'
            )