def create_agents(self,
                      pre_trained,
                      path="backups/RL_agent6iter200000.zip"):

        dummy_agent_4 = DQN.load("backups/Lane_iter200000_lane4.zip")
        dummy_agent_6 = DQN.load("backups/RL_agent6iter200000.zip")

        for index, partition in enumerate(self.env.get_partitions()):
            self.partitions.append(partition)

            if pre_trained:
                if partition.num_lanes == 4:
                    agent = copy.copy(dummy_agent_4)
                else:
                    agent = copy.copy(dummy_agent_6)
                agent.env = partition
                agent.exploration_initial_eps = 0
                agent.exploration_final_eps = 0
            else:
                agent = DQN(MlpPolicy,
                            partition,
                            verbose=2,
                            gamma=0.9,
                            exploration_fraction=0.6,
                            exploration_final_eps=0)

            print("Agent added: ", index)
            self.agents.append(agent)
Beispiel #2
0
    def run_test(self):
        env = CustomEnv(self.path_planner, self.behavior_planner, event)
        env = make_vec_env(lambda: env, n_envs=1)
        if (self.event == Scenario.LANE_CHANGE):
            model = DQN.load(MODEL_LOAD_PATH)
        if (self.event == Scenario.PEDESTRIAN):
            model = DQN.load(MODEL_LOAD_PATH)
        obs = env.reset()
        count = 0
        success = 0
        while count < 500:
            done = False

            while not done:
                action, _ = model.predict(obs)

                print("Action taken:", RLDecision(action))
                obs, reward, done, info = env.step(action)
                # print("Reward",reward)
            count += 1
            if info[0]["success"]:
                success += 1
            print("Count ", count, "Success ", success, "Success Rate:",
                  success * 100 / float(count), "%")
        print("Success Rate ", success / count, success, count)
Beispiel #3
0
def train_multiple(cfg, version, trained_model, double_agent=False):
    # double_agent refers to both agents having learned in multi environment
    if double_agent:
        gym_wrapper = MultiAgentCustomEnv(cfg)
        # model_trained = DQN.load("{0}models/{1}".format("./", trained_model), env=gym_wrapper)
        model_trained = DQN.load("{0}models/{1}".format(
            cfg["study_results"], trained_model),
                                 env=gym_wrapper)
    else:
        gym_wrapper = CustomEnv(cfg)
        # model_trained = DQN.load("{0}models/{1}".format("./", trained_model), env=gym_wrapper)
        model_trained = DQN.load("{0}models/{1}".format(
            cfg["study_results"], trained_model),
                                 env=gym_wrapper)

    gym_wrapper = MultiAgentCustomEnv(cfg,
                                      model_trained,
                                      single=not double_agent)

    model = DQN(MlpPolicy,
                gym_wrapper,
                verbose=1,
                double_q=cfg["double-dqn"],
                prioritized_replay=cfg["prioritized"],
                policy_kwargs=dict(dueling=cfg["dueling"]),
                exploration_fraction=cfg["exploration_frac"],
                tensorboard_log=cfg["study_results"] +
                "tensorboard/experiments/")

    model.learn(total_timesteps=cfg["timesteps"],
                tb_log_name=cfg["experiment_name"])
    model.save("{0}models/{2}-v{1}".format(cfg["study_results"], version,
                                           cfg["experiment_name"]))
Beispiel #4
0
def get_environment_figures(model,
                            *,
                            source_folder=TRAINED_MODEL_FOLDER_DOCKER,
                            vector=False):
    if not vector:
        try:
            model_to_load = source_folder + model
            trained_model = DQN.load(model_to_load, ENV_DISP)
        except Exception as e:
            try:
                source_folder = TRAINED_MODEL_FOLDER_LOCAL
                model_to_load = source_folder + model
                trained_model = DQN.load(model_to_load, ENV_DISP)
            except Exception as e:
                print("Failed to load model.")
                print(
                    "If model is not inside the trained_model folder, override the source_folder to match the desired folder"
                )
                print(str(e))
                os._exit(0)

        # Show the result of the training
        obs = ENV_DISP.reset()
        for episode in range(1):
            done = False
            while not done:
                action, _states = trained_model.predict(obs)
                obs, rewards, done, info = ENV_DISP.step(action)

            fig_xy = ENV_DISP.get_xy_plane_figure()
            fig_xz = ENV_DISP.get_xz_plane_figure()
            fig_3d = ENV_DISP.get_3d_figure()
            return fig_xy, fig_xz, fig_3d
    else:
        print("Vectorized env not implemented yet")
Beispiel #5
0
def launchAgent():
    from stable_baselines import DQN
    import Reinforcement_AI.env.c_seperate_env as sep_env
    from queue import Queue
    from threading import Thread

    minimap_env = sep_env.MinimapEnv()
    allenv = sep_env.AllEnv()

    minimap_model = DQN(
        "CnnPolicy",  # policy
        minimap_env,  # environment
        double_q=True,  # Double Q enable
        prioritized_replay=True,  # Replay buffer enabled
        verbose=0  # log print
    )

    allenv_model = DQN(
        "MlpPolicy",
        allenv,
        double_q=True,
        prioritized_replay=True,
        verbose=0
    )

    for i in range(100):
        if i != 0:
            minimap_model = DQN.load("KR_minimap_" + str(i))
            allenv_model = DQN.load("KR_allenv_" + str(i))

        que = Queue()

        minimap_model.set_env(minimap_env)
        allenv_model.set_env(allenv)

        # minimap_thread = Thread(target=minimap_model.learn, args=[50000])
        # allenv_thread = Thread(target=allenv_model.learn, args=[50000])
        allenv_thread = Thread(target=lambda q, arg1: q.put(allenv_model.learn(arg1)), args=(que, 50000))
        # test = Pool(processes=1)

        # minimap_thread.start()
        allenv_thread.start()
        # test_result = test.apply_async(allenv_model.learn, (50000, None, 100, "DQN", True, None))
        minimap_model.learn(total_timesteps=50000)

        # allenv_model.learn(total_timesteps=50000)

        # minimap_thread.join()
        allenv_thread.join()

        allenv_model = que.get()
        # return_val = test_result.get()

        minimap_model.save("KR_minimap_" + str(i + 1))
        allenv_model.save("KR_allenv_" + str(i + 1))
Beispiel #6
0
def pre_trained_model(env):
    model = DQN(
        CnnPolicy,
        env,
        #                tensorboard_log=graph_path,
        double_q=True,
        prioritized_replay=True,
        prioritized_replay_alpha=0.99,
        learning_starts=MAX_STEPS * 0.5,
        verbose=1)
    model.load(pre_trained_path)
    return model
Beispiel #7
0
def main(mode="train"):

    env = gym.make("balancebot-v0")
    if mode == "train":
        model = deepq(policy=LnMlpPolicy,
                      env=env,
                      double_q=True,
                      prioritized_replay=True,
                      learning_rate=1e-3,
                      buffer_size=100,
                      verbose=0,
                      tensorboard_log="dqn_balancebot_tensorboard")
        model.learn(
            total_timesteps=2000,
            callback=callback
        )
        print("Saving model to balance_dqn.pkl")
        model.save("balance_dqn.pkl")

        del model  # remove to demonstrate saving and loading

    if mode == "test":
        model = deepq.load("balance_dqn.pkl")

        obs = env.reset()
        done = False
        env.set_done(5000)
        while not done:
            action, _states = model.predict(obs)
            obs, rewards, done, info = env.step(action)
            # env.render()
            print(obs)
def LunarLander_v2_DQN(): #TODO : 报错
    # Create environment
    env = gym.make('LunarLander-v2')

    # Instantiate the agent
    model = DQN('MlpPolicy', env, learning_rate=1e-3, prioritized_replay=True, verbose=1)
    # Train the agent
    model.learn(total_timesteps=100000)
    # Save the agent
    model.save("dqn_lunar")
    del model  # delete trained model to demonstrate loading

    # Load the trained agent
    model = DQN.load("dqn_lunar")

    # Evaluate the agent
    mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
    print(mean_reward, std_reward)

    # Enjoy trained agent
    obs = env.reset()
    for i in range(1000):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        env.render()
Beispiel #9
0
def predict_with_DQN():
    simulation_start_time = time.time()
    model = DQN.load("deepq_jobshop")

    scores = []  # list of final scores after each episode
    episodes = 1  # 30
    max_periods = 8000  # 8000

    for episode in range(episodes):
        # Reset the game-state, done and score before every episode
        next_state = env.reset()
        score = 0

        for period in range(max_periods):  # predict for x periods
            action, _states = model.predict(next_state)
            next_state, reward, done, info = env.step(action)
            score += reward
        scores.append(score)

        print("Episode: {}/{}, score: {}".format(episode + 1, episodes, score))

        # print("Observation space at the end: " + str(next_state))
    print("Prediction finished after " +
          str(round(time.time() - simulation_start_time, 4)) + " seconds")
    print("Final average score over " + str(episodes) + " episodes: " +
          str(mean(scores)))
    return scores
 def initialize(self, model_path: str):
     """
     loads the model
     Args:
     :param model_path: (str) the path to the model
     """
     self.neural_network = DQN.load(model_path)
def testLookAheadAgent(env, original_env, agent):
    agent_path = 'trained-agents/{0}'.format(agent)
    model = DQN.load(load_path=agent_path, env=env)

    state = env.reset()

    writeLineToFile('Step; State; Reward', csv_output_filename)

    time.sleep(
        5
    )  # pra dar tempo de iniciar os fluxos e não ficar com active_flows vazio

    while True:
        active_flows = original_env.getActiveFlows()

        for flow in active_flows:
            if original_env.isElephantFlow(flow):
                state = original_env.getState()
                action, _ = model.predict(state, deterministic=False)
                state, reward, done, info = env.step(action, flow)

                output_data_line = '{0}; {1}; {2}'.format(step, state, reward)
                writeLineToFile(output_data_line, csv_output_filename)

        print()
        time.sleep(1)
Beispiel #12
0
def testModel(model_path, episodes, max_timesteps_per_episode, sqil=True):
    """
    runs a model "episodes" iterations and collects rewards for each episode
    each episode stops at max_timesteps_per_episode or earlyier
    """
    env = gym.make("cartpole_custom-v0")
    model = None
    if sqil:
        model = SQIL_DQN.load(model_path)
    else:
        model = DQN.load(model_path)

    rewards = np.zeros((episodes))
    for i in range(0, episodes):
        obs = env.reset()
        timestep = 0
        done = False
        while not done:
            timestep += 1
            action, _ = model.predict(obs)
            obs, reward, done, _ = env.step(action)
            rewards[i] += reward
            if timestep > max_timesteps_per_episode:
                break

    return rewards
Beispiel #13
0
def main():
    num = len(sys.argv)
    if(num == 2):
        games = int(sys.argv[1])
    else:
        print("python ",sys.argv[0]," (number of games)")
        sys.exit(1)
    env = TetrisEnv()
    model = DQN.load("tetris_model")
    total_model_score = 0
    total_random_score = 0
    for _ in range(games):
        obs = env.reset()
        while True:
            action, _states = model.predict(obs)
            obs, rewards, dones, info = env.step(action)
            current_board = env.game.get_board()
            for board in env.game.board_list:
                if np.array_equal(np.array(current_board),np.array(board)):
                    obs, rewards, dones, info = env.step(3) #move down if repeated
                    break
            if(dones):
                total_model_score += env.game.score
                break
        #using random actions
        obs = env.reset()
        while True:
            action= env.action_space.sample()
            obs, rewards, dones, info = env.step(action)
            if(dones):
                total_random_score += env.game.score
                break
    print("Final avg score using model: ", total_model_score/games)
    print("Final avg score using random actions: ", total_random_score/games)
Beispiel #14
0
def testAgent(env, original_env, agent, timesteps):
    num_steps = int(timesteps)

    agent_path = 'trained-agents/{0}'.format(agent)
    model = DQN.load(load_path=agent_path, env=env)

    state = env.reset()

    writeLineToFile('Step; State; Reward', csv_output_filename)

    for step in range(num_steps):
        print('Step ', step)
        print('State ', state)

        action, _ = model.predict(state, deterministic=False)
        print('Action: ', action)

        state, reward, done, info = env.step(action)
        extra_info = info[0]

        is_action_for_elephant_flow = extra_info['is_action_for_elephant_flow']
        flow_label = 'EF' if is_action_for_elephant_flow else 'MF'
        print('Flow: ', flow_label)

        output_data_line = '{0}; {1}; {2}; {3}'.format(step, state, reward,
                                                       flow_label)
        writeLineToFile(output_data_line, csv_output_filename)
        step += 1
Beispiel #15
0
def setup():
    global e, model, obs

    p.size(800, 600)
    e = Env(width, height)
    model = DQN.load("ball_blast_dqn_25000")
    obs = e.reset()
Beispiel #16
0
def main():
    env = make_env()
    max_screen_x = 0

    model = DQN.load(saved_model_file_path)

    obs = env.reset()

    fps = 60
    frames_per_timestep = 4
    speed_up_factor = 1.5
    wait_time = frames_per_timestep / fps / speed_up_factor
    while True:
        t1 = time.time()

        action, _states = model.predict(obs)

        t2 = time.time()
        
        t3 = wait_time - (t2 - t1)

        if t3 > 0:
            time.sleep(t3)
        
        obs, rewards, done, info = env.step(action)

        if info['screen_x'] > max_screen_x:
            max_screen_x = info['screen_x']
            logger.info("Max screen x: " + str(max_screen_x))
        if done:
            env.reset()
        else:
            env.render()
def load_model(model_dir, model_type="PPO"):
    #policy = MlpPolicy
    if model_type == "PPO":
        model = PPO2.load(model_dir)
    elif model_type == "DQN":
        model = DQN.load(model_dir)
    return model
Beispiel #18
0
class Agent:
    model = DQN.load("best_model.pkl")

    def __init__(self):
        self.name = "DQNAgent"

    def act(self, stateObs, actions):
        action, _states = self.model.predict(stateObs)
        return action
Beispiel #19
0
def run(model_name, iteration, world, stage):
    world_stage = 'SuperMarioBros-{}-{}-v2'.format(world, stage)
    env = gym_super_mario_bros.make(world_stage)
    env = JoypadSpace(env, RIGHT_ONLY)
    env = WarpFrame(env)
    env = FrameStack(env, n_frames=4)
    env = EpisodicLifeEnv(env)
    # env = MaxAndSkipEnv(env)

    # Save a checkpoint every 1000 steps
    checkpoint_callback = CheckpointCallback(save_freq=5000,
                                             save_path='./logs/',
                                             name_prefix=model_name)

    eval_callback = EvalCallback(env,
                                 best_model_save_path='./logs/',
                                 log_path='./logs/',
                                 eval_freq=10000,
                                 deterministic=True,
                                 render=False)

    print("Compiling model...")
    steps = 10000

    if iteration > 0:
        model = DQN.load('models/{}'.format(model_name),
                         env=env,
                         verbose=1,
                         learning_starts=2500,
                         learning_rate=1e-4,
                         exploration_final_eps=0.01,
                         prioritized_replay=True,
                         prioritized_replay_alpha=0.6,
                         train_freq=4,
                         tensorboard_log="./mario_tensorboard/")
    else:
        model = DQN(CnnPolicy,
                    env,
                    verbose=1,
                    learning_starts=2500,
                    learning_rate=1e-4,
                    exploration_final_eps=0.01,
                    prioritized_replay=True,
                    prioritized_replay_alpha=0.6,
                    train_freq=4,
                    tensorboard_log="./mario_tensorboard/")

    print("Training starting...")
    with ProgressBarManager(steps) as progress_callback:
        model.learn(
            total_timesteps=steps,
            # , eval_callback, checkpoint_callback],
            callback=[progress_callback],
            tb_log_name=model_name)
    print("Finished training model on env...\n")
    model.save("models/{}".format(model_name))
Beispiel #20
0
def loader(algo, env_name):
    if algo == 'dqn':
        return DQN.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'ppo2':
        return PPO2.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'a2c':
        return A2C.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'acer':
        return ACER.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'trpo':
        return TRPO.load("trained_agents/" + algo + "/" + env_name + ".pkl")
Beispiel #21
0
    def predict(self,
                symbol,
                loadpath=None,
                sd=dt.datetime(2018, 1, 29),
                ed=dt.datetime(2019, 12, 18),
                fwd=False):
        # update data
        dp.pull(symbol, should_save=True)
        dp.pull('SPY', should_save=True)

        # load data and add phantom SPY trading day
        df = self._load_data([symbol], sd, ed)
        if fwd:
            lastspy = df.loc['SPY'].tail(1).copy()
            lastspy.index = lastspy.index.shift(1, freq='D')
            lastspy['Symbol'] = 'SPY'
            lastspy = lastspy.reset_index().set_index(['Symbol', 'Date'])
            df = df.append(lastspy).sort_index()

        # load model and predict for test range
        self.model = DQN.load(loadpath)
        if fwd:
            chgs = np.linspace(-0.5, 0.5, num=101)
            pxs = chgs + df.loc[symbol].tail(1).copy().AdjClose.values[0]
            pxchgs = np.zeros((101, ))
            actions = np.zeros((101, ))
            for i, px in enumerate(pxs):
                last = df.loc[symbol].tail(1).copy()
                last.index = last.index.shift(1, freq='D')
                pxchgs[i] = px / last.AdjClose - 1
                last.AdjClose = px
                last.Close = px
                last['Symbol'] = symbol
                last = last.reset_index().set_index(['Symbol', 'Date'])
                df_tmp = df.append(last).sort_index()

                # predict
                df_met = self._get_indicators(symbol, df_tmp)
                ob = df_met.tail(1).drop(['Date', 'AdjClose'], axis=1)
                action, _ = self.model.predict(ob)
                actions[i] = action

            df_preds = pd.DataFrame({
                'Price': pxs,
                'Chg': pxchgs,
                'Action': actions
            })
            return df_preds
        else:
            df_met = self._get_indicators(symbol, df)
            ob = df_met.tail(1).drop(['Date', 'AdjClose'], axis=1)
            action, _ = self.model.predict(ob)
            return action
Beispiel #22
0
def main(log_dir=None, name_results_root_folder="results"):
    args = parseArgs()
    time_steps = TIME_STEPS
    # if log_dir doesnt created,use defaul one which contains the starting time of the training.
    if log_dir is None:
        if args.restart_training:
            # find the latest training folder
            latest_log_dir = os.path.join(
                name_results_root_folder,
                sorted(os.listdir(name_results_root_folder))[-1])
            logdir = latest_log_dir
        else:
            defaul_log_dir = os.path.join(name_results_root_folder,
                                          "DQN_" + getTimeStr())
            os.makedirs(defaul_log_dir, exist_ok=True)
            logdir = defaul_log_dir
    else:
        logdir = log_dir
    reward_bound = REWARD_BOUND
    # get arena environments and custom callback
    env = Monitor(Arena2dEnvWrapper(0, True),
                  os.path.join(logdir, "arena_env0"))
    # env = Arena2dEnvWrapper(0, True)
    call_back = SaveOnBestTrainingRewardCallback(500, logdir, 1, reward_bound)
    # set temporary model path, if training was interrupted by the keyboard, the current model parameters will be saved.
    path_temp_model = os.path.join(logdir, "DQN_TEMP")
    if not args.restart_training:
        model = DQN(MlpPolicy,
                    env,
                    gamma=GAMMA,
                    learning_rate=LEARNING_RATE,
                    buffer_size=BUFFER_SIZE,
                    target_network_update_freq=SYNC_TARGET_STEPS,
                    tensorboard_log=logdir,
                    verbose=1)
        reset_num_timesteps = True
    else:
        if os.path.exists(path_temp_model + ".zip"):
            print("continue training the model...")
            model = DQN.load(path_temp_model, env=env)
            reset_num_timesteps = False
        else:
            print(
                "Can't load the model with the path: {}, please check again!".
                format(path_temp_model))
            env.close()
            exit(-1)
    # try:
    model.learn(time_steps,
                log_interval=200,
                callback=call_back,
                reset_num_timesteps=reset_num_timesteps)
    model.save(os.path.join(logdir, "DQN_final"))
Beispiel #23
0
def get_existing_model(model_path):

    print('--- Training from existing model', model_path, '---')

    # Load model
    model = DQN.load(model_path, tensorboard_log=TENSORBOARD_PATH)

    # Set environment
    env = QWOPEnv()  # SubprocVecEnv([lambda: QWOPEnv()])
    model.set_env(env)

    return model
def launchAgent(model_name: str):
    """
    :param model_name: 실행시킬 모델의 종류. HER, DDPG, PPO2 혹은 기타값(DQN)이어야 함
                        현재는 의도상 PPO2로 세팅할 것
    :return: 1000회의 사이클을 돌고 난 이후의 모델
    """
    import Reinforcement_AI.env.e_enhanced_image_env as image_env
    from stable_baselines import DQN, HER, DDPG, PPO2
    from stable_baselines.common import make_vec_env

    print("Current Env is " + model_name)

    if model_name == "HER":
        env = image_env.DetailedMiniMapEnv()
        model = HER("CnnPolicy", env=env, model_class=DQN)
    if model_name == "DDPG":
        env = image_env.DDPGImageEnv()
        model = DDPG(policy="CnnPolicy", env=env, normalize_observations=True)
    if model_name == "PPO2":
        env = make_vec_env(image_env.DetailedMiniMapEnv, n_envs=1)
        model = PPO2(policy="CnnPolicy", env=env, verbose=1)
    else:
        env = image_env.DetailedMiniMapEnv()
        model = DQN(
            "CnnPolicy",  # policy
            env=env,  # environment
            double_q=True,  # Double Q enable
            prioritized_replay=True,  # Replay buffer enabled
            verbose=0  # log print
        )

    for i in range(1000):
        if i != 0:
            if model_name == "HER":
                model = HER.load("detailedmap_HER_" + str(i), env)
            if model_name == "DDPG":
                model = DDPG.load("detailedmap_DDPG_" + str(i), env)
            if model_name == "PPO2":
                model = PPO2.load("detailedmap_PPO2_" + str(i), env)
            else:
                model = DQN.load("detailedmap_DQN_" + str(i), env)

        # print('model learn start')
        model.learn(total_timesteps=12500)  #FPS가 130이상 넘어갈때의 최소수치
        print("this model is : detailedmap_" + model_name + "_" + str(i + 1))
        # print('model learn finished')

        # print('model save start')
        model.save("detailedmap_" + model_name + "_" + str(i + 1))
        del model
        # print('model save end')

    return model
Beispiel #25
0
def run_sonobuoy_training(
                    exp_name,exp_path,
                    basicdate,
                    model_type='PPO2',
                    n_eval_episodes=10,
                    training_intervals=100,
                    max_steps=10000,
                    reward_margin=10,
                    log_to_tb=False,
                    pelican_agent_filepath=False):

    # set up logging 
    if log_to_tb:
        writer = SummaryWriter(exp_path)
        tb_log_name = 'sonobuoy_training'
    else:
        writer = None
        tb_log_name = None

        
    env = gym.make('plark-env-v0', panther_agent_filepath='/data/agents/models/PPO2_20200429_073132_panther/')
    
    if pelican_agent_filepath:
        logger.info('Loading agent from file: ' + pelican_agent_filepath)

        if model_type.lower() == 'dqn':
            model = DQN.load(pelican_agent_filepath)
            model.set_env(env)
            
        elif model_type.lower() == 'ppo2':
            model = PPO2.load(pelican_agent_filepath)
            model.set_env(DummyVecEnv([lambda: env]))
            
        elif model_type.lower() == 'a2c':
            model = A2C.load(pelican_agent_filepath)
            model.set_env(env)
            
        elif model_type.lower() == 'acktr':
            model = ACKTR.load(pelican_agent_filepath)
            model.set_env(env)

    else:   
        # Instantiate the env and model
        model = PPO2('CnnPolicy', env)

    # Start training 
    train_agent(exp_path,model,env,training_intervals,max_steps,model_type,basicdate,writer,tb_log_name,reward_margin)
                
    # Evaluate
    mean_reward, n_steps = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes, deterministic=False, render=False, callback=None, reward_threshold=None, return_episode_rewards=False)
    logger.info('Evaluation finished')
    logger.info('Mean Reward is ' + str(mean_reward))
    logger.info('Number of steps is ' + str(n_steps))
Beispiel #26
0
def run_illegal_move_training(
                    exp_name,exp_path,
                    basicdate,
                    model_type='PPO2',
                    n_eval_episodes=10,
                    training_intervals=100,
                    max_steps=10000,
                    reward_margin=10,
                    log_to_tb=False,
                    pelican_agent_filepath=False):
    
       # set up logging 
    if log_to_tb:
        writer = SummaryWriter(exp_path)
        tb_log_name = 'Illegal_move_prevention_training'
    else:
        writer = None
        tb_log_name = None
    
    if pelican_agent_filepath:
        logger.info('Loading agent from file: ' + pelican_agent_filepath)
        # env = plark_env_illegal_move.PlarkEnvIllegalMove( config_file_path='/Components/plark-game/plark_game/game_config/10x10/balanced.json')
        env = gym.make('plark-env-illegal-move-v0')

        if model_type.lower() == 'dqn':
            model = DQN.load(pelican_agent_filepath)
            model.set_env(env)
            
        elif model_type.lower() == 'ppo2':
            model = PPO2.load(pelican_agent_filepath)
            model.set_env(DummyVecEnv([lambda: env]))
            
        elif model_type.lower() == 'a2c':
            model = A2C.load(pelican_agent_filepath)
            model.set_env(env)
            
        elif model_type.lower() == 'acktr':
            model = ACKTR.load(pelican_agent_filepath)
            model.set_env(env)

    else:   
        # Instantiate the env and model
        env = gym.make('plark-env-illegal-move-v0')
        model = PPO2('CnnPolicy', env)

    # Start training 
    train_agent(exp_path,model,env,training_intervals,max_steps,model_type,basicdate,writer,tb_log_name,reward_margin)
                
    # Evaluate
    mean_reward, n_steps = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes, deterministic=False, render=False, callback=None, reward_threshold=None, return_episode_rewards=False)
    logger.info('Evaluation finished')
    logger.info('Mean Reward is ' + str(mean_reward))
    logger.info('Number of steps is ' + str(n_steps))
def run():
    env = DummyVecEnv([lambda: DemoEnv()])
    model = DQN.load("deepq_DemoEnv", env)
    obs = env.reset()
    sum_rew = 0
    while True:
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        env.render()
        sum_rew += rewards[0]
        if dones[0] == True:
            print("Total reward: ", sum_rew)
            break
Beispiel #28
0
    def __init__(self, path, deterministic=False, tau=0.05):
        self.turn_model = DQN.load(path + "model_turn")
        self.speed_model = DQN.load(path + "model_speed")
        dic = loadConfig(path + "parameters.json")

        self.tau = tau
        self.deterministic = deterministic

        self.turn_bins = np.linspace(
            -dic["max_turn"], dic["max_turn"], dic["turn_bins"]
        )
        self.speed_bins = np.linspace(
            dic["min_speed"], dic["max_speed"], dic["speed_bins"]
        )

        self.raycast_options = {
            "n_fish_bins": dic["num_bins_rays"],
            "n_wall_raycasts": dic["num_bins_rays"],
            "fov_angle_fish_bins": np.radians(dic["degreees"]),
            "fov_angle_wall_raycasts": np.radians(dic["degrees"]),
            "world_bounds": ([-50, -50], [50, 50]),
        }
Beispiel #29
0
 def loadAgent(self, filepath, algorithm_type):
     try:
         if algorithm_type.lower() == 'dqn':
             self.model = DQN.load(filepath)
         elif algorithm_type.lower() == 'ppo2':
             self.model = PPO2.load(filepath)
         elif algorithm_type.lower() == 'a2c':
             self.model = A2C.load(filepath)
         elif algorithm_type.lower() == 'acktr':
             self.model = ACKTR.load(filepath)
     except:
         raise ValueError('Error loading pelican agent. File : "' +
                          filepath + '" does not exsist')
def main():
    # create the environment
    env = gym.make("gym_balanceBot-v0")

    if os.path.isfile("trained_model/dqn_balanceBot.zip") == False:
        # Instantiate the agent
        model = DQN('MlpPolicy',
                    env,
                    learning_rate=1e-3,
                    prioritized_replay=True,
                    verbose=1)

        # Train the agent
        model.learn(total_timesteps=int(2e5))
        # Save the agent
        model.save("trained_model/dqn_balanceBot")
        del model  # delete trained model to demonstrate loading

        # Load the trained agent
        model = DQN.load("trained_model/dqn_balanceBot")

        # Evaluate the agent
        mean_reward, std_reward = evaluate_policy(model,
                                                  model.get_env(),
                                                  n_eval_episodes=10)

    else:
        # Load the trained agent
        model = DQN.load("trained_model/dqn_balanceBot")

    # Enjoy trained agent
    obs = env.reset()
    for i in range(3000):
        action, states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        env.render()
        sleep(1. / 240.)

    env.close()