Ejemplo n.º 1
0
def train_A2C(start_time_tests    = [31*24*3600, 304*24*3600], 
              episode_length_test = 14*24*3600, 
              load                = False):
    '''Method to train (or load a pre-trained) A2C agent. Testing periods 
    have to be introduced already here to not use these during training. 
    
    Parameters
    ----------
    start_time_tests : list of integers
        Time in seconds from the beginning of the year that will be used 
        for testing. These periods should be excluded in the training 
        process. By default the first day of February and the first day of
        November are used. 
    episode_length_test : integer
        Number of seconds indicating the length of the testing periods. By
        default two weeks are reserved for testing. 
    load : boolean
        Boolean indicating whether the algorithm is loaded (True) or 
        needs to be trained (False)
     
    '''
    excluding_periods = []
    for start_time_test in start_time_tests:
        excluding_periods.append((start_time_test,start_time_test+episode_length_test))
    # Summer period (from June 21st till September 22nd). 
    # Excluded since no heating during this period (nothing to learn).
    excluding_periods.append((173*24*3600, 266*24*3600))  
    
    env = BoptestGymEnvRewardWeightCost(url                   = url,
                                        actions               = ['oveHeaPumY_u'],
                                        observations          = {'reaTZon_y':(280.,310.)}, 
                                        random_start_time     = True,
                                        excluding_periods     = excluding_periods,
                                        max_episode_length    = 1*24*3600,
                                        warmup_period         = 3*3600,
                                        Ts                    = 900)
    
    env = NormalizedObservationWrapper(env)
    env = NormalizedActionWrapper(env)  
    
    model = A2C('MlpPolicy', env, verbose=1, gamma=0.99, seed=seed,
                tensorboard_log=os.path.join('results'))
    
    if not load: 
        model.learn(total_timesteps=int(1e5))
        # Save the agent
        model = A2C.load(os.path.join(utilities.get_root_path(), 'examples',
                                      'agents', 'a2c_bestest_hydronic_heatpump'))
    else:
        # Load the trained agent
        model = A2C.load(os.path.join(utilities.get_root_path(), 'examples',
                                      'agents', 'a2c_bestest_hydronic_heatpump'))
    
    return env, model, start_time_tests
Ejemplo n.º 2
0
def evaluate(modelname, env):
    n_cores = 4
    obs = env.reset()
    model = A2C.load(modelname)
    wr = 0
    win = 0
    total_health_diff = 0
    loss = 0
    episodes = 0
    total_episodes = 100
    while episodes < total_episodes:
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        #print(rewards[0])
        time.sleep(.04)
        # print(rewards[0])
        env.render(mode="human")
        for i in range(4):

            if dones[i] == True:
                if info[i]["p1_health"] < info[i]["p2_health"]:
                    loss += 1
                else:
                    win += 1
                total_health_diff += info[i]["p1_health"] - info[i]["p2_health"]
                wr = win / (win + loss)
                episodes += 1
    return wr, total_health_diff / total_episodes
Ejemplo n.º 3
0
def _train(env_id, agent, model_params, total_steps, is_evaluation=False):
    if is_evaluation:  # evaluate_policy() must only take one environment
        envs = SubprocVecEnv([make_env(env_id)])
    else:
        envs = SubprocVecEnv([make_env(env_id) for _ in range(NUM_CPU)])
    envs = VecNormalize(
        envs)  # normalize the envs during training and evaluation

    # Load pretrained model during training.
    if not is_evaluation and os.path.exists(agent + '_' + env_id):
        if agent == 'ppo2':
            model = PPO2.load(agent + '_' + env_id)
        elif agent == 'a2c':
            model = A2C.load(agent + '_' + env_id)
    else:
        if agent == 'ppo2':
            model = PPO2(MlpLstmPolicy,
                         envs,
                         nminibatches=1,
                         verbose=1,
                         **model_params)
        elif agent == 'a2c':
            model = A2C(MlpLstmPolicy, envs, verbose=1, **model_params)

    model.learn(total_timesteps=total_steps)
    return envs, model
Ejemplo n.º 4
0
def main(mode="train"):

    env = gym.make("snakebot-v0")
    if mode == "train":
        model = ac(policy=MlpLnLstmPolicy,
                   env=env,
                   verbose=0,
                   tensorboard_log="a2c_snakebot_tensorboard")
        model.learn(total_timesteps=2000, callback=callback)
        print("Saving model to snake_dqn.pkl...")
        model.save("snake_a2c.pkl")
        print("done.")

        del model  # remove to demonstrate saving and loading

    if mode == "test":
        model = ac.load("snake_a2c.pkl")

        obs = env.reset()
        done = False
        env.set_done(5000)
        while not done:
            action, _states = model.predict(obs)
            obs, rewards, done, info = env.step(action)
            # env.render()
            print(obs)
Ejemplo n.º 5
0
def train_a2c(seed):
    """
    test A2C on the uav_env(cartesian,discrete) 
    :param seed: (int) random seed for A2C
    """
    """
    A2C(policy, env, gamma=0.99, n_steps=5, vf_coef=0.25, ent_coef=0.01, 
    max_grad_norm=0.5, learning_rate=0.0007, alpha=0.99, epsilon=1e-05,
    lr_schedule='linear', verbose=0,tensorboard_log=None, _init_setup_model=True)
    """
    algo = 'A2C'
    num_timesteps = 3000000

    env = set_up_env(seed)

    global best_mean_reward, n_steps
    best_mean_reward, n_steps = -np.inf, 0

    model = A2C(policy=MlpPolicy, env=env, gamma=0.99, n_steps=5, vf_coef=0.25,
                ent_coef=0.01, max_grad_norm=0.5, learning_rate=0.0007, alpha=0.99,
                epsilon=1e-05, lr_schedule='linear', verbose=0,
                tensorboard_log="./logs/{}/tensorboard/{}/".format(EXPERIMENT_NATURE, algo))

    model.learn(total_timesteps=num_timesteps, callback=callback, seed=seed,
                log_interval=500, tb_log_name="seed_{}".format(seed))

    model = A2C.load(log_dir + 'best_model.pkl')

    evaluation = evaluate_model(env, model, 100)
    os.makedirs('./logs/{}/csv/{}/'.format(EXPERIMENT_NATURE, algo), exist_ok=True)
    os.rename('/tmp/gym/monitor.csv', "./logs/{}/csv/{}/seed_{}.csv".format(EXPERIMENT_NATURE, algo, seed))
    env.close()
    del model, env
    gc.collect()
    return evaluation
Ejemplo n.º 6
0
def main():
    alg_input = input("Select algorithm (PPO2 or A2C only):")
    if alg_input != "PPO2" and alg_input != "A2C" and alg_input != "ppo2" and alg_input != "a2c":
        print("Not an option (PPO2 or A2C only) !")
        alg_input = input("Select algorithm (PPO2 or A2C only):")
    model_input = "trained_agents\\" + input(
        "Select model to test(input filename, eg. a2c_wf_2):")

    env = gym.make("WARFLEET-v0")
    # The algorithms require a vectorized environment to run
    env = DummyVecEnv([lambda: env])
    log_dir = "./logs/"

    done = False
    stage_reward = 0
    turns = 0

    if alg_input == "PPO2" or alg_input == "ppo2":
        model = PPO2.load(model_input, env=env, tensorboard_log=log_dir)
    elif alg_input == "A2C" or alg_input == "a2c":
        model = A2C.load(model_input, env=env, tensorboard_log=log_dir)

    obs = env.reset()

    while not done:
        action, _states = model.predict(obs)
        obs, reward, done, info = env.step(action)
        stage_reward += reward
        turns = turns + 1
        # env.render()

    print("Reward: {} /42".format(stage_reward))
    print("Turns: {}".format(turns))
    env.close()
def load_a2c():
    loaded_model = A2C.load(save_dir + "/A2C_tutorial")
    print("loaded", loaded_model.predict(obs, deterministic=True))
    print("load gamma=", loaded_model.gamma, ", n_steps=", loaded_model.n_steps)
    # 模型保存模型的超参数和网络参数, 但不保存环境 env. 在 load 模型后需要重新设置环境.
    loaded_model.set_env(DummyVecEnv([lambda: gym.make("Pendulum-v0")]))
    loaded_model.learn(8000)
def load_model(tickers):
    '''Load in the pretrained model from the trained models folder '''
    # model = run_model(tickers,start="2020-01-01T09:30:00-04:00", end="2020-12-31T09:30:00-04:00")
    model = A2C.load(
        "trained_models/2021-03-22 18:25:09.528982/A2C_30k_dow_120.zip")

    return model
Ejemplo n.º 9
0
def train_agent_with_a2c(load=False):
    from stable_baselines.common.policies import MlpPolicy
    from stable_baselines.common.vec_env import SubprocVecEnv
    from stable_baselines import A2C

    # multiprocess environment
    n_cpu = 4
    env = SubprocVecEnv([lambda: gym.make('F16GCAS-v0') for i in range(n_cpu)])
    env = gym.make("F16GCAS-v0")

    class CustomPolicy(MlpPolicy):
        def __init__(self, *args, **kwargs):
            super(CustomPolicy, self).__init__(*args, **kwargs,
                                               layers=[128, 128])
    if not load:
        model = A2C(env=env, verbose=1, policy=CustomPolicy)
        # model.learn(total_timesteps=1000000)
        ExpData = ExpertDataset("./lqr_export.npz")
        model.pretrain(ExpData, n_epochs=100)
    else:
        model = A2C.load(ROOT+"/trained_models/TDRL/f16/a2c/128_128", env=env)
        with model.graph.as_default():
            for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='model/pi/'):
                print(i)

    return model
Ejemplo n.º 10
0
def NewPotential(current_window, algorithm='PPO'):

    # Determine the pretrained agent
    if algorithm == 'A2C':
        model = A2C.load("pretrained_A2C")
    elif algorithm == 'PPO':
        model = PPO2.load("pretrained_PPO")
    elif algorithm == 'ACKTR':
        model = ACKTR.load("pretrained_ACKTR")
    elif algorithm == 'ACER':
        model = ACER.load("pretrained_ACER")
    else:
        raise ValueError("%s is not a valid algorithm." % algorithm)

    if len(current_window) != model.observation_space.shape[0]:
        raise ValueError("%s is does not match the model's window size." %
                         len(current_window))

    action, _states = model.predict(current_window, deterministic=False)

    voltages = np.linspace(0, 1, num=model.action_space.n)
    if action >= 0 and action <= model.action_space.n - 1:
        voltage = voltages[action]
    else:
        raise ValueError(
            "Received invalid action={} which is not part of the action space".
            format(action))

    return voltage
    def test_save_callback(self):
        '''
        Test that the model performance can be monitored and results can be 
        checked and saved as the model improves. This test trains an agent
        for a short period of time, without loading a pre-trained model. 
        Therefore, this test also checks that a RL from stable-baselines 
        can be trained.
        
        '''
        # Define logging directory. Monitoring data and agent model will be stored here
        log_dir = os.path.join(utilities.get_root_path(), 'examples', 'agents',
                               'monitored_A2C')

        # Perform a short training example with callback
        env, _, _ = run_save_callback.train_A2C_with_callback(
            log_dir=log_dir, tensorboard_log=None)

        # Load the trained agent
        model = A2C.load(os.path.join(log_dir, 'best_model'))

        # Test one step with the trained model
        obs = env.reset()
        df = pd.DataFrame([model.predict(obs)[0][0]], columns=['value'])
        df.index.name = 'keys'
        ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                    'references', 'save_callback.csv')
        self.compare_ref_values_df(df, ref_filepath)

        # Remove model to prove further testing
        shutil.rmtree(log_dir, ignore_errors=True)
Ejemplo n.º 12
0
def run_agent(envs, parameters):
    '''Train an agent.'''
    path = Path(parameters['path'])
    dummy_env = OptVecEnv(envs)
    set_global_seeds(parameters.setdefault('seed'))
    save_path = str(path / 'model.pkl')
    alg = parameters['alg']
    if alg == 'PPO':
        with open(save_path, 'rb') as pkl:
            model = PPO2.load(pkl, env=dummy_env)
    elif alg == 'A2C':
        with open(save_path, 'rb') as pkl:
            model = A2C.load(pkl, env=dummy_env)
    try:
        done = False
        observations = dummy_env.reset()
        while not done:
            action = model.predict(observations)
            print(action[0].ravel().tolist())
            observations, rewards, dones, infos = dummy_env.step(action[0])
            done = any(dones)
            info = infos[0]
            yield info['weights']
    finally:
        dummy_env.close()
Ejemplo n.º 13
0
def train(game, num_timesteps, num_envs, dir_name, model_name,
          prev_model_name):
    dir_name = get_valid_filename(dir_name)
    model_name = get_valid_filename(model_name)

    log_dir = f"logs/{dir_name}/{model_name}-training"
    model_dir = f"models/{dir_name}"
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    env = make_vec_envs(game, False, num_envs)
    prev_model_path = f"{model_dir}/{prev_model_name}.zip"
    if prev_model_name is not None and os.path.exists(prev_model_path):
        model = A2C.load(prev_model_path, env=env)
        model.tensorboard_log = log_dir
    else:
        model = A2C(policy="MlpPolicy",
                    env=env,
                    gamma=0.8,
                    n_steps=64,
                    learning_rate=0.00025,
                    verbose=1,
                    tensorboard_log=log_dir)
    model.learn(num_timesteps)
    model.save(f"{model_dir}/{model_name}.zip")
    env.close()
Ejemplo n.º 14
0
def train_agent(train, pickle_file, agent_type, env_kwargs, parms):

    bin_path = "bin/" + pickle_file

    if (path.exists(bin_path)):
        if agent_type == "a2c":
            print("Loading A2C Agent")
            RL_model = A2C.load(
                bin_path,
                tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{agent_type}")
        elif agent_type == "ddpg":
            print("Loading DDPG Agent")
            RL_model = DDPG.load(
                bin_path,
                tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{agent_type}")
        elif agent_type == "ppo":
            print("Loading PPO2 Agent")
            RL_model = PPO2.load(
                bin_path,
                tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{agent_type}")
    else:
        e_train_gym = ipenv.PortfolioAllocEnv(df=train, **env_kwargs)
        env_train, _ = e_train_gym.get_sb_env()

        agent = ipagent.IPRLAgent(env=env_train)

        model = agent.get_model(model_name=agent_type, model_kwargs=parms)

        RL_model = agent.train_model(model=model,
                                     tb_log_name=agent_type,
                                     total_timesteps=1000000)

        RL_model.save(bin_path)

    return RL_model
    def test_variable_episode(self):
        '''
        Test that a model can be trained using variable episode length. 
        The method that is used to determine whether the episode is 
        terminated or not is defined by the user. This test trains an agent
        for a short period of time, without loading a pre-trained model. 
        Therefore, this test also checks that a RL from stable-baselines 
        can be trained. This test also uses the save callback to check that
        the variable episode length is being effectively used. 
        Notice that this test also checks that child classes can be nested
        since the example redefines the `compute_reward` and the 
        `compute_done` methods. 
        
        '''
        # Define logging directory. Monitoring data and agent model will be stored here
        log_dir = os.path.join(utilities.get_root_path(), 'examples', 'agents',
                               'variable_episode_A2C')

        # Perform a short training example with callback
        env, _, _ = run_variable_episode.train_A2C_with_variable_episode(
            log_dir=log_dir, tensorboard_log=None)

        # Load the trained agent
        model = A2C.load(os.path.join(log_dir, 'best_model'))

        # Test one step with the trained model
        obs = env.reset()
        df = pd.DataFrame([model.predict(obs)[0][0]], columns=['value'])
        df.index.name = 'keys'
        ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                    'references', 'variable_episode_step.csv')
        self.compare_ref_values_df(df, ref_filepath)

        # Check variable lengths
        monitor = pd.read_csv(os.path.join(log_dir, 'monitor.csv'),
                              index_col=None)
        monitor = monitor.iloc[1:]
        monitor.reset_index(inplace=True)
        monitor.columns = ['reward', 'episode_length', 'time']

        # Time may vary from one computer to another
        monitor.drop(labels='time', axis=1, inplace=True)

        # Utilities require index to have time as index name (even this is not the case here)
        monitor.index.name = 'time'

        # Transform to numeric
        monitor = monitor.apply(
            lambda col: pd.to_numeric(col, errors='coerce'))

        # Check that we obtain always same monitoring parameters
        ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                    'references',
                                    'variable_episode_monitoring.csv')
        self.compare_ref_timeseries_df(monitor, ref_filepath)

        # Remove model to prove further testing
        shutil.rmtree(log_dir, ignore_errors=True)
Ejemplo n.º 16
0
 def __init__(self, env, mode="random", agent=''):
     super().__init__(env)
     self.mode = mode
     if self.mode == "agent":
         self.agent = A2C.load(agent)
     self.facings = [1, 1, 1, 1]
     # put into place to not overcomplicate the first turn. both players will just pass until that
     self.first_turn = 0
     self.last_infos = {}
def load_agent():
    model = A2C.load("a2c_agent.zip")
    obs = env.reset()
    for _ in range(10000):
        action, _states = model.predict(obs)
        obs, done, reward, info = env.step(action)
        if done:
            obs = env.reset()
        env.render()
    env.close()
 def _setup(self):
     # Game parameters
     self.env = gym.make(self.ENV_NAME)
     self.env.play_type = PLAY_TYPE.MACHINE
     self.env.render_mode = 'human'
     self.env.MAX_TURNS = self.max_turns
     self.model = A2C.load(self.MODEL_FILENAME)
     self.env.reset()
     # Report success
     print('Created new environment {0} with GameID: {1}'.format(self.ENV_NAME, self.GAME_ID))
Ejemplo n.º 19
0
def loader(algo, env_name):
    if algo == 'dqn':
        return DQN.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'ppo2':
        return PPO2.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'a2c':
        return A2C.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'acer':
        return ACER.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'trpo':
        return TRPO.load("trained_agents/" + algo + "/" + env_name + ".pkl")
Ejemplo n.º 20
0
def run_illegal_move_training(
                    exp_name,exp_path,
                    basicdate,
                    model_type='PPO2',
                    n_eval_episodes=10,
                    training_intervals=100,
                    max_steps=10000,
                    reward_margin=10,
                    log_to_tb=False,
                    pelican_agent_filepath=False):
    
       # set up logging 
    if log_to_tb:
        writer = SummaryWriter(exp_path)
        tb_log_name = 'Illegal_move_prevention_training'
    else:
        writer = None
        tb_log_name = None
    
    if pelican_agent_filepath:
        logger.info('Loading agent from file: ' + pelican_agent_filepath)
        # env = plark_env_illegal_move.PlarkEnvIllegalMove( config_file_path='/Components/plark-game/plark_game/game_config/10x10/balanced.json')
        env = gym.make('plark-env-illegal-move-v0')

        if model_type.lower() == 'dqn':
            model = DQN.load(pelican_agent_filepath)
            model.set_env(env)
            
        elif model_type.lower() == 'ppo2':
            model = PPO2.load(pelican_agent_filepath)
            model.set_env(DummyVecEnv([lambda: env]))
            
        elif model_type.lower() == 'a2c':
            model = A2C.load(pelican_agent_filepath)
            model.set_env(env)
            
        elif model_type.lower() == 'acktr':
            model = ACKTR.load(pelican_agent_filepath)
            model.set_env(env)

    else:   
        # Instantiate the env and model
        env = gym.make('plark-env-illegal-move-v0')
        model = PPO2('CnnPolicy', env)

    # Start training 
    train_agent(exp_path,model,env,training_intervals,max_steps,model_type,basicdate,writer,tb_log_name,reward_margin)
                
    # Evaluate
    mean_reward, n_steps = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes, deterministic=False, render=False, callback=None, reward_threshold=None, return_episode_rewards=False)
    logger.info('Evaluation finished')
    logger.info('Mean Reward is ' + str(mean_reward))
    logger.info('Number of steps is ' + str(n_steps))
Ejemplo n.º 21
0
def read_model(model_type):

    if model_type == "A2C":
        model = A2C.load(
            "./model_saved/Selected/A2C_ModelMar-05-2021_0815/A2C_ModelMar-05-2021_0815"
        )
    if model_type == "TD3":
        model = TD3.load(
            "./model_saved/Selected/TD3_ModelMar-05-2021_1442/TD3_ModelMar-05-2021_1442"
        )

    return model
Ejemplo n.º 22
0
def load_model(config):
    model = None
    if config["algo_name"] == "TD3":
        model = TD3.load("agents/{}".format(args["test_agent_path"]))
    if config["algo_name"] == "A2C":
        model = A2C.load("agents/{}".format(args["test_agent_path"]))
    if config["algo_name"] == "SAC":
        model = SAC.load("agents/{}".format(args["test_agent_path"]))
    if config["algo_name"] == "PPO2":
        model = PPO2.load("agents/{}".format(args["test_agent_path"]))
    assert model is not None, "Alg name not found, cannot load model, exiting. "
    return model
def main():
    model_dir = './models'
    model_name = model_dir + '/' + MODEL_NAME

    """ Generate & Check environment """
    env_name = 'myenv-v2'
    env = gym.make(env_name)
    # env = gym.wrappers.Monitor(env, "./videos", force=True)  # For video making

    """ Vectorize environment """
    # Unnecessary to vectorize environment
    # env = DummyVecEnv([lambda: env])

    """ Load model and set environment """
    if ALGORITHM == 'ppo2':
        model = PPO2.load(model_name)
    elif ALGORITHM == 'a2c':
        model = A2C.load(model_name)
    else:
        raise Exception('Load error.  Specify proper name')

    for idx in range(NUM_TRIALS):
        """ Initialization """
        observation = env.reset()
        frames = []

        """ Save some initial values """
        fighter_0 = env.fighter.ingress
        jammer_0 = env.jammer.ingress

        while True:
            action_index, _ = model.predict(observation)

            # 環境を1step 実行
            observation, reward, done, _ = env.step(action_index)

            # 環境の描画とビデオ録画
            # shot = env.render(mode=args.render_mode)
            frames.append(env.render(mode=args.render_mode))

            # Space keyでpause, デバッグ用
            pause_for_debug()

            # Slow down rendering
            pygame.time.wait(10)

            # エピソードの終了処理
            if done:
                status_print(env, observation, reward, done, fighter_0, jammer_0)
                video_name = ALGORITHM + '_' + env.mission_condition + '-' + str(idx)
                make_video(video_name, frames)
                make_jason(env, video_name, fighter_0, jammer_0, reward)
                break
Ejemplo n.º 24
0
def run_sonobuoy_training(
                    exp_name,exp_path,
                    basicdate,
                    model_type='PPO2',
                    n_eval_episodes=10,
                    training_intervals=100,
                    max_steps=10000,
                    reward_margin=10,
                    log_to_tb=False,
                    pelican_agent_filepath=False):

    # set up logging 
    if log_to_tb:
        writer = SummaryWriter(exp_path)
        tb_log_name = 'sonobuoy_training'
    else:
        writer = None
        tb_log_name = None

        
    env = gym.make('plark-env-v0', panther_agent_filepath='/data/agents/models/PPO2_20200429_073132_panther/')
    
    if pelican_agent_filepath:
        logger.info('Loading agent from file: ' + pelican_agent_filepath)

        if model_type.lower() == 'dqn':
            model = DQN.load(pelican_agent_filepath)
            model.set_env(env)
            
        elif model_type.lower() == 'ppo2':
            model = PPO2.load(pelican_agent_filepath)
            model.set_env(DummyVecEnv([lambda: env]))
            
        elif model_type.lower() == 'a2c':
            model = A2C.load(pelican_agent_filepath)
            model.set_env(env)
            
        elif model_type.lower() == 'acktr':
            model = ACKTR.load(pelican_agent_filepath)
            model.set_env(env)

    else:   
        # Instantiate the env and model
        model = PPO2('CnnPolicy', env)

    # Start training 
    train_agent(exp_path,model,env,training_intervals,max_steps,model_type,basicdate,writer,tb_log_name,reward_margin)
                
    # Evaluate
    mean_reward, n_steps = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes, deterministic=False, render=False, callback=None, reward_threshold=None, return_episode_rewards=False)
    logger.info('Evaluation finished')
    logger.info('Mean Reward is ' + str(mean_reward))
    logger.info('Number of steps is ' + str(n_steps))
def get_pretrained_agents():
    """
    Get the agents from the saved_agents/ directory
    :return:
    """
    agents = []
    dir_name = "saved_agents"
    for filename in os.listdir(dir_name):
        agent = A2C.load(dir_name + '/' + filename)
        weights = np.array([float(w) for w in filename.split('_')])
        agents.append([weights, agent])

    return agents
def run_baseline(params, LOAD_POLICY, VARIABLE_EVAL):
    # Evaluate the agent
    env = env_fun(animate=params["animate"],
                  max_steps=params["max_steps"],
                  action_input=False,
                  latent_input=False,
                  is_variable=VARIABLE_EVAL)
    policy = A2C('MlpPolicy', env)
    if LOAD_POLICY:
        policy_dir = "agents/xxx.zip"
        policy = A2C.load(policy_dir)  # 2Q5
    regressor = PyTorchMlpCst(env.obs_dim + env.act_dim, 24, env.obs_dim)
    return evaluate_model(params, env, policy, regressor)
Ejemplo n.º 27
0
 def loadAgent(self, filepath, algorithm_type):
     try:
         if algorithm_type.lower() == 'dqn':
             self.model = DQN.load(filepath)
         elif algorithm_type.lower() == 'ppo2':
             self.model = PPO2.load(filepath)
         elif algorithm_type.lower() == 'a2c':
             self.model = A2C.load(filepath)
         elif algorithm_type.lower() == 'acktr':
             self.model = ACKTR.load(filepath)
     except:
         raise ValueError('Error loading pelican agent. File : "' +
                          filepath + '" does not exsist')
Ejemplo n.º 28
0
    def __init__(self, config: Dict[str, Any]) -> None:
        """ Initialize agent.

        Args:
            config (Dict[str, Any]): Agent configuration.
        """
        from stable_baselines import A2C
        self.model = A2C.load(config["weights"])
        self.state = None

        # Number of environments used to train model
        # to which stable-baselines input tensor size is fixed
        self.n_train_envs = self.model.n_envs
def load_model(env, model_dir, log_dir):
    model_name = model_dir + '/' + LOAD_MODEL_NAME + '.zip'
    print(f'----- model will be loaded from {model_name} \n')
    """ Load trained model, then continue training """
    if ALGORITHM == 'ppo2':
        model = PPO2.load(model_name, verbose=0, tensorboard_log=log_dir)
    elif ALGORITHM == 'a2c':
        model = A2C.load(model_name, verbose=0, tensorboard_log=log_dir)
    else:
        raise Exception('Specify Algorithm')

    model.set_env(env)

    return model
Ejemplo n.º 30
0
def load_a2c_model(env, learning_rate, batch_size, algorithm):
    from stable_baselines.common.policies import MlpPolicy
    model = None
    existing_pickle_files = get_files_with_pattern(pickle_dir, 'ppo2_recent_model.pkl')
    
    for file_name in existing_pickle_files:
        search = re.search('ppo2_recent_model.pkl', file_name)
        if search:
            model = A2C.load(file_name, env=env, verbose=0, tensorboard_log=log_dir)
            logger.info("Loading existing pickle file for environment {} with algorithm {} and policy '{}'.".format(env, algorithm, model.policy))
            return model
    
    logger.debug("No pickle was found for environment {}. Creating new model with algorithm {} and policy 'MlpPolicy'...".format(env, algorithm))
    model = A2C(policy='MlpPolicy', env=env, verbose=0, tensorboard_log=log_dir, learning_rate=learning_rate, n_steps = batch_size)
    return model