Python A2C Beispiele, stable_baselines.A2C Python Beispiele

Beispiel #1

0

Datei anzeigen

def train_agent_with_a2c(load=False):
    from stable_baselines.common.policies import MlpPolicy
    from stable_baselines.common.vec_env import SubprocVecEnv
    from stable_baselines import A2C

    # multiprocess environment
    n_cpu = 4
    env = SubprocVecEnv([lambda: gym.make('F16GCAS-v0') for i in range(n_cpu)])
    env = gym.make("F16GCAS-v0")

    class CustomPolicy(MlpPolicy):
        def __init__(self, *args, **kwargs):
            super(CustomPolicy, self).__init__(*args, **kwargs,
                                               layers=[128, 128])
    if not load:
        model = A2C(env=env, verbose=1, policy=CustomPolicy)
        # model.learn(total_timesteps=1000000)
        ExpData = ExpertDataset("./lqr_export.npz")
        model.pretrain(ExpData, n_epochs=100)
    else:
        model = A2C.load(ROOT+"/trained_models/TDRL/f16/a2c/128_128", env=env)
        with model.graph.as_default():
            for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='model/pi/'):
                print(i)

    return model

Beispiel #2

0

Datei anzeigen

def train(game, num_timesteps, num_envs, dir_name, model_name,
          prev_model_name):
    dir_name = get_valid_filename(dir_name)
    model_name = get_valid_filename(model_name)

    log_dir = f"logs/{dir_name}/{model_name}-training"
    model_dir = f"models/{dir_name}"
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    env = make_vec_envs(game, False, num_envs)
    prev_model_path = f"{model_dir}/{prev_model_name}.zip"
    if prev_model_name is not None and os.path.exists(prev_model_path):
        model = A2C.load(prev_model_path, env=env)
        model.tensorboard_log = log_dir
    else:
        model = A2C(policy="MlpPolicy",
                    env=env,
                    gamma=0.8,
                    n_steps=64,
                    learning_rate=0.00025,
                    verbose=1,
                    tensorboard_log=log_dir)
    model.learn(num_timesteps)
    model.save(f"{model_dir}/{model_name}.zip")
    env.close()

Beispiel #3

0

Datei anzeigen

def train_a2c(seed):
    """
    test A2C on the uav_env(cartesian,discrete) 
    :param seed: (int) random seed for A2C
    """
    """
    A2C(policy, env, gamma=0.99, n_steps=5, vf_coef=0.25, ent_coef=0.01, 
    max_grad_norm=0.5, learning_rate=0.0007, alpha=0.99, epsilon=1e-05,
    lr_schedule='linear', verbose=0,tensorboard_log=None, _init_setup_model=True)
    """
    algo = 'A2C'
    num_timesteps = 3000000

    env = set_up_env(seed)

    global best_mean_reward, n_steps
    best_mean_reward, n_steps = -np.inf, 0

    model = A2C(policy=MlpPolicy, env=env, gamma=0.99, n_steps=5, vf_coef=0.25,
                ent_coef=0.01, max_grad_norm=0.5, learning_rate=0.0007, alpha=0.99,
                epsilon=1e-05, lr_schedule='linear', verbose=0,
                tensorboard_log="./logs/{}/tensorboard/{}/".format(EXPERIMENT_NATURE, algo))

    model.learn(total_timesteps=num_timesteps, callback=callback, seed=seed,
                log_interval=500, tb_log_name="seed_{}".format(seed))

    model = A2C.load(log_dir + 'best_model.pkl')

    evaluation = evaluate_model(env, model, 100)
    os.makedirs('./logs/{}/csv/{}/'.format(EXPERIMENT_NATURE, algo), exist_ok=True)
    os.rename('/tmp/gym/monitor.csv', "./logs/{}/csv/{}/seed_{}.csv".format(EXPERIMENT_NATURE, algo, seed))
    env.close()
    del model, env
    gc.collect()
    return evaluation

Beispiel #4

0

Datei anzeigen

def _train(env_id, agent, model_params, total_steps, is_evaluation=False):
    if is_evaluation:  # evaluate_policy() must only take one environment
        envs = SubprocVecEnv([make_env(env_id)])
    else:
        envs = SubprocVecEnv([make_env(env_id) for _ in range(NUM_CPU)])
    envs = VecNormalize(
        envs)  # normalize the envs during training and evaluation

    # Load pretrained model during training.
    if not is_evaluation and os.path.exists(agent + '_' + env_id):
        if agent == 'ppo2':
            model = PPO2.load(agent + '_' + env_id)
        elif agent == 'a2c':
            model = A2C.load(agent + '_' + env_id)
    else:
        if agent == 'ppo2':
            model = PPO2(MlpLstmPolicy,
                         envs,
                         nminibatches=1,
                         verbose=1,
                         **model_params)
        elif agent == 'a2c':
            model = A2C(MlpLstmPolicy, envs, verbose=1, **model_params)

    model.learn(total_timesteps=total_steps)
    return envs, model

Beispiel #5

0

Datei anzeigen

def attention_render(model_name, env_name, num_cpu, log_dir):
    if not os.path.exists(log_dir):
        raise ('log_dir not Exists')

    env_id = env_name + 'NoFrameskip-v4'
    env = SubprocVecEnv([make_env(env_id, i, log_dir) for i in range(num_cpu)])
    # env = Monitor(env, log_dir, allow_early_resets=True)

    if model_name == 'A2C_Attention':
        model = A2C(AttentionPolicy,
                    env,
                    verbose=1,
                    tensorboard_log=log_dir + 'tensorboard/')
    elif model_name == 'A2C_Attention2':
        model = A2C(Attention2Policy,
                    env,
                    verbose=1,
                    tensorboard_log=log_dir + 'tensorboard/')
    elif model_name == 'A2C':
        model = A2C(LstmPolicy,
                    env,
                    verbose=1,
                    tensorboard_log=log_dir + 'tensorboard/')
    else:
        model = None
    model = model.load(log_dir + model_name + '_' + env_name, env=env)

    obs = env.reset()
    # print(env.observation_space)
    # cv2.imshow('test', RGB2BGR(obs[0]))
    # cv2.waitKey(0)
    while True:
        action, _states = model.predict(obs)
        obs, rewards, done, info = env.step(action)
        attentions = model.get_attention(obs, _states, done)[0]
        attentions_img = []
        # print('attention', np.array(attention).shape)
        for i, attention in enumerate(attentions):
            attention = np.array(attention)
            attention = np.reshape(attention, [
                env.observation_space.shape[0] // 10,
                env.observation_space.shape[1] // 10, 1
            ])
            attention = np.repeat(attention, [10] * attention.shape[0], axis=0)
            attention = np.repeat(attention, [10] * attention.shape[1], axis=1)
            attention = attention * 255
            attentions_img.append(attention)
            # print(np.sum(attention))
        attentions = tile_images(attentions_img)
        cv2.imshow('attention', attentions)
        cv2.waitKey(1)
        # break
        env.render()
    return model

Beispiel #6

0

Datei anzeigen

def train_A2C(start_time_tests    = [31*24*3600, 304*24*3600], 
              episode_length_test = 14*24*3600, 
              load                = False):
    '''Method to train (or load a pre-trained) A2C agent. Testing periods 
    have to be introduced already here to not use these during training. 
    
    Parameters
    ----------
    start_time_tests : list of integers
        Time in seconds from the beginning of the year that will be used 
        for testing. These periods should be excluded in the training 
        process. By default the first day of February and the first day of
        November are used. 
    episode_length_test : integer
        Number of seconds indicating the length of the testing periods. By
        default two weeks are reserved for testing. 
    load : boolean
        Boolean indicating whether the algorithm is loaded (True) or 
        needs to be trained (False)
     
    '''
    excluding_periods = []
    for start_time_test in start_time_tests:
        excluding_periods.append((start_time_test,start_time_test+episode_length_test))
    # Summer period (from June 21st till September 22nd). 
    # Excluded since no heating during this period (nothing to learn).
    excluding_periods.append((173*24*3600, 266*24*3600))  
    
    env = BoptestGymEnvRewardWeightCost(url                   = url,
                                        actions               = ['oveHeaPumY_u'],
                                        observations          = {'reaTZon_y':(280.,310.)}, 
                                        random_start_time     = True,
                                        excluding_periods     = excluding_periods,
                                        max_episode_length    = 1*24*3600,
                                        warmup_period         = 3*3600,
                                        Ts                    = 900)
    
    env = NormalizedObservationWrapper(env)
    env = NormalizedActionWrapper(env)  
    
    model = A2C('MlpPolicy', env, verbose=1, gamma=0.99, seed=seed,
                tensorboard_log=os.path.join('results'))
    
    if not load: 
        model.learn(total_timesteps=int(1e5))
        # Save the agent
        model = A2C.load(os.path.join(utilities.get_root_path(), 'examples',
                                      'agents', 'a2c_bestest_hydronic_heatpump'))
    else:
        # Load the trained agent
        model = A2C.load(os.path.join(utilities.get_root_path(), 'examples',
                                      'agents', 'a2c_bestest_hydronic_heatpump'))
    
    return env, model, start_time_tests

Beispiel #7

0

Datei anzeigen

 def build_model(self):
     if self.is_stack:
         if self.game_type == "box":
             self.env = DummyVecEnv([lambda: self.env])
             self.model = A2C(MlpPolicy, self.env, verbose=0, gamma=self.gamma, learning_rate =self.actor_lr, ent_coef=self.c2,vf_coef=self.critic_lr)
         if self.game_type == "atari":
             self.model = A2C(CnnPolicy, self.env, verbose=0, gamma=self.gamma, learning_rate =self.actor_lr, ent_coef=self.c2,vf_coef=self.critic_lr)
     else:
         if self.game_type == "box":
             self.env = DummyVecEnv([lambda: self.env])
             self.model = A2C(MlpPolicy, self.env, verbose=0, gamma=self.gamma, learning_rate =self.actor_lr, ent_coef=self.c2,vf_coef=self.critic_lr)
         if self.game_type == "atari":
             self.model = A2C(CnnLstmPolicy, self.env, verbose=0, gamma=self.gamma, learning_rate =self.actor_lr, ent_coef=self.c2,vf_coef=self.critic_lr)

Beispiel #8

0

Datei anzeigen

Datei: learn_model.py Projekt: silverjoda/adaptive_neural_control

def run_baseline(params, LOAD_POLICY, VARIABLE_EVAL):
    # Evaluate the agent
    env = env_fun(animate=params["animate"],
                  max_steps=params["max_steps"],
                  action_input=False,
                  latent_input=False,
                  is_variable=VARIABLE_EVAL)
    policy = A2C('MlpPolicy', env)
    if LOAD_POLICY:
        policy_dir = "agents/xxx.zip"
        policy = A2C.load(policy_dir)  # 2Q5
    regressor = PyTorchMlpCst(env.obs_dim + env.act_dim, 24, env.obs_dim)
    return evaluate_model(params, env, policy, regressor)

Beispiel #9

0

Datei anzeigen

Datei: football_exp_run.py Projekt: gyh75520/A2C_Exp

def get_model(model_name, env, log_dir):
    if model_name == "A2C_DualAttention":
        model = A2C(DualAttentionLstmPolicy, env, verbose=1)
    elif model_name == "A2C_SelfAttention_Cin":
        model = A2C(SelfAttentionCinLstmPolicy, env, verbose=1)
    elif model_name == "A2C_SelfAttention":
        model = A2C(SelfAttentionLstmPolicy, env, verbose=1)
    elif model_name == 'A2C_Attention':
        model = A2C(AttentionPolicy,
                    env,
                    verbose=1,
                    tensorboard_log=log_dir + 'tensorboard/')
    elif model_name == 'A2C_Attention2':
        model = A2C(Attention2Policy,
                    env,
                    verbose=1,
                    tensorboard_log=log_dir + 'tensorboard/')
    elif model_name == 'A2C_Attention3':
        model = A2C(Attention3Policy, env, verbose=1)
    elif model_name == 'A2C_Attention4':
        model = A2C(Attention4Policy, env, verbose=1)
    elif model_name == 'A2C':
        model = A2C(CnnLstmPolicy, env, verbose=1)
    else:
        raise ('{} Not Exist'.format(model_name))
    return model

Beispiel #10

0

Datei anzeigen

def load_a2c_model(env, learning_rate, batch_size, algorithm):
    from stable_baselines.common.policies import MlpPolicy
    model = None
    existing_pickle_files = get_files_with_pattern(pickle_dir, 'ppo2_recent_model.pkl')
    
    for file_name in existing_pickle_files:
        search = re.search('ppo2_recent_model.pkl', file_name)
        if search:
            model = A2C.load(file_name, env=env, verbose=0, tensorboard_log=log_dir)
            logger.info("Loading existing pickle file for environment {} with algorithm {} and policy '{}'.".format(env, algorithm, model.policy))
            return model
    
    logger.debug("No pickle was found for environment {}. Creating new model with algorithm {} and policy 'MlpPolicy'...".format(env, algorithm))
    model = A2C(policy='MlpPolicy', env=env, verbose=0, tensorboard_log=log_dir, learning_rate=learning_rate, n_steps = batch_size)
    return model

Beispiel #11

0

Datei anzeigen

Datei: train_a2c.py Projekt: PaulCharnay/Projet_AIF

def train():
    """Trains an A2C policy """
    env = create_env()

    model = A2C(
        policy = CnnPolicy,
        env = env,
        gamma = 0.99,
        n_steps = 5,
        vf_coef=0.25, 
        ent_coef=0.01,
        max_grad_norm=0.5,
        learning_rate=7e-4,
        alpha=0.99,
        epsilon=1e-05,
        lr_schedule='constant',
        verbose=1,
        tensorboard_log="./tb"  
    )

    model.learn(
        total_timesteps=int(1e7), 
        callback=callback, 
        tb_log_name="a2c"
    )

    model.save("models/pacman_a2c.pkl")

Beispiel #12

0

Datei anzeigen

Datei: trade_agent.py Projekt: Andre-Williams22/DS-2.9-Technical-Seminar

def load_model(tickers):
    '''Load in the pretrained model from the trained models folder '''
    # model = run_model(tickers,start="2020-01-01T09:30:00-04:00", end="2020-12-31T09:30:00-04:00")
    model = A2C.load(
        "trained_models/2021-03-22 18:25:09.528982/A2C_30k_dow_120.zip")

    return model

Beispiel #13

0

Datei anzeigen

def run_agent(envs, parameters):
    '''Train an agent.'''
    alg = parameters['alg']
    learning_rate = parameters['learning_rate']
    gamma = parameters['gamma']
    model_path = parameters['model_path']
    set_global_seeds(parameters.get('seed'))
    dummy_env = OptVecEnv(envs)
    if alg == 'PPO':
        model = PPO2(MlpPolicy,
                     dummy_env,
                     gamma=gamma,
                     learning_rate=learning_rate,
                     verbose=1,
                     nminibatches=dummy_env.num_envs)
    elif alg == 'A2C':
        model = A2C(MlpPolicy,
                    dummy_env,
                    gamma=gamma,
                    learning_rate=learning_rate,
                    verbose=1)
    else:
        model = DDPG(ddpg.MlpPolicy,
                     dummy_env,
                     gamma=gamma,
                     verbose=1,
                     actor_lr=learning_rate / 10,
                     critic_lr=learning_rate)
    try:
        model.learn(total_timesteps=parameters.get('total_timesteps', 10**6))
    except tf.errors.InvalidArgumentError:
        LOGGER.error('Possible Nan, %s', str((alg, learning_rate, gamma)))
    finally:
        dummy_env.close()
        model.save(str(model_path))

Beispiel #14

0

Datei anzeigen

Datei: training_actor_critic-v1.py Projekt: DreamMaker-Ai/AirDominance_1D_RL_Git

def define_model(env, log_dir):
    if DEFAULT:
        policy_kwargs = dict()
    else:
        policy_kwargs = dict(act_fun=ACT_FUN, net_arch=NET_ARCH)

    if ALGORITHM == 'ppo2':
        model = PPO2(policy=MlpPolicy,
                     env=env,
                     policy_kwargs=policy_kwargs,
                     verbose=0,
                     tensorboard_log=log_dir)

    elif ALGORITHM == 'a2c':
        model = A2C(policy=MlpPolicy,
                    env=env,
                    policy_kwargs=policy_kwargs,
                    verbose=0,
                    tensorboard_log=log_dir)
    else:
        raise Exception('Specify proper algorithm')

    model_arch = model.get_parameter_list()
    print('\n--------------- Summary of archs ---------------')
    for model_param in model_arch:
        print(model_param)
    print('\n')

    return model

Beispiel #15

0

Datei anzeigen

def train(env_id, num_timesteps, seed, policy, lr_schedule, num_env):
    """
    Train A2C model for atari environment, for testing purposes

    :param env_id: (str) Environment ID
    :param num_timesteps: (int) The total number of samples
    :param seed: (int) The initial seed for training
    :param policy: (A2CPolicy) The policy model to use (MLP, CNN, LSTM, ...)
    :param lr_schedule: (str) The type of scheduler for the learning rate update ('linear', 'constant',
                                 'double_linear_con', 'middle_drop' or 'double_middle_drop')
    :param num_env: (int) The number of environments
    """
    policy_fn = None
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = CnnLstmPolicy
    elif policy == 'lnlstm':
        policy_fn = CnnLnLstmPolicy
    if policy_fn is None:
        raise ValueError("Error: policy {} not implemented".format(policy))

    env = VecFrameStack(make_atari_env(env_id, num_env, seed), 4)

    model = A2C(policy_fn, env, lr_schedule=lr_schedule, seed=seed)
    model.learn(total_timesteps=int(num_timesteps * 1.1))
    env.close()

Beispiel #16

0

Datei anzeigen

def main():
    alg_input = input("Select algorithm (PPO2 or A2C only):")
    if alg_input != "PPO2" and alg_input != "A2C" and alg_input != "ppo2" and alg_input != "a2c":
        print("Not an option (PPO2 or A2C only) !")
        alg_input = input("Select algorithm (PPO2 or A2C only):")
    model_input = "trained_agents\\" + input(
        "Select model to test(input filename, eg. a2c_wf_2):")

    env = gym.make("WARFLEET-v0")
    # The algorithms require a vectorized environment to run
    env = DummyVecEnv([lambda: env])
    log_dir = "./logs/"

    done = False
    stage_reward = 0
    turns = 0

    if alg_input == "PPO2" or alg_input == "ppo2":
        model = PPO2.load(model_input, env=env, tensorboard_log=log_dir)
    elif alg_input == "A2C" or alg_input == "a2c":
        model = A2C.load(model_input, env=env, tensorboard_log=log_dir)

    obs = env.reset()

    while not done:
        action, _states = model.predict(obs)
        obs, reward, done, info = env.step(action)
        stage_reward += reward
        turns = turns + 1
        # env.render()

    print("Reward: {} /42".format(stage_reward))
    print("Turns: {}".format(turns))
    env.close()

Beispiel #17

0

Datei anzeigen

def test_a2c_update_n_batch_on_load(tmp_path):
    env = make_vec_env("CartPole-v1", n_envs=2)
    model = A2C("MlpPolicy", env, n_steps=10)

    model.learn(total_timesteps=100)
    model.save(os.path.join(str(tmp_path), "a2c_cartpole.zip"))

    del model

    model = A2C.load(os.path.join(str(tmp_path), "a2c_cartpole.zip"))
    test_env = DummyVecEnv([lambda: gym.make("CartPole-v1")])

    model.set_env(test_env)
    assert model.n_batch == 10
    model.learn(100)
    os.remove(os.path.join(str(tmp_path), "a2c_cartpole.zip"))

Beispiel #18

0

Datei anzeigen

def run_agent(envs, parameters):
    '''Train an agent.'''
    path = Path(parameters['path'])
    dummy_env = OptVecEnv(envs)
    set_global_seeds(parameters.setdefault('seed'))
    save_path = str(path / 'model.pkl')
    alg = parameters['alg']
    if alg == 'PPO':
        with open(save_path, 'rb') as pkl:
            model = PPO2.load(pkl, env=dummy_env)
    elif alg == 'A2C':
        with open(save_path, 'rb') as pkl:
            model = A2C.load(pkl, env=dummy_env)
    try:
        done = False
        observations = dummy_env.reset()
        while not done:
            action = model.predict(observations)
            print(action[0].ravel().tolist())
            observations, rewards, dones, infos = dummy_env.step(action[0])
            done = any(dones)
            info = infos[0]
            yield info['weights']
    finally:
        dummy_env.close()

Beispiel #19

0

Datei anzeigen

Datei: eval_wr.py Projekt: j-warmus/street-fighter-ai

def evaluate(modelname, env):
    n_cores = 4
    obs = env.reset()
    model = A2C.load(modelname)
    wr = 0
    win = 0
    total_health_diff = 0
    loss = 0
    episodes = 0
    total_episodes = 100
    while episodes < total_episodes:
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        #print(rewards[0])
        time.sleep(.04)
        # print(rewards[0])
        env.render(mode="human")
        for i in range(4):

            if dones[i] == True:
                if info[i]["p1_health"] < info[i]["p2_health"]:
                    loss += 1
                else:
                    win += 1
                total_health_diff += info[i]["p1_health"] - info[i]["p2_health"]
                wr = win / (win + loss)
                episodes += 1
    return wr, total_health_diff / total_episodes

Beispiel #20

0

Datei anzeigen

Datei: model.py Projekt: Extracheesy/DailyTradingAC

def model_training_learning(env_train, model_name, timesteps=100000):

    # train model
    os.chdir("./model_saved/" + model_name)
    start = time.time()
    print("Train ", model_name, " Model with MlpPolicy: ")

    if model_name == "A2C_Model":
        model = A2C('MlpPolicy', env_train, verbose=0)
    elif model_name == "PPO_Model":
        model = PPO2('MlpPolicy', env_train, verbose=0)
    elif model_name == "TD3_Model":
        model = TD3('MlpPolicy', env_train, verbose=0)
    elif model_name == "SAC_Model":
        model = SAC('MlpPolicy', env_train, verbose=0)

    print("Learning ", model_name, " time steps: ", timesteps)

    model.learn(total_timesteps=timesteps)
    print("TD3 Model learning completed: ")
    end = time.time()
    timestamp = time.strftime('%b-%d-%Y_%H%M')
    model_file_name = (model_name + timestamp)
    model.save(model_file_name)
    print("- ", model_name, " save finish     :")
    print("Training time  ", model_name, " : ", (end - start) / 60, " minutes")

    os.chdir("./..")
    os.chdir("./..")
    return model

Beispiel #21

0

Datei anzeigen

def get_a2c(vec_env=None,
            policy='CnnPolicy',
            learning_rate=7e-4,
            momentum=0.0,
            alpha=0.99,
            epsilon=1e-5,
            max_grad_norm=0.5,
            lr_schedule='constant') -> A2C:
    """
    Parameter's default values are taken from stable_baselines.a2c.a2c.py
    """
    if vec_env is None:
        vec_env = create_training_env(1)
    return A2C(policy=policy,
               env=vec_env,
               gamma=0.99,
               n_steps=5,
               vf_coef=0.25,
               ent_coef=0.01,
               max_grad_norm=max_grad_norm,
               learning_rate=learning_rate,
               alpha=alpha,
               momentum=momentum,
               epsilon=epsilon,
               lr_schedule=lr_schedule,
               verbose=2)

Beispiel #22

0

Datei anzeigen

def test_evaluate_policy():
    model = A2C('MlpPolicy', 'Pendulum-v0', seed=0)
    n_steps_per_episode, n_eval_episodes = 200, 2
    model.n_callback_calls = 0

    def dummy_callback(locals_, _globals):
        locals_['model'].n_callback_calls += 1

    _, episode_lengths = evaluate_policy(model,
                                         model.get_env(),
                                         n_eval_episodes,
                                         deterministic=True,
                                         render=False,
                                         callback=dummy_callback,
                                         reward_threshold=None,
                                         return_episode_rewards=True)

    n_steps = sum(episode_lengths)
    assert n_steps == n_steps_per_episode * n_eval_episodes
    assert n_steps == model.n_callback_calls

    # Reaching a mean reward of zero is impossible with the Pendulum env
    with pytest.raises(AssertionError):
        evaluate_policy(model,
                        model.get_env(),
                        n_eval_episodes,
                        reward_threshold=0.0)

    episode_rewards, _ = evaluate_policy(model,
                                         model.get_env(),
                                         n_eval_episodes,
                                         return_episode_rewards=True)
    assert len(episode_rewards) == n_eval_episodes

Beispiel #23

0

Datei anzeigen

def train_agent(train, pickle_file, agent_type, env_kwargs, parms):

    bin_path = "bin/" + pickle_file

    if (path.exists(bin_path)):
        if agent_type == "a2c":
            print("Loading A2C Agent")
            RL_model = A2C.load(
                bin_path,
                tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{agent_type}")
        elif agent_type == "ddpg":
            print("Loading DDPG Agent")
            RL_model = DDPG.load(
                bin_path,
                tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{agent_type}")
        elif agent_type == "ppo":
            print("Loading PPO2 Agent")
            RL_model = PPO2.load(
                bin_path,
                tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{agent_type}")
    else:
        e_train_gym = ipenv.PortfolioAllocEnv(df=train, **env_kwargs)
        env_train, _ = e_train_gym.get_sb_env()

        agent = ipagent.IPRLAgent(env=env_train)

        model = agent.get_model(model_name=agent_type, model_kwargs=parms)

        RL_model = agent.train_model(model=model,
                                     tb_log_name=agent_type,
                                     total_timesteps=1000000)

        RL_model.save(bin_path)

    return RL_model

Beispiel #24

0

Datei anzeigen

    def __init__(self, method, K=5, P=0.95):
        self.method = method
        self.K = K
        self.state_size = self.K + 1
        self.action_size = self.K + 1
        self.reward = []

        env_name = 'ErdosAttack-v0'

        self.log_dir = "/tmp/gym_attack/"
        os.makedirs(self.log_dir, exist_ok=True)

        env = gym.make(env_name)
        env.init_params(K, P)
        env = Monitor(env, self.log_dir, allow_early_resets=True)
        self.envs = DummyVecEnv([lambda: env])

        if method == 'PPO':
            self.model = PPO2(MLP_PPO, self.envs, verbose=0)
        elif method == 'DQN':
            self.model = DQN(MLP_DQN, self.envs, verbose=0)
        elif method == 'A2C':
            self.model = A2C(MLP_A2C, self.envs, verbose=0)
        else:
            raise Exception("Erreur ! Méthode: 'PPO' ou 'DQN' ou 'A2C")
        print("Model Initialized !")

        self.best_mean_reward, self.n_steps = -np.inf, 0

Beispiel #25

0

Datei anzeigen

Datei: test_boptestGymEnv.py Projekt: ibpsa/project1-boptest-gym

    def test_save_callback(self):
        '''
        Test that the model performance can be monitored and results can be 
        checked and saved as the model improves. This test trains an agent
        for a short period of time, without loading a pre-trained model. 
        Therefore, this test also checks that a RL from stable-baselines 
        can be trained.
        
        '''
        # Define logging directory. Monitoring data and agent model will be stored here
        log_dir = os.path.join(utilities.get_root_path(), 'examples', 'agents',
                               'monitored_A2C')

        # Perform a short training example with callback
        env, _, _ = run_save_callback.train_A2C_with_callback(
            log_dir=log_dir, tensorboard_log=None)

        # Load the trained agent
        model = A2C.load(os.path.join(log_dir, 'best_model'))

        # Test one step with the trained model
        obs = env.reset()
        df = pd.DataFrame([model.predict(obs)[0][0]], columns=['value'])
        df.index.name = 'keys'
        ref_filepath = os.path.join(utilities.get_root_path(), 'testing',
                                    'references', 'save_callback.csv')
        self.compare_ref_values_df(df, ref_filepath)

        # Remove model to prove further testing
        shutil.rmtree(log_dir, ignore_errors=True)

Beispiel #26

0

Datei anzeigen

Datei: __init__.py Projekt: zhovner/pwnagotchi

def load(config, agent, epoch, from_disk=True):
    config = config['ai']
    if not config['enabled']:
        logging.info("ai disabled")
        return False

    logging.info("[ai] bootstrapping dependencies ...")

    from stable_baselines import A2C
    from stable_baselines.common.policies import MlpLstmPolicy
    from stable_baselines.common.vec_env import DummyVecEnv

    import pwnagotchi.ai.gym as wrappers

    env = wrappers.Environment(agent, epoch)
    env = DummyVecEnv([lambda: env])

    logging.info("[ai] bootstrapping model ...")

    a2c = A2C(MlpLstmPolicy, env, **config['params'])

    if from_disk and os.path.exists(config['path']):
        logging.info("[ai] loading %s ..." % config['path'])
        a2c.load(config['path'], env)
    else:
        logging.info("[ai] model created:")
        for key, value in config['params'].items():
            logging.info("      %s: %s" % (key, value))

    return a2c

Beispiel #27

0

Datei anzeigen

Datei: 3-Save-model.py Projekt: Baichenjia/Stable-Baselines-Basic

def load_a2c():
    loaded_model = A2C.load(save_dir + "/A2C_tutorial")
    print("loaded", loaded_model.predict(obs, deterministic=True))
    print("load gamma=", loaded_model.gamma, ", n_steps=", loaded_model.n_steps)
    # 模型保存模型的超参数和网络参数, 但不保存环境 env. 在 load 模型后需要重新设置环境.
    loaded_model.set_env(DummyVecEnv([lambda: gym.make("Pendulum-v0")]))
    loaded_model.learn(8000)

Beispiel #28

0

Datei anzeigen

Datei: 4-Gym-wrapper.py Projekt: Baichenjia/Stable-Baselines-Basic

def test_monitor():
    env = gym.make('Pendulum-v0')
    env = Monitor(gym.make('Pendulum-v0'), filename=None, allow_early_resets=True)
    normalized_env = NormalizeActionWrapper(env)
    normalized_env = DummyVecEnv([lambda: normalized_env])
    # model
    model_2 = A2C('MlpPolicy', normalized_env, verbose=1).learn(1000)

Beispiel #29

0

Datei anzeigen

Datei: Prediction.py Projekt: karelbertrands/RLeab

def NewPotential(current_window, algorithm='PPO'):

    # Determine the pretrained agent
    if algorithm == 'A2C':
        model = A2C.load("pretrained_A2C")
    elif algorithm == 'PPO':
        model = PPO2.load("pretrained_PPO")
    elif algorithm == 'ACKTR':
        model = ACKTR.load("pretrained_ACKTR")
    elif algorithm == 'ACER':
        model = ACER.load("pretrained_ACER")
    else:
        raise ValueError("%s is not a valid algorithm." % algorithm)

    if len(current_window) != model.observation_space.shape[0]:
        raise ValueError("%s is does not match the model's window size." %
                         len(current_window))

    action, _states = model.predict(current_window, deterministic=False)

    voltages = np.linspace(0, 1, num=model.action_space.n)
    if action >= 0 and action <= model.action_space.n - 1:
        voltage = voltages[action]
    else:
        raise ValueError(
            "Received invalid action={} which is not part of the action space".
            format(action))

    return voltage

Beispiel #30

0

Datei anzeigen

def a2c(env, seed):
    return A2C('MlpPolicy',
               env,
               learning_rate=0.001,
               verbose=1,
               tensorboard_log="./data/runs",
               seed=seed)