コード例 #1
0
def main(training=True):
    #load an environment
    env = Environment(opticalTweezers(), 4)
    env.reset()

    #state action placeholders
    States = tf.placeholder(tf.float32, shape=[None, 4, 6], name='States')
    Actions = tf.placeholder(tf.int32, shape=[None], name='Actions')
    Rewards = tf.placeholder(tf.float32, shape=[None, 1], name='Rewards')
    Advantages = tf.placeholder(tf.float32, shape=[None, 1], name='Advantages')
    Entropy_coefficient = tf.placeholder(tf.float32,
                                         shape=(),
                                         name='Entropy_coefficient')

    #load a model or else init
    if os.path.isfile(os.path.join(os.getcwd(), 'model')):
        #load model
        pass
    else:
        max_grad_norm = 0.5
        actor = Actor(States, Actions, Advantages, Rewards,
                      Entropy_coefficient, max_grad_norm)
        critic = Critic(Rewards, actor)
    if training:
        train_model2(env, actor, critic, States, Actions, Rewards, Advantages,
                     Entropy_coefficient)
    else:
        pass
コード例 #2
0
ファイル: ddpglander.py プロジェクト: siddharthchd/openai-gym
def main(args=None):

    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)
    env = Environment(gym.make('LunarLanderContinuous-v2'))
    env.reset()
    state_dim = env.get_state_size()
    action_space = gym.make(args.env).action_space
    action_dim = action_space.high.shape[0]
    act_range = action_space.high

    algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)
    stats = algo.train(env, args, summary_writer)

    df = pd.DataFrame(np.array(stats))
    df.to_csv(args.type + "/logs.csv",
              header=['Episode', 'Mean', 'Stddev'],
              float_format='%10.5f')

    # Save weights and close environments
    exp_dir = 'models/'
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    algo.save_weights(exp_dir)
    env.env.close()
コード例 #3
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    # Environment Initialization
    if(args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif(args.type=="DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if(args.type=="DDQN"):
        algo = DDQN(action_dim, state_dim, args)
        algo.load_weights(args.model_path)
    elif(args.type=="A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)

    # Display agent
    old_state, time = env.reset(), 0
    while True:
       env.render()
       a = algo.policy_action(old_state)
       old_state, r, done, _ = env.step(a)
       time += 1
       if done: env.reset()

    env.env.close()
コード例 #4
0
ファイル: run.py プロジェクト: geniustom/DRL_Keras
def main():
	args=Arg()
	env = Environment(gym.make(args.env), args.consecutive_frames)
	env.reset()
	state_dim = env.get_state_size()
	action_dim = gym.make(args.env).action_space.n
	algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
	set_session(get_session())
	summary_writer = tf.summary.FileWriter("./tensorboard_" + args.env)
	stats = algo.train(env, args, summary_writer)
	
	
	print(stats)
	algo.save_weights('./'+args.env+'.h5')
	env.env.close()
コード例 #5
0
ファイル: a3c.py プロジェクト: HenrikMettler/MasterThesis
    def train(self, args, summary_writer):

        # Instantiate one environment per thread
        envs = [
            Environment(gym.make(args.env), args.consecutive_frames)
            for i in range(args.n_threads)
        ]
        [e.reset() for e in envs]
        state_dim = envs[0].get_state_size()
        action_dim = gym.make(args.env).action_space.n
        # Create threads
        tqdm_e = tqdm(range(int(args.nb_episodes)),
                      desc='Score',
                      leave=True,
                      unit=" episodes")

        threads = [
            threading.Thread(target=training_thread,
                             daemon=True,
                             args=(self, args.nb_episodes, envs[i], action_dim,
                                   args.training_interval, summary_writer,
                                   tqdm_e, args.render))
            for i in range(args.n_threads)
        ]

        for t in threads:
            t.start()
            time.sleep(0.5)
        try:
            [t.join() for t in threads]
        except KeyboardInterrupt:
            print("Exiting all threads...")
        return None
コード例 #6
0
    def train(self, env, args, summary_writer):

        # Instantiate one environment per thread
        if(args.is_atari):
            envs = [AtariEnvironment(args) for i in range(args.n_threads)]
            state_dim = envs[0].get_state_size()
            action_dim = envs[0].get_action_size()
        else:
            envs = [Environment(gym.make(args.env), args.consecutive_frames) for i in range(args.n_threads)]
            [e.reset() for e in envs]
            state_dim = envs[0].get_state_size()
            action_dim = gym.make(args.env).action_space.n

        # Create threads
        factor = 100.0 / (args.nb_episodes)
        tqdm_e = tqdm(range(args.nb_episodes), desc='Score', leave=True, unit=" episodes")

        threads = [threading.Thread(
                target=training_thread,
                args=(self,
                    args.nb_episodes,
                    envs[i],
                    action_dim,
                    args.training_interval,
                    summary_writer,
                    tqdm_e,
                    factor)) for i in range(args.n_threads)]

        for t in threads:
            t.start()
            time.sleep(1)
        [t.join() for t in threads]

        return None
コード例 #7
0
    def train(self, env, args, summary_writer):

        # Instantiate one environment per thread
        if (args.is_ai2thor):
            config_dict = {'max_episode_length': 500}
            envs = [
                AI2ThorEnv(config_dict=config_dict)
                for i in range(args.n_threads)
            ]
            env.reset()
            state = envs[0].reset()
            state_dim = state.shape
            action_dim = envs[0].action_space.n
        elif (args.is_atari):
            envs = [AtariEnvironment(args) for i in range(args.n_threads)]
            state_dim = envs[0].get_state_size()
            action_dim = envs[0].get_action_size()
        else:
            envs = [
                Environment(gym.make(args.env), args.consecutive_frames)
                for i in range(args.n_threads)
            ]
            [e.reset() for e in envs]
            state_dim = envs[0].get_state_size()
            action_dim = gym.make(args.env).action_space.n

        # Create threads
        tqdm_e = tqdm(range(int(args.nb_episodes)),
                      desc='Score',
                      leave=True,
                      unit=" episodes")

        threads = [
            threading.Thread(target=training_thread,
                             daemon=True,
                             args=(self, args.nb_episodes, envs[i], action_dim,
                                   args.training_interval, summary_writer,
                                   tqdm_e, args.render))
            for i in range(args.n_threads)
        ]

        for t in threads:
            t.start()
            time.sleep(0.5)
        try:
            [t.join() for t in threads]
        except KeyboardInterrupt:
            print("Exiting all threads...")
        return None
コード例 #8
0
def main(args=None):
    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)
    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    # Environment Initialization
    if args.is_ai2thor:
        config_dict = {'max_episode_length': 2000}
        env = AI2ThorEnv(config_dict=config_dict)
        env.reset()
        state = env.reset()
        state_dim = state.shape
        action_dim = env.action_space.n
    elif (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari, is_ai2thor=args.is_ai2thor)
    algo.load_weights(args.actor_path, args.critic_path)

    # Display agent
    old_state, time = env.reset(), 0
    while True:
        a = algo.policy_action(old_state)
        old_state, r, done, _ = env.step(a)
        time += 1
        if done:
            print('----- done, resetting env ----')
            env.reset()
コード例 #9
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    if args.wandb:
        wandb.init(entity=args.wandb_id, project=args.wandb_project)

    # Environment Initialization
    if (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif (args.type == "DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "A3C"):
        algo = A3C(action_dim,
                   state_dim,
                   args.consecutive_frames,
                   is_atari=args.is_atari)
    elif (args.type == "DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args)  # e, mean, stdev: e is episode

    # Export results to CSV
    if (args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type,
                                                      args.env,
                                                      args.nb_episodes,
                                                      args.batch_size)

    algo.save_weights(export_path)
    env.env.close()
コード例 #10
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    summary_writer = tf.summary.FileWriter("{}/tensorboard_M1_{}_M1_{}_snr1_{}_snr2_{}".format(args.out_dir, args.M1, args.M1, args.snr_M1, args.snr_M2))

    # Initialize the wireless environment
    users_env = UsersEnvCluster(args.M1, args.M2, args.snr_M1, args.snr_M2, fixed_channel=False)
    print(users_env)

    # Wrap the environment to use consecutive frames
    env = Environment(users_env, args.consecutive_frames)
    env.reset()

    # Define parameters for the DDQN and DDPG algorithms
    state_dim = env.get_state_size()
    action_dim = users_env.action_dim
    act_range = 1
    act_min = 0

    # Initialize the DQN algorithm for the clustering optimization
    n_clusters = users_env.n_clusters
    algo_clustering = DDQN(n_clusters, state_dim, args)

    # Initialize the DDPG algorithm for the beamforming optimization
    algo = DDPG(action_dim, state_dim, act_range, act_min, args.consecutive_frames, algo_clustering, episode_length=args.episode_length)

    if args.step == "train":
        # Train
        stats = algo.train(env, args, summary_writer)

        # Export results to CSV
        if(args.gather_stats):
            df = pd.DataFrame(np.array(stats))
            df.to_csv(args.out_dir + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f')

        # Save weights and close environments
        exp_dir = '{}/models_M1_{}_M2_{}_snr1_{}_snr2_{}/'.format(args.out_dir, args.M1, args.M2, args.snr_M1, args.snr_M2)
        if not os.path.exists(exp_dir):
            os.makedirs(exp_dir)
        # Save DDPG
        export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDPG", args.nb_episodes, args.batch_size)
        algo.save_weights(export_path)

        # Save DDQN
        export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDQN", args.nb_episodes, args.batch_size)
        algo.ddqn_clustering.save_weights(export_path)

    elif args.step == "inference":
        print("Loading the DDPG networks (actor and critic) and the DDQN policy network ...")
        path_actor = '<add the path of the .h5 file of the DDPG actor>'
        path_critic = '<add the path of the .h5 file of the DDPG critic>'
        path_ddqn = '<add the path of the .h5 file of the DDQN actor>'
        algo.load_weights(path_actor, path_critic, path_ddqn)

        # run a random policy during inference as an example
        s = np.random.rand(1, args.Nr)
        s_1 = np.zeros_like(s)
        s = np.vstack((s_1, s))

        while True:
            W = algo.policy_action(s)
            cluster_index = algo.ddqn_clustering.policy_action(s)
            a_and_c = {'a': W, 'c': cluster_index}
            new_state, r, done, _ = env.step(a_and_c)
            print("RL min rate = {}".format(r))
            print("RL state = {}".format(np.log(1 + new_state)))
            s = new_state
            input('Press Enter to continue ...')
コード例 #11
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env)

    # Environment Initialization
    if(args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif(args.type=="DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if(args.type=="DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif(args.type=="A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif(args.type=="A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
    elif(args.type=="DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if(args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir,
        args.type,
        args.env,
        args.nb_episodes,
        args.batch_size)

    algo.save_weights(export_path)
    env.env.close()
コード例 #12
0
import os
import sys
import gym
import argparse
import numpy as np
import pandas as pd
import tensorflow as tf
import gym
gym.logger.set_level(40)
from utils.continuous_environments import Environment
env = Environment(gym.make(LunarLanderContinuous - v2),
                  args.consecutive_frames)
env.reset()
state_dim = env.get_state_size()
action_space = gym.make("LunarLanderContinuous-v2").action_space
action_dim = action_space.high.shape[0]
act_range = action_space.high
コード例 #13
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)

    # Environment Initialization
    if (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif (args.type == "DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high

    else:
        if args.env == 'cell':
            #do this
            env = Environment(opticalTweezers(), args.consecutive_frames)
            # env=opticalTweezers(consecutive_frames=args.consecutive_frames)
            env.reset()
            state_dim = (6, )
            action_dim = 4  #note that I have to change the reshape code for a 2d agent # should be 4
        else:
            # Standard Environments
            env = Environment(gym.make(args.env), args.consecutive_frames)
            env.reset()
            state_dim = env.get_state_size()
            print(state_dim)
            action_dim = gym.make(args.env).action_space.n
            print(action_dim)
    # Pick algorithm to train
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "A3C"):
        algo = A3C(action_dim,
                   state_dim,
                   args.consecutive_frames,
                   is_atari=args.is_atari)
    elif (args.type == "DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if (args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Display agent
    old_state, time = env.reset(), 0
    # all_old_states=[old_state for i in range(args.consecutive_frames)]
    while True:
        env.render()
        a = algo.policy_action(old_state)
        old_state, r, done, _ = env.step(a)
        time += 1
        if done: env.reset()
コード例 #14
0
ファイル: main.py プロジェクト: gungui98/deeprl-a3c-ai2thor
def main(args=None):
    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    # Environment Initialization
    if args.is_ai2thor:
        config_dict = {'max_episode_length': 500}
        env = AI2ThorEnv(config_dict=config_dict)
        env.reset()
        state = env.reset()
        state_dim = state.shape
        action_dim = env.action_space.n
        args.env = 'ai2thor'
    elif (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
        print(state_dim)
        print(action_dim)
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)
    algo = A3C(action_dim,
               state_dim,
               args.consecutive_frames,
               is_atari=args.is_atari,
               is_ai2thor=args.is_ai2thor)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if args.gather_stats:
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type,
                                                      args.env,
                                                      args.nb_episodes,
                                                      args.batch_size)

    algo.save_weights(export_path)
    env.close()
コード例 #15
0
    def fit(self,
            env,
            summary_writer,
            debug=False,
            num_cpus=4,
            is_market=False,
            env_args={},
            test_env_args=None,
            env_version='v1'):
        stagnation = 1
        best_so_far = 0

        # Init test env
        test_env = None
        if env_version == 'v1':
            test_env = MarketEnvironmentV1(
                **test_env_args) if test_env_args else None
        if env_version == 'v2':
            test_env = MarketEnvironmentV2(
                **test_env_args) if test_env_args else None

        envs = []

        # Create environements for all population
        if is_market:
            if env_version == 'v1':
                envs = [
                    MarketEnvironmentV1(**env_args)
                    for i in range(self.population_size)
                ]
            if env_version == 'v2':
                envs = [
                    MarketEnvironmentV2(**env_args)
                    for i in range(self.population_size)
                ]
        else:
            envs = [
                Environment(**env_args) for i in range(self.population_size)
            ]

        # Iterating over all generations
        tqdm_e = tqdm(total=self.generations,
                      desc='Generation',
                      leave=True,
                      unit=" gen")
        for gen_i in range(self.generations):

            # Doing our evaluations
            args = [(self, self.networks[i], envs[i])
                    for i in range(self.population_size)]
            with Pool(num_cpus) as p:
                rewards = np.array(p.map(_run_par_evaluate, args))

            # Tracking best score per generation
            self.fitness.append(np.max(rewards))

            # Selecting the best network
            best_network = np.argmax(rewards)

            # Selecting top n networks
            n = int(self.survival_ratio * self.population_size)
            top_n_index = np.argsort(rewards)[-n:]

            # Creating our child networks
            new_networks = []
            for _ in range(self.population_size - n):
                # Origin will take -> 0 if both parent -> 1 if one parent and -> 2 if just get another network from previous run
                origin = np.random.choice([0, 1, 2],
                                          p=[
                                              self.both_parent_percentage,
                                              self.one_parent_percentage,
                                              1 - self.both_parent_percentage -
                                              self.one_parent_percentage
                                          ])

                # both parents
                if origin == 0:
                    new_net = NeuralNet(parent1=self.networks[random.randint(
                        0,
                        len(top_n_index) - 1)],
                                        parent2=self.networks[random.randint(
                                            0,
                                            len(top_n_index) - 1)],
                                        var=self.mutation_variance)
                # One parent
                elif origin == 1:
                    new_net = NeuralNet(parent1=self.networks[random.randint(
                        0,
                        len(top_n_index) - 1)],
                                        parent2=None,
                                        var=self.mutation_variance)
                else:
                    # Copy from other run (aside from the choosen best)
                    index = top_n_index[0]
                    while index not in top_n_index:
                        index = random.randint(0, len(self.networks) - 1)
                    new_net = self.networks[index]

                new_networks.append(new_net)

            # Setting our new networks
            maintain_best_n = [self.networks[i] for i in top_n_index]
            self.networks = maintain_best_n + new_networks

            # Export results for Tensorboard
            r_max = rewards.max()
            r_mean = rewards.mean()
            r_std = rewards.std()
            self.insert_info(r_max, r_mean, r_std)
            summary_writer.add_summary(tfSummary('Max rewards', r_max),
                                       global_step=gen_i)
            summary_writer.add_summary(tfSummary('Mean rewards', r_mean),
                                       global_step=gen_i)
            summary_writer.add_summary(tfSummary('STD rewards', r_std),
                                       global_step=gen_i)

            # Update stagnation
            if r_max > best_so_far:
                best_so_far = r_max
                stagnation = 1
            else:
                stagnation += 1

            #Update tqdm
            tqdm_e.set_description('Generation:' + str(gen_i + 1) +
                                   '| Highest Reward:' + str(r_max) +
                                   '| Average Reward:' + str(r_mean) +
                                   '| std Reward: ' + str(r_std) +
                                   '| Stagnation: ' + str(stagnation) +
                                   '| Population size: ' +
                                   str(len(self.networks)))

            # Save current weights
            self.best_network = self.networks[best_network]
            if debug:
                self._log_best_network_env_info(maintain_best_n[0],
                                                summary_writer, envs[0],
                                                test_env, gen_i)
            self.save_weights(gen_i, maintain_best_n[0], self.save_path)

            # Update logs
            summary_writer.flush()
            tqdm_e.update(1)
            tqdm_e.refresh()

            # Se estiver estagnado por muito tempo, eu paro
            if stagnation > 10 and self.stagnation_end: break

        # Close the environments
        [e.close() for e in envs]

        # Returning the best network
        self.best_network = self.networks[best_network]

        return self.global_info