예제 #1
0
def run_env_once(phenotype):
    single_envs = SubprocVecEnv([make_env(env_name, envs_size)])
    test_organism([phenotype], single_envs, render=True)

    # Visualize().update(phenotype)
    feedforward_highest = FeedforwardCUDA([phenotype])
    states = env.reset()
    done = False
    distance = 0.0
    last_distance = 0.0
    distance_stagnation = 0
    while not done:
        actions = feedforward_highest.update(np.array([states]))
        states, reward, done, info = env.step(actions[0])
        distance += np.around(states[2], decimals=2)

        if distance <= last_distance:
            distance_stagnation += 1
        else:
            distance_stagnation = 0

        if distance_stagnation >= 100:
            done = True

        last_distance = distance

        env.render()
    env.close()
예제 #2
0
    def run_env_once(phenotype, env):
        feedforward_highest = FeedforwardCUDA()
        states = env.reset()

        done = False
        last_distance = 0.0
        distance_stagnation = 0

        final_reward = 0.0
        while not done:
            actions = feedforward_highest.update([phenotype], np.array([states]))

            states, reward, done, info = env.step(actions[0])
            pos = info["pos"]

            final_reward += reward
            if pos <= last_distance:
                distance_stagnation += 1
            else:
                distance_stagnation = 0

            if distance_stagnation >= 100:
                done = True
            #
            last_distance = pos

            env.render()

        print("Final rewards: {}".format(final_reward))
예제 #3
0
파일: cartpole.py 프로젝트: valorcurse/neat
        def __init__(self):
            print("Creating envs...")
            self.envs = SubprocVecEnv([make_env(env_name, seed) for seed in range(envs_size)])
            self.num_of_envs = envs_size
            self.feedforward = FeedforwardCUDA()

            print("Done.")
예제 #4
0
    def run_env_once(phenotype, env):
        feedforward_highest = FeedforwardCUDA()
        states = env.reset()

        done = False
        distance = 0.0
        last_distance = 0.0
        distance_stagnation = 0

        # image = env.render(mode='rgb_array')
        # images = []
        # activations = []
        while not done:
            actions = feedforward_highest.update([phenotype],
                                                 np.array([states]))

            # print(actions)
            states, reward, done, info = env.step(actions[0])
            distance += np.around(states[2], decimals=2)

            if distance <= last_distance:
                distance_stagnation += 1
            else:
                distance_stagnation = 0

            if distance_stagnation >= 100:
                done = True

            last_distance = distance

            # activations.append(feedforward_highest.mem[0])
            # images.append(env.render(mode='rgb_array'))
            env.render()
예제 #5
0
def test_different_input():
    G = nx.DiGraph()
    G.add_nodes_from([(10, {
        "activation": np.tanh,
        "type": NeuronType.INPUT,
        "pos": (-1.0, -1.0)
    }),
                      (20, {
                          "activation": np.tanh,
                          "type": NeuronType.INPUT,
                          "pos": (0.0, -1.0)
                      }),
                      (30, {
                          "activation": np.tanh,
                          "type": NeuronType.INPUT,
                          "pos": (1.0, -1.0)
                      }),
                      (40, {
                          "activation": np.tanh,
                          "type": NeuronType.HIDDEN
                      }),
                      (60, {
                          "activation": np.tanh,
                          "type": NeuronType.OUTPUT,
                          "pos": (-1.0, 1.0)
                      }),
                      (70, {
                          "activation": np.tanh,
                          "type": NeuronType.OUTPUT,
                          "pos": (1.0, 1.0)
                      })])

    G.add_weighted_edges_from([
        (10, 40, 0.7),
        (20, 40, 0.4),
        (40, 60, 0.1),
        (40, 70, 0.25),
    ])

    phenotype = Phenotype(G, 0)

    inputs = np.array([0.5, 0.2, 0.7])

    feedforward_highest = FeedforwardCUDA()

    result = feedforward_highest.update([phenotype], [inputs])[0]

    answers = [0.0, 0.0]
    hidden_node = math.tanh(
        math.tanh(inputs[0]) * 0.7 + math.tanh(inputs[1]) * 0.4)
    answers[0] = math.tanh(hidden_node * 0.1)
    answers[1] = math.tanh(hidden_node * 0.25)

    np.testing.assert_array_almost_equal(result, answers)
예제 #6
0
def test_single_edges():
    G = nx.DiGraph()
    G.add_nodes_from([(10, {
        "activation": np.tanh,
        "type": NeuronType.INPUT,
        "bias": 0.0,
        "pos": (-1.0, -1.0)
    }),
                      (20, {
                          "activation": np.tanh,
                          "type": NeuronType.INPUT,
                          "bias": 0.0,
                          "pos": (0.0, -1.0)
                      }),
                      (30, {
                          "activation": np.tanh,
                          "type": NeuronType.INPUT,
                          "bias": 0.0,
                          "pos": (1.0, -1.0)
                      }),
                      (60, {
                          "activation": np.tanh,
                          "type": NeuronType.OUTPUT,
                          "bias": 0.0,
                          "pos": (-1.0, 1.0)
                      }),
                      (70, {
                          "activation": np.tanh,
                          "type": NeuronType.OUTPUT,
                          "bias": 0.0,
                          "pos": (1.0, 1.0)
                      })])

    G.add_weighted_edges_from([
        (20, 60, 0.4),
        (30, 70, 0.1),
    ])

    phenotype = Phenotype(G, 0)

    inputs = np.array([0.5, 0.5, 0.5])

    feedforward_highest = FeedforwardCUDA()

    result = feedforward_highest.update([phenotype], [inputs])[0]

    norm_inputs = np.tanh(inputs)
    answers = np.tanh([norm_inputs[0] * 0.4, norm_inputs[1] * 0.1])

    np.testing.assert_array_almost_equal(result, answers)
예제 #7
0
def test_hidden_nodes():
    G = nx.DiGraph()
    G.add_nodes_from([(10, {
        "activation": np.tanh,
        "type": NeuronType.INPUT,
        "pos": (-1.0, -1.0)
    }),
                      (20, {
                          "activation": np.tanh,
                          "type": NeuronType.INPUT,
                          "pos": (0.0, -1.0)
                      }),
                      (30, {
                          "activation": np.tanh,
                          "type": NeuronType.INPUT,
                          "pos": (1.0, -1.0)
                      }),
                      (40, {
                          "activation": np.tanh,
                          "type": NeuronType.HIDDEN
                      }),
                      (60, {
                          "activation": np.tanh,
                          "type": NeuronType.OUTPUT,
                          "pos": (-1.0, 1.0)
                      }),
                      (70, {
                          "activation": np.tanh,
                          "type": NeuronType.OUTPUT,
                          "pos": (1.0, 1.0)
                      })])

    G.add_weighted_edges_from([
        (20, 40, 0.4),
        (40, 60, 0.1),
    ])

    phenotype = Phenotype(G, 0)

    inputs = np.array([1, 1, 1])

    feedforward_highest = FeedforwardCUDA()

    result = feedforward_highest.update([phenotype], [inputs])[0]

    norm_inputs = np.tanh(inputs)
    hidden = math.tanh(norm_inputs[1] * 0.4)
    answers = np.tanh([hidden * 0.1, 0.0])

    np.testing.assert_array_almost_equal(result, answers, decimal=4)
예제 #8
0
파일: cartpole.py 프로젝트: valorcurse/neat
    class TestOrganism(Evaluation):

        def __init__(self):
            print("Creating envs...")
            self.envs = SubprocVecEnv([make_env(env_name, seed) for seed in range(envs_size)])
            self.num_of_envs = envs_size
            self.feedforward = FeedforwardCUDA()

            print("Done.")

        def evaluate(self, phenotypes: List[Phenotype]) -> Tuple[np.ndarray, np.ndarray]:

            states = self.envs.reset()

            num_of_runs = 3

            fitnesses = np.zeros(len(self.envs.remotes), dtype=np.float64)


            done = False
            done_tracker = np.zeros(len(self.envs.remotes), dtype=np.int32)

            diff = abs(len(phenotypes) - len(self.envs.remotes))
            if diff < 0:
                done_tracker[diff:] = num_of_runs

            while not done:

                actions = self.feedforward.update(phenotypes, states[:len(phenotypes)])
                actions = np.pad(actions, ((0, diff), (0, 0)), 'constant')

                states, rewards, dones, info = self.envs.step(np.argmax(actions, axis=1))

                fitnesses[done_tracker < num_of_runs] += rewards[done_tracker < num_of_runs]

                # Finish run if the robot fell
                envs_run_done = dones == True
                done_tracker[envs_run_done] += dones[envs_run_done]
                done = all(r >= num_of_runs for r in done_tracker)

                # Reset the done envs
                for i in np.where(dones == True)[0]:
                    remote = self.envs.remotes[i]
                    remote.send(('reset', None))
                    # If we don't receive, the remote will not reset properly
                    reset_obs = remote.recv()[0]
                    states[i] = reset_obs

                # self.envs.render()

            final_fitnesses = []
            fitnesses_t = fitnesses.T
            for i in range(fitnesses_t.shape[0]):
                fitness = fitnesses_t[i]
                mean = np.sum(fitness)/num_of_runs

                final_fitnesses.append(mean)

            return (np.array(final_fitnesses[:len(phenotypes)]), np.zeros((len(phenotypes), 0)))
예제 #9
0
파일: cartpole.py 프로젝트: valorcurse/neat
    def run_env_once(phenotype, env):
        feedforward_highest = FeedforwardCUDA()
        states = env.reset()

        done = False

        final_reward = 0.0
        while not done:
            actions = feedforward_highest.update([phenotype], np.array([states]))

            states, reward, done, info = env.step(np.argmax(actions[0]))

            final_reward += reward

            env.render()

        print("Final rewards: {}".format(final_reward))
예제 #10
0
def test_multiple_edges():
    G = nx.DiGraph()
    G.add_nodes_from([(1, {
        "activation": np.tanh,
        "type": NeuronType.INPUT,
        "pos": (-1.0, -1.0)
    }),
                      (2, {
                          "activation": np.tanh,
                          "type": NeuronType.INPUT,
                          "pos": (0.0, -1.0)
                      }),
                      (3, {
                          "activation": np.tanh,
                          "type": NeuronType.INPUT,
                          "pos": (1.0, -1.0)
                      }),
                      (6, {
                          "activation": np.tanh,
                          "type": NeuronType.OUTPUT,
                          "pos": (-1.0, 1.0)
                      }),
                      (7, {
                          "activation": np.tanh,
                          "type": NeuronType.OUTPUT,
                          "pos": (1.0, 1.0)
                      })])

    G.add_weighted_edges_from([(2, 6, 0.4), (3, 6, 0.1)])

    phenotype = Phenotype(G, 0)

    inputs = np.array([1, 1, 1])

    feedforward_highest = FeedforwardCUDA()

    result = feedforward_highest.update([phenotype], [inputs])[0]

    norm_inputs = np.tanh(inputs)
    answer = norm_inputs[1] * 0.4 + norm_inputs[2] * 0.1
    answers = np.tanh([answer, 0.0])

    np.testing.assert_array_almost_equal(result, answers)
예제 #11
0
def test_organism(phenotypes, envs, render=False):
    feedforward = FeedforwardCUDA(phenotypes)

    observations = envs.reset()

    obs_32 = np.float32(observations)
    actions = feedforward.update(obs_32)

    fitnesses = np.zeros(len(envs.remotes), dtype=np.float64)

    done = False
    done_tracker = np.array([False for _ in range(len(envs.remotes))])

    diff = len(phenotypes) - len(envs.remotes)
    if diff < 0:
        done_tracker[diff:] = True

    distances = np.zeros(len(envs.remotes))
    last_distances = np.zeros(len(envs.remotes))
    stagnations = np.zeros(len(envs.remotes))

    all_states = []

    max_steps = 50
    steps = max_steps
    while not done:
        actions = np.pad(actions, (0, abs(diff)), 'constant')
        states, rewards, dones, info = envs.step(actions)

        # if render:
        #     envs.remotes[0].send(('render', None))
        #     envs.remotes[0].recv()

        actions = feedforward.update(states)

        fitnesses[done_tracker == False] += np.around(
            rewards[done_tracker == False], decimals=4)
        # fitnesses[done_tracker == False] = np.around(rewards[done_tracker == False], decimals=2)

        envs_done = dones == True
        done_tracker[envs_done] = dones[envs_done]
        envs_running = len([d for d in done_tracker if d == False])

        # print("\r"+" "* 100, end='', flush=True)
        # print("\rEnvs running: {}/{}".format(envs_running, len(phenotypes)), end='')

        done = envs_running == 0

        distances += np.around(states.T[2], decimals=2)

        stagnations += distances == last_distances

        done_tracker[stagnations >= 100] = True

        last_distances = distances

        if steps == max_steps:
            steps = 0
            all_states.append(states[:, [0, 4, 6, 8, 9, 11, 13]])

        steps += 1

    all_states = np.array(all_states)
    flattened_states = []
    for row_i in range(all_states.shape[1]):
        flattened_states.append(all_states[:, row_i].flatten())

    flattened_states = pad_matrix(np.array(flattened_states),
                                  behavior_matrix_size)

    return (fitnesses, flattened_states)
예제 #12
0
def test_custom():
    G = nx.DiGraph()
    G.add_nodes_from([
        (1, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (-1.0, -1.0)
        }),
        (2, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (3, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (4, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (5, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (6, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (7, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (8, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (9, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (10, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),  #
        (11, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),  #
        (12, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (13, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (14, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (15, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (16, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (17, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (18, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (19, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (20, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (21, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (22, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (23, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (24, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "pos": (1.0, -1.0)
        }),
        (25, {
            "activation": np.tanh,
            "type": NeuronType.HIDDEN,
            "pos": (1.0, -1.0)
        }),
        (26, {
            "activation": np.tanh,
            "type": NeuronType.OUTPUT,
            "pos": (-1.0, 1.0)
        }),
        (27, {
            "activation": np.tanh,
            "type": NeuronType.OUTPUT,
            "pos": (-1.0, 1.0)
        }),
        (28, {
            "activation": np.tanh,
            "type": NeuronType.OUTPUT,
            "pos": (-1.0, 1.0)
        }),
        (29, {
            "activation": np.tanh,
            "type": NeuronType.OUTPUT,
            "pos": (-1.0, 1.0)
        }),
    ])

    G.add_weighted_edges_from([
        (10, 25, 1.0),
        (11, 25, 1.0),
        (25, 27, 1.0),
    ])

    phenotype = Phenotype(G, 0)

    inputs = np.array([
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0.89, 0.996, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0
    ])

    feedforward_highest = FeedforwardCUDA()

    result = feedforward_highest.update([phenotype], [inputs])[0]
    print(result)
    print(feedforward_highest.mem[0])
예제 #13
0
def test_multiple_phenotypes():
    G1 = nx.DiGraph()
    G1.add_nodes_from([
        (0, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "bias": 0.0,
            "pos": (-1.0, -1.0)
        }),
        (-1, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "bias": 0.0,
            "pos": (0.0, -1.0)
        }),
        (6, {
            "activation": np.tanh,
            "type": NeuronType.HIDDEN,
            "bias": 0.0,
            "pos": (-1.0, 0.0)
        }),
        (10, {
            "activation": np.tanh,
            "type": NeuronType.HIDDEN,
            "bias": 0.0,
            "pos": (0.0, 0.0)
        }),
        (3, {
            "activation": np.tanh,
            "type": NeuronType.HIDDEN,
            "bias": 0.0,
            "pos": (1.0, 0.0)
        }),
        (-2, {
            "activation": np.tanh,
            "type": NeuronType.OUTPUT,
            "bias": 0.0,
            "pos": (-1.0, 1.0)
        }),
    ])

    G1.add_weighted_edges_from([(0, -2, 0.15286614111378544),
                                (-1, -2, 0.8345164457693686),
                                (0, 3, 0.15286614111378544), (3, -2, 1.0),
                                (0, 6, 0.15286614111378544), (6, 3, 1.0),
                                (6, 10, 1.0), (10, 3, 1.0)])

    phenotype1 = Phenotype(G1, 0)

    G2 = nx.DiGraph()
    G2.add_nodes_from([
        (0, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "bias": 0.0,
            "pos": (-1.0, -1.0)
        }),
        (-1, {
            "activation": np.tanh,
            "type": NeuronType.INPUT,
            "bias": 0.0,
            "pos": (0.0, -1.0)
        }),
        (6, {
            "activation": np.tanh,
            "type": NeuronType.HIDDEN,
            "bias": 0.0,
            "pos": (-1.0, 0.0)
        }),
        (3, {
            "activation": np.tanh,
            "type": NeuronType.HIDDEN,
            "bias": 0.0,
            "pos": (1.0, 0.0)
        }),
        (-2, {
            "activation": np.tanh,
            "type": NeuronType.OUTPUT,
            "bias": 0.0,
            "pos": (-1.0, 1.0)
        }),
    ])

    G2.add_weighted_edges_from([(0, -2, 0.15286614111378544),
                                (-1, -2, 0.8345164457693686),
                                (0, 3, 0.15286614111378544), (3, -2, 1.0),
                                (0, 6, 0.15286614111378544), (6, 3, 1.0)])

    phenotype2 = Phenotype(G2, 0)

    inputs = np.array([1, 1])

    feedforward_highest = FeedforwardCUDA()

    result_one = feedforward_highest.update([phenotype1], [inputs])

    result_both = feedforward_highest.update([phenotype1, phenotype2],
                                             [inputs, inputs])

    assert result_one[0] == result_both[0]
예제 #14
0
    class TestOrganism(Evaluation):

        def __init__(self):
            print("Creating envs...")
            self.envs = SubprocVecEnv([make_env(env_name, seed) for seed in range(envs_size)])
            self.num_of_envs = envs_size
            self.feedforward = FeedforwardCUDA()
            print("Done.")

        def evaluate(self, phenotypes: List[Phenotype]) -> Tuple[np.ndarray, np.ndarray]:

            states = self.envs.reset()

            num_of_runs = 1

            fitnesses = np.zeros(len(self.envs.remotes), dtype=np.float64)

            done = False
            done_tracker = np.zeros(len(self.envs.remotes), dtype=np.int32)

            diff = len(phenotypes) - len(self.envs.remotes)
            if diff < 0:
                done_tracker[diff:] = num_of_runs

            # distances = np.zeros(len(self.envs.remotes))
            last_distances = np.zeros(len(self.envs.remotes))
            stagnations = np.zeros(len(self.envs.remotes))

            while not done:

                actions = self.feedforward.update(phenotypes, states[:len(phenotypes)])
                actions = np.pad(actions, (0, abs(diff)), 'constant')

                states, rewards, dones, info = self.envs.step(actions)

                pos = np.round(np.array([i['pos'] for i in info]), 2)

                # Only keep track of rewards for the right run
                # padded_rewards = np.pad(np.asmatrix(rewards), [(0, num_of_runs - 1), (0, 0)], mode='constant')
                # rolled_rewards = np.array(
                #     [np.roll(padded_rewards[:, i], done_tracker[i]) for i in range(len(done_tracker))]).T
                # fitnesses += rolled_rewards
                fitnesses[done_tracker < num_of_runs] += rewards[done_tracker < num_of_runs]

                # Finish run if it has not moved for a certain amount of frames
                stagnated_distances = pos == last_distances
                stagnations += stagnated_distances


                stopped_moving = stagnations >= 100
                dones[stopped_moving == True] = stopped_moving[stopped_moving == True]

                # Reset stagnations
                stagnations[stopped_moving == True] = 0
                last_distances = pos

                # Finish run if the robot fell
                envs_run_done = dones == True
                done_tracker[envs_run_done] += dones[envs_run_done]
                done = all(r >= num_of_runs for r in done_tracker)

                # Reset the done envs
                for i in np.where(dones == True)[0]:
                    remote = self.envs.remotes[i]
                    remote.send(('reset', None))
                    # If we don't receive, the remote will not reset properly
                    reset_obs = remote.recv()[0]
                    states[i] = reset_obs

                # print(done_tracker)
                # print(done)
                # self.envs.render()

            final_fitnesses = []

            fitnesses_t = fitnesses.T
            for i in range(fitnesses_t.shape[0]):
                fitness = fitnesses_t[i]
                mean = np.sum(fitness)/num_of_runs

                final_fitnesses.append(mean)

            return (np.array(final_fitnesses[:len(phenotypes)]), np.zeros((len(phenotypes), 0)))
예제 #15
0
    class TestOrganism(Evaluation):
        def __init__(self):
            print("Creating envs...")
            self.envs = SubprocVecEnv(
                [make_env(env_name, seed) for seed in range(envs_size)])
            self.num_of_envs = envs_size
            self.feedforward = FeedforwardCUDA()
            print("Done.")

        def evaluate(
                self,
                phenotypes: List[Phenotype]) -> Tuple[np.ndarray, np.ndarray]:

            states = self.envs.reset()

            num_of_runs = 1

            states_32 = np.float32(states)
            actions = self.feedforward.update(phenotypes, states_32)

            fitnesses = np.zeros((num_of_runs, len(self.envs.remotes)),
                                 dtype=np.float64)
            all_states = np.zeros(
                (num_of_runs, len(self.envs.remotes), behavior_dimensions))
            state_indexes = np.zeros((num_of_runs, len(self.envs.remotes)),
                                     dtype=np.int32)

            done = False
            done_tracker = np.zeros(len(self.envs.remotes), dtype=np.int32)

            diff = len(phenotypes) - len(self.envs.remotes)
            if diff < 0:
                done_tracker[diff:] = num_of_runs

            last_distances = np.zeros(len(self.envs.remotes))
            stagnations = np.zeros(len(self.envs.remotes))

            max_steps = 10
            steps = max_steps

            while not done:
                states_32 = np.float32(states)
                actions = self.feedforward.update(phenotypes,
                                                  states_32[:len(phenotypes)])
                actions = np.pad(actions, (0, abs(diff)), 'constant')

                states, rewards, dones, info = self.envs.step(actions)

                pos = np.round(np.array([i['pos'] for i in info]), 2)

                # Only keep track of rewards for the right run
                padded_rewards = np.pad(np.asmatrix(rewards),
                                        [(0, num_of_runs - 1), (0, 0)],
                                        mode='constant')
                rolled_rewards = np.array([
                    np.roll(padded_rewards[:, i], done_tracker[i])
                    for i in range(len(done_tracker))
                ]).T
                fitnesses += rolled_rewards

                # Finish run if it has not moved for a certain amount of frames
                stagnated_distances = pos == last_distances
                stagnations += stagnated_distances

                stopped_moving = stagnations >= 100
                dones[stopped_moving == True] = stopped_moving[stopped_moving
                                                               == True]

                # Reset stagnations
                stagnations[stopped_moving == True] = 0
                last_distances = pos

                # Finish run if the robot fell
                envs_run_done = dones == True
                done_tracker[envs_run_done] += dones[envs_run_done]
                done = all(r >= num_of_runs for r in done_tracker)

                # Reset the done envs
                for i in np.where(dones == True)[0]:
                    remote = self.envs.remotes[i]
                    remote.send(('reset', None))
                    # If we don't receive, the remote will not reset properly
                    reset_obs = remote.recv()[0]
                    states[i] = reset_obs

                if steps == max_steps:
                    steps = 0
                    relevant_states = states[:, :features_dimensions]
                    for i in range(done_tracker.size):
                        for row in range(relevant_states.shape[0]):
                            level = done_tracker[i]

                            if level >= num_of_runs:
                                continue

                            s = relevant_states[row]

                            start = state_indexes[level][row]
                            end = start + len(s)

                            all_states[level][row][start:end] = s

                            # print(all_states[level][row].shape)
                            # print(all_states[level][row])

                            state_indexes[level][
                                row] = end % behavior_dimensions

                steps += 1

            final_fitnesses = []
            final_states = []

            fitnesses_t = fitnesses.T
            for i in range(fitnesses_t.shape[0]):
                fitness = fitnesses_t[i]
                index = np.argmax(fitness)

                states = all_states[index]

                final_fitnesses.append(fitness[index])
                final_states.append(states[index])

            return (np.array(final_fitnesses[:len(phenotypes)]),
                    np.array(final_states[:len(phenotypes)]))