Ejemplo n.º 1
0
    def test_make_child(self):
        parents = [Individual(NNAgent((10, 10), 5)), Individual(NNAgent((10, 10), 5))]
        pw = [parent.agent.get_weights() for parent in parents]

        np.random.seed(42)
        child = make_child(parents)
        cw = child.agent.get_weights()

        parent_order = [0, 1, 0, 0, 0, 1, 0, 0, 0, 1]  # Generated from np.random.randint(0, 2) when seed = 42
        for i in range(10):
            i_parent = parent_order[i]
            # cw[0] is the W matrix of the first layer.
            # pw[i_parent] are the weights of the parent which gave genes to
            np.testing.assert_equal(cw[0][:, i], pw[i_parent][0][:, i])
Ejemplo n.º 2
0
def make_child(parents, simulation_params):
    # TODO make random, instead of alternating
    # TODO change every column istead of whole matrix
    """
    Make a single child from a list of parents.
    :param parents: a list of parent which will make a child
    :return: the child
    """
    child = Individual(
        NNAgent(parents[0].agent.state_space_shape,
                parents[0].agent.action_space_size))
    weights = child.agent.get_weights()

    parent_weights = [parent.agent.get_weights() for parent in parents]

    skip = 2  # this number will never change. Just for readability.
    for i_matrix in range(0, len(weights),
                          skip):  # For each W and b matrix, alternating
        for i_col in range(np.shape(weights[i_matrix])[1]):
            # which_parent = (i_matrix % (skip * len(parents))) // skip
            which_parent = np.random.randint(0, len(parents))

            # W-matrix
            weights[i_matrix][:, i_col] = parent_weights[which_parent][
                i_matrix][:, i_col]  # TODO: maybe optimize this

            # b-vector
            weights[i_matrix +
                    1][i_col] = parent_weights[which_parent][i_matrix +
                                                             1][i_col]

    child.agent.set_weights(weights)

    return child
Ejemplo n.º 3
0
    def test_make_child_random_subsequence(self):
        parents = [Individual(NNAgent((10, 10), 5)), Individual(NNAgent((10, 10), 5))]

        np.random.seed(42)
        child = make_child_random_subsequence(parents)
        weights = child.agent.get_weights()

        # Parent weights looks like this: [[W1, b1, W2, b2, W3, b3], [W1, b1, W2, b2, W3, b3]]
        parent_weights = [parent.agent.get_weights() for parent in parents]

        # Generated by alternating np.random.randint(1, 10) and np.random.randint(0, 2) when seed = 42
        subseq_len_values = iter([7, 8, 5, 3, 8, 4, 8, 6, 2, 6, 5, 6, 9, 3, 7, 9, 5, 7, 9, 2, 9, 2, 9, 5, 4, 7, 8, 3, 1,
                                  2, 4, 6, 4, 2, 2, 4, 8, 7, 9, 8, 2, 5, 9, 9, 1, 7, 8, 8, 3, 8, 3, 5, 7, 9, 7, 8, 2, 7,
                                  8, 3, 8, 3, 3, 3, 5, 7, 7, 9, 3, 7, 4, 4, 5, 7, 4, 7, 3, 2, 9, 6, 4, 7, 9, 1, 9, 9, 9,
                                  7, 8, 9, 5, 3, 8, 6, 8, 4, 1, 4, 7, 3])
        i_subseq_parent_values = iter([1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0,
                                       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
                                       0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1,
                                       1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0])

        # Loop over each W and b matrix
        for i_m in range(0, len(weights)):  # i_m = index of matrix. i_m=0: W1, i_m=1: b1, 1_m=2: W2, etc.
            m_seq = weights[i_m].flatten(order='F')  # m_seq = matrix as flat number sequence, column-major
            m_seq_parents = [pw[i_m].flatten(order='F') for pw in parent_weights]  # parent matrices as flat number sequences
            # print(m_seq[:10])
            # print(m_seq_parents[0][:10])
            # print(m_seq_parents[1][:10])

            prev_subseq_end = 0  # End position of previous subsequence
            while prev_subseq_end < m_seq.size:  # Go on until previous subsequence end is larger than total sequence length
                try:
                    subseq_len = next(subseq_len_values)
                    i_subseq_parent = next(i_subseq_parent_values)  # index of parent to get subsequence from
                except StopIteration:
                    break
                # print('i_subseq_parent:', i_subseq_parent)
                # print(m_seq_parents[0])
                # print(m_seq_parents[1])
                current_subseq_end = prev_subseq_end + subseq_len

                np.testing.assert_equal(m_seq[prev_subseq_end:current_subseq_end],
                                        m_seq_parents[i_subseq_parent][prev_subseq_end:current_subseq_end],
                                        err_msg='Mismatch for matrix {} sequence, subseq [{}, {}]'
                                                .format(i_m, prev_subseq_end, current_subseq_end))

                prev_subseq_end = current_subseq_end
Ejemplo n.º 4
0
def new_gen_with_challenger(filename, simulation_params):
    """
    Creates a new random generation with one individual from the outside.
    :param filename: where the file of the challenger is
    :param simulation_params: all of the simulation params
    :return: the new generation
    """
    state_space_shape = simulation_params.state_space_shape
    action_space_shape = simulation_params.action_space_shape
    gen = make_first_generation(simulation_params)
    challenger = NNAgent(state_space_shape, action_space_shape)
    challenger.load_model(filename)
    challenger = Individual(challenger)
    gen.add_individual(challenger)
    return gen
Ejemplo n.º 5
0
def make_child_random_subsequence(parents, simulation_params):
    """
    Makes a single child from a list of parents.
    Sets weights for the child by copying random-length consecutive subsequences from parents' weights.
    :param parents:
    :param simulation_params:
    :return: a child
    """
    child = Individual(
        NNAgent(parents[0].agent.state_space_shape,
                parents[0].agent.action_space_size))
    # Weights looks like this: [W1, b1, W2, b2, W3, b3]
    weights = child.agent.get_weights()

    # Parent weights looks like this: [[W1, b1, W2, b2, W3, b3], [W1, b1, W2, b2, W3, b3]]
    parent_weights = [parent.agent.get_weights() for parent in parents]

    # Loop over each W and b matrix
    for i_m in range(
            0, len(weights)
    ):  # i_m = index of matrix. i_m=0: W1, i_m=1: b1, 1_m=2: W2, etc.
        m_orig_shape = weights[i_m].shape
        m_seq = weights[i_m].flatten(
            order='F')  # m_seq = matrix as flat number sequence, column-major
        m_seq_parents = [w[i_m].flatten(order='F') for w in parent_weights
                         ]  # parent matrices as flat number sequences

        prev_subseq_end = 0  # End position of previous subsequence
        while prev_subseq_end < m_seq.size:  # Go on until previous subsequence end is larger than total sequence length
            subseq_len = np.random.randint(1,
                                           simulation_params.max_subseq_length)
            i_subseq_parent = np.random.randint(
                0, len(parents))  # index of parent to get subsequence from

            current_subseq_end = prev_subseq_end + subseq_len
            m_seq[prev_subseq_end:current_subseq_end] = m_seq_parents[
                i_subseq_parent][prev_subseq_end:current_subseq_end]
            prev_subseq_end = current_subseq_end

        weights[i_m] = m_seq.reshape(
            m_orig_shape,
            order='F')  # Reshape sequence and set new matrix for child

    child.agent.set_weights(weights)
    return child
Ejemplo n.º 6
0
def continue_gen(path, simulation_params):
    """
    Creates a generation based on files from a directory.
    :param path: where the directory is
    :param simulation_params: all of the simulation params
    :return: the loaded generation
    """
    state_space_shape = simulation_params.state_space_shape
    action_space_shape = simulation_params.action_space_shape
    individuals = []
    filenames = [
        f for f in listdir(path)
        if isfile(join(path, f)) and not f == ".gitkeep"
    ]
    for f in filenames:
        individual = NNAgent(state_space_shape, action_space_shape)
        individual.load_model(path + "/" + f)
        individuals.append(Individual(individual))
    return Generation(1, individuals)
Ejemplo n.º 7
0
def make_first_generation(simulation_params):
    """
    Creates the first generation. Individuals have random weights.
    :param simulation_params: all of the simulation params
    :return:
    """
    """
    from keras import backend as K

def my_init(shape, dtype=None):
    return K.random_normal(shape, dtype=dtype)

model.add(Dense(64, kernel_initializer=my_init))
NUM_INDIVIDUALS_PER_GENERATION, STATE_SPACE_SHAPE, ACTION_SPACE_SHAPE
    """
    individuals = [
        Individual(
            NNAgent(simulation_params.state_space_shape,
                    simulation_params.action_space_shape))
        for _ in range(simulation_params.num_individuals_per_gen)
    ]
    return Generation(1, individuals)
Ejemplo n.º 8
0
    def _simulate_individual(self, individual: Individual, render):
        """
        Simulates a single individual and assigns its fitness score.
        This involves letting the individual play a game of Mario,
        and assigning the resulting fitness to the individual.
        :param individual:
        """
        state = self.env.reset()

        x_pos = 0
        last_x_pos = 0
        reward_final = 0
        accumulated_fitness = 0
        died = False

        last_fps_time = time.time()
        frames = 0
        steps_standing_still = 0
        number_of_steps_standing_still_before_kill = 200

        for step in range(self.max_steps):
            self.state_downscaled = get_sensor_map(self.env_expanded)

            action = individual.agent.act(self.state_downscaled)
            # print('\r', _vectofixedstr(action, 12), end=' ')
            action = np.argmax(action)

            state, reward, done, info = self.env.step(action)

            if info['flag_get']:
                accumulated_fitness += x_pos

            x_pos = info['x_pos'] + accumulated_fitness

            reward_final += reward

            # Checks if reward is 0 to see if Mario stood still in the last step

            if last_x_pos - 1 <= x_pos <= last_x_pos + 1:
                steps_standing_still += 1
                if steps_standing_still >= number_of_steps_standing_still_before_kill:
                    break
            else:
                steps_standing_still = 0

            last_x_pos = x_pos

            if render:
                self.env.render()

            if info["life"] <= 2:
                died = True
                break

            # now = time.time()
            frames += 1
            """
            if now - last_fps_time >= 1:
                fps = frames / (now - last_fps_time)
                self._log.debug('FPS: {}'.format(fps))
                last_fps_time = now
                frames = 0
            """

        fps = frames / (time.time() - last_fps_time)
        self._log.debug('Steps per second: {:.2f}'.format(fps))

        individual.fitness = x_pos
        # individual.fitness = reward_final

        if died:
            self._log.debug(
                'Individual {} died. It achieved fitness {}'.format(
                    individual.id, individual.fitness))
        else:
            self._log.debug(
                'Individual {} ran out of simulation steps. It achieved fitness {}'
                .format(individual.id, individual.fitness))