Exemple #1
0
def generate_child(model1, model2, tesnsor_size, layers_info, cfg):
    child_weights = crossingover(model1, model2)
    mutation(child_weights, cfg.NUM_MUTATION_WEIGHTS, cfg.MUTATION_FACTOR)
    child_model = network.generate_model(tesnsor_size)

    for layer_idx in range(len(child_model.layers)):
        weights_mtrx = layers_info[layer_idx].get_weights_mtrx(
            child_weights[:layers_info[layer_idx].size()])
        child_model.layers[layer_idx].set_weights(weights_mtrx)
        child_weights = child_weights[layers_info[layer_idx].size():]

    return child_model
Exemple #2
0
def selection(num_networks, rewards, num_selected, num_random, num_new_random,
              tesnsor_size):
    result = []
    nn_name_tmpl = 'nn{}.h5'

    for i in range(num_selected - num_random - num_new_random):
        best_network_idx = rewards.index(max(rewards))
        nn = models.load_model(nn_name_tmpl.format(best_network_idx))
        nn.save(nn_name_tmpl.format(i))
        rewards[best_network_idx] = 0

    for i in range(num_random):
        random_idx = random.randint(0, num_networks - 1)
        nn = models.load_model(nn_name_tmpl.format(random_idx))
        nn.save(
            nn_name_tmpl.format(num_selected - num_random - num_new_random +
                                i))

    for i in range(num_new_random):
        new_model = network.generate_model(tesnsor_size)
        nn.save(nn_name_tmpl.format(num_selected - num_new_random + i))

    return result
Exemple #3
0
def main():
    # Loading configuration
    cfg = config.Config('ga_cfg.json')
    cfg.update()

    # Creating the environment
    env = cartpole.CartPole(img_mode=True,
                            img_size=(100, 150),
                            num_prev_states=cfg.NUM_PREVIOUS_USING_STATES)

    # Generation of the start population
    for i in range(cfg.NUM_START_POPULATION):
        nn = network.generate_model(env.tensor_shape)
        nn.save(NN_NAME_TMPL.format(i))
        utils.clear_session()

    # Download the first model of the neural network
    nn = models.load_model(NN_NAME_TMPL.format(0))

    # Creating information about network layers
    layers_info = []
    for i in range(len(nn.layers)):
        layers_info.append(utils.Weights(nn.layers[i]))

    # Maximum reward for all epochs
    max_reward = 0

    # The main cycle of epochs
    for gen_idx in range(cfg.NUM_GENERATION):
        print('#### GENERATION {} ####'.format(gen_idx))

        # Read updated configuration
        cfg.update()

        # If the first epocha, then do not generate children
        if gen_idx == 0:
            num_networks = cfg.NUM_START_POPULATION
        # Else generate children
        else:
            num_tasks = cfg.NUM_PARENT_NETWORKS * cfg.CHILDREN_PER_PARENT

            for net_idx in range(cfg.NUM_PARENT_NETWORKS):
                for child_idx in range(cfg.CHILDREN_PER_PARENT):
                    partner_idx = geneticalg.get_partner(
                        net_idx, cfg.NUM_PARENT_NETWORKS)

                    nn_parent1 = models.load_model(
                        NN_NAME_TMPL.format(net_idx))
                    nn_parent2 = models.load_model(
                        NN_NAME_TMPL.format(partner_idx))

                    child_model = geneticalg.generate_child(
                        nn_parent1, nn_parent2, env.tensor_shape, layers_info,
                        cfg)

                    safe_idx = (cfg.NUM_PARENT_NETWORKS + child_idx +
                                net_idx * cfg.CHILDREN_PER_PARENT)

                    child_model.save(NN_NAME_TMPL.format(safe_idx))

                    print('Generating: {}%\r'.format(
                        (safe_idx - cfg.NUM_PARENT_NETWORKS) / num_tasks *
                        100),
                          end='')

                    utils.clear_session()
                utils.clear_session()
            print('')
            num_networks = (cfg.NUM_PARENT_NETWORKS +
                            cfg.CHILDREN_PER_PARENT * cfg.NUM_PARENT_NETWORKS)

        # Estimates for the current epoch
        gen_rewards = [0 for i in range(num_networks)]

        # Cycle to test each neural network
        for network_idx in range(num_networks):
            current_nn = models.load_model(NN_NAME_TMPL.format(network_idx))

            # Estimates for different tests of current neural network
            nn_rewards = np.array([])
            # Cycle to test different attempts
            for start_id in range(cfg.NUM_STARTS_FOR_AVRG):
                env.prepare_env()

                while not env.is_done():
                    obs = env.get_obs()

                    predict = current_nn.predict(obs)
                    action = 0 if predict[0][0] < 0.5 else 1

                    env.step(action)

                nn_rewards = np.append(nn_rewards, env.get_reward())

            # Save the average estimate for the current neural network
            gen_rewards[network_idx] = int(np.mean(nn_rewards))
            # Update and save the best estimate and network for all epochs
            if max_reward < gen_rewards[network_idx]:
                max_reward = gen_rewards[network_idx]
                with open("max_reward.txt", "w") as f:
                    f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)])
                current_nn.save('best_network.h5')

            print('Network {}: {}'.format(network_idx,
                                          gen_rewards[network_idx]))
            utils.clear_session()

        # Information on the results of the current epoch
        print('-' * 40)
        print('MAX REWARD CURRENT: {}'.format(max(gen_rewards)))
        print('MAX REWARD COMMON: {}'.format(max_reward))
        print('-' * 40)

        # Selection of the best neural networks
        nnetworks = geneticalg.selection(num_networks, gen_rewards,
                                         cfg.NUM_PARENT_NETWORKS,
                                         cfg.RANDOM_SELECTED_NETWORKS,
                                         cfg.NEW_GENERATED_RANDOM_NETWORK,
                                         env.tensor_shape)

        utils.clear_session()