Beispiel #1
0
def crossingover1(model1, model2):
    result = []

    for layer_idx in range(len(model1.layers)):
        m1_weights = Weights(model1.layers[layer_idx])
        m2_weights = Weights(model2.layers[layer_idx])
        m1_weights_list = m1_weights.get_weights_list()
        m2_weights_list = m2_weights.get_weights_list()

        model_idx = random.randint(0, 1)
        if model_idx == 0:
            result.extend(m1_weights_list)
        else:
            result.extend(m2_weights_list)
    
    return result
Beispiel #2
0
def crossingover3(model1, model2):
    result = []

    for layer_idx in range(len(model1.layers)):
        m1_weights = Weights(model1.layers[layer_idx])
        m2_weights = Weights(model2.layers[layer_idx])
        m1_weights_list = m1_weights.get_weights_list()
        m2_weights_list = m2_weights.get_weights_list()
        
        if len(m1_weights_list) == 0:
            continue

        separate_idx = random.randint(0, len(m1_weights_list) - 1)
        for weight_idx in range(0, separate_idx):
            result.append(m1_weights_list[weight_idx])
        for weight_idx in range(separate_idx, len(m1_weights_list)):
            result.append(m2_weights_list[weight_idx])
    
    return result
Beispiel #3
0
def main():
    env = gym.make('CartPole-v1')
    env.reset()
    env.render()
    time.sleep(0.5)
    gym_img = current_window_img(WINDOW_OFFSET)

    img_tensor = np.array(gym_img, dtype='float')

    conv_base = VGG16(weights='imagenet',
                      include_top=False,
                      input_shape=img_tensor.shape)

    nnetworks = [generate_model((46080, )) for i in range(NUM_PARENT_NETWORKS)]

    layers_info = []
    for i in range(len(nnetworks[0].layers)):
        layers_info.append(Weights(nnetworks[0].layers[i]))

    for gen_idx in range(NUM_GENERATION):
        print('Generation {}'.format(gen_idx))

        for net_idx in range(NUM_PARENT_NETWORKS):
            for child_idx in range(CHILDREN_PER_PARENT):
                partner_idx = get_partner_idx(net_idx, nnetworks)
                child_model = generate_child(nnetworks[net_idx],
                                             nnetworks[partner_idx], (46080, ),
                                             layers_info)
                nnetworks.append(child_model)

        rewards = [0 for i in range(len(nnetworks))]
        for network_idx in range(len(nnetworks)):
            reward = 0
            env.reset()

            last_tensor = None
            while reward < MAX_REWARD:
                env.render()
                gym_img = current_window_img(WINDOW_OFFSET)
                gym_tensor = np.array(gym_img, dtype='float')
                gym_tensor = np.expand_dims(gym_tensor, axis=0)

                a = time.time()
                conv_predict = conv_base.predict(gym_tensor)
                print('conv_base :' + str(time.time() - a))
                conv_predict = conv_predict.reshape((1, 46080))
                a = time.time()
                predict = nnetworks[network_idx].predict(conv_predict)
                print('nnetworks :' + str(time.time() - a))

                action = 0 if predict[0][0] < 0.5 else 1
                _, _, done, _ = env.step(action)
                reward += 1

                last_tensor = gym_tensor

                if done:
                    rewards[network_idx] = reward
                    break

            print('Network {}: {}'.format(network_idx, reward))
        print('MAX REWARD: {}'.format(max(rewards)))
        print('-' * 40)

        nnetworks = selection(nnetworks, rewards, NUM_PARENT_NETWORKS,
                              RANDOM_SELECTED_NETWORKS)

        for i in range(len(nnetworks)):
            nnetworks[i].save('tmp' + str(i) + '.h5')

        nnetworks.clear()

        K.clear_session()
        gc.collect()

        nnetworks = []
        for i in range(NUM_PARENT_NETWORKS):
            nnetworks.append(load_model('tmp' + str(i) + '.h5'))
Beispiel #4
0
def main():
    global NUM_PARENT_NETWORKS
    global CHILDREN_PER_PARENT
    global NUM_MUTATION_WEIGHTS
    global MUTATION_FACTOR

    env = gym.make('CartPole-v1')
    env.reset()
    
    # time.sleep(0.5)
    
    gym_img = env.render(mode='rgb_array') #current_window_img(WINDOW_OFFSET)
    gym_img = rgb2gray(gym_img)
    gym_img = gym_img[150:350, 100:500]
    gym_img = resize(gym_img, (100, 200))
    # exit()
    gym_img = gym_img.astype('float32') / 255.0

    img_tensor = np.array(gym_img, dtype='float')
    img_tensor = img_tensor.reshape((img_tensor.shape[0],
                                    img_tensor.shape[1],
                                    1))

    for _ in range(NUM_PREVIOUS_USING_STATES):
        img_tensor = np.append(img_tensor, img_tensor[:,:,0:1], axis=2)

    nnetworks = [generate_model(img_tensor.shape) 
                 for i in range(NUM_PARENT_NETWORKS)]

    layers_info = []
    for i in range(len(nnetworks[0].layers)):
        layers_info.append(Weights(nnetworks[0].layers[i]))

    max_reward = 0
    for gen_idx in range(NUM_GENERATION):
        print('Generation {}'.format(gen_idx))
        with open('GAConfig.txt') as cfg:
            NUM_PARENT_NETWORKS = int(cfg.readline())
            CHILDREN_PER_PARENT = int(cfg.readline())
            NUM_MUTATION_WEIGHTS = int(cfg.readline())
            MUTATION_FACTOR = np.float32(float(cfg.readline()))
            print(NUM_PARENT_NETWORKS, CHILDREN_PER_PARENT, NUM_MUTATION_WEIGHTS, MUTATION_FACTOR)
        
        for net_idx in range(NUM_PARENT_NETWORKS):
            for child_idx in range(CHILDREN_PER_PARENT):
                partner_idx = get_partner_idx(net_idx, nnetworks)
                child_model = generate_child(nnetworks[net_idx],
                                             nnetworks[partner_idx],
                                             img_tensor.shape,
                                             layers_info)
                nnetworks.append(child_model)

        rewards = [0 for i in range(len(nnetworks))]
        for network_idx in range(len(nnetworks)):
            run_results = np.array([])
            for start_id in range(NUM_STARTS_FOR_AVRG):
                reward = 0
                env.reset()

                prev_states = np.zeros((img_tensor.shape[0],
                                        img_tensor.shape[1],
                                        img_tensor.shape[2] - 1))
                # for i in range(img_tensor.shape[2] - 1):
                #     prev_states[:,:,i:i+1] = img_tensor[:,:,0:1]

                while reward < MAX_REWARD:
                    env.render()
                    gym_img = env.render(mode='rgb_array') #current_window_img(WINDOW_OFFSET)
                    gym_img = rgb2gray(gym_img)
                    gym_img = gym_img[150:350, 100:500]
                    gym_img = resize(gym_img, (100, 200))
                    gym_img = gym_img.astype('float32') / 255.0

                    gym_tensor = np.array(gym_img, dtype='float')
                    gym_tensor = gym_tensor.reshape((gym_tensor.shape[0],
                                                    gym_tensor.shape[1],
                                                    1))
                    for i in range(NUM_PREVIOUS_USING_STATES):
                        gym_tensor = np.append(gym_tensor, prev_states[:,:,i:i+1], axis=2)

                    gym_tensor = np.expand_dims(gym_tensor, axis=0)

                    predict = nnetworks[network_idx].predict(gym_tensor)
                    action = 0 if predict[0][0] < 0.5 else 1
                    _, _, done, _ = env.step(action)
                    reward += 1

                    if done:
                        run_results = np.append(run_results, reward)
                        break
                    else:
                        # if reward % 2 == 0:
                        update_prev_states(prev_states, gym_tensor[:,:,:,0:1])

            rewards[network_idx] = int(np.mean(run_results))
            if max_reward < max(rewards):
                max_reward = max(rewards)
                with open("max_reward.txt", "w") as f:
                    f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)])
                nnetworks[network_idx].save('best_network.h5')
            print('Network {}: {}'.format(network_idx, rewards[network_idx]))
        
        print('-'*40)
        print('MAX REWARD CURRENT: {}'.format(max(rewards)))
        print('MAX REWARD COMMON: {}'.format(max_reward))
        print('-'*40)

        nnetworks = selection(nnetworks, 
                              rewards, 
                              NUM_PARENT_NETWORKS, 
                              RANDOM_SELECTED_NETWORKS)

        for i in range(len(nnetworks)):
            nnetworks[i].save('tmp'+str(i) + '.h5')
        
        nnetworks.clear()
        
        K.clear_session()
        gc.collect()

        nnetworks = []
        for i in range(NUM_PARENT_NETWORKS):
            nnetworks.append(load_model('tmp' + str(i) + '.h5'))
Beispiel #5
0
def main():
    env = gym.make('CartPole-v1')
    env.reset()
    # env.render()
    # time.sleep(0.5)

    gym_img = env.render(mode='rgb_array') #current_window_img(WINDOW_OFFSET)
    gym_img = rgb2gray(gym_img)
    
    img_tensor = np.array(gym_img, dtype='float')
    img_tensor = img_tensor.reshape((img_tensor.shape[0],
                                     img_tensor.shape[1],
                                     1))


    nnetworks = [generate_model(img_tensor.shape) 
                 for i in range(NUM_PARENT_NETWORKS)]

    layers_info = []
    for i in range(len(nnetworks[0].layers)):
        layers_info.append(Weights(nnetworks[0].layers[i]))

    for gen_idx in range(NUM_GENERATION):
        print('Generation {}'.format(gen_idx))

        for net_idx in range(NUM_PARENT_NETWORKS):
            for child_idx in range(CHILDREN_PER_PARENT):
                partner_idx = get_partner_idx(net_idx, nnetworks)
                child_model = generate_child(nnetworks[net_idx],
                                             nnetworks[partner_idx],
                                             img_tensor.shape,
                                             layers_info)
                nnetworks.append(child_model)

        rewards = [0 for i in range(len(nnetworks))]
        for network_idx in range(len(nnetworks)):
            reward = 0
            env.reset()

            while reward < MAX_REWARD:
                env.render()
                gym_img = current_window_img(WINDOW_OFFSET)
                gym_tensor = np.array(gym_img, dtype='float')
                gym_tensor = gym_tensor.reshape((gym_tensor.shape[0],
                                                 gym_tensor.shape[1],
                                                 1))
                gym_tensor = np.expand_dims(gym_tensor, axis=0)

                predict = nnetworks[network_idx].predict(gym_tensor)
                action = 0 if predict[0][0] < 0.5 else 1
                _, _, done, _ = env.step(action)
                reward += 1

                if done:
                    rewards[network_idx] = reward
                    break
            
            print('Network {}: {}'.format(network_idx, reward))
        print('MAX REWARD: {}'.format(max(rewards)))
        print('-'*40)

        nnetworks = selection(nnetworks, 
                              rewards, 
                              NUM_PARENT_NETWORKS, 
                              RANDOM_SELECTED_NETWORKS)

        for i in range(len(nnetworks)):
            nnetworks[i].save('tmp'+str(i) + '.h5')
        
        nnetworks.clear()
        
        K.clear_session()
        gc.collect()

        nnetworks = []
        for i in range(NUM_PARENT_NETWORKS):
            nnetworks.append(load_model('tmp' + str(i) + '.h5'))
Beispiel #6
0
def main():
    global NUM_PARENT_NETWORKS
    global CHILDREN_PER_PARENT
    global NUM_MUTATION_WEIGHTS
    global MUTATION_FACTOR

    env = gym.make('Ant-v2')
    exit()
    env.reset()


    gym_img = env.render(mode='state_pixels')
    gym_img = rgb2gray(gym_img)
    gym_img = gym_img[:-12, :]
    
    # print(gym_img.shape)
    gym_img = resize(gym_img, gym_img.shape)

    gym_img = gym_img.astype('float32') / 255.0

    img_tensor = np.array(gym_img, dtype='float')
    img_tensor = img_tensor.reshape((img_tensor.shape[0],
                                     img_tensor.shape[1],
                                     1))

    for _ in range(NUM_PREVIOUS_USING_STATES):
        img_tensor = np.append(img_tensor, img_tensor[:,:,0:1], axis=2)
    
    print(img_tensor.shape)
    for i in range(NUM_PARENT_NETWORKS):
        nn = generate_model(img_tensor.shape)
        nn.save('nn' + str(i) + '.h5')
        K.clear_session()
        gc.collect()
    
    K.clear_session()
    gc.collect()

    nn = models.load_model('nn0.h5')

    layers_info = []
    for i in range(len(nn.layers)):
        layers_info.append(Weights(nn.layers[i]))

    max_reward = 0
    for gen_idx in range(NUM_GENERATION):
        print('Generation {}'.format(gen_idx))
        with open('GAConfig.txt') as cfg:
            NUM_PARENT_NETWORKS = int(cfg.readline())
            CHILDREN_PER_PARENT = int(cfg.readline())
            NUM_MUTATION_WEIGHTS = int(cfg.readline())
            MUTATION_FACTOR = np.float32(float(cfg.readline()))
            print(NUM_PARENT_NETWORKS, CHILDREN_PER_PARENT, NUM_MUTATION_WEIGHTS, MUTATION_FACTOR)
        
        for net_idx in range(NUM_PARENT_NETWORKS):
            for child_idx in range(CHILDREN_PER_PARENT):
                partner_idx = get_partner_idx(net_idx, NUM_PARENT_NETWORKS)
                nn_parent1 = models.load_model('nn' + str(net_idx) + '.h5')
                nn_parent2 = models.load_model('nn' + str(partner_idx) + '.h5')
                child_model = generate_child(nn_parent1,
                                             nn_parent2,
                                             img_tensor.shape,
                                             layers_info)
                safe_idx = NUM_PARENT_NETWORKS + net_idx * CHILDREN_PER_PARENT + child_idx
                child_model.save('nn' + str(safe_idx) + '.h5')
                K.clear_session()
                gc.collect()
            K.clear_session()
            gc.collect()

        num_networks = NUM_PARENT_NETWORKS + CHILDREN_PER_PARENT * NUM_PARENT_NETWORKS
        
        rewards = [0 for i in range(num_networks)]
        for network_idx in range(num_networks):
            current_nn = models.load_model('nn' + str(network_idx) + '.h5')
            run_results = np.array([])
            for start_id in range(NUM_STARTS_FOR_AVRG):
                reward = MAX_REWARD
                samples = 0
                env.reset()
                for i in range(50): env.step([0, 0, 0]) 
                
                prev_states = np.zeros((img_tensor.shape[0],
                                        img_tensor.shape[1],
                                        img_tensor.shape[2] - 1))

                while reward > 0:
                    # time.sleep(0.01)
                    env.render()
                    gym_img = env.render(mode='state_pixels')
                    gym_img = rgb2gray(gym_img)
                    gym_img = gym_img[:-12, :]
                    gym_img = gym_img.astype('float32') / 255.0

                    gym_tensor = np.array(gym_img, dtype='float')
                    gym_tensor = gym_tensor.reshape((gym_tensor.shape[0],
                                                    gym_tensor.shape[1],
                                                    1))
                    for i in range(NUM_PREVIOUS_USING_STATES):
                        gym_tensor = np.append(gym_tensor, prev_states[:,:,i:i+1], axis=2)

                    gym_tensor = np.expand_dims(gym_tensor, axis=0)
                    
                    action = [0, 0, 0]
                    predict = current_nn.predict(gym_tensor)
                    action[0] = (predict[0][0] - 0.5) * 2.0
                    if predict[0][1] > predict[0][2]:
                        action[1], action[2] = 0.6, 0
                    else:
                        action[1], action[2] = 0, 0.6
                    # action[1] = 0 if predict[0][1] < 0.5 else 0.6
                    # action[2] = 0 if predict[0][2] < 0.5 else 0.6
                    # action[0] = (predict[0][0] - 0.5) * 2.0
                    # action[1] = predict[0][1]
                    # action[2] = predict[0][2]
                    # print(action)
                    _, rew, done, _ = env.step(action)
                    reward += rew
                    reward = clamp(reward, -10, MAX_REWARD)
                    samples += 0.1

                    # print(rew, done, lives)
                    # if done:
                    #     # run_results = np.append(run_results, rew)
                    #     break
                    # else:
                    #     # if reward % 2 == 0:
                    update_prev_states(prev_states, gym_tensor[:,:,:,0:1])
                run_results = np.append(run_results, samples)

            rewards[network_idx] = float(np.mean(run_results))
            if max_reward < max(rewards):
                max_reward = max(rewards)
                with open("max_reward.txt", "w") as f:
                    f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)])
                current_nn.save('best_network.h5')
            print('Network {}: {}'.format(network_idx, rewards[network_idx]))
            
            K.clear_session()
            gc.collect()

        print('-'*40)
        print('MAX REWARD CURRENT: {}'.format(max(rewards)))
        print('MAX REWARD COMMON: {}'.format(max_reward))
        print('-'*40)

        nnetworks = selection(num_networks,
                              rewards,
                              NUM_PARENT_NETWORKS,
                              RANDOM_SELECTED_NETWORKS)

        # for i in range(len(nnetworks)):
        #     nnetworks[i].save('tmp'+str(i) + '.h5')
        
        # nnetworks.clear()
        
        K.clear_session()
        gc.collect()
Beispiel #7
0
def main():
    global NUM_PARENT_NETWORKS
    global CHILDREN_PER_PARENT
    global NUM_MUTATION_WEIGHTS
    global MUTATION_FACTOR

    env = cartpole.CartPole(img_mode=True, img_size=(25, 25))

    for i in range(NUM_PARENT_NETWORKS):
        nn = generate_model(env.tensor_shape)
        nn.save('nn' + str(i) + '.h5')
        K.clear_session()
        gc.collect()

    K.clear_session()
    gc.collect()

    # nnetworks = [generate_model(img_tensor.shape)
    #              for i in range(NUM_PARENT_NETWORKS)]

    nn = models.load_model('nn0.h5')

    layers_info = []
    for i in range(len(nn.layers)):
        layers_info.append(Weights(nn.layers[i]))

    max_reward = 0
    for gen_idx in range(NUM_GENERATION):
        print('Generation {}'.format(gen_idx))
        with open('GAConfig.txt') as cfg:
            NUM_PARENT_NETWORKS = int(cfg.readline())
            CHILDREN_PER_PARENT = int(cfg.readline())
            NUM_MUTATION_WEIGHTS = int(cfg.readline())
            MUTATION_FACTOR = np.float32(float(cfg.readline()))
            print(NUM_PARENT_NETWORKS, CHILDREN_PER_PARENT,
                  NUM_MUTATION_WEIGHTS, MUTATION_FACTOR)

        num_tasks = NUM_PARENT_NETWORKS * CHILDREN_PER_PARENT
        for net_idx in range(NUM_PARENT_NETWORKS):
            for child_idx in range(CHILDREN_PER_PARENT):
                partner_idx = get_partner_idx(net_idx, NUM_PARENT_NETWORKS)
                nn_parent1 = models.load_model('nn' + str(net_idx) + '.h5')
                nn_parent2 = models.load_model('nn' + str(partner_idx) + '.h5')
                child_model = generate_child(nn_parent1, nn_parent2,
                                             env.tensor_shape, layers_info)
                safe_idx = NUM_PARENT_NETWORKS + net_idx * CHILDREN_PER_PARENT + child_idx
                child_model.save('nn' + str(safe_idx) + '.h5')
                print('Generating: {}%\r'.format(
                    int(
                        float(net_idx * CHILDREN_PER_PARENT + child_idx) /
                        num_tasks * 100)),
                      end='')
                K.clear_session()
                gc.collect()
            K.clear_session()
            gc.collect()
            # nnetworks.append(child_model)
        print('')

        num_networks = NUM_PARENT_NETWORKS + CHILDREN_PER_PARENT * NUM_PARENT_NETWORKS

        rewards = [0 for i in range(num_networks)]
        for network_idx in range(num_networks):
            current_nn = models.load_model('nn' + str(network_idx) + '.h5')
            run_results = np.array([])
            for start_id in range(NUM_STARTS_FOR_AVRG):
                env.prepare_env()

                while not env.is_done():
                    obs = env.get_obs()

                    predict = current_nn.predict(obs)
                    action = 0 if predict[0][0] < 0.5 else 1

                    env.step(action)

                run_results = np.append(run_results, env.get_reward())
            rewards[network_idx] = int(np.mean(run_results))
            if max_reward < max(rewards):
                max_reward = max(rewards)
                with open("max_reward.txt", "w") as f:
                    f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)])
                current_nn.save('best_network.h5')
            print('Network {}: {}'.format(network_idx, rewards[network_idx]))
            K.clear_session()
            gc.collect()

        print('-' * 40)
        print('MAX REWARD CURRENT: {}'.format(max(rewards)))
        print('MAX REWARD COMMON: {}'.format(max_reward))
        print('-' * 40)

        nnetworks = selection(num_networks, rewards, NUM_PARENT_NETWORKS,
                              RANDOM_SELECTED_NETWORKS,
                              NEW_GENERATED_RANDOM_NETWORK, env.tensor_shape)

        # for i in range(len(nnetworks)):
        #     nnetworks[i].save('tmp'+str(i) + '.h5')

        # nnetworks.clear()

        K.clear_session()
        gc.collect()
Beispiel #8
0
def main():
    # Создание окружения системы gym
    env = gym.make('CartPole-v1')
    env.reset()
    env.render()
    time.sleep(0.5)

    # Захват первого изображения для настройки слоев
    gym_img = current_window_img(WINDOW_OFFSET)

    # Преобразование изображения к тензору
    img_tensor = np.array(gym_img, dtype='float')
    img_tensor = img_tensor.reshape(
        (img_tensor.shape[0], img_tensor.shape[1], 1))

    # Расширение тензора в три раза для учета 2-х предыдущих кадров
    for _ in range(NUM_PREVIOUS_USING_STATES):
        img_tensor = np.append(img_tensor, img_tensor[:, :, 0:1], axis=2)

    # Создание начальной популяции нейронных сетей
    nnetworks = [
        generate_model(img_tensor.shape) for i in range(NUM_PARENT_NETWORKS)
    ]

    # Создание информационных слоев
    layers_info = []
    for i in range(len(nnetworks[0].layers)):
        layers_info.append(Weights(nnetworks[0].layers[i]))

    max_reward = 0
    # Основной цикл: проход по всем генерациям
    for gen_idx in range(NUM_GENERATION):
        print('Generation {}'.format(gen_idx))

        for net_idx in range(NUM_PARENT_NETWORKS):
            for child_idx in range(CHILDREN_PER_PARENT):
                partner_idx = get_partner_idx(net_idx, nnetworks)
                child_model = generate_child(nnetworks[net_idx],
                                             nnetworks[partner_idx],
                                             img_tensor.shape, layers_info)
                nnetworks.append(child_model)

        rewards = [0 for i in range(len(nnetworks))]
        for network_idx in range(len(nnetworks)):
            run_results = np.array([])
            for start_id in range(NUM_STARTS_FOR_AVRG):
                reward = 0
                env.reset()

                prev_states = np.zeros(
                    (img_tensor.shape[0], img_tensor.shape[1],
                     img_tensor.shape[2] - 1))

                while reward < MAX_REWARD:
                    env.render()
                    gym_img = current_window_img(WINDOW_OFFSET)
                    gym_tensor = np.array(gym_img, dtype='float')
                    gym_tensor = gym_tensor.reshape(
                        (gym_tensor.shape[0], gym_tensor.shape[1], 1))
                    for i in range(NUM_PREVIOUS_USING_STATES):
                        gym_tensor = np.append(gym_tensor,
                                               prev_states[:, :, i:i + 1],
                                               axis=2)

                    gym_tensor = np.expand_dims(gym_tensor, axis=0)

                    predict = nnetworks[network_idx].predict(gym_tensor)
                    action = 0 if predict[0][0] < 0.5 else 1
                    _, _, done, _ = env.step(action)
                    reward += 1

                    if done:
                        run_results = np.append(run_results, reward)
                        break
                    else:
                        if reward % 3 == 0:
                            update_prev_states(prev_states, gym_tensor[:, :, :,
                                                                       0:1])
                    # if (reward >= 9):
                    #     save_states(prev_states)
                    #     exit()

            rewards[network_idx] = int(np.mean(run_results))
            if max_reward < max(rewards):
                max_reward = max(rewards)
                with open("max_reward.txt", "w") as f:
                    f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)])
                nnetworks[network_idx].save('best_network.h5')
            print('Network {}: {}'.format(network_idx, rewards[network_idx]))

        print('-' * 40)
        print('MAX REWARD CURRENT: {}'.format(max(rewards)))
        print('MAX REWARD COMMON: {}'.format(max_reward))
        print('-' * 40)

        nnetworks = selection(nnetworks, rewards, NUM_PARENT_NETWORKS,
                              RANDOM_SELECTED_NETWORKS)

        for i in range(len(nnetworks)):
            nnetworks[i].save('tmp' + str(i) + '.h5')

        nnetworks.clear()

        K.clear_session()
        gc.collect()

        nnetworks = []
        for i in range(NUM_PARENT_NETWORKS):
            nnetworks.append(load_model('tmp' + str(i) + '.h5'))
Beispiel #9
0
def main():
    env = gym.make('Pendulum-v0')
    env.reset()

    gym_img = env.render(mode='rgb_array')  #current_window_img(WINDOW_OFFSET)
    gym_img = rgb2gray(gym_img)
    gym_img = gym_img[125:375, 125:375]
    gym_img = resize(gym_img, (25, 25))
    gym_img = gym_img.astype('float32') / 255.0

    img_tensor = np.array(gym_img, dtype='float')
    img_tensor = img_tensor.reshape(
        (img_tensor.shape[0], img_tensor.shape[1], 1))

    for _ in range(NUM_PREVIOUS_USING_STATES):
        img_tensor = np.append(img_tensor, img_tensor[:, :, 0:1], axis=2)

    nnetworks = [
        generate_model(img_tensor.shape) for i in range(NUM_PARENT_NETWORKS)
    ]

    layers_info = []
    for i in range(len(nnetworks[0].layers)):
        layers_info.append(Weights(nnetworks[0].layers[i]))

    max_reward = 0
    for gen_idx in range(NUM_GENERATION):
        print('Generation {}'.format(gen_idx))

        for net_idx in range(NUM_PARENT_NETWORKS):
            for child_idx in range(CHILDREN_PER_PARENT):
                partner_idx = get_partner_idx(net_idx, nnetworks)
                child_model = generate_child(nnetworks[net_idx],
                                             nnetworks[partner_idx],
                                             img_tensor.shape, layers_info)
                nnetworks.append(child_model)

        rewards = [0 for i in range(len(nnetworks))]
        for network_idx in range(len(nnetworks)):
            run_results = np.array([])
            for start_id in range(NUM_STARTS_FOR_AVRG):
                reward = 0
                env.reset()

                prev_states = np.zeros(
                    (img_tensor.shape[0], img_tensor.shape[1],
                     img_tensor.shape[2] - 1))
                left_costs = START_COSTS
                while True:
                    env.render()
                    gym_img = env.render(
                        mode='rgb_array')  #current_window_img(WINDOW_OFFSET)
                    gym_img = rgb2gray(gym_img)
                    gym_img = gym_img[125:375, 125:375]
                    gym_img = resize(gym_img, (25, 25))
                    gym_img = gym_img.astype('float32') / 255.0

                    gym_tensor = np.array(gym_img, dtype='float')
                    gym_tensor = gym_tensor.reshape(
                        (gym_tensor.shape[0], gym_tensor.shape[1], 1))
                    for i in range(NUM_PREVIOUS_USING_STATES):
                        gym_tensor = np.append(gym_tensor,
                                               prev_states[:, :, i:i + 1],
                                               axis=2)

                    gym_tensor = np.expand_dims(gym_tensor, axis=0)

                    predict = nnetworks[network_idx].predict(gym_tensor)
                    print(predict)
                    action = [(predict[0][0] - 0.5) * 4.0]
                    _, costs, _, _ = env.step(action)

                    left_costs += costs
                    reward += 1

                    if left_costs < 0:
                        run_results = np.append(run_results, reward)
                        break
                    else:
                        # if reward % 2 == 0:
                        update_prev_states(prev_states, gym_tensor[:, :, :,
                                                                   0:1])

            rewards[network_idx] = int(np.mean(run_results))
            if max_reward < max(rewards):
                max_reward = max(rewards)
                with open("max_reward.txt", "w") as f:
                    f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)])
                nnetworks[network_idx].save('best_network.h5')
            print('Network {}: {}'.format(network_idx, rewards[network_idx]))

        print('-' * 40)
        print('MAX REWARD CURRENT: {}'.format(max(rewards)))
        print('MAX REWARD COMMON: {}'.format(max_reward))
        print('-' * 40)

        nnetworks = selection(nnetworks, rewards, NUM_PARENT_NETWORKS,
                              RANDOM_SELECTED_NETWORKS)

        for i in range(len(nnetworks)):
            nnetworks[i].save('tmp' + str(i) + '.h5')

        nnetworks.clear()

        K.clear_session()
        gc.collect()

        nnetworks = []
        for i in range(NUM_PARENT_NETWORKS):
            nnetworks.append(load_model('tmp' + str(i) + '.h5'))
Beispiel #10
0
def main():
    global NUM_PARENT_NETWORKS
    global CHILDREN_PER_PARENT
    global NUM_MUTATION_WEIGHTS
    global MUTATION_FACTOR

    env = gym.make('Breakout-v0')
    env.frameskip = 0
    env.reset()
    env.step(1)

    # print(env.get_action_meanings())
    # exit()
    # time.sleep(0.5)

    gym_img = env.render(mode='rgb_array')
    gym_img = rgb2gray(gym_img)
    gym_img = gym_img[33:-17, :]
    gym_img = resize(gym_img, (70, 70))
    gym_img = gym_img.astype('float32') / 255.0

    img_tensor = np.array(gym_img, dtype='float')
    img_tensor = img_tensor.reshape(
        (img_tensor.shape[0], img_tensor.shape[1], 1))

    for _ in range(NUM_PREVIOUS_USING_STATES):
        img_tensor = np.append(img_tensor, img_tensor[:, :, 0:1], axis=2)

    for i in range(NUM_PARENT_NETWORKS):
        nn = generate_model(img_tensor.shape)
        nn.save('nn' + str(i) + '.h5')
        K.clear_session()
        gc.collect()

    K.clear_session()
    gc.collect()

    # nnetworks = [generate_model(img_tensor.shape)
    #              for i in range(NUM_PARENT_NETWORKS)]

    nn = models.load_model('nn0.h5')

    layers_info = []
    for i in range(len(nn.layers)):
        layers_info.append(Weights(nn.layers[i]))

    max_reward = 0
    for gen_idx in range(NUM_GENERATION):
        print('Generation {}'.format(gen_idx))
        with open('GAConfig.txt') as cfg:
            NUM_PARENT_NETWORKS = int(cfg.readline())
            CHILDREN_PER_PARENT = int(cfg.readline())
            NUM_MUTATION_WEIGHTS = int(cfg.readline())
            MUTATION_FACTOR = np.float32(float(cfg.readline()))
            print(NUM_PARENT_NETWORKS, CHILDREN_PER_PARENT,
                  NUM_MUTATION_WEIGHTS, MUTATION_FACTOR)

        num_tasks = NUM_PARENT_NETWORKS * CHILDREN_PER_PARENT
        for net_idx in range(NUM_PARENT_NETWORKS):
            for child_idx in range(CHILDREN_PER_PARENT):
                partner_idx = get_partner_idx(net_idx, NUM_PARENT_NETWORKS)
                nn_parent1 = models.load_model('nn' + str(net_idx) + '.h5')
                nn_parent2 = models.load_model('nn' + str(partner_idx) + '.h5')
                child_model = generate_child(nn_parent1, nn_parent2,
                                             img_tensor.shape, layers_info)
                safe_idx = NUM_PARENT_NETWORKS + net_idx * CHILDREN_PER_PARENT + child_idx
                child_model.save('nn' + str(safe_idx) + '.h5')
                K.clear_session()
                gc.collect()
                print('Generating: {}%\r'.format(
                    int(
                        float(net_idx * CHILDREN_PER_PARENT + child_idx) /
                        num_tasks * 100)),
                      end='')
            K.clear_session()
            gc.collect()
            # nnetworks.append(child_model)

        num_networks = NUM_PARENT_NETWORKS + CHILDREN_PER_PARENT * NUM_PARENT_NETWORKS

        rewards = [0 for i in range(num_networks)]
        for network_idx in range(num_networks):
            current_nn = models.load_model('nn' + str(network_idx) + '.h5')
            run_results = np.array([])
            for start_id in range(NUM_STARTS_FOR_AVRG):
                reward = 0
                sample = 0
                env.reset()
                # env.seed()

                for i in range(10):
                    env.step(0)
                env.step(1)
                env.step(1)
                env.step(1)

                prev_states = np.zeros(
                    (img_tensor.shape[0], img_tensor.shape[1],
                     img_tensor.shape[2] - 1))
                # for i in range(img_tensor.shape[2] - 1):
                #     prev_states[:,:,i:i+1] = img_tensor[:,:,0:1]

                prev_lives = 5
                while True:
                    # time.sleep(0.01)
                    env.render()
                    gym_img = env.render(
                        mode='rgb_array')  #current_window_img(WINDOW_OFFSET)
                    gym_img = rgb2gray(gym_img)
                    # gym_img = gym_img[150:350, 200:400]
                    gym_img = gym_img[33:-17, :]
                    gym_img = resize(gym_img, (70, 70))
                    gym_img = gym_img.astype('float32') / 255.0

                    gym_tensor = np.array(gym_img, dtype='float')
                    gym_tensor = gym_tensor.reshape(
                        (gym_tensor.shape[0], gym_tensor.shape[1], 1))
                    for i in range(NUM_PREVIOUS_USING_STATES):
                        gym_tensor = np.append(gym_tensor,
                                               prev_states[:, :, i:i + 1],
                                               axis=2)

                    gym_tensor = np.expand_dims(gym_tensor, axis=0)

                    predict = current_nn.predict(gym_tensor)
                    predict = np.argmax(predict)
                    if predict == 0:
                        action = 0
                    elif predict == 1:
                        action = 2
                    elif predict == 2:
                        action = 3
                    # else:
                    #     action = 3

                    obs, rew, done, lives = env.step(action)
                    reward += rew
                    sample += 1

                    if lives['ale.lives'] < prev_lives:
                        break

                    # print(rew, done, lives)
                    if done:
                        # run_results = np.append(run_results, rew)
                        break
                    else:
                        # if reward % 2 == 0:
                        update_prev_states(prev_states, gym_tensor[:, :, :,
                                                                   0:1])
                # print(reward)
                # reward = int(input())
                run_results = np.append(run_results, sample)

            rewards[network_idx] = int(np.mean(run_results))
            if max_reward < max(rewards):
                max_reward = max(rewards)
                with open("max_reward.txt", "w") as f:
                    f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)])
                current_nn.save('best_network.h5')
            print('Network {}: {}'.format(network_idx, rewards[network_idx]))

            K.clear_session()
            gc.collect()

        print('-' * 40)
        print('MAX REWARD CURRENT: {}'.format(max(rewards)))
        print('MAX REWARD COMMON: {}'.format(max_reward))
        print('-' * 40)

        nnetworks = selection(num_networks, rewards, NUM_PARENT_NETWORKS,
                              RANDOM_SELECTED_NETWORKS,
                              NEW_GENERATED_RANDOM_NETWORK, img_tensor.shape)

        # for i in range(len(nnetworks)):
        #     nnetworks[i].save('tmp'+str(i) + '.h5')

        # nnetworks.clear()

        K.clear_session()
        gc.collect()
Beispiel #11
0
def main():
    env = gym.make('CartPole-v1')
    env.reset()
    env.render()
    time.sleep(0.5)
    gym_img = current_window_img(WINDOW_OFFSET)

    img_tensor = np.array(gym_img, dtype='float')

    # vgg16 = VGG16(weights='imagenet',
    #               include_top=False,
    #               input_shape=img_tensor.shape)

    # conv_base = models.Sequential()
    # conv_base.add(vgg16.layers[1])
    # conv_base.add(vgg16.layers[3])
    # conv_base.add(vgg16.layers[4])
    # conv_base.add(vgg16.layers[6])
    # conv_base.add(vgg16.layers[7])
    # conv_base.add(vgg16.layers[10])

    # conv_base.trainable = False

    # conv_base.compile(optimizer=optimizers.RMSprop(lr=1e-4),
    #                   loss='binary_crossentropy',
    #                   metrics=['acc'])

    # gym_img = current_window_img(WINDOW_OFFSET)
    # gym_tensor = np.array(gym_img, dtype='float')
    # gym_tensor = np.expand_dims(gym_tensor, axis=0)

    # a = conv_base.predict(gym_tensor)
    # conv_base.summary()
    # exit()

    nnetworks = [generate_model((1575, )) for i in range(NUM_PARENT_NETWORKS)]

    layers_info = []
    for i in range(len(nnetworks[0].layers)):
        layers_info.append(Weights(nnetworks[0].layers[i]))

    for gen_idx in range(NUM_GENERATION):
        print('Generation {}'.format(gen_idx))

        vgg16 = VGG16(weights='imagenet',
                      include_top=False,
                      input_shape=img_tensor.shape)
        conv_base = models.Sequential()
        conv_base.add(vgg16.layers[1])
        conv_base.add(vgg16.layers[3])
        conv_base.add(vgg16.layers[4])
        conv_base.add(vgg16.layers[6])
        conv_base.add(vgg16.layers[7])
        conv_base.add(vgg16.layers[10])

        conv_base.trainable = False

        conv_base.compile(optimizer=optimizers.RMSprop(lr=1e-4),
                          loss='binary_crossentropy',
                          metrics=['acc'])

        for net_idx in range(NUM_PARENT_NETWORKS):
            for child_idx in range(CHILDREN_PER_PARENT):
                partner_idx = get_partner_idx(net_idx, nnetworks)
                child_model = generate_child(nnetworks[net_idx],
                                             nnetworks[partner_idx], (1575, ),
                                             layers_info)
                nnetworks.append(child_model)

        rewards = [0 for i in range(len(nnetworks))]
        for network_idx in range(len(nnetworks)):
            reward = 0
            env.reset()

            last_tensor = None
            while reward < MAX_REWARD:
                env.render()
                gym_img = current_window_img(WINDOW_OFFSET)
                gym_tensor = np.array(gym_img, dtype='float')
                gym_tensor = np.expand_dims(gym_tensor, axis=0)

                # a = time.time()
                conv_predict = conv_base.predict(gym_tensor)
                # print('conv_base :' + str(time.time() - a))
                conv_predict = conv_predict[:, :, :, 0:1]
                conv_predict = conv_predict.reshape((1, 1575))

                # a = time.time()
                predict = nnetworks[network_idx].predict(conv_predict)
                # print('nnetworks :' + str(time.time() - a))

                action = 0 if predict[0][0] < 0.5 else 1
                _, _, done, _ = env.step(action)
                reward += 1

                last_tensor = gym_tensor

                if done:
                    rewards[network_idx] = reward
                    break

            print('Network {}: {}'.format(network_idx, reward))
        print('MAX REWARD: {}'.format(max(rewards)))
        print('-' * 40)

        nnetworks = selection(nnetworks, rewards, NUM_PARENT_NETWORKS,
                              RANDOM_SELECTED_NETWORKS)

        for i in range(len(nnetworks)):
            nnetworks[i].save('tmp' + str(i) + '.h5')

        nnetworks.clear()

        K.clear_session()
        gc.collect()

        nnetworks = []
        for i in range(NUM_PARENT_NETWORKS):
            nnetworks.append(load_model('tmp' + str(i) + '.h5'))
Beispiel #12
0
def main():
    global NUM_PARENT_NETWORKS
    global CHILDREN_PER_PARENT
    global NUM_MUTATION_WEIGHTS
    global MUTATION_FACTOR

    env = gym.make('Pendulum-v0')
    env.reset()

    # time.sleep(0.5)

    gym_img = env.render(mode='rgb_array')  #current_window_img(WINDOW_OFFSET)
    gym_img = rgb2gray(gym_img)
    gym_img = gym_img[125:375, 125:375]
    gym_img = resize(gym_img, (25, 25))
    # exit()
    gym_img = gym_img.astype('float32') / 255.0

    img_tensor = np.array(gym_img, dtype='float')
    img_tensor = img_tensor.reshape(
        (img_tensor.shape[0], img_tensor.shape[1], 1))

    for _ in range(NUM_PREVIOUS_USING_STATES):
        img_tensor = np.append(img_tensor, img_tensor[:, :, 0:1], axis=2)

    for i in range(NUM_PARENT_NETWORKS):
        nn = generate_model(img_tensor.shape)
        nn.save('nn' + str(i) + '.h5')
        K.clear_session()
        gc.collect()

    K.clear_session()
    gc.collect()

    # nnetworks = [generate_model(img_tensor.shape)
    #              for i in range(NUM_PARENT_NETWORKS)]

    nn = models.load_model('nn0.h5')

    layers_info = []
    for i in range(len(nn.layers)):
        layers_info.append(Weights(nn.layers[i]))

    max_reward = 0
    for gen_idx in range(NUM_GENERATION):
        print('Generation {}'.format(gen_idx))
        with open('GAConfig.txt') as cfg:
            NUM_PARENT_NETWORKS = int(cfg.readline())
            CHILDREN_PER_PARENT = int(cfg.readline())
            NUM_MUTATION_WEIGHTS = int(cfg.readline())
            MUTATION_FACTOR = np.float32(float(cfg.readline()))
            print(NUM_PARENT_NETWORKS, CHILDREN_PER_PARENT,
                  NUM_MUTATION_WEIGHTS, MUTATION_FACTOR)

        for net_idx in range(NUM_PARENT_NETWORKS):
            for child_idx in range(CHILDREN_PER_PARENT):
                partner_idx = get_partner_idx(net_idx, NUM_PARENT_NETWORKS)
                nn_parent1 = models.load_model('nn' + str(net_idx) + '.h5')
                nn_parent2 = models.load_model('nn' + str(partner_idx) + '.h5')
                child_model = generate_child(nn_parent1, nn_parent2,
                                             img_tensor.shape, layers_info)
                safe_idx = NUM_PARENT_NETWORKS + net_idx * CHILDREN_PER_PARENT + child_idx
                child_model.save('nn' + str(safe_idx) + '.h5')
                K.clear_session()
                gc.collect()
            K.clear_session()
            gc.collect()
            # nnetworks.append(child_model)

        num_networks = NUM_PARENT_NETWORKS + CHILDREN_PER_PARENT * NUM_PARENT_NETWORKS

        rewards = [0 for i in range(num_networks)]
        for network_idx in range(num_networks):
            current_nn = models.load_model('nn' + str(network_idx) + '.h5')
            run_results = np.array([])
            for start_id in range(NUM_STARTS_FOR_AVRG):
                reward = 0
                env.reset()

                prev_states = np.zeros(
                    (img_tensor.shape[0], img_tensor.shape[1],
                     img_tensor.shape[2] - 1))
                # for i in range(img_tensor.shape[2] - 1):
                #     prev_states[:,:,i:i+1] = img_tensor[:,:,0:1]

                left_costs = START_COSTS
                while True:
                    env.render()
                    gym_img = env.render(
                        mode='rgb_array')  #current_window_img(WINDOW_OFFSET)
                    gym_img = rgb2gray(gym_img)
                    gym_img = gym_img[125:375, 125:375]
                    gym_img = resize(gym_img, (25, 25))
                    gym_img = gym_img.astype('float32') / 255.0

                    gym_tensor = np.array(gym_img, dtype='float')
                    gym_tensor = gym_tensor.reshape(
                        (gym_tensor.shape[0], gym_tensor.shape[1], 1))
                    for i in range(NUM_PREVIOUS_USING_STATES):
                        gym_tensor = np.append(gym_tensor,
                                               prev_states[:, :, i:i + 1],
                                               axis=2)

                    gym_tensor = np.expand_dims(gym_tensor, axis=0)

                    predict = current_nn.predict(gym_tensor)
                    action = [(predict[0][0] - 0.5) * 4.0]
                    _, costs, _, _ = env.step(action)
                    reward += 1

                    left_costs += costs

                    if left_costs < 0:
                        run_results = np.append(run_results, reward)
                        break
                    else:
                        # if reward % 2 == 0:
                        update_prev_states(prev_states, gym_tensor[:, :, :,
                                                                   0:1])

            rewards[network_idx] = int(np.mean(run_results))
            if max_reward < max(rewards):
                max_reward = max(rewards)
                with open("max_reward.txt", "w") as f:
                    f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)])
                current_nn.save('best_network.h5')
            print('Network {}: {}'.format(network_idx, rewards[network_idx]))

        print('-' * 40)
        print('MAX REWARD CURRENT: {}'.format(max(rewards)))
        print('MAX REWARD COMMON: {}'.format(max_reward))
        print('-' * 40)

        nnetworks = selection(num_networks, rewards, NUM_PARENT_NETWORKS,
                              RANDOM_SELECTED_NETWORKS)

        # for i in range(len(nnetworks)):
        #     nnetworks[i].save('tmp'+str(i) + '.h5')

        # nnetworks.clear()

        K.clear_session()
        gc.collect()