def crossingover1(model1, model2): result = [] for layer_idx in range(len(model1.layers)): m1_weights = Weights(model1.layers[layer_idx]) m2_weights = Weights(model2.layers[layer_idx]) m1_weights_list = m1_weights.get_weights_list() m2_weights_list = m2_weights.get_weights_list() model_idx = random.randint(0, 1) if model_idx == 0: result.extend(m1_weights_list) else: result.extend(m2_weights_list) return result
def crossingover3(model1, model2): result = [] for layer_idx in range(len(model1.layers)): m1_weights = Weights(model1.layers[layer_idx]) m2_weights = Weights(model2.layers[layer_idx]) m1_weights_list = m1_weights.get_weights_list() m2_weights_list = m2_weights.get_weights_list() if len(m1_weights_list) == 0: continue separate_idx = random.randint(0, len(m1_weights_list) - 1) for weight_idx in range(0, separate_idx): result.append(m1_weights_list[weight_idx]) for weight_idx in range(separate_idx, len(m1_weights_list)): result.append(m2_weights_list[weight_idx]) return result
def main(): env = gym.make('CartPole-v1') env.reset() env.render() time.sleep(0.5) gym_img = current_window_img(WINDOW_OFFSET) img_tensor = np.array(gym_img, dtype='float') conv_base = VGG16(weights='imagenet', include_top=False, input_shape=img_tensor.shape) nnetworks = [generate_model((46080, )) for i in range(NUM_PARENT_NETWORKS)] layers_info = [] for i in range(len(nnetworks[0].layers)): layers_info.append(Weights(nnetworks[0].layers[i])) for gen_idx in range(NUM_GENERATION): print('Generation {}'.format(gen_idx)) for net_idx in range(NUM_PARENT_NETWORKS): for child_idx in range(CHILDREN_PER_PARENT): partner_idx = get_partner_idx(net_idx, nnetworks) child_model = generate_child(nnetworks[net_idx], nnetworks[partner_idx], (46080, ), layers_info) nnetworks.append(child_model) rewards = [0 for i in range(len(nnetworks))] for network_idx in range(len(nnetworks)): reward = 0 env.reset() last_tensor = None while reward < MAX_REWARD: env.render() gym_img = current_window_img(WINDOW_OFFSET) gym_tensor = np.array(gym_img, dtype='float') gym_tensor = np.expand_dims(gym_tensor, axis=0) a = time.time() conv_predict = conv_base.predict(gym_tensor) print('conv_base :' + str(time.time() - a)) conv_predict = conv_predict.reshape((1, 46080)) a = time.time() predict = nnetworks[network_idx].predict(conv_predict) print('nnetworks :' + str(time.time() - a)) action = 0 if predict[0][0] < 0.5 else 1 _, _, done, _ = env.step(action) reward += 1 last_tensor = gym_tensor if done: rewards[network_idx] = reward break print('Network {}: {}'.format(network_idx, reward)) print('MAX REWARD: {}'.format(max(rewards))) print('-' * 40) nnetworks = selection(nnetworks, rewards, NUM_PARENT_NETWORKS, RANDOM_SELECTED_NETWORKS) for i in range(len(nnetworks)): nnetworks[i].save('tmp' + str(i) + '.h5') nnetworks.clear() K.clear_session() gc.collect() nnetworks = [] for i in range(NUM_PARENT_NETWORKS): nnetworks.append(load_model('tmp' + str(i) + '.h5'))
def main(): global NUM_PARENT_NETWORKS global CHILDREN_PER_PARENT global NUM_MUTATION_WEIGHTS global MUTATION_FACTOR env = gym.make('CartPole-v1') env.reset() # time.sleep(0.5) gym_img = env.render(mode='rgb_array') #current_window_img(WINDOW_OFFSET) gym_img = rgb2gray(gym_img) gym_img = gym_img[150:350, 100:500] gym_img = resize(gym_img, (100, 200)) # exit() gym_img = gym_img.astype('float32') / 255.0 img_tensor = np.array(gym_img, dtype='float') img_tensor = img_tensor.reshape((img_tensor.shape[0], img_tensor.shape[1], 1)) for _ in range(NUM_PREVIOUS_USING_STATES): img_tensor = np.append(img_tensor, img_tensor[:,:,0:1], axis=2) nnetworks = [generate_model(img_tensor.shape) for i in range(NUM_PARENT_NETWORKS)] layers_info = [] for i in range(len(nnetworks[0].layers)): layers_info.append(Weights(nnetworks[0].layers[i])) max_reward = 0 for gen_idx in range(NUM_GENERATION): print('Generation {}'.format(gen_idx)) with open('GAConfig.txt') as cfg: NUM_PARENT_NETWORKS = int(cfg.readline()) CHILDREN_PER_PARENT = int(cfg.readline()) NUM_MUTATION_WEIGHTS = int(cfg.readline()) MUTATION_FACTOR = np.float32(float(cfg.readline())) print(NUM_PARENT_NETWORKS, CHILDREN_PER_PARENT, NUM_MUTATION_WEIGHTS, MUTATION_FACTOR) for net_idx in range(NUM_PARENT_NETWORKS): for child_idx in range(CHILDREN_PER_PARENT): partner_idx = get_partner_idx(net_idx, nnetworks) child_model = generate_child(nnetworks[net_idx], nnetworks[partner_idx], img_tensor.shape, layers_info) nnetworks.append(child_model) rewards = [0 for i in range(len(nnetworks))] for network_idx in range(len(nnetworks)): run_results = np.array([]) for start_id in range(NUM_STARTS_FOR_AVRG): reward = 0 env.reset() prev_states = np.zeros((img_tensor.shape[0], img_tensor.shape[1], img_tensor.shape[2] - 1)) # for i in range(img_tensor.shape[2] - 1): # prev_states[:,:,i:i+1] = img_tensor[:,:,0:1] while reward < MAX_REWARD: env.render() gym_img = env.render(mode='rgb_array') #current_window_img(WINDOW_OFFSET) gym_img = rgb2gray(gym_img) gym_img = gym_img[150:350, 100:500] gym_img = resize(gym_img, (100, 200)) gym_img = gym_img.astype('float32') / 255.0 gym_tensor = np.array(gym_img, dtype='float') gym_tensor = gym_tensor.reshape((gym_tensor.shape[0], gym_tensor.shape[1], 1)) for i in range(NUM_PREVIOUS_USING_STATES): gym_tensor = np.append(gym_tensor, prev_states[:,:,i:i+1], axis=2) gym_tensor = np.expand_dims(gym_tensor, axis=0) predict = nnetworks[network_idx].predict(gym_tensor) action = 0 if predict[0][0] < 0.5 else 1 _, _, done, _ = env.step(action) reward += 1 if done: run_results = np.append(run_results, reward) break else: # if reward % 2 == 0: update_prev_states(prev_states, gym_tensor[:,:,:,0:1]) rewards[network_idx] = int(np.mean(run_results)) if max_reward < max(rewards): max_reward = max(rewards) with open("max_reward.txt", "w") as f: f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)]) nnetworks[network_idx].save('best_network.h5') print('Network {}: {}'.format(network_idx, rewards[network_idx])) print('-'*40) print('MAX REWARD CURRENT: {}'.format(max(rewards))) print('MAX REWARD COMMON: {}'.format(max_reward)) print('-'*40) nnetworks = selection(nnetworks, rewards, NUM_PARENT_NETWORKS, RANDOM_SELECTED_NETWORKS) for i in range(len(nnetworks)): nnetworks[i].save('tmp'+str(i) + '.h5') nnetworks.clear() K.clear_session() gc.collect() nnetworks = [] for i in range(NUM_PARENT_NETWORKS): nnetworks.append(load_model('tmp' + str(i) + '.h5'))
def main(): env = gym.make('CartPole-v1') env.reset() # env.render() # time.sleep(0.5) gym_img = env.render(mode='rgb_array') #current_window_img(WINDOW_OFFSET) gym_img = rgb2gray(gym_img) img_tensor = np.array(gym_img, dtype='float') img_tensor = img_tensor.reshape((img_tensor.shape[0], img_tensor.shape[1], 1)) nnetworks = [generate_model(img_tensor.shape) for i in range(NUM_PARENT_NETWORKS)] layers_info = [] for i in range(len(nnetworks[0].layers)): layers_info.append(Weights(nnetworks[0].layers[i])) for gen_idx in range(NUM_GENERATION): print('Generation {}'.format(gen_idx)) for net_idx in range(NUM_PARENT_NETWORKS): for child_idx in range(CHILDREN_PER_PARENT): partner_idx = get_partner_idx(net_idx, nnetworks) child_model = generate_child(nnetworks[net_idx], nnetworks[partner_idx], img_tensor.shape, layers_info) nnetworks.append(child_model) rewards = [0 for i in range(len(nnetworks))] for network_idx in range(len(nnetworks)): reward = 0 env.reset() while reward < MAX_REWARD: env.render() gym_img = current_window_img(WINDOW_OFFSET) gym_tensor = np.array(gym_img, dtype='float') gym_tensor = gym_tensor.reshape((gym_tensor.shape[0], gym_tensor.shape[1], 1)) gym_tensor = np.expand_dims(gym_tensor, axis=0) predict = nnetworks[network_idx].predict(gym_tensor) action = 0 if predict[0][0] < 0.5 else 1 _, _, done, _ = env.step(action) reward += 1 if done: rewards[network_idx] = reward break print('Network {}: {}'.format(network_idx, reward)) print('MAX REWARD: {}'.format(max(rewards))) print('-'*40) nnetworks = selection(nnetworks, rewards, NUM_PARENT_NETWORKS, RANDOM_SELECTED_NETWORKS) for i in range(len(nnetworks)): nnetworks[i].save('tmp'+str(i) + '.h5') nnetworks.clear() K.clear_session() gc.collect() nnetworks = [] for i in range(NUM_PARENT_NETWORKS): nnetworks.append(load_model('tmp' + str(i) + '.h5'))
def main(): global NUM_PARENT_NETWORKS global CHILDREN_PER_PARENT global NUM_MUTATION_WEIGHTS global MUTATION_FACTOR env = gym.make('Ant-v2') exit() env.reset() gym_img = env.render(mode='state_pixels') gym_img = rgb2gray(gym_img) gym_img = gym_img[:-12, :] # print(gym_img.shape) gym_img = resize(gym_img, gym_img.shape) gym_img = gym_img.astype('float32') / 255.0 img_tensor = np.array(gym_img, dtype='float') img_tensor = img_tensor.reshape((img_tensor.shape[0], img_tensor.shape[1], 1)) for _ in range(NUM_PREVIOUS_USING_STATES): img_tensor = np.append(img_tensor, img_tensor[:,:,0:1], axis=2) print(img_tensor.shape) for i in range(NUM_PARENT_NETWORKS): nn = generate_model(img_tensor.shape) nn.save('nn' + str(i) + '.h5') K.clear_session() gc.collect() K.clear_session() gc.collect() nn = models.load_model('nn0.h5') layers_info = [] for i in range(len(nn.layers)): layers_info.append(Weights(nn.layers[i])) max_reward = 0 for gen_idx in range(NUM_GENERATION): print('Generation {}'.format(gen_idx)) with open('GAConfig.txt') as cfg: NUM_PARENT_NETWORKS = int(cfg.readline()) CHILDREN_PER_PARENT = int(cfg.readline()) NUM_MUTATION_WEIGHTS = int(cfg.readline()) MUTATION_FACTOR = np.float32(float(cfg.readline())) print(NUM_PARENT_NETWORKS, CHILDREN_PER_PARENT, NUM_MUTATION_WEIGHTS, MUTATION_FACTOR) for net_idx in range(NUM_PARENT_NETWORKS): for child_idx in range(CHILDREN_PER_PARENT): partner_idx = get_partner_idx(net_idx, NUM_PARENT_NETWORKS) nn_parent1 = models.load_model('nn' + str(net_idx) + '.h5') nn_parent2 = models.load_model('nn' + str(partner_idx) + '.h5') child_model = generate_child(nn_parent1, nn_parent2, img_tensor.shape, layers_info) safe_idx = NUM_PARENT_NETWORKS + net_idx * CHILDREN_PER_PARENT + child_idx child_model.save('nn' + str(safe_idx) + '.h5') K.clear_session() gc.collect() K.clear_session() gc.collect() num_networks = NUM_PARENT_NETWORKS + CHILDREN_PER_PARENT * NUM_PARENT_NETWORKS rewards = [0 for i in range(num_networks)] for network_idx in range(num_networks): current_nn = models.load_model('nn' + str(network_idx) + '.h5') run_results = np.array([]) for start_id in range(NUM_STARTS_FOR_AVRG): reward = MAX_REWARD samples = 0 env.reset() for i in range(50): env.step([0, 0, 0]) prev_states = np.zeros((img_tensor.shape[0], img_tensor.shape[1], img_tensor.shape[2] - 1)) while reward > 0: # time.sleep(0.01) env.render() gym_img = env.render(mode='state_pixels') gym_img = rgb2gray(gym_img) gym_img = gym_img[:-12, :] gym_img = gym_img.astype('float32') / 255.0 gym_tensor = np.array(gym_img, dtype='float') gym_tensor = gym_tensor.reshape((gym_tensor.shape[0], gym_tensor.shape[1], 1)) for i in range(NUM_PREVIOUS_USING_STATES): gym_tensor = np.append(gym_tensor, prev_states[:,:,i:i+1], axis=2) gym_tensor = np.expand_dims(gym_tensor, axis=0) action = [0, 0, 0] predict = current_nn.predict(gym_tensor) action[0] = (predict[0][0] - 0.5) * 2.0 if predict[0][1] > predict[0][2]: action[1], action[2] = 0.6, 0 else: action[1], action[2] = 0, 0.6 # action[1] = 0 if predict[0][1] < 0.5 else 0.6 # action[2] = 0 if predict[0][2] < 0.5 else 0.6 # action[0] = (predict[0][0] - 0.5) * 2.0 # action[1] = predict[0][1] # action[2] = predict[0][2] # print(action) _, rew, done, _ = env.step(action) reward += rew reward = clamp(reward, -10, MAX_REWARD) samples += 0.1 # print(rew, done, lives) # if done: # # run_results = np.append(run_results, rew) # break # else: # # if reward % 2 == 0: update_prev_states(prev_states, gym_tensor[:,:,:,0:1]) run_results = np.append(run_results, samples) rewards[network_idx] = float(np.mean(run_results)) if max_reward < max(rewards): max_reward = max(rewards) with open("max_reward.txt", "w") as f: f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)]) current_nn.save('best_network.h5') print('Network {}: {}'.format(network_idx, rewards[network_idx])) K.clear_session() gc.collect() print('-'*40) print('MAX REWARD CURRENT: {}'.format(max(rewards))) print('MAX REWARD COMMON: {}'.format(max_reward)) print('-'*40) nnetworks = selection(num_networks, rewards, NUM_PARENT_NETWORKS, RANDOM_SELECTED_NETWORKS) # for i in range(len(nnetworks)): # nnetworks[i].save('tmp'+str(i) + '.h5') # nnetworks.clear() K.clear_session() gc.collect()
def main(): global NUM_PARENT_NETWORKS global CHILDREN_PER_PARENT global NUM_MUTATION_WEIGHTS global MUTATION_FACTOR env = cartpole.CartPole(img_mode=True, img_size=(25, 25)) for i in range(NUM_PARENT_NETWORKS): nn = generate_model(env.tensor_shape) nn.save('nn' + str(i) + '.h5') K.clear_session() gc.collect() K.clear_session() gc.collect() # nnetworks = [generate_model(img_tensor.shape) # for i in range(NUM_PARENT_NETWORKS)] nn = models.load_model('nn0.h5') layers_info = [] for i in range(len(nn.layers)): layers_info.append(Weights(nn.layers[i])) max_reward = 0 for gen_idx in range(NUM_GENERATION): print('Generation {}'.format(gen_idx)) with open('GAConfig.txt') as cfg: NUM_PARENT_NETWORKS = int(cfg.readline()) CHILDREN_PER_PARENT = int(cfg.readline()) NUM_MUTATION_WEIGHTS = int(cfg.readline()) MUTATION_FACTOR = np.float32(float(cfg.readline())) print(NUM_PARENT_NETWORKS, CHILDREN_PER_PARENT, NUM_MUTATION_WEIGHTS, MUTATION_FACTOR) num_tasks = NUM_PARENT_NETWORKS * CHILDREN_PER_PARENT for net_idx in range(NUM_PARENT_NETWORKS): for child_idx in range(CHILDREN_PER_PARENT): partner_idx = get_partner_idx(net_idx, NUM_PARENT_NETWORKS) nn_parent1 = models.load_model('nn' + str(net_idx) + '.h5') nn_parent2 = models.load_model('nn' + str(partner_idx) + '.h5') child_model = generate_child(nn_parent1, nn_parent2, env.tensor_shape, layers_info) safe_idx = NUM_PARENT_NETWORKS + net_idx * CHILDREN_PER_PARENT + child_idx child_model.save('nn' + str(safe_idx) + '.h5') print('Generating: {}%\r'.format( int( float(net_idx * CHILDREN_PER_PARENT + child_idx) / num_tasks * 100)), end='') K.clear_session() gc.collect() K.clear_session() gc.collect() # nnetworks.append(child_model) print('') num_networks = NUM_PARENT_NETWORKS + CHILDREN_PER_PARENT * NUM_PARENT_NETWORKS rewards = [0 for i in range(num_networks)] for network_idx in range(num_networks): current_nn = models.load_model('nn' + str(network_idx) + '.h5') run_results = np.array([]) for start_id in range(NUM_STARTS_FOR_AVRG): env.prepare_env() while not env.is_done(): obs = env.get_obs() predict = current_nn.predict(obs) action = 0 if predict[0][0] < 0.5 else 1 env.step(action) run_results = np.append(run_results, env.get_reward()) rewards[network_idx] = int(np.mean(run_results)) if max_reward < max(rewards): max_reward = max(rewards) with open("max_reward.txt", "w") as f: f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)]) current_nn.save('best_network.h5') print('Network {}: {}'.format(network_idx, rewards[network_idx])) K.clear_session() gc.collect() print('-' * 40) print('MAX REWARD CURRENT: {}'.format(max(rewards))) print('MAX REWARD COMMON: {}'.format(max_reward)) print('-' * 40) nnetworks = selection(num_networks, rewards, NUM_PARENT_NETWORKS, RANDOM_SELECTED_NETWORKS, NEW_GENERATED_RANDOM_NETWORK, env.tensor_shape) # for i in range(len(nnetworks)): # nnetworks[i].save('tmp'+str(i) + '.h5') # nnetworks.clear() K.clear_session() gc.collect()
def main(): # Создание окружения системы gym env = gym.make('CartPole-v1') env.reset() env.render() time.sleep(0.5) # Захват первого изображения для настройки слоев gym_img = current_window_img(WINDOW_OFFSET) # Преобразование изображения к тензору img_tensor = np.array(gym_img, dtype='float') img_tensor = img_tensor.reshape( (img_tensor.shape[0], img_tensor.shape[1], 1)) # Расширение тензора в три раза для учета 2-х предыдущих кадров for _ in range(NUM_PREVIOUS_USING_STATES): img_tensor = np.append(img_tensor, img_tensor[:, :, 0:1], axis=2) # Создание начальной популяции нейронных сетей nnetworks = [ generate_model(img_tensor.shape) for i in range(NUM_PARENT_NETWORKS) ] # Создание информационных слоев layers_info = [] for i in range(len(nnetworks[0].layers)): layers_info.append(Weights(nnetworks[0].layers[i])) max_reward = 0 # Основной цикл: проход по всем генерациям for gen_idx in range(NUM_GENERATION): print('Generation {}'.format(gen_idx)) for net_idx in range(NUM_PARENT_NETWORKS): for child_idx in range(CHILDREN_PER_PARENT): partner_idx = get_partner_idx(net_idx, nnetworks) child_model = generate_child(nnetworks[net_idx], nnetworks[partner_idx], img_tensor.shape, layers_info) nnetworks.append(child_model) rewards = [0 for i in range(len(nnetworks))] for network_idx in range(len(nnetworks)): run_results = np.array([]) for start_id in range(NUM_STARTS_FOR_AVRG): reward = 0 env.reset() prev_states = np.zeros( (img_tensor.shape[0], img_tensor.shape[1], img_tensor.shape[2] - 1)) while reward < MAX_REWARD: env.render() gym_img = current_window_img(WINDOW_OFFSET) gym_tensor = np.array(gym_img, dtype='float') gym_tensor = gym_tensor.reshape( (gym_tensor.shape[0], gym_tensor.shape[1], 1)) for i in range(NUM_PREVIOUS_USING_STATES): gym_tensor = np.append(gym_tensor, prev_states[:, :, i:i + 1], axis=2) gym_tensor = np.expand_dims(gym_tensor, axis=0) predict = nnetworks[network_idx].predict(gym_tensor) action = 0 if predict[0][0] < 0.5 else 1 _, _, done, _ = env.step(action) reward += 1 if done: run_results = np.append(run_results, reward) break else: if reward % 3 == 0: update_prev_states(prev_states, gym_tensor[:, :, :, 0:1]) # if (reward >= 9): # save_states(prev_states) # exit() rewards[network_idx] = int(np.mean(run_results)) if max_reward < max(rewards): max_reward = max(rewards) with open("max_reward.txt", "w") as f: f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)]) nnetworks[network_idx].save('best_network.h5') print('Network {}: {}'.format(network_idx, rewards[network_idx])) print('-' * 40) print('MAX REWARD CURRENT: {}'.format(max(rewards))) print('MAX REWARD COMMON: {}'.format(max_reward)) print('-' * 40) nnetworks = selection(nnetworks, rewards, NUM_PARENT_NETWORKS, RANDOM_SELECTED_NETWORKS) for i in range(len(nnetworks)): nnetworks[i].save('tmp' + str(i) + '.h5') nnetworks.clear() K.clear_session() gc.collect() nnetworks = [] for i in range(NUM_PARENT_NETWORKS): nnetworks.append(load_model('tmp' + str(i) + '.h5'))
def main(): env = gym.make('Pendulum-v0') env.reset() gym_img = env.render(mode='rgb_array') #current_window_img(WINDOW_OFFSET) gym_img = rgb2gray(gym_img) gym_img = gym_img[125:375, 125:375] gym_img = resize(gym_img, (25, 25)) gym_img = gym_img.astype('float32') / 255.0 img_tensor = np.array(gym_img, dtype='float') img_tensor = img_tensor.reshape( (img_tensor.shape[0], img_tensor.shape[1], 1)) for _ in range(NUM_PREVIOUS_USING_STATES): img_tensor = np.append(img_tensor, img_tensor[:, :, 0:1], axis=2) nnetworks = [ generate_model(img_tensor.shape) for i in range(NUM_PARENT_NETWORKS) ] layers_info = [] for i in range(len(nnetworks[0].layers)): layers_info.append(Weights(nnetworks[0].layers[i])) max_reward = 0 for gen_idx in range(NUM_GENERATION): print('Generation {}'.format(gen_idx)) for net_idx in range(NUM_PARENT_NETWORKS): for child_idx in range(CHILDREN_PER_PARENT): partner_idx = get_partner_idx(net_idx, nnetworks) child_model = generate_child(nnetworks[net_idx], nnetworks[partner_idx], img_tensor.shape, layers_info) nnetworks.append(child_model) rewards = [0 for i in range(len(nnetworks))] for network_idx in range(len(nnetworks)): run_results = np.array([]) for start_id in range(NUM_STARTS_FOR_AVRG): reward = 0 env.reset() prev_states = np.zeros( (img_tensor.shape[0], img_tensor.shape[1], img_tensor.shape[2] - 1)) left_costs = START_COSTS while True: env.render() gym_img = env.render( mode='rgb_array') #current_window_img(WINDOW_OFFSET) gym_img = rgb2gray(gym_img) gym_img = gym_img[125:375, 125:375] gym_img = resize(gym_img, (25, 25)) gym_img = gym_img.astype('float32') / 255.0 gym_tensor = np.array(gym_img, dtype='float') gym_tensor = gym_tensor.reshape( (gym_tensor.shape[0], gym_tensor.shape[1], 1)) for i in range(NUM_PREVIOUS_USING_STATES): gym_tensor = np.append(gym_tensor, prev_states[:, :, i:i + 1], axis=2) gym_tensor = np.expand_dims(gym_tensor, axis=0) predict = nnetworks[network_idx].predict(gym_tensor) print(predict) action = [(predict[0][0] - 0.5) * 4.0] _, costs, _, _ = env.step(action) left_costs += costs reward += 1 if left_costs < 0: run_results = np.append(run_results, reward) break else: # if reward % 2 == 0: update_prev_states(prev_states, gym_tensor[:, :, :, 0:1]) rewards[network_idx] = int(np.mean(run_results)) if max_reward < max(rewards): max_reward = max(rewards) with open("max_reward.txt", "w") as f: f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)]) nnetworks[network_idx].save('best_network.h5') print('Network {}: {}'.format(network_idx, rewards[network_idx])) print('-' * 40) print('MAX REWARD CURRENT: {}'.format(max(rewards))) print('MAX REWARD COMMON: {}'.format(max_reward)) print('-' * 40) nnetworks = selection(nnetworks, rewards, NUM_PARENT_NETWORKS, RANDOM_SELECTED_NETWORKS) for i in range(len(nnetworks)): nnetworks[i].save('tmp' + str(i) + '.h5') nnetworks.clear() K.clear_session() gc.collect() nnetworks = [] for i in range(NUM_PARENT_NETWORKS): nnetworks.append(load_model('tmp' + str(i) + '.h5'))
def main(): global NUM_PARENT_NETWORKS global CHILDREN_PER_PARENT global NUM_MUTATION_WEIGHTS global MUTATION_FACTOR env = gym.make('Breakout-v0') env.frameskip = 0 env.reset() env.step(1) # print(env.get_action_meanings()) # exit() # time.sleep(0.5) gym_img = env.render(mode='rgb_array') gym_img = rgb2gray(gym_img) gym_img = gym_img[33:-17, :] gym_img = resize(gym_img, (70, 70)) gym_img = gym_img.astype('float32') / 255.0 img_tensor = np.array(gym_img, dtype='float') img_tensor = img_tensor.reshape( (img_tensor.shape[0], img_tensor.shape[1], 1)) for _ in range(NUM_PREVIOUS_USING_STATES): img_tensor = np.append(img_tensor, img_tensor[:, :, 0:1], axis=2) for i in range(NUM_PARENT_NETWORKS): nn = generate_model(img_tensor.shape) nn.save('nn' + str(i) + '.h5') K.clear_session() gc.collect() K.clear_session() gc.collect() # nnetworks = [generate_model(img_tensor.shape) # for i in range(NUM_PARENT_NETWORKS)] nn = models.load_model('nn0.h5') layers_info = [] for i in range(len(nn.layers)): layers_info.append(Weights(nn.layers[i])) max_reward = 0 for gen_idx in range(NUM_GENERATION): print('Generation {}'.format(gen_idx)) with open('GAConfig.txt') as cfg: NUM_PARENT_NETWORKS = int(cfg.readline()) CHILDREN_PER_PARENT = int(cfg.readline()) NUM_MUTATION_WEIGHTS = int(cfg.readline()) MUTATION_FACTOR = np.float32(float(cfg.readline())) print(NUM_PARENT_NETWORKS, CHILDREN_PER_PARENT, NUM_MUTATION_WEIGHTS, MUTATION_FACTOR) num_tasks = NUM_PARENT_NETWORKS * CHILDREN_PER_PARENT for net_idx in range(NUM_PARENT_NETWORKS): for child_idx in range(CHILDREN_PER_PARENT): partner_idx = get_partner_idx(net_idx, NUM_PARENT_NETWORKS) nn_parent1 = models.load_model('nn' + str(net_idx) + '.h5') nn_parent2 = models.load_model('nn' + str(partner_idx) + '.h5') child_model = generate_child(nn_parent1, nn_parent2, img_tensor.shape, layers_info) safe_idx = NUM_PARENT_NETWORKS + net_idx * CHILDREN_PER_PARENT + child_idx child_model.save('nn' + str(safe_idx) + '.h5') K.clear_session() gc.collect() print('Generating: {}%\r'.format( int( float(net_idx * CHILDREN_PER_PARENT + child_idx) / num_tasks * 100)), end='') K.clear_session() gc.collect() # nnetworks.append(child_model) num_networks = NUM_PARENT_NETWORKS + CHILDREN_PER_PARENT * NUM_PARENT_NETWORKS rewards = [0 for i in range(num_networks)] for network_idx in range(num_networks): current_nn = models.load_model('nn' + str(network_idx) + '.h5') run_results = np.array([]) for start_id in range(NUM_STARTS_FOR_AVRG): reward = 0 sample = 0 env.reset() # env.seed() for i in range(10): env.step(0) env.step(1) env.step(1) env.step(1) prev_states = np.zeros( (img_tensor.shape[0], img_tensor.shape[1], img_tensor.shape[2] - 1)) # for i in range(img_tensor.shape[2] - 1): # prev_states[:,:,i:i+1] = img_tensor[:,:,0:1] prev_lives = 5 while True: # time.sleep(0.01) env.render() gym_img = env.render( mode='rgb_array') #current_window_img(WINDOW_OFFSET) gym_img = rgb2gray(gym_img) # gym_img = gym_img[150:350, 200:400] gym_img = gym_img[33:-17, :] gym_img = resize(gym_img, (70, 70)) gym_img = gym_img.astype('float32') / 255.0 gym_tensor = np.array(gym_img, dtype='float') gym_tensor = gym_tensor.reshape( (gym_tensor.shape[0], gym_tensor.shape[1], 1)) for i in range(NUM_PREVIOUS_USING_STATES): gym_tensor = np.append(gym_tensor, prev_states[:, :, i:i + 1], axis=2) gym_tensor = np.expand_dims(gym_tensor, axis=0) predict = current_nn.predict(gym_tensor) predict = np.argmax(predict) if predict == 0: action = 0 elif predict == 1: action = 2 elif predict == 2: action = 3 # else: # action = 3 obs, rew, done, lives = env.step(action) reward += rew sample += 1 if lives['ale.lives'] < prev_lives: break # print(rew, done, lives) if done: # run_results = np.append(run_results, rew) break else: # if reward % 2 == 0: update_prev_states(prev_states, gym_tensor[:, :, :, 0:1]) # print(reward) # reward = int(input()) run_results = np.append(run_results, sample) rewards[network_idx] = int(np.mean(run_results)) if max_reward < max(rewards): max_reward = max(rewards) with open("max_reward.txt", "w") as f: f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)]) current_nn.save('best_network.h5') print('Network {}: {}'.format(network_idx, rewards[network_idx])) K.clear_session() gc.collect() print('-' * 40) print('MAX REWARD CURRENT: {}'.format(max(rewards))) print('MAX REWARD COMMON: {}'.format(max_reward)) print('-' * 40) nnetworks = selection(num_networks, rewards, NUM_PARENT_NETWORKS, RANDOM_SELECTED_NETWORKS, NEW_GENERATED_RANDOM_NETWORK, img_tensor.shape) # for i in range(len(nnetworks)): # nnetworks[i].save('tmp'+str(i) + '.h5') # nnetworks.clear() K.clear_session() gc.collect()
def main(): env = gym.make('CartPole-v1') env.reset() env.render() time.sleep(0.5) gym_img = current_window_img(WINDOW_OFFSET) img_tensor = np.array(gym_img, dtype='float') # vgg16 = VGG16(weights='imagenet', # include_top=False, # input_shape=img_tensor.shape) # conv_base = models.Sequential() # conv_base.add(vgg16.layers[1]) # conv_base.add(vgg16.layers[3]) # conv_base.add(vgg16.layers[4]) # conv_base.add(vgg16.layers[6]) # conv_base.add(vgg16.layers[7]) # conv_base.add(vgg16.layers[10]) # conv_base.trainable = False # conv_base.compile(optimizer=optimizers.RMSprop(lr=1e-4), # loss='binary_crossentropy', # metrics=['acc']) # gym_img = current_window_img(WINDOW_OFFSET) # gym_tensor = np.array(gym_img, dtype='float') # gym_tensor = np.expand_dims(gym_tensor, axis=0) # a = conv_base.predict(gym_tensor) # conv_base.summary() # exit() nnetworks = [generate_model((1575, )) for i in range(NUM_PARENT_NETWORKS)] layers_info = [] for i in range(len(nnetworks[0].layers)): layers_info.append(Weights(nnetworks[0].layers[i])) for gen_idx in range(NUM_GENERATION): print('Generation {}'.format(gen_idx)) vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=img_tensor.shape) conv_base = models.Sequential() conv_base.add(vgg16.layers[1]) conv_base.add(vgg16.layers[3]) conv_base.add(vgg16.layers[4]) conv_base.add(vgg16.layers[6]) conv_base.add(vgg16.layers[7]) conv_base.add(vgg16.layers[10]) conv_base.trainable = False conv_base.compile(optimizer=optimizers.RMSprop(lr=1e-4), loss='binary_crossentropy', metrics=['acc']) for net_idx in range(NUM_PARENT_NETWORKS): for child_idx in range(CHILDREN_PER_PARENT): partner_idx = get_partner_idx(net_idx, nnetworks) child_model = generate_child(nnetworks[net_idx], nnetworks[partner_idx], (1575, ), layers_info) nnetworks.append(child_model) rewards = [0 for i in range(len(nnetworks))] for network_idx in range(len(nnetworks)): reward = 0 env.reset() last_tensor = None while reward < MAX_REWARD: env.render() gym_img = current_window_img(WINDOW_OFFSET) gym_tensor = np.array(gym_img, dtype='float') gym_tensor = np.expand_dims(gym_tensor, axis=0) # a = time.time() conv_predict = conv_base.predict(gym_tensor) # print('conv_base :' + str(time.time() - a)) conv_predict = conv_predict[:, :, :, 0:1] conv_predict = conv_predict.reshape((1, 1575)) # a = time.time() predict = nnetworks[network_idx].predict(conv_predict) # print('nnetworks :' + str(time.time() - a)) action = 0 if predict[0][0] < 0.5 else 1 _, _, done, _ = env.step(action) reward += 1 last_tensor = gym_tensor if done: rewards[network_idx] = reward break print('Network {}: {}'.format(network_idx, reward)) print('MAX REWARD: {}'.format(max(rewards))) print('-' * 40) nnetworks = selection(nnetworks, rewards, NUM_PARENT_NETWORKS, RANDOM_SELECTED_NETWORKS) for i in range(len(nnetworks)): nnetworks[i].save('tmp' + str(i) + '.h5') nnetworks.clear() K.clear_session() gc.collect() nnetworks = [] for i in range(NUM_PARENT_NETWORKS): nnetworks.append(load_model('tmp' + str(i) + '.h5'))
def main(): global NUM_PARENT_NETWORKS global CHILDREN_PER_PARENT global NUM_MUTATION_WEIGHTS global MUTATION_FACTOR env = gym.make('Pendulum-v0') env.reset() # time.sleep(0.5) gym_img = env.render(mode='rgb_array') #current_window_img(WINDOW_OFFSET) gym_img = rgb2gray(gym_img) gym_img = gym_img[125:375, 125:375] gym_img = resize(gym_img, (25, 25)) # exit() gym_img = gym_img.astype('float32') / 255.0 img_tensor = np.array(gym_img, dtype='float') img_tensor = img_tensor.reshape( (img_tensor.shape[0], img_tensor.shape[1], 1)) for _ in range(NUM_PREVIOUS_USING_STATES): img_tensor = np.append(img_tensor, img_tensor[:, :, 0:1], axis=2) for i in range(NUM_PARENT_NETWORKS): nn = generate_model(img_tensor.shape) nn.save('nn' + str(i) + '.h5') K.clear_session() gc.collect() K.clear_session() gc.collect() # nnetworks = [generate_model(img_tensor.shape) # for i in range(NUM_PARENT_NETWORKS)] nn = models.load_model('nn0.h5') layers_info = [] for i in range(len(nn.layers)): layers_info.append(Weights(nn.layers[i])) max_reward = 0 for gen_idx in range(NUM_GENERATION): print('Generation {}'.format(gen_idx)) with open('GAConfig.txt') as cfg: NUM_PARENT_NETWORKS = int(cfg.readline()) CHILDREN_PER_PARENT = int(cfg.readline()) NUM_MUTATION_WEIGHTS = int(cfg.readline()) MUTATION_FACTOR = np.float32(float(cfg.readline())) print(NUM_PARENT_NETWORKS, CHILDREN_PER_PARENT, NUM_MUTATION_WEIGHTS, MUTATION_FACTOR) for net_idx in range(NUM_PARENT_NETWORKS): for child_idx in range(CHILDREN_PER_PARENT): partner_idx = get_partner_idx(net_idx, NUM_PARENT_NETWORKS) nn_parent1 = models.load_model('nn' + str(net_idx) + '.h5') nn_parent2 = models.load_model('nn' + str(partner_idx) + '.h5') child_model = generate_child(nn_parent1, nn_parent2, img_tensor.shape, layers_info) safe_idx = NUM_PARENT_NETWORKS + net_idx * CHILDREN_PER_PARENT + child_idx child_model.save('nn' + str(safe_idx) + '.h5') K.clear_session() gc.collect() K.clear_session() gc.collect() # nnetworks.append(child_model) num_networks = NUM_PARENT_NETWORKS + CHILDREN_PER_PARENT * NUM_PARENT_NETWORKS rewards = [0 for i in range(num_networks)] for network_idx in range(num_networks): current_nn = models.load_model('nn' + str(network_idx) + '.h5') run_results = np.array([]) for start_id in range(NUM_STARTS_FOR_AVRG): reward = 0 env.reset() prev_states = np.zeros( (img_tensor.shape[0], img_tensor.shape[1], img_tensor.shape[2] - 1)) # for i in range(img_tensor.shape[2] - 1): # prev_states[:,:,i:i+1] = img_tensor[:,:,0:1] left_costs = START_COSTS while True: env.render() gym_img = env.render( mode='rgb_array') #current_window_img(WINDOW_OFFSET) gym_img = rgb2gray(gym_img) gym_img = gym_img[125:375, 125:375] gym_img = resize(gym_img, (25, 25)) gym_img = gym_img.astype('float32') / 255.0 gym_tensor = np.array(gym_img, dtype='float') gym_tensor = gym_tensor.reshape( (gym_tensor.shape[0], gym_tensor.shape[1], 1)) for i in range(NUM_PREVIOUS_USING_STATES): gym_tensor = np.append(gym_tensor, prev_states[:, :, i:i + 1], axis=2) gym_tensor = np.expand_dims(gym_tensor, axis=0) predict = current_nn.predict(gym_tensor) action = [(predict[0][0] - 0.5) * 4.0] _, costs, _, _ = env.step(action) reward += 1 left_costs += costs if left_costs < 0: run_results = np.append(run_results, reward) break else: # if reward % 2 == 0: update_prev_states(prev_states, gym_tensor[:, :, :, 0:1]) rewards[network_idx] = int(np.mean(run_results)) if max_reward < max(rewards): max_reward = max(rewards) with open("max_reward.txt", "w") as f: f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)]) current_nn.save('best_network.h5') print('Network {}: {}'.format(network_idx, rewards[network_idx])) print('-' * 40) print('MAX REWARD CURRENT: {}'.format(max(rewards))) print('MAX REWARD COMMON: {}'.format(max_reward)) print('-' * 40) nnetworks = selection(num_networks, rewards, NUM_PARENT_NETWORKS, RANDOM_SELECTED_NETWORKS) # for i in range(len(nnetworks)): # nnetworks[i].save('tmp'+str(i) + '.h5') # nnetworks.clear() K.clear_session() gc.collect()