def fitness_car_race(w, render: bool=False, steps=1000): score = 0 nn.set_vectorized_weights(net, w, outs) n = 2 for _ in range(n): # env._max_episode_steps = steps obs = env.reset() last_obs = np.array(obs) / 255.0 # net.clear() # fitness s = 0 while True: close = False if render: close = not env.render() # print(obs) obs = obs / 255.0 # if render: # plt.cla() # plt.imshow(obs[::8,::8,1]) # plt.pause(0.00001) # determine action res = net.predict(np.expand_dims(np.concatenate([ last_obs[::8,::8,1].flatten(), obs[::8,::8,1].flatten() ]), 0))[0] res = res * 2 - 1 action = res #np.argmax(res) last_obs = obs obs, reward, done, _ = env.step(action) s += reward if done or close: break score += s if render: print(s) env.close() if close: break return score / n
def fitness_xor(w: np.ndarray): total, p = 0, 2 nn.set_vectorized_weights(net, w, outs) out = net.predict(np.array([[0, 0], [0, 1], [1, 0], [1, 1]])) total += np.power(0 - out[0, 0], p) total += np.power(1 - out[1, 0], p) total += np.power(1 - out[2, 0], p) total += np.power(0 - out[3, 0], p) return 4 - total
def fitness_pong(w, render: bool = False, steps=1000): score = 0 nn.set_vectorized_weights(net, w, outs) for _ in range(1): # env._max_episode_steps = steps obs = env.reset() # fitness s = 0 while True: close = False if render: close = not env.render() # print(obs) obs = obs / 256 # determine action res = net.predict(np.expand_dims(obs, 0))[0] action = np.argmax(res) obs, reward, done, _ = env.step(action) s += reward if done or close: break score += s if render: print(s) env.close() if close: break return score
def fitness_cartpole(w: np.ndarray, render: bool = False, steps=1000): score = 0 nn.set_vectorized_weights(net, w, outs) n = 10 if render: n = 1 for _ in range(n): env._max_episode_steps = steps obs = env.reset() # total reward (fitness score) s = 0 while True: close = False if render: close = not env.render() # print(obs) # determine action to take res = net.predict(np.expand_dims(obs, 0))[0] action = np.argmax(res) obs, reward, done, _ = env.step(action) s += reward if done or close: break score += s if render: print(s) env.close() return score / n
1.0, 500, 10, min_sigma=1e-3, big_sigma=1e1, wait_iter=100) try: for i in range(1000): scores = [] pop = e.ask() # eval population for ind in pop: scores.append(fitness_xor(ind)) e.tell(scores) max_score = np.max(scores) # if max_score >= 3.9: if i == 99: ind = pop[np.argmax(scores)] print(ind) nn.set_vectorized_weights(net, ind, outs) out = net.predict(np.array([[0, 0], [0, 1], [1, 0], [1, 1]])) print(out) break finally: pass