def run(): with open("results/winner-pickle-"+fn_results, 'rb') as f: c = pickle.load(f) print('loaded genome:') print(c) local_dir = os.path.dirname(__file__) config_path = os.path.join(local_dir, 'config-recurrent-small') config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path) net = neat.nn.recurrent.RecurrentNetwork.create(c, config) register() env = gym.make("PuyoPuyoEndlessSmall-v2") done = False ob = env.reset() ticks = 0 total_reward = 0 while True: env.render() time.sleep(0.5) pieces_sum, field_sum = multiplyMatrices(ob[0], ob[1]) next_piece = pieces_sum[0] inp_piece = np.ndarray.flatten(next_piece) inp_field = np.ndarray.flatten(field_sum) inputs = np.hstack([inp_piece, inp_field]) nn_output = net.activate(inputs) action = np.argmax(nn_output) ob, rew, done, info = env.step(action) total_reward += rew ticks += 1 if done: break print("Game played for ", ticks, " turns.") print("Total score: ", total_reward+ticks) if DRAW_NETS: visualize.draw_net(config, c, view=True, filename="results/winner-"+fn_results+".net") visualize.draw_net(config, c, view=True, filename="results/winner-"+fn_results+"-enabled.net", show_disabled=False) visualize.draw_net(config, c, view=True, filename="results/winner-"+fn_results+"-pruned.net", show_disabled=False, prune_unused=True)
# note: we only calculate the loss on the actions we've actually taken actions = tf.cast(actions, tf.int32) policy_loss = weighted_sparse_ce(actions, logits, sample_weight=advantages) # entropy loss can be calculated via CE over itself entropy_loss = kls.categorical_crossentropy(logits, logits, from_logits=True) # here signs are flipped because optimizer minimizes return policy_loss - self.params['entropy'] * entropy_loss if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) summary_writer = tf.summary.create_file_writer(log_path) ''' gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)]) except RuntimeError as e: print(e) ''' register() env = gym.make('PuyoPuyoEndlessTsu-v2') model = Model(num_actions=env.action_space.n) agent = A2CAgent(model) with summary_writer.as_default(): rewards_history = agent.train(env)