Exemple #1
0
def run():
    with open("results/winner-pickle-"+fn_results, 'rb') as f:
        c = pickle.load(f)
        
    print('loaded genome:')
    print(c)

    local_dir = os.path.dirname(__file__)
    config_path = os.path.join(local_dir, 'config-recurrent-small')
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                        neat.DefaultSpeciesSet, neat.DefaultStagnation,
                        config_path)

    net = neat.nn.recurrent.RecurrentNetwork.create(c, config)
    register()
    env = gym.make("PuyoPuyoEndlessSmall-v2")
    done = False
    ob = env.reset()
    ticks = 0
    total_reward = 0

    while True:
        env.render()
        time.sleep(0.5)
        pieces_sum, field_sum = multiplyMatrices(ob[0], ob[1])
        next_piece = pieces_sum[0]
            
        inp_piece = np.ndarray.flatten(next_piece)
        inp_field = np.ndarray.flatten(field_sum)
        inputs = np.hstack([inp_piece, inp_field])
        
        nn_output = net.activate(inputs)
        action = np.argmax(nn_output)
        
        ob, rew, done, info = env.step(action)
        
        total_reward += rew
        ticks += 1
        
        if done:
            break

    print("Game played for ", ticks, " turns.")
    print("Total score: ", total_reward+ticks)

    if DRAW_NETS:
        visualize.draw_net(config, c, view=True, 
                        filename="results/winner-"+fn_results+".net")
        
        visualize.draw_net(config, c, view=True, 
                        filename="results/winner-"+fn_results+"-enabled.net",
                        show_disabled=False)
        
        visualize.draw_net(config, c, view=True, 
                        filename="results/winner-"+fn_results+"-pruned.net",
                        show_disabled=False, prune_unused=True)
Exemple #2
0
        # note: we only calculate the loss on the actions we've actually taken
        actions = tf.cast(actions, tf.int32)
        policy_loss = weighted_sparse_ce(actions,
                                         logits,
                                         sample_weight=advantages)
        # entropy loss can be calculated via CE over itself
        entropy_loss = kls.categorical_crossentropy(logits,
                                                    logits,
                                                    from_logits=True)
        # here signs are flipped because optimizer minimizes
        return policy_loss - self.params['entropy'] * entropy_loss


if __name__ == '__main__':
    logging.getLogger().setLevel(logging.INFO)
    summary_writer = tf.summary.create_file_writer(log_path)
    '''
	gpus = tf.config.experimental.list_physical_devices('GPU')
	if gpus:
		try:
			tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
		except RuntimeError as e:
			print(e)
	'''
    register()
    env = gym.make('PuyoPuyoEndlessTsu-v2')
    model = Model(num_actions=env.action_space.n)
    agent = A2CAgent(model)
    with summary_writer.as_default():
        rewards_history = agent.train(env)