parser = argparse.ArgumentParser(description='Combine Sarsa with Neural Networks.') parser.add_argument('--size', type=int, dest="size", help='Size of the barn.', default=10) parser.add_argument('--cache-size', type=int, dest="cache_size", help='Size of the cache.', default=1000) parser.add_argument('--hidden-size', type=int, dest="hidden_size", help='Size of the hidden layer of neurons.', default=25) parser.add_argument('--epsilon', type=float, dest="Epsilon", help="Epsilo value, close to one is complete random, 0 is greedy", default=0.6) parser.add_argument('--alpha', type=float, dest="Alpha", help="Step paramenter, 1 is fast close to zero is slow but secure.", default=0.1) parser.add_argument('--gamma', type=float, dest="Gamma", help="0 is no memory, 1 is funes.", default=0.2) parser.add_argument('--lambda', type=float, dest="Lambda", help="Control the eligibility traces.", default=0.2) parser.add_argument('--iter-step', type=int, dest="iter_step", help="Evaluate and print each number of steps.", default=100) parser.add_argument('--iter-total', type=int, dest="iter_step", help="Stop at this steps", default=50000) args = parser.parse_args() # Intial state. from state import BarnState initial_state = BarnState.initial_state(args.size) # Print to stdout. print json.dumps(vars(args)) print json.dumps({"initial": {"food": initial_state.food, "position": initial_state.position}}) # Create a single sarsa. sarsa = QSarsa(initial_state, cache_size=args.cache_size, hidden_size=args.hidden_size, Epsilon=args.Epsilon, Alpha=args.Alpha, Gamma=args.Gamma, Lambda=args.Lambda, Sigma=1) for i in range(100000): sarsa.run(i) if i % args.iter_step == 0: reward, history = sarsa.evaluate() # print reward_total, [(s.position, len(s.food)) for s in history] print json.dumps({"iteration": i, "eval": {"reward": reward, "history": [(s.position, len(s.food)) for s in history]}})
# Show the food. for f in self.initial_state.food: pylab.annotate('food', xy=f, size=5, bbox=dict(boxstyle="round4,pad=.5", fc="0.8"), ha='center') # Create the x and y locations. x = [s.position[0] for s in history] y = [s.position[1] for s in history] pylab.plot(x, y) # Save the figure pylab.savefig("%s-%d.png" % (self.prefix, counter)) if __name__ == '__main__': # Size. size = int(sys.argv[1]) prefix = sys.argv[2] # Intial state. initial_state = BarnState.initial_state(size) # Create a single sarsa. sarsa = Sarsa(initial_state, Epsilon=0.6, Alpha=0.1, Gamma=0.8, prefix=prefix) # Seed all the agents. sarsa.seed_all() sarsa.run()