コード例 #1
0
    
    parser = argparse.ArgumentParser(description='Combine Sarsa with Neural Networks.')
    parser.add_argument('--size', type=int, dest="size", help='Size of the barn.', default=10)
    parser.add_argument('--cache-size', type=int, dest="cache_size", help='Size of the cache.', default=1000)
    parser.add_argument('--hidden-size', type=int, dest="hidden_size", help='Size of the hidden layer of neurons.', default=25)
    parser.add_argument('--epsilon', type=float, dest="Epsilon", help="Epsilo value, close to one is complete random, 0 is greedy", default=0.6)
    parser.add_argument('--alpha', type=float, dest="Alpha", help="Step paramenter, 1 is fast close to zero is slow but secure.", default=0.1)
    parser.add_argument('--gamma', type=float, dest="Gamma", help="0 is no memory, 1 is funes.", default=0.2)    
    parser.add_argument('--lambda', type=float, dest="Lambda", help="Control the eligibility traces.", default=0.2)    
    parser.add_argument('--iter-step', type=int, dest="iter_step", help="Evaluate and print each number of steps.", default=100)    
    parser.add_argument('--iter-total', type=int, dest="iter_step", help="Stop at this steps", default=50000)    
    args = parser.parse_args()

    # Intial state.
    from state import BarnState
    initial_state = BarnState.initial_state(args.size)

    # Print to stdout.
    print json.dumps(vars(args))
    print json.dumps({"initial": {"food": initial_state.food, "position": initial_state.position}})

    # Create a single sarsa.
    sarsa = QSarsa(initial_state, cache_size=args.cache_size, hidden_size=args.hidden_size, Epsilon=args.Epsilon, Alpha=args.Alpha,
                   Gamma=args.Gamma, Lambda=args.Lambda, Sigma=1)
    for i in range(100000):
        sarsa.run(i)
        if i % args.iter_step == 0:
            reward, history = sarsa.evaluate()
            # print reward_total, [(s.position, len(s.food)) for s in history]
            print json.dumps({"iteration": i, "eval": {"reward": reward, "history": [(s.position, len(s.food)) for s in history]}})
コード例 #2
0
ファイル: newsarsa.py プロジェクト: jorgeecardona/neurothesis
        # Show the food.
        for f in self.initial_state.food:
            pylab.annotate('food', xy=f, size=5, bbox=dict(boxstyle="round4,pad=.5", fc="0.8"), ha='center')

        # Create the x and y locations.
        x = [s.position[0] for s in history]
        y = [s.position[1] for s in history]    
        pylab.plot(x, y)

        # Save the figure        
        pylab.savefig("%s-%d.png" % (self.prefix, counter))
    
        

if __name__ == '__main__':

    # Size.
    size = int(sys.argv[1])
    prefix = sys.argv[2]

    # Intial state.
    initial_state = BarnState.initial_state(size)

    # Create a single sarsa.
    sarsa = Sarsa(initial_state, Epsilon=0.6, Alpha=0.1, Gamma=0.8, prefix=prefix)

    # Seed all the agents.
    sarsa.seed_all()

    sarsa.run()