예제 #1
0
def main(**kargs):
    initial_weights_file, initial_i_frame = latest(kargs['weights_dir'])

    print("Continuing using weights from file: ", initial_weights_file, "from", initial_i_frame)

    if kargs['theano_verbose']:
        theano.config.compute_test_value = 'warn'
        theano.config.exception_verbosity = 'high'
        theano.config.optimizer = 'fast_compile'

    ale = ag.init(display_screen=(kargs['visualize'] == 'ale'), record_dir=kargs['record_dir'])
    game = ag.SpaceInvadersGame(ale)


    def new_game():
        game.ale.reset_game()
        game.finished = False
        game.cum_reward = 0
        game.lives = 4
        return game

    replay_memory = dqn.ReplayMemory(size=kargs['dqn.replay_memory_size']) if not kargs['dqn.no_replay'] else None
    # dqn_algo = q.ConstAlgo([3])
    dqn_algo = dqn.DQNAlgo(game.n_actions(),
                           replay_memory=replay_memory,
                           initial_weights_file=initial_weights_file,
                           build_network=kargs['dqn.network'],
                           updates=kargs['dqn.updates'])

    dqn_algo.replay_start_size = kargs['dqn.replay_start_size']
    dqn_algo.final_epsilon = kargs['dqn.final_epsilon']
    dqn_algo.initial_epsilon = kargs['dqn.initial_epsilon']
    dqn_algo.i_frames = initial_i_frame

    dqn_algo.log_frequency=kargs['dqn.log_frequency']


    import Queue
    dqn_algo.mood_q = Queue.Queue() if kargs['show_mood'] else None

    if kargs['show_mood'] is not None:
        plot = kargs['show_mood']()

        def worker():
            while True:
                item = dqn_algo.mood_q.get()
                plot.show(item)
                dqn_algo.mood_q.task_done()

        import threading
        t = threading.Thread(target=worker)
        t.daemon = True
        t.start()

    print(str(dqn_algo))

    visualizer = ag.SpaceInvadersGameCombined2Visualizer() if kargs['visualize'] == 'q' else q.GameNoVisualizer()
    teacher = q.Teacher(new_game, dqn_algo, visualizer,
                        ag.Phi(skip_every=4), repeat_action=4, sleep_seconds=0)
    teacher.teach(500000)
예제 #2
0
def random_on_space_invaders():
    import q_learning as q
    import numpy as np
    import ale_game as ag
    reload(q)
    reload(ag)
    ale = ag.init()
    game = ag.SpaceInvadersGame(ale)
    #game.show_vectorized(game.vectorized(ale.getScreen()))
    teacher = q.Teacher(game, q.RandomAlgo(game.get_actions()),
                        ag.SpaceInvadersGameVectorizedVisualizer())
    teacher.teach(1)
예제 #3
0
def const_on_space_invaders():
    import teacher as q
    import ale_game as ag
    import dqn
    reload(q)
    reload(ag)
    reload(dqn)

    ale = ag.init()
    game = ag.SpaceInvadersGame(ale)

    def new_game():
        game.ale.reset_game()
        game.finished = False
        game.cum_reward = 0
        return game

    const_algo = q.ConstAlgo([2, 2, 2, 2, 2, 0, 0, 0, 0])
    teacher = q.Teacher(new_game, const_algo, ag.SpaceInvadersGameCombined2Visualizer(),
                        ag.Phi(skip_every=6), repeat_action=6)
    teacher.teach(1)
예제 #4
0
def sarsa_gd_on_space_invaders():
    import q_learning as q
    import numpy as np
    import ale_game as ag
    import matplotlib.pyplot as plt
    plt.ion()
    reload(q)
    reload(ag)
    ale = ag.init()
    run = '1'

    n_colors = 5

    def state_adapter(scr):
        vect = np.reshape(ag.vectorized(scr, 14, 20), 14 * 20 * n_colors)
        return np.where(vect)[0]

    game = ag.SpaceInvadersGame(ale)
    q_algo1 = q.SARSALambdaGradientDescent(game.get_actions(),
                                           game.get_state(),
                                           initial_q=5,
                                           initial_theta=[1] * 14 * 20 *
                                           n_colors,
                                           be_positive=False,
                                           state_adapter=state_adapter)
    q_algo1.epsilon = 0.05
    q_algo1.lmbda = 0.99  # 0.9
    q_algo1.gamma = 0.999
    q_algo1.alpha = 0.5

    def new_game():
        game.ale.reset_game()
        game.finished = False
        game.cum_reward = 0
        return game

    teacher = q.Teacher(new_game,
                        q_algo1,
                        ag.SpaceInvadersGameVectorizedVisualizer(),
                        repeat_action=3)

    #  teacher.single_step(Game)
    q_algo1.epsilon = 0
    q_algo1.log_freq = 1
    teacher.teach(1)

    initial_training = 1000
    training_decay_from = 95
    training_decay_ex = 50

    result_test = []
    result_1 = []
    result_2 = []

    teacher = q.Teacher(new_game,
                        q_algo1,
                        q.GameNoVisualizer(),
                        repeat_action=3)
    q_algo1.log_freq = 0.05
    q_algo1.epsilon = 1
    result_1 = teacher.teach(initial_training)

    q_algo1.epsilon = 0
    q_algo1.log_freq = 0.05
    result_test.append(teacher.teach(1))

    for i in range(training_decay_from):
        q_algo1.epsilon = 1 - i / 100
        teacher = q.Teacher(new_game,
                            q_algo1,
                            q.GameNoVisualizer(),
                            repeat_action=3)
        result_2.append(teacher.teach(training_decay_ex))
        q_algo1.epsilon = 0
        result_test.append(teacher.teach(1))

    import cPickle as pickle
    with open('gradient_descent.theta' + run, 'wb') as handle:
        pickle.dump(q_algo1.theta, handle)

    with open('gradient_descent.gamma' + run, 'wb') as handle:
        pickle.dump(q_algo1.gamma, handle)

    with open('gradient_descent.lmbda' + run, 'wb') as handle:
        pickle.dump(q_algo1.lmbda, handle)

    with open('gradient_descent.alpha' + run, 'wb') as handle:
        pickle.dump(q_algo1.alpha, handle)

    r1 = [a[1] for a in result_1]
    plt.plot(
        np.array(
            [x[1] - x[0] for x in zip(np.cumsum(r1),
                                      np.cumsum(r1)[200:])]) / 200)

    r2 = [a[1] for r in result_2 for a in r]
    plt.plot(
        np.array(
            [x[1] - x[0] for x in zip(np.cumsum(r2),
                                      np.cumsum(r2)[200:])]) / 200)

    r_test = [a[1] for r in result_test for a in r]
    plt.plot(
        np.array([
            x[1] - x[0] for x in zip(np.cumsum(r_test),
                                     np.cumsum(r_test)[50:])
        ]) / 50)

    r_4 = [a[1] for a in result_4]
    plt.plot(
        np.array(
            [x[1] - x[0] for x in zip(np.cumsum(r_test),
                                      np.cumsum(r_4)[2:])]) / 2)

    q_algo1.epsilon = 0.1
    teacher = q.Teacher(new_game,
                        q_algo1,
                        q.GameNoVisualizer(),
                        repeat_action=3)
    teacher.teach(100)