コード例 #1
0
ファイル: Main.py プロジェクト: fA1sEr/ADRQN2
                action = agent.act(state)
                img_state, reward, done = game.make_action(action)
                if not done:
                    state_new = img_state
                else:
                    state_new = None
                agent.add_transition(state, action, reward, state_new, done)
                state = state_new

                if learning_step % UPDATE_FREQUENCY == 0:
                    agent.learn_from_memory()
                if learning_step % COPY_FREQUENCY == 0:
                    updateTarget(targetOps, SESSION)

                if done:
                    print("Epoch %d Train Game %d get %.1f" % (epoch, games_cnt, game.get_total_reward()))
                    break
            if SAVE_MODEL and games_cnt % 10 == 0:
                saver.save(SESSION, model_savefile)
                print("Saving the network weigths to:", model_savefile)

        print("\nTesting...")

        test_scores = []
        for test_step in range(EPISODES_TO_TEST):
            game.reset()
            agent.reset_cell_state()
            while not game.is_terminared():
                state = game.get_state()
                action = agent.act(state, train=False)
                game.make_action(action)
コード例 #2
0
ファイル: Main.py プロジェクト: fA1sEr/ADRQN2-pong-test
trainables = tf.trainable_variables()

targetOps = updateTargetGraph(trainables, TAU)

print("Loading model from: ", model_savefile)
saver.restore(SESSION, model_savefile)

##########################################
print("\nTesting...")

test_scores = []

for test_step in range(EPISODES_TO_TEST):
    game.reset()
    agent.reset_cell_state()
    while not game.is_terminared():
        state = game.get_state()
        action = agent.act(state, train=False)
        game.make_action(action)
    now_score = game.get_total_reward()
    saveScore(now_score)
    test_scores.append(now_score)

test_scores = np.array(test_scores)
my_file = open(reward_savefile, 'a')  # Name and path of the reward text file
my_file.write("%.1f (±%.1f)  min:%.1f  max:%.1f\n" %
              (test_scores.mean(), test_scores.std(), test_scores.min(),
               test_scores.max()))
my_file.close()
コード例 #3
0
ファイル: Main.py プロジェクト: fA1sEr/ADRQN2-pong0.5
                img_state, reward, done = game.make_action(action)
                if not done:
                    state_new = img_state
                else:
                    state_new = None
                agent.add_transition(state, action, reward, state_new, done)
                state = state_new

                if learning_step % UPDATE_FREQUENCY == 0:
                    agent.learn_from_memory()
                if learning_step % COPY_FREQUENCY == 0:
                    updateTarget(targetOps, SESSION)

                if done:
                    print("Epoch %d Train Game %d get %.1f" %
                          (epoch, games_cnt, game.get_total_reward()))
                    break
            if SAVE_MODEL and games_cnt % 10 == 0:
                saver.save(SESSION, model_savefile)
                print("Saving the network weigths to:", model_savefile)

        print("\nTesting...")

        test_scores = []
        for test_step in range(EPISODES_TO_TEST):
            game.reset()
            agent.reset_cell_state()
            while not game.is_terminared():
                state = game.get_state()
                action = agent.act(state, train=False)
                game.make_action(action)
コード例 #4
0
            s, reward, d = game.make_action(action)
            done = game.is_terminared()
            if not done:
                state_new = preprocess(game.get_state())
            else:
                state_new = None

            agent.add_transition(state, action, reward, state_new, done)
            state = state_new

            if learning_step % UPDATE_FREQUENCY == 0:
                agent.learn_from_memory()
                updateTarget(targetOps, SESSION)

            if done:
                train_scores.append(game.get_total_reward())
                train_episodes_finished += 1
                game.reset()
                agent.reset_cell_state()
                state = preprocess(game.get_state())

        print("%d training episodes played." % train_episodes_finished)
        train_scores = np.array(train_scores)

        print(
            "Results: mean: %.1f±%.1f," %
            (train_scores.mean(), train_scores.std()),
            "min: %.1f," % train_scores.min(),
            "max: %.1f," % train_scores.max())

        print("\nTesting...")