コード例 #1
0
    tf.multiply(tf.log(tf.gather_nd(l_out, actions_pl)), advantages_pl))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_pl,
                                   beta1=0.8,
                                   beta2=0.92)
train_f = optimizer.minimize(loss_f)

saver = tf.train.Saver()  # we use this later to save the model

# test forward pass
env = Minesweeper(display=False,
                  ROWS=6,
                  COLS=6,
                  MINES=7,
                  OUT="CONDENSED",
                  rewards={
                      "win": 1,
                      "loss": -1,
                      "progress": 0.9,
                      "noprogress": -0.3,
                      "YOLO": -0.3
                  })
state = env.stateConverter(env.get_state()).flatten()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    action_probabilities = sess.run(fetches=l_out,
                                    feed_dict={states_pl: [state]})
print(action_probabilities)

# helper functions

コード例 #2
0
rows = args.size
cols = args.size
mines = args.mine
FULL = False

rewards = {
    "win": 0.9,
    "loss": -1,
    "progress": 0.9,
    "noprogress": -0.3,
    "YOLO": -0.3
}
env = Minesweeper(display=False,
                  FULL=FULL,
                  ROWS=rows,
                  COLS=cols,
                  MINES=mines,
                  rewards=rewards)

n_inputs = rows * cols * 10 if FULL else rows * cols * 2
n_hidden = [rows * cols * 10, 250, 250, 250, 250]
n_outputs = rows * cols

# Model
model = Sequential()
model.add(
    Dense(
        input_shape=(1, n_inputs),
        units=n_hidden[0],
        activation='relu',
        kernel_initializer='glorot_uniform',
コード例 #3
0
    parser.add_argument(
        "-d",
        "--display",
        help=
        "run with this argument to display the game and see the agent play by pressing enter"
    )
    args = parser.parse_args()
    if args.display:
        display = True

    env = Minesweeper(display=display,
                      ROWS=6,
                      COLS=6,
                      MINES=6,
                      OUT="FULL",
                      rewards={
                          "win": 1,
                          "loss": -1,
                          "progress": 0.9,
                          "noprogress": -0.3,
                          "YOLO": -0.4
                      })

    with tf.Session() as sess:
        #Restore Model
        saver = tf.train.Saver()
        saver.restore(sess, "{}/{}.ckpt".format(model, model))

        #Initialize test parameters
        games = 0
        moves = 0
        stuck = 0