コード例 #1
0
import dimod
from dwave.system.samplers import DWaveSampler
from dwave.system.composites import EmbeddingComposite
from board24 import Board
import morris
import pygame
import numpy as np
import sys

game = morris.GameState()

num_spots = 24
max_checkers = 18
our = np.zeros(num_spots, dtype=int)
enemy = np.zeros(num_spots, dtype=int)
previous_enemy = np.zeros(num_spots, dtype=int)
h_const = 100
j_const = 1
constraint_const = 3
mill_constant = 0.1
anti_mill_constant = 0.5

#our   = np.array([0,1,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0])
#enemy = np.array([0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0])
#previous_enemy=enemy

b = Board()
b.board_array = our + 2 * enemy
# FOR BOARD MARKERS:
# OURS IS 1
# ENEMY IS 2
コード例 #2
0
ファイル: dqn.py プロジェクト: Scemic/9MANMORRIS
def trainNetwork(s, readout, sess):
    # define the cost function                                                              TODO!!!
    a = tf.placeholder("float", [None, 24])
    y = tf.placeholder("float", [None])
    readout_action = tf.reduce_sum(tf.mul(readout, a), reduction_indices=1)
    cost = tf.reduce_mean(tf.square(y - readout_action))
    with tf.name_scope('train'):
        train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

    # open up a game state to communicate with emulator
    game_state = game.GameState()

    # store the previous observations in replay memory
    D = deque()

    # get the first state by doing nothing
    do_nothing = np.zeros(24)
    s_t, r_0, terminal = game_state.frame_step(do_nothing)

    # saving and loading networks
    saver = tf.train.Saver()
    # Merge all sumarries and write
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter('morris/train', sess.graph)
    tf.global_variables_initializer().run()

    checkpoint = tf.train.get_checkpoint_state("saved_networks")
    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print(("Successfully loaded:", checkpoint.model_checkpoint_path))
    else:
        print("Could not find old network weights")

    epsilon = INITIAL_EPSILON
    t = 0
    try:
        while "pigs" != "fly":
            # choose an action epsilon greedily
            a_t = readout.eval(feed_dict={s: [s_t]})[0]
            if random.random() <= epsilon or t <= OBSERVE:
                a_t = np.random.rand(24)

            # scale down epsilon
            if epsilon > FINAL_EPSILON and t > OBSERVE:
                epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

            # run the selected action and observe next state and reward
            s_t1, r_t, terminal = game_state.frame_step(a_t)

            # store the transition in D
            D.append((s_t, a_t, r_t, s_t1, terminal))
            if len(D) > REPLAY_MEMORY:
                D.popleft()

            # only train if done observing
            if t > OBSERVE:
                # sample a minibatch to train on
                minibatch = random.sample(D, BATCH)

                # get the batch variables
                s_j_batch = [d[0] for d in minibatch]
                a_batch = [d[1] for d in minibatch]
                r_batch = [d[2] for d in minibatch]
                s_j1_batch = [d[3] for d in minibatch]

                y_batch = []
                readout_j1_batch = readout.eval(feed_dict={s: s_j1_batch})
                for i in range(0, len(minibatch)):
                    # if terminal only equals reward
                    if minibatch[i][4]:
                        y_batch.append(r_batch[i])
                    else:
                        y_batch.append(r_batch[i] +
                                       GAMMA * np.max(readout_j1_batch[i]))

                # perform gradient step
                '''
                train_step.run(feed_dict = {
                    y : y_batch,
                    a : a_batch,
                    s : s_j_batch})
                '''
                if t % 100 == 0:  # Record execution stats
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()
                    summary, _ = sess.run([merged, train_step],
                                          feed_dict={
                                              y: y_batch,
                                              a: a_batch,
                                              s: s_j_batch
                                          },
                                          options=run_options,
                                          run_metadata=run_metadata)
                    train_writer.add_run_metadata(run_metadata, 'step%03d' % t)
                    train_writer.add_summary(summary, t)
                    print(('Adding run metadata for', t))
                else:  # Record a summary
                    summary, _ = sess.run([merged, train_step],
                                          feed_dict={
                                              y: y_batch,
                                              a: a_batch,
                                              s: s_j_batch
                                          })
                    train_writer.add_summary(summary, t)

            # update the old values
            s_t = s_t1
            t += 1

            # save progress every 1000 iterations
            if t % 1000 == 0:
                saver.save(sess, 'morris/checkpoint morris-dqn', global_step=t)

            # print info
            state = ""
            if t <= OBSERVE:
                state = "observe"
            elif t > OBSERVE and t <= OBSERVE + EXPLORE:
                state = "explore"
            else:
                state = "train"
            print(("TIMESTEP", t, "/ STATE", state, "/ EPSILON", epsilon,
                   "/ REWARD", r_t))
    except KeyboardInterrupt:
        train_writer.close()

        export_path = 'morris/out'
        print(('Exporting trained model to', export_path))
        saver = tf.train.Saver(sharded=True)
        model_exporter = exporter.Exporter(saver)
        model_exporter.init(sess.graph.as_graph_def(),
                            named_graph_signatures={
                                'inputs':
                                exporter.generic_signature({'board': s}),
                                'outputs':
                                exporter.generic_signature({'values': readout})
                            })
        model_exporter.export(export_path, tf.constant(1), sess)