Esempio n. 1
0
 def load_model(self):
     self.data_node = tf.placeholder(tf.float32, shape=(1, INPUT_WIDTH, INPUT_WIDTH, INPUT_DEPTH))
     self.sess=tf.Session()
     if self.is_value_net :
         from valuenet import ValueNet
         self.vnet=ValueNet()
         self.keep_prob_node=tf.placeholder(tf.float32)
         self.value=self.vnet.model(self.data_node, keep_prob_node=self.keep_prob_node)
         self.position_values=np.ndarray(dtype=np.float32, shape=(BOARD_SIZE**2,))
     else:
         self.net = SLNetwork()
         self.net.declare_layers(num_hidden_layer=5)
         self.logit=self.net.model(self.data_node)
     saver = tf.train.Saver()
     saver.restore(self.sess, self.model_path)
Esempio n. 2
0
    def selfplay(self):
        data_node=tf.placeholder(tf.float32, shape=(1, INPUT_WIDTH, INPUT_WIDTH, INPUT_DEPTH))
        slnet=SLNetwork()
        slnet.declare_layers(num_hidden_layer=8)

        this_logits=slnet.model(data_node)
        saver=tf.train.Saver(max_to_keep=MAX_NUM_MODEL_TO_KEEP)
        print(this_logits.name)
        sl_model = os.path.join(MODELS_DIR, SLMODEL_NAME)
        sess = tf.Session()
        saver.restore(sess, sl_model)

        with tf.variable_scope("other_nn_player"):
            slnet2=SLNetwork()
            slnet2.declare_layers(num_hidden_layer=8)
            other_logits=slnet2.model(data_node)
            #use non-scoped name to restore those variables
            var_dict2 = {slnet.input_layer.weight.op.name: slnet2.input_layer.weight,
                    slnet.input_layer.bias.op.name: slnet2.input_layer.bias}
            for i in xrange(slnet2.num_hidden_layer):
                var_dict2[slnet.conv_layer[i].weight.op.name] = slnet2.conv_layer[i].weight
                var_dict2[slnet.conv_layer[i].bias.op.name] = slnet2.conv_layer[i].bias
            saver2 = tf.train.Saver(var_list=var_dict2)

            otherSess = tf.Session()
            saver2.restore(otherSess, sl_model)

        batch_game_size=128

        batch_reward_node = tf.placeholder(dtype=np.float32, shape=(PG_STATE_BATCH_SIZE,))
        batch_data_node = tf.placeholder(dtype=np.float32, shape=(PG_STATE_BATCH_SIZE, INPUT_WIDTH, INPUT_WIDTH, INPUT_DEPTH))
        batch_label_node = tf.placeholder(shape=(PG_STATE_BATCH_SIZE,), dtype=np.int32)

        batch_rewards = np.ndarray(dtype=np.float32, shape=(PG_STATE_BATCH_SIZE,))
        batch_data = np.ndarray(dtype=np.float32, shape=(PG_STATE_BATCH_SIZE, INPUT_WIDTH, INPUT_WIDTH, INPUT_DEPTH))
        batch_labels = np.ndarray(shape=(PG_STATE_BATCH_SIZE,), dtype=np.int32)

        game_rewards=np.ndarray(shape=(batch_game_size,), dtype=np.float32)

        tf.get_variable_scope().reuse_variables()
        logit = slnet.model(batch_data_node)
        entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logit, batch_label_node)
        loss = tf.reduce_mean(tf.mul(batch_reward_node, entropy))
        opt = tf.train.GradientDescentOptimizer(FLAGS.alpha / batch_game_size)
        opt_op=opt.minimize(loss)

        win1=0
        win2=0
        ite=0
        g_step=0

        if not os.path.exists(MODELS_DIR):
            os.makedirs(MODELS_DIR)
            print("no SL model? creating pg model dir")
        else:
            for f in os.listdir(MODELS_DIR):
                if f.startswith(PGMODEL_NAME):
                    try:
                        os.remove(os.path.join(MODELS_DIR,f))
                    except OSError as e:
                        print(e.strerror)
            print("removing old models in pg dir")

        while ite < FLAGS.max_iterations:
            start_batch_time = time.time()
            games,_tmp1,_tmp2=self.play_one_batch_games(sess,otherSess, this_logits,other_logits,data_node,batch_game_size, game_rewards)
            win1 += _tmp1
            win2 += _tmp2
            data_tool=data_util(games, PG_STATE_BATCH_SIZE, batch_data, batch_labels)
            data_tool.disable_symmetry_checking()
            offset1=0; offset2=0; nepoch=0
            while nepoch <1 :
                o1,o2,next_epoch=data_tool.prepare_batch(offset1,offset2)
                if(next_epoch):
                    nepoch += 1
                k=0
                batch_rewards.fill(0)
                new_o1, new_o2=0,0
                while k < PG_STATE_BATCH_SIZE:
                    for i in range(offset1, batch_game_size):
                        R=game_rewards[i]
                        sign=1 if offset2 % 2 ==0 else -1
                        for j in range(offset2, len(games[i])-1):
                            batch_rewards[k]=R*sign*(FLAGS.gamma**(len(games[i])-2-j))
                            sign=-sign
                            k += 1
                            if(k>=PG_STATE_BATCH_SIZE):
                                new_o1=i
                                new_o2=j+1
                                break
                        offset2=0
                        if(k>=PG_STATE_BATCH_SIZE):
                            break
                    if k<PG_STATE_BATCH_SIZE:
                        offset1, offset2 = 0,0
                assert(new_o1==o1 and new_o2==o2)
                offset1, offset2=o1,o2
                sess.run(opt_op, feed_dict={batch_data_node:batch_data, batch_label_node:batch_labels, batch_reward_node: batch_rewards})

            print("time cost for one batch of %d games"%PG_GAME_BATCH_SIZE, time.time()-start_batch_time)
            ite += 1
            if ite % FLAGS.frequency==0:
                saver.save(sess, os.path.join(MODELS_DIR, PGMODEL_NAME), global_step=g_step)
                pg_model=self.select_model(MODELS_DIR)
                saver2.restore(otherSess, pg_model)
                g_step +=1
                print("Replce opponenet with new model, ", g_step)
                print(pg_model)
            ite += 1
        print("In total, this win", win1, "opponenet win", win2)
        otherSess.close()
        sess.close()
Esempio n. 3
0
class NNAgent(object):

    def __init__(self, model_location, name, is_value_net=False):
        self.model_path=model_location
        self.agent_name=name
        self.is_value_net=is_value_net
        self.initialize_game()

    def initialize_game(self):
        self.game_state=[]
        self.boardtensor=np.zeros(dtype=np.float32, shape=(1, INPUT_WIDTH, INPUT_WIDTH, INPUT_DEPTH))
        make_empty_board_tensor(self.boardtensor)
        self.load_model()

    def load_model(self):
        self.data_node = tf.placeholder(tf.float32, shape=(1, INPUT_WIDTH, INPUT_WIDTH, INPUT_DEPTH))
        self.sess=tf.Session()
        if self.is_value_net :
            from valuenet import ValueNet
            self.vnet=ValueNet()
            self.keep_prob_node=tf.placeholder(tf.float32)
            self.value=self.vnet.model(self.data_node, keep_prob_node=self.keep_prob_node)
            self.position_values=np.ndarray(dtype=np.float32, shape=(BOARD_SIZE**2,))
        else:
            self.net = SLNetwork()
            self.net.declare_layers(num_hidden_layer=5)
            self.logit=self.net.model(self.data_node)
        saver = tf.train.Saver()
        saver.restore(self.sess, self.model_path)

    def reinitialize(self):
        self.game_state = []
        make_empty_board_tensor(self.boardtensor)

    #0-black player, 1-white player
    def play_move(self, intplayer, intmove):
        update_tensor(self.boardtensor, intplayer, intmove)
        self.game_state.append(intmove)

    def generate_move(self, intplayer=None):
        if self.is_value_net :
            s=list(self.game_state)
            empty_positions=[i for i in range(BOARD_SIZE**2) if i not in s]
            self.position_values.fill(0.0)
            for intmove in empty_positions:
                update_tensor(self.boardtensor, intplayer, intmove)
                v=self.sess.run(self.value, feed_dict={self.data_node:self.boardtensor})
                undo_update_tensor(self.boardtensor,intplayer, intmove)
                self.position_values[intmove]=v
            im=softmax_selection(self.position_values, self.game_state, temperature=0.1)
            #im=max_selection(self.position_values, self.game_state)
            return im
        else:
            logits=self.sess.run(self.logit, feed_dict={self.data_node:self.boardtensor})
            intmove=softmax_selection(logits, self.game_state)
            #intmove=max_selection(logits, self.game_state)
            raw_move=intmove_to_raw(intmove)
            assert(ord('a') <= ord(raw_move[0]) <= ord('z') and 0<= int(raw_move[1:]) <BOARD_SIZE**2)
            return raw_move

    def close_all(self):
        self.sess.close()