def load_model(self): self.data_node = tf.placeholder(tf.float32, shape=(1, INPUT_WIDTH, INPUT_WIDTH, INPUT_DEPTH)) self.sess=tf.Session() if self.is_value_net : from valuenet import ValueNet self.vnet=ValueNet() self.keep_prob_node=tf.placeholder(tf.float32) self.value=self.vnet.model(self.data_node, keep_prob_node=self.keep_prob_node) self.position_values=np.ndarray(dtype=np.float32, shape=(BOARD_SIZE**2,)) else: self.net = SLNetwork() self.net.declare_layers(num_hidden_layer=5) self.logit=self.net.model(self.data_node) saver = tf.train.Saver() saver.restore(self.sess, self.model_path)
class NNAgent(object): def __init__(self, model_location, name, is_value_net=False): self.model_path=model_location self.agent_name=name self.is_value_net=is_value_net self.initialize_game() def initialize_game(self): self.game_state=[] self.boardtensor=np.zeros(dtype=np.float32, shape=(1, INPUT_WIDTH, INPUT_WIDTH, INPUT_DEPTH)) make_empty_board_tensor(self.boardtensor) self.load_model() def load_model(self): self.data_node = tf.placeholder(tf.float32, shape=(1, INPUT_WIDTH, INPUT_WIDTH, INPUT_DEPTH)) self.sess=tf.Session() if self.is_value_net : from valuenet import ValueNet self.vnet=ValueNet() self.keep_prob_node=tf.placeholder(tf.float32) self.value=self.vnet.model(self.data_node, keep_prob_node=self.keep_prob_node) self.position_values=np.ndarray(dtype=np.float32, shape=(BOARD_SIZE**2,)) else: self.net = SLNetwork() self.net.declare_layers(num_hidden_layer=5) self.logit=self.net.model(self.data_node) saver = tf.train.Saver() saver.restore(self.sess, self.model_path) def reinitialize(self): self.game_state = [] make_empty_board_tensor(self.boardtensor) #0-black player, 1-white player def play_move(self, intplayer, intmove): update_tensor(self.boardtensor, intplayer, intmove) self.game_state.append(intmove) def generate_move(self, intplayer=None): if self.is_value_net : s=list(self.game_state) empty_positions=[i for i in range(BOARD_SIZE**2) if i not in s] self.position_values.fill(0.0) for intmove in empty_positions: update_tensor(self.boardtensor, intplayer, intmove) v=self.sess.run(self.value, feed_dict={self.data_node:self.boardtensor}) undo_update_tensor(self.boardtensor,intplayer, intmove) self.position_values[intmove]=v im=softmax_selection(self.position_values, self.game_state, temperature=0.1) #im=max_selection(self.position_values, self.game_state) return im else: logits=self.sess.run(self.logit, feed_dict={self.data_node:self.boardtensor}) intmove=softmax_selection(logits, self.game_state) #intmove=max_selection(logits, self.game_state) raw_move=intmove_to_raw(intmove) assert(ord('a') <= ord(raw_move[0]) <= ord('z') and 0<= int(raw_move[1:]) <BOARD_SIZE**2) return raw_move def close_all(self): self.sess.close()