Ejemplo n.º 1
0
    def train(self, train_dat_file, test_dat_file):
        self.loader_train = DatasetLoader(train_dat_file)
        self.loader_test = DatasetLoader(test_dat_file)

        epoch = 0
        while True:
            print('epoch:', epoch)
            epoch += 1

            ith_part = 0
            while self._has_more_data:
                ith_part += 1
                self.adapt()
                self.train_part(ith_part)
#                 if ith_part >= 1:
#                     break

            self._has_more_data = True
Ejemplo n.º 2
0
    def train(self, train_dat_file, test_dat_file):
        self.loader_train = DatasetLoader(train_dat_file)
        self.loader_test = DatasetLoader(test_dat_file)

        epoch = 0
        while True:
            print('epoch:', epoch)
            epoch += 1

            ith_part = 0
            while self._has_more_data:
                ith_part += 1
                self.adapt()
                self.train_part(ith_part)
#                 if ith_part >= 1:
#                     break

            self._has_more_data = True
Ejemplo n.º 3
0
class ValueNet(object):
    def __init__(self, brain_dir, summary_dir):
        self.brain_dir = brain_dir
        self.brain_file = os.path.join(self.brain_dir, 'model.ckpt')
        self.summary_dir = summary_dir

        self._has_more_data = True

        self.ds_train = None
        self.ds_test = None

        self.graph = tf.Graph()
        with self.graph.as_default():
            self.states_pl, self.rewards_pl = self.placeholder_inputs()
            self.value_outputs, self.opt_op, self.global_step, self.mse = self.model(
                self.states_pl, self.rewards_pl)
            self.summary_op = tf.merge_all_summaries()
            init = tf.initialize_all_variables()
            self.saver = tf.train.Saver(
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                  scope="value_net"))
        self.summary_writer = tf.train.SummaryWriter(self.summary_dir,
                                                     self.graph)
        self.sess = tf.Session(graph=self.graph)
        self.sess.run(init)

    def get_input_shape(self):
        NUM_CHANNELS = 4
        return Board.BOARD_SIZE, Board.BOARD_SIZE, NUM_CHANNELS

    def placeholder_inputs(self):
        h, w, c = self.get_input_shape()
        states = tf.placeholder(tf.float32, [None, h, w, c])  # NHWC
        rewards = tf.placeholder(tf.float32, shape=[None])
        return states, rewards

    def weight_variable(self, shape):
        initial = tf.truncated_normal(shape, stddev=0.01)
        return tf.Variable(initial)

    def bias_variable(self, shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    def create_value_net(self, states_pl):
        NUM_CHANNELS = 4
        ch1 = 32
        W_1 = self.weight_variable([3, 3, NUM_CHANNELS, ch1])
        b_1 = self.bias_variable([ch1])

        ch = 32
        W_2 = self.weight_variable([3, 3, ch1, ch])
        b_2 = self.bias_variable([ch])
        W_21 = self.weight_variable([3, 3, ch, ch])
        b_21 = self.bias_variable([ch])
        W_22 = self.weight_variable([3, 3, ch, ch])
        b_22 = self.bias_variable([ch])
        #         W_23 = self.weight_variable([1, 1, ch, 1])
        #         b_23 = self.bias_variable([1])

        h_conv1 = tf.nn.relu(
            tf.nn.conv2d(states_pl, W_1, [1, 1, 1, 1], padding='SAME') + b_1)
        h_conv2 = tf.nn.relu(
            tf.nn.conv2d(h_conv1, W_2, [1, 1, 1, 1], padding='SAME') + b_2)
        h_conv21 = tf.nn.relu(
            tf.nn.conv2d(h_conv2, W_21, [1, 1, 1, 1], padding='SAME') + b_21)
        h_conv22 = tf.nn.relu(
            tf.nn.conv2d(h_conv21, W_22, [1, 1, 1, 1], padding='SAME') + b_22)
        #         h_conv23 = tf.nn.relu(tf.nn.conv2d(h_conv22, W_23, [1, 1, 1, 1], padding='SAME') + b_23)

        conv_out_dim = h_conv22.get_shape()[1:].num_elements()
        conv_out = tf.reshape(h_conv22, [-1, conv_out_dim])

        num_hidden = 1

        W_3 = tf.Variable(tf.zeros([conv_out_dim, num_hidden], tf.float32))
        b_3 = tf.Variable(tf.zeros([num_hidden], tf.float32))
        #         W_4 = tf.Variable(tf.zeros([num_hidden, 1], tf.float32))
        #         b_4 = tf.Variable(tf.zeros([1], tf.float32))

        #         hidden = tf.nn.relu(tf.matmul(conv_out, W_3) + b_3)
        #         fc_out = tf.matmul(hidden, W_4) + b_4
        fc_out = tf.tanh(tf.matmul(conv_out, W_3) + b_3)
        return fc_out

    def model(self, states_pl, rewards_pl):
        global_step = tf.Variable(0, name='global_step', trainable=False)

        with tf.variable_scope("value_net"):
            value_outputs = self.create_value_net(states_pl)
        value_net_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope="value_net")

        mean_square_loss = tf.reduce_mean(
            tf.squared_difference(rewards_pl, value_outputs))
        value_reg_loss = tf.reduce_sum(
            [tf.reduce_sum(tf.square(x)) for x in value_net_vars])
        value_loss = mean_square_loss + 0.001 * value_reg_loss

        optimizer = tf.train.AdamOptimizer(0.0001)
        value_opt_op = optimizer.minimize(value_loss, global_step=global_step)

        tf.scalar_summary("raw_value_loss", mean_square_loss)
        tf.scalar_summary("reg_value_loss", value_reg_loss)
        tf.scalar_summary("all_value_loss", value_loss)
        return value_outputs, value_opt_op, global_step, mean_square_loss

    def get_state_values(self, states, players):
        h, w, c = self.get_input_shape()

        ss = []
        for s, p in zip(states, players):
            img, _ = self.adapt_state(s, p)
            ss.append(img)
        ss = np.array(ss)

        feed_dict = {
            self.states_pl: ss.reshape((-1, h, w, c)),
        }
        return self.sess.run(self.value_outputs, feed_dict=feed_dict)

    def save(self):
        self.saver.save(self.sess, self.brain_file)

    def load(self):
        ckpt = tf.train.get_checkpoint_state(self.brain_dir)
        if ckpt and ckpt.model_checkpoint_path:
            self.saver.restore(self.sess, ckpt.model_checkpoint_path)

    def close(self):
        self.sess.close()

    def train(self, train_dat_file, test_dat_file):
        self.loader_train = DatasetLoader(train_dat_file)
        self.loader_test = DatasetLoader(test_dat_file)

        epoch = 0
        while True:
            print('epoch:', epoch)
            epoch += 1

            ith_part = 0
            while self._has_more_data:
                ith_part += 1
                self.adapt()
                self.train_part(ith_part)
#                 if ith_part >= 1:
#                     break

            self._has_more_data = True
#             if epoch >= 1:
#                 break

    def fill_feed_dict(self, data_set, states_pl, rewards_pl, batch_size=None):
        batch_size = batch_size or BATCH_SIZE
        states_feed, rewards_feed = data_set.next_batch(batch_size)
        feed_dict = {states_pl: states_feed, rewards_pl: rewards_feed}
        return feed_dict

    def train_part(self, ith_part):
        NUM_STEPS = self.ds_train.num_examples // BATCH_SIZE
        print('total num steps:', NUM_STEPS)
        start_time = time.time()
        train_mse = 0.
        for step in range(1, NUM_STEPS + 1):
            feed_dict = self.fill_feed_dict(self.ds_train, self.states_pl,
                                            self.rewards_pl)
            _, train_mse = self.sess.run([self.opt_op, self.mse],
                                         feed_dict=feed_dict)

            if step % 1000 == 0:
                summary_str, gstep = self.sess.run(
                    [self.summary_op, self.global_step], feed_dict=feed_dict)
                self.summary_writer.add_summary(summary_str, gstep)
                self.summary_writer.flush()

            if step == NUM_STEPS:
                self.saver.save(self.sess,
                                self.brain_file,
                                global_step=self.global_step)

        duration = time.time() - start_time
        test_mse = self.do_eval(self.mse, self.states_pl, self.rewards_pl,
                                self.ds_test)
        print(
            'part: %d, acc_train: %.3f, test accuracy: %.3f, time cost: %.3f sec'
            % (ith_part, train_mse, test_mse, duration))

    def do_eval(self, mse, states_pl, rewards_pl, data_set):
        accum_mse = 0.
        batch_size = BATCH_SIZE
        assert batch_size != 0
        steps_per_epoch = math.ceil(data_set.num_examples / batch_size)
        for _ in range(steps_per_epoch):
            feed_dict = self.fill_feed_dict(data_set, states_pl, rewards_pl,
                                            batch_size)
            accum_mse += self.sess.run(mse, feed_dict=feed_dict)
        avg_mse = accum_mse / (steps_per_epoch or 1)
        return avg_mse

    def forge(self, row):
        board = row[:Board.BOARD_SIZE_SQ]
        player = row[-2]
        image, _ = self.adapt_state(board, player)
        reward = row[-1]
        return image, reward

    def adapt_state(self, board, player):
        black = (board == Board.STONE_BLACK).astype(float)
        white = (board == Board.STONE_WHITE).astype(float)
        empty = (board == Board.STONE_EMPTY).astype(float)
        is_black_move = np.ones_like(
            black, float) if player == Board.STONE_BLACK else np.zeros_like(
                black, float)

        image = np.dstack((black, white, empty, is_black_move)).ravel()
        legal = empty.astype(bool)
        return image, legal

    def adapt(self):
        gc.collect()

        if self.ds_train is not None and not self.loader_train.is_wane:
            self.ds_train = None
        if self.ds_test is not None and not self.loader_test.is_wane:
            self.ds_test = None

        gc.collect()

        h, w, c = self.get_input_shape()

        def f(dat):
            ds = []
            for row in dat:
                s, r = self.forge(row)
                ds.append((s, r))
            ds = np.array(ds)

            return DataSet(
                np.vstack(ds[:, 0]).reshape((-1, h, w, c)), ds[:, 1])

        if self.ds_train is None:
            ds_train, self._has_more_data = self.loader_train.load(
                DATASET_CAPACITY)
            self.ds_train = f(ds_train)
        if self.ds_test is None:
            ds_test, _ = self.loader_test.load(DATASET_CAPACITY // 2)
            self.ds_test = f(ds_test)

        print(self.ds_train.images.shape, self.ds_train.labels.shape)
        print(self.ds_test.images.shape, self.ds_test.labels.shape)
Ejemplo n.º 4
0
 def __init__(self, is_train=True, is_revive=False, is_rl=False):
     super(DCNN3, self).__init__(is_train, is_revive, is_rl)
     self.loader_train = DatasetLoader(Pre.DATA_SET_TRAIN)
     self.loader_valid = DatasetLoader(Pre.DATA_SET_VALID)
     self.loader_test = DatasetLoader(Pre.DATA_SET_TEST)
Ejemplo n.º 5
0
class DCNN3(Pre):
    def __init__(self, is_train=True, is_revive=False, is_rl=False):
        super(DCNN3, self).__init__(is_train, is_revive, is_rl)
        self.loader_train = DatasetLoader(Pre.DATA_SET_TRAIN)
        self.loader_valid = DatasetLoader(Pre.DATA_SET_VALID)
        self.loader_test = DatasetLoader(Pre.DATA_SET_TEST)

    def placeholder_inputs(self):
        h, w, c = self.get_input_shape()
        states = tf.placeholder(tf.float32, [None, h, w, c])  # NHWC
        actions = tf.placeholder(tf.float32, [None, Pre.NUM_ACTIONS])
        return states, actions

    def model(self, states_pl, actions_pl):
        with tf.variable_scope("policy_net"):
            self.predictions = self.create_policy_net(states_pl)
        with tf.variable_scope("value_net"):
            self.value_outputs = self.create_value_net(states_pl)

        self.policy_net_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy_net")

        pg_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.predictions, labels=actions_pl))
        reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in self.policy_net_vars])
        self.loss = pg_loss  + 0.001 * reg_loss

#        tf.scalar_summary("raw_policy_loss", pg_loss)
#        tf.scalar_summary("reg_policy_loss", reg_loss)
#        tf.scalar_summary("all_policy_loss", self.loss)

        self.optimizer = tf.train.AdamOptimizer(0.0001)
        self.opt_op = self.optimizer.minimize(self.loss)

        self.predict_probs = tf.nn.softmax(self.predictions)
        eq = tf.equal(tf.argmax(self.predict_probs, 1), tf.argmax(actions_pl, 1))

#         best_move = tf.argmax(actions_pl, 1)
#         eq = tf.nn.in_top_k(self.predict_probs, best_move, 3)

        self.eval_correct = tf.reduce_sum(tf.cast(eq, tf.int32))

        self.rl_op(actions_pl)

    def create_conv_net(self, states_pl):
        ch1 = 32
        W_1 = self.weight_variable([3, 3, Pre.NUM_CHANNELS, ch1])
        b_1 = self.bias_variable([ch1])

        ch = 32
        W_2 = self.weight_variable([3, 3, ch1, ch])
        b_2 = self.bias_variable([ch])
        W_21 = self.weight_variable([3, 3, ch, ch])
        b_21 = self.bias_variable([ch])
        W_22 = self.weight_variable([3, 3, ch, ch])
        b_22 = self.bias_variable([ch])
        W_23 = self.weight_variable([1, 1, ch, 1024])
        b_23 = self.bias_variable([1024])

        h_conv1 = tf.nn.relu(tf.nn.conv2d(states_pl, W_1, [1, 1, 1, 1], padding='VALID') + b_1)
        h_conv2 = tf.nn.relu(tf.nn.conv2d(h_conv1, W_2, [1, 1, 1, 1], padding='SAME') + b_2)
        h_conv21 = tf.nn.relu(tf.nn.conv2d(h_conv2, W_21, [1, 1, 1, 1], padding='SAME') + b_21)
        h_conv22 = tf.nn.relu(tf.nn.conv2d(h_conv21, W_22, [1, 1, 1, 1], padding='SAME') + b_22)
        h_conv23 = tf.nn.relu(tf.nn.conv2d(h_conv22, W_23, [1, 1, 1, 1], padding='SAME') + b_23)

        self.conv_out_dim = h_conv23.get_shape()[1:].num_elements()
        conv_out = tf.reshape(h_conv23, [-1, self.conv_out_dim])
        return conv_out

    def create_policy_net(self, states_pl):
        conv = self.create_conv_net(states_pl)
        conv = tf.identity(conv, 'policy_net_conv')
        W_3 = self.weight_variable([self.conv_out_dim, Pre.NUM_ACTIONS])
        b_3 = self.bias_variable([Pre.NUM_ACTIONS])

        fc_out = tf.matmul(conv, W_3) + b_3
        return fc_out

    def create_value_net(self, states_pl):
        conv = self.create_conv_net(states_pl)
        conv = tf.identity(conv, 'value_net_conv')
        num_hidden = 128
        W_3 = tf.Variable(tf.zeros([self.conv_out_dim, num_hidden], tf.float32))
        b_3 = tf.Variable(tf.zeros([num_hidden], tf.float32))
        W_4 = tf.Variable(tf.zeros([num_hidden, 1], tf.float32))
        b_4 = tf.Variable(tf.zeros([1], tf.float32))

        hidden = tf.nn.relu(tf.matmul(conv, W_3) + b_3)
        fc_out = tf.matmul(hidden, W_4) + b_4
        return fc_out

    def forge(self, row):
        board = row[:Board.BOARD_SIZE_SQ]
        image, _ = self.adapt_state(board)

        visit = row[Board.BOARD_SIZE_SQ::2]
#         visit[visit == 0] = 1
#         win = row[Board.BOARD_SIZE_SQ+1::2]
        win_rate = visit
        s = np.sum(win_rate)
        win_rate /= s
        return image, win_rate

    def adapt(self, filename):
        # proc = psutil.Process(os.getpid())
        gc.collect()
        # mem0 = proc.memory_info().rss

        if self.ds_train is not None and not self.loader_train.is_wane:
            self.ds_train = None
        if self.ds_valid is not None and not self.loader_valid.is_wane:
            self.ds_valid = None
        if self.ds_test is not None and not self.loader_test.is_wane:
            self.ds_test = None

        gc.collect()

        # mem1 = proc.memory_info().rss
        # print('gc(M):', (mem1 - mem0) / 1024 ** 2)

        h, w, c = self.get_input_shape()

        def f(dat):
            ds = []
            for row in dat:
                s, a = self.forge(row)
                ds.append((s, a))
            ds = np.array(ds)
            return DataSet(np.vstack(ds[:, 0]).reshape((-1, h, w, c)), np.vstack(ds[:, 1]))

        if self.ds_train is None:
            ds_train, self._has_more_data = self.loader_train.load(Pre.DATASET_CAPACITY)
            self.ds_train = f(ds_train)
        if self.ds_valid is None:
            ds_valid, _ = self.loader_valid.load(Pre.DATASET_CAPACITY // 2)
            self.ds_valid = f(ds_valid)
        if self.ds_test is None:
            ds_test, _ = self.loader_test.load(Pre.DATASET_CAPACITY // 2)
            self.ds_test = f(ds_test)

        print(self.ds_train.images.shape, self.ds_train.labels.shape)
        print(self.ds_valid.images.shape, self.ds_valid.labels.shape)
        print(self.ds_test.images.shape, self.ds_test.labels.shape)


    def get_input_shape(self):
        return Board.BOARD_SIZE, Board.BOARD_SIZE, Pre.NUM_CHANNELS

    def mid_vis(self, feed_dict):
        pass
Ejemplo n.º 6
0
class DCNN2(Pre):
    def __init__(self, is_train=True, is_revive=False):
        super().__init__(is_train, is_revive)
        self.loader_train = DatasetLoader(Pre.DATA_SET_TRAIN)
        self.loader_valid = DatasetLoader(Pre.DATA_SET_VALID)
        self.loader_test = DatasetLoader(Pre.DATA_SET_TEST)

    def diags(self, a):
        assert len(a.shape) == 2 and a.shape[0] == a.shape[1]
        valid = a.shape[0] - 5

        vecs = [a.diagonal(i) for i in range(-valid, valid + 1)]
        c = np.zeros((len(vecs), a.shape[0]))
        c[:, :] = -1
        for i, v in enumerate(vecs):
            c[i, :v.shape[0]] = v
        return c

    def regulate(self, a):
        md = self.diags(a)
        ad = self.diags(np.rot90(a))
        m = np.vstack((a, a.T, md, ad))
        return m

    def placeholder_inputs(self):
        h, w, c = self.get_input_shape()
        states = tf.placeholder(tf.float32, [None, h, w, c])  # NHWC
        actions = tf.placeholder(tf.float32, [None, Pre.NUM_ACTIONS])
        return states, actions

    def model(self, states_pl, actions_pl):
        ch1 = 32
        W_1 = self.weight_variable([1, 5, Pre.NUM_CHANNELS, ch1])
        b_1 = self.bias_variable([ch1])

        ch = 32
        W_2 = self.weight_variable([3, 3, ch1, ch])
        b_2 = self.bias_variable([ch])
        W_21 = self.weight_variable([3, 3, ch, ch])
        b_21 = self.bias_variable([ch])

        self.h_conv1 = tf.nn.relu(
            tf.nn.conv2d(states_pl, W_1, [1, 1, 1, 1], padding='VALID') + b_1)
        self.h_conv2 = tf.nn.relu(
            tf.nn.conv2d(self.h_conv1, W_2, [1, 1, 1, 1], padding='SAME') +
            b_2)
        self.h_conv21 = tf.nn.relu(
            tf.nn.conv2d(self.h_conv2, W_21, [1, 1, 1, 1], padding='SAME') +
            b_21)

        shape = self.h_conv21.get_shape().as_list()
        dim = np.cumprod(shape[1:])[-1]
        h_conv_out = tf.reshape(self.h_conv21, [-1, dim])

        num_hidden = 128
        W_3 = self.weight_variable([dim, num_hidden])
        b_3 = self.bias_variable([num_hidden])
        W_4 = self.weight_variable([num_hidden, Pre.NUM_ACTIONS])
        b_4 = self.bias_variable([Pre.NUM_ACTIONS])

        self.hidden = tf.matmul(h_conv_out, W_3) + b_3
        self.predictions = tf.matmul(self.hidden, W_4) + b_4

        self.cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
            self.predictions, actions_pl)
        #         self.loss = tf.reduce_mean(self.cross_entropy)
        self.loss = tf.reduce_mean(
            -tf.reduce_sum(tf.nn.log_softmax(self.predictions) * actions_pl,
                           reduction_indices=1))
        print("states_pl shape:", states_pl.get_shape())
        print("actions_pl shape:", actions_pl.get_shape())
        print("predictions shape:", self.predictions.get_shape())
        print("cross_entropy shape:", self.cross_entropy.get_shape())
        print("loss shape:", self.loss.get_shape())

        tf.scalar_summary("loss", self.loss)
        self.optimizer = tf.train.AdamOptimizer()
        self.opt_op = self.optimizer.minimize(self.loss)

        self.predict_probs = tf.nn.softmax(self.predictions)
        eq = tf.equal(tf.argmax(self.predict_probs, 1),
                      tf.argmax(actions_pl, 1))
        self.eval_correct = tf.reduce_sum(tf.cast(eq, tf.int32))

        self.rl_op(actions_pl)

    def forge(self, row):
        board = row[:Board.BOARD_SIZE_SQ]
        image, _ = self.adapt_state(board)

        visit = row[Board.BOARD_SIZE_SQ::2]
        #         visit[visit == 0] = 1
        #         win = row[Board.BOARD_SIZE_SQ+1::2]
        win_rate = visit
        s = np.sum(win_rate)
        win_rate /= s
        return image, win_rate

    def adapt(self, filename):
        # proc = psutil.Process(os.getpid())
        gc.collect()
        # mem0 = proc.memory_info().rss

        if self.ds_train is not None and not self.loader_train.is_wane:
            self.ds_train = None
        if self.ds_valid is not None and not self.loader_valid.is_wane:
            self.ds_valid = None
        if self.ds_test is not None and not self.loader_test.is_wane:
            self.ds_test = None

        gc.collect()

        # mem1 = proc.memory_info().rss
        # print('gc(M):', (mem1 - mem0) / 1024 ** 2)

        h, w, c = self.get_input_shape()

        def f(dat):
            ds = []
            for row in dat:
                s, a = self.forge(row)
                ds.append((s, a))
            ds = np.array(ds)
            return DataSet(
                np.vstack(ds[:, 0]).reshape((-1, h, w, c)), np.vstack(ds[:,
                                                                         1]))

        if self.ds_train is None:
            ds_train, self._has_more_data = self.loader_train.load(
                Pre.DATASET_CAPACITY)
            self.ds_train = f(ds_train)
        if self.ds_valid is None:
            ds_valid, _ = self.loader_valid.load(Pre.DATASET_CAPACITY // 2)
            self.ds_valid = f(ds_valid)
        if self.ds_test is None:
            ds_test, _ = self.loader_test.load(Pre.DATASET_CAPACITY // 2)
            self.ds_test = f(ds_test)

        print(self.ds_train.images.shape, self.ds_train.labels.shape)
        print(self.ds_valid.images.shape, self.ds_valid.labels.shape)
        print(self.ds_test.images.shape, self.ds_test.labels.shape)

    def adapt_state(self, board):
        board = board.reshape(-1, Board.BOARD_SIZE)
        board = self.regulate(board)
        return super(DCNN2, self).adapt_state(board)

    def get_input_shape(self):
        assert Board.BOARD_SIZE >= 5
        height = 6 * Board.BOARD_SIZE - 18  # row vecs + col vecs + valid(len>=5) main diag vecs + valid(len>=5) anti diag vecs
        return height, Board.BOARD_SIZE, Pre.NUM_CHANNELS

    def mid_vis(self, feed_dict):
        pass
Ejemplo n.º 7
0
class ValueNet(object):

    def __init__(self, brain_dir, summary_dir):
        self.brain_dir = brain_dir
        self.brain_file = os.path.join(self.brain_dir, 'model.ckpt')
        self.summary_dir = summary_dir

        self._has_more_data = True

        self.ds_train = None
        self.ds_test = None

        self.graph = tf.Graph()
        with self.graph.as_default():
            self.states_pl, self.rewards_pl = self.placeholder_inputs()
            self.value_outputs, self.opt_op, self.global_step, self.mse = self.model(self.states_pl, self.rewards_pl)
            self.summary_op = tf.summary.merge_all()
            init = tf.initialize_all_variables()
            self.saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="value_net"))
        self.summary_writer = tf.summary.FileWriter(self.summary_dir, self.graph)
        self.sess = tf.Session(graph=self.graph)
        self.sess.run(init)

    def get_input_shape(self):
        NUM_CHANNELS = 4
        return Board.BOARD_SIZE, Board.BOARD_SIZE, NUM_CHANNELS

    def placeholder_inputs(self):
        h, w, c = self.get_input_shape()
        states = tf.placeholder(tf.float32, [None, h, w, c])  # NHWC
        rewards = tf.placeholder(tf.float32, shape=[None])
        return states, rewards

    def weight_variable(self, shape):
        initial = tf.truncated_normal(shape, stddev=0.01)
        return tf.Variable(initial)

    def bias_variable(self, shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    def create_value_net(self, states_pl):
        NUM_CHANNELS = 4
        ch1 = 32
        W_1 = self.weight_variable([3, 3, NUM_CHANNELS, ch1])
        b_1 = self.bias_variable([ch1])

        ch = 32
        W_2 = self.weight_variable([3, 3, ch1, ch])
        b_2 = self.bias_variable([ch])
        W_21 = self.weight_variable([3, 3, ch, ch])
        b_21 = self.bias_variable([ch])
        W_22 = self.weight_variable([3, 3, ch, ch])
        b_22 = self.bias_variable([ch])
#         W_23 = self.weight_variable([1, 1, ch, 1])
#         b_23 = self.bias_variable([1])

        h_conv1 = tf.nn.relu(tf.nn.conv2d(states_pl, W_1, [1, 1, 1, 1], padding='SAME') + b_1)
        h_conv2 = tf.nn.relu(tf.nn.conv2d(h_conv1, W_2, [1, 1, 1, 1], padding='SAME') + b_2)
        h_conv21 = tf.nn.relu(tf.nn.conv2d(h_conv2, W_21, [1, 1, 1, 1], padding='SAME') + b_21)
        h_conv22 = tf.nn.relu(tf.nn.conv2d(h_conv21, W_22, [1, 1, 1, 1], padding='SAME') + b_22)
#         h_conv23 = tf.nn.relu(tf.nn.conv2d(h_conv22, W_23, [1, 1, 1, 1], padding='SAME') + b_23)

        conv_out_dim = h_conv22.get_shape()[1:].num_elements()
        conv_out = tf.reshape(h_conv22, [-1, conv_out_dim])

        num_hidden = 1

        W_3 = tf.Variable(tf.zeros([conv_out_dim, num_hidden], tf.float32))
        b_3 = tf.Variable(tf.zeros([num_hidden], tf.float32))
#         W_4 = tf.Variable(tf.zeros([num_hidden, 1], tf.float32))
#         b_4 = tf.Variable(tf.zeros([1], tf.float32))

#         hidden = tf.nn.relu(tf.matmul(conv_out, W_3) + b_3)
#         fc_out = tf.matmul(hidden, W_4) + b_4
        fc_out = tf.tanh(tf.matmul(conv_out, W_3) + b_3)
        return fc_out

    def model(self, states_pl, rewards_pl):
        global_step = tf.Variable(0, name='global_step', trainable=False)

        with tf.variable_scope("value_net"):
            value_outputs = self.create_value_net(states_pl)
        value_net_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="value_net")

        mean_square_loss = tf.reduce_mean(tf.squared_difference(rewards_pl, value_outputs))
        value_reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in value_net_vars])
        value_loss = mean_square_loss + 0.001 * value_reg_loss

        optimizer = tf.train.AdamOptimizer(0.0001)
        value_opt_op = optimizer.minimize(value_loss, global_step=global_step)

        tf.summary.scalar("raw_value_loss", mean_square_loss)
        tf.summary.scalar("reg_value_loss", value_reg_loss)
        tf.summary.scalar("all_value_loss", value_loss)
        return value_outputs, value_opt_op, global_step, mean_square_loss

    def get_state_values(self, states, players):
        h, w, c = self.get_input_shape()

        ss = []
        for s, p in zip(states, players):
            img, _ = self.adapt_state(s, p)
            ss.append(img)
        ss = np.array(ss)

        feed_dict = {
            self.states_pl: ss.reshape((-1, h, w, c)),
        }
        return self.sess.run(self.value_outputs, feed_dict=feed_dict)

    def save(self):
        self.saver.save(self.sess, self.brain_file)

    def load(self):
        ckpt = tf.train.get_checkpoint_state(self.brain_dir)
        if ckpt and ckpt.model_checkpoint_path:
            self.saver.restore(self.sess, ckpt.model_checkpoint_path)

    def close(self):
        self.sess.close()

    def train(self, train_dat_file, test_dat_file):
        self.loader_train = DatasetLoader(train_dat_file)
        self.loader_test = DatasetLoader(test_dat_file)

        epoch = 0
        while True:
            print('epoch:', epoch)
            epoch += 1

            ith_part = 0
            while self._has_more_data:
                ith_part += 1
                self.adapt()
                self.train_part(ith_part)
#                 if ith_part >= 1:
#                     break

            self._has_more_data = True
#             if epoch >= 1:
#                 break


    def fill_feed_dict(self, data_set, states_pl, rewards_pl, batch_size=None):
        batch_size = batch_size or BATCH_SIZE
        states_feed, rewards_feed = data_set.next_batch(batch_size)
        feed_dict = {
            states_pl: states_feed,
            rewards_pl: rewards_feed
        }
        return feed_dict

    def train_part(self, ith_part):
        NUM_STEPS = self.ds_train.num_examples // BATCH_SIZE
        print('total num steps:', NUM_STEPS)
        start_time = time.time()
        train_mse = 0.
        for step in range(1, NUM_STEPS + 1):
            feed_dict = self.fill_feed_dict(self.ds_train, self.states_pl, self.rewards_pl)
            _, train_mse = self.sess.run([self.opt_op, self.mse], feed_dict=feed_dict)

            if step % 1000 == 0:
                summary_str, gstep = self.sess.run([self.summary_op, self.global_step], feed_dict=feed_dict)
                self.summary_writer.add_summary(summary_str, gstep)
                self.summary_writer.flush()

            if step == NUM_STEPS:
                self.saver.save(self.sess, self.brain_file, global_step=self.global_step)

        duration = time.time() - start_time
        test_mse = self.do_eval(self.mse, self.states_pl, self.rewards_pl, self.ds_test)
        print('part: %d, acc_train: %.3f, test accuracy: %.3f, time cost: %.3f sec' %
              (ith_part, train_mse, test_mse, duration))

    def do_eval(self, mse, states_pl, rewards_pl, data_set):
        accum_mse = 0.
        batch_size = BATCH_SIZE
        assert batch_size != 0
        steps_per_epoch = math.ceil(data_set.num_examples / batch_size)
        for _ in range(steps_per_epoch):
            feed_dict = self.fill_feed_dict(data_set, states_pl, rewards_pl, batch_size)
            accum_mse += self.sess.run(mse, feed_dict=feed_dict)
        avg_mse = accum_mse / (steps_per_epoch or 1)
        return avg_mse

    def forge(self, row):
        board = row[:Board.BOARD_SIZE_SQ]
        player = row[-2]
        image, _ = self.adapt_state(board, player)
        reward = row[-1]
        return image, reward

    def adapt_state(self, board, player):
        black = (board == Board.STONE_BLACK).astype(float)
        white = (board == Board.STONE_WHITE).astype(float)
        empty = (board == Board.STONE_EMPTY).astype(float)
        is_black_move = np.ones_like(black, float) if player == Board.STONE_BLACK else np.zeros_like(black, float)

        image = np.dstack((black, white, empty, is_black_move)).ravel()
        legal = empty.astype(bool)
        return image, legal

    def adapt(self):
        gc.collect()

        if self.ds_train is not None and not self.loader_train.is_wane:
            self.ds_train = None
        if self.ds_test is not None and not self.loader_test.is_wane:
            self.ds_test = None

        gc.collect()

        h, w, c = self.get_input_shape()

        def f(dat):
            ds = []
            for row in dat:
                s, r = self.forge(row)
                ds.append((s, r))
            ds = np.array(ds)

            return DataSet(np.vstack(ds[:, 0]).reshape((-1, h, w, c)), ds[:, 1])

        if self.ds_train is None:
            ds_train, self._has_more_data = self.loader_train.load(DATASET_CAPACITY)
            self.ds_train = f(ds_train)
        if self.ds_test is None:
            ds_test, _ = self.loader_test.load(DATASET_CAPACITY // 2)
            self.ds_test = f(ds_test)

        print(self.ds_train.images.shape, self.ds_train.labels.shape)
        print(self.ds_test.images.shape, self.ds_test.labels.shape)
Ejemplo n.º 8
0
 def __init__(self, is_train=True, is_revive=False):
     super().__init__(is_train, is_revive)
     self.loader_train = DatasetLoader(Pre.DATA_SET_TRAIN)
     self.loader_valid = DatasetLoader(Pre.DATA_SET_VALID)
     self.loader_test = DatasetLoader(Pre.DATA_SET_TEST)
Ejemplo n.º 9
0
class DCNN2(Pre):
    def __init__(self, is_train=True, is_revive=False):
        super().__init__(is_train, is_revive)
        self.loader_train = DatasetLoader(Pre.DATA_SET_TRAIN)
        self.loader_valid = DatasetLoader(Pre.DATA_SET_VALID)
        self.loader_test = DatasetLoader(Pre.DATA_SET_TEST)

    def diags(self, a):
        assert len(a.shape) == 2 and a.shape[0] == a.shape[1]
        valid = a.shape[0] - 5

        vecs = [a.diagonal(i) for i in range(-valid, valid + 1)]
        c = np.zeros((len(vecs), a.shape[0]))
        c[:, :] = -1
        for i, v in enumerate(vecs):
            c[i, :v.shape[0]] = v
        return c

    def regulate(self, a):
        md = self.diags(a)
        ad = self.diags(np.rot90(a))
        m = np.vstack((a, a.T, md, ad))
        return m

    def placeholder_inputs(self):
        h, w, c = self.get_input_shape()
        states = tf.placeholder(tf.float32, [None, h, w, c])  # NHWC
        actions = tf.placeholder(tf.float32, [None, Pre.NUM_ACTIONS])
        return states, actions

    def model(self, states_pl, actions_pl):
        ch1 = 32
        W_1 = self.weight_variable([1, 5, Pre.NUM_CHANNELS, ch1])
        b_1 = self.bias_variable([ch1])

        ch = 32
        W_2 = self.weight_variable([3, 3, ch1, ch])
        b_2 = self.bias_variable([ch])
        W_21 = self.weight_variable([3, 3, ch, ch])
        b_21 = self.bias_variable([ch])

        self.h_conv1 = tf.nn.relu(tf.nn.conv2d(states_pl, W_1, [1, 1, 1, 1], padding='VALID') + b_1)
        self.h_conv2 = tf.nn.relu(tf.nn.conv2d(self.h_conv1, W_2, [1, 1, 1, 1], padding='SAME') + b_2)
        self.h_conv21 = tf.nn.relu(tf.nn.conv2d(self.h_conv2, W_21, [1, 1, 1, 1], padding='SAME') + b_21)

        shape = self.h_conv21.get_shape().as_list()
        dim = np.cumprod(shape[1:])[-1]
        h_conv_out = tf.reshape(self.h_conv21, [-1, dim])

        num_hidden = 128
        W_3 = self.weight_variable([dim, num_hidden])
        b_3 = self.bias_variable([num_hidden])
        W_4 = self.weight_variable([num_hidden, Pre.NUM_ACTIONS])
        b_4 = self.bias_variable([Pre.NUM_ACTIONS])

        self.hidden = tf.matmul(h_conv_out, W_3) + b_3
        self.predictions = tf.matmul(self.hidden, W_4) + b_4

        self.cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=actions_pl, logits=self.predictions)
#         self.loss = tf.reduce_mean(self.cross_entropy)
        self.loss = tf.reduce_mean(-tf.reduce_sum(tf.nn.log_softmax(self.predictions) * actions_pl, axis=1))
        print("states_pl shape:", states_pl.get_shape())
        print("actions_pl shape:", actions_pl.get_shape())
        print("predictions shape:", self.predictions.get_shape())
        print("cross_entropy shape:", self.cross_entropy.get_shape())
        print("loss shape:", self.loss.get_shape())

        tf.summary.scalar("loss", self.loss)
        self.optimizer = tf.train.AdamOptimizer()
        self.opt_op = self.optimizer.minimize(self.loss)

        self.predict_probs = tf.nn.softmax(self.predictions)
        eq = tf.equal(tf.argmax(self.predict_probs, 1), tf.argmax(actions_pl, 1))
        self.eval_correct = tf.reduce_sum(tf.cast(eq, tf.int32))

        self.rl_op(actions_pl)


    def forge(self, row):
        board = row[:Board.BOARD_SIZE_SQ]
        image, _ = self.adapt_state(board)

        visit = row[Board.BOARD_SIZE_SQ::2]
#         visit[visit == 0] = 1
#         win = row[Board.BOARD_SIZE_SQ+1::2]
        win_rate = visit
        s = np.sum(win_rate)
        win_rate /= s
        return image, win_rate

    def adapt(self, filename):
        # proc = psutil.Process(os.getpid())
        gc.collect()
        # mem0 = proc.memory_info().rss

        if self.ds_train is not None and not self.loader_train.is_wane:
            self.ds_train = None
        if self.ds_valid is not None and not self.loader_valid.is_wane:
            self.ds_valid = None
        if self.ds_test is not None and not self.loader_test.is_wane:
            self.ds_test = None

        gc.collect()

        # mem1 = proc.memory_info().rss
        # print('gc(M):', (mem1 - mem0) / 1024 ** 2)

        h, w, c = self.get_input_shape()

        def f(dat):
            ds = []
            for row in dat:
                s, a = self.forge(row)
                ds.append((s, a))
            ds = np.array(ds)
            return DataSet(np.vstack(ds[:, 0]).reshape((-1, h, w, c)), np.vstack(ds[:, 1]))

        if self.ds_train is None:
            ds_train, self._has_more_data = self.loader_train.load(Pre.DATASET_CAPACITY)
            self.ds_train = f(ds_train)
        if self.ds_valid is None:
            ds_valid, _ = self.loader_valid.load(Pre.DATASET_CAPACITY // 2)
            self.ds_valid = f(ds_valid)
        if self.ds_test is None:
            ds_test, _ = self.loader_test.load(Pre.DATASET_CAPACITY // 2)
            self.ds_test = f(ds_test)

        print(self.ds_train.images.shape, self.ds_train.labels.shape)
        print(self.ds_valid.images.shape, self.ds_valid.labels.shape)
        print(self.ds_test.images.shape, self.ds_test.labels.shape)


    def adapt_state(self, board):
        board = board.reshape(-1, Board.BOARD_SIZE)
        board = self.regulate(board)
        return super(DCNN2, self).adapt_state(board)

    def get_input_shape(self):
        assert Board.BOARD_SIZE >= 5
        height = 6 * Board.BOARD_SIZE - 18  # row vecs + col vecs + valid(len>=5) main diag vecs + valid(len>=5) anti diag vecs
        return height, Board.BOARD_SIZE, Pre.NUM_CHANNELS

    def mid_vis(self, feed_dict):
        pass