Exemple #1
0
 def start(self):
     with tf.Session() as session:
         saver = tf.train.Saver()
         restore_from_last_checkpoint(session, saver)
         nn = net.NN(session)
         self.mcts_batch = tree.MCTS_Batch(nn)
         self.moves_num = 0
         while True:
             gc.collect()
             self.moves_num += 1
             # zero is thinking
             pi = self.mcts_batch.alpha([self.current_node],
                                        get_temperature(self.moves_num))[0]
             zero_move = pick_move_greedily(pi)
             self.make_move(zero_move)
             if self.current_node.is_terminal:
                 break
             # human is thinking
             self.mcts_batch.alpha([self.current_node],
                                   get_temperature(self.moves_num))[0]
             human_move = self.get_human_input()
             self.make_move(human_move)
             if self.current_node.is_terminal:
                 break
         # who is the winner
         print_winner(self.current_node)
Exemple #2
0
 def start(self):
     if self.data_len == 0:
         log("no data for training.")
         return
     with tf.Session() as session:
         saver = tf.train.Saver(
             max_to_keep=config.train_checkpoint_max_to_keep)
         self.restore(session, saver)
         nn = net.NN(session)
         log("training version:", self.version, "global step:",
             self.global_step, "session start.")
         with open(config.log_path + "loss_log.csv", "a+") as loss_log_file:
             for echo in range(self.echo_max):
                 for batch_index in range(self.batch_num):
                     self.global_step += 1
                     state_batch, pi_batch, z_batch = self.get_next_batch(
                         batch_index, self.batch_size)
                     p_loss, v_loss = nn.train(state_batch, pi_batch,
                                               z_batch)
                     loss_log_file.write("{0},{1},{2}\n".format(
                         self.global_step, p_loss, v_loss))
                 log("training echo:", echo, "global step:",
                     self.global_step)
                 saver.save(session,
                            config.checkpoint_path +
                            "v{0:03d}".format(self.version),
                            global_step=self.global_step)
         self.clear()
         log("training session end.")
Exemple #3
0
 def start(self):
     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=config.
                                 self_play_woker_gpu_memory_fraction)
     with tf.Session(config=tf.ConfigProto(
             gpu_options=gpu_options)) as session:
         saver = tf.train.Saver()
         self.restore(session, saver)
         nn = net.NN(session)
         mcts_batch = tree.MCTS_Batch(nn)
         while self.echo < self.echo_max:
             log("selfplay worker", self.worker_id, "version:",
                 self.version, "echo:", self.echo, "session start.")
             self.play(mcts_batch)
             self.save()
             self.echo += 1
         log("selfplay worker", self.worker_id, "session end.")
Exemple #4
0
def play_game(player):
    moves_num = 0
    mcts_batch = None
    current_node = tree.Node(tree.FakeNode(), 0, config.black, board.Board())
    current_node.is_game_root = True
    current_node.is_search_root = True

    def make_move_with_gui(current_node, move):
        current_node = make_move(current_node, move)
        gui.print_node(current_node)
        return current_node

    with tf.Session() as session:
        saver = tf.train.Saver()
        restore_from_last_checkpoint(session, saver)
        nn = net.NN(session)
        mcts_batch = tree.MCTS_Batch(nn)
        moves_num = 0
        while True:
            gc.collect()
            moves_num += 1

            # zero is thinking
            pi = mcts_batch.alpha([current_node],
                                  get_temperature(moves_num))[0]
            zero_move = pick_move_greedily(pi)
            current_node = make_move_with_gui(current_node, zero_move)
            if current_node.is_terminal:
                break

            # player is thinking
            mcts_batch.alpha([current_node], get_temperature(moves_num))[0]
            player_move = player.make_move(current_node)
            print("player move: {}".format(player_move))
            current_node = make_move_with_gui(current_node, player_move)
            if current_node.is_terminal:
                break
        # who is the winner
        print_winner(current_node)
Exemple #5
0
def main():
    parser = argparse.ArgumentParser(description='Chainer: NN')
    parser.add_argument('--initmodel',
                        '-m',
                        default='',
                        help='Initialize the model from given file')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the optimization from snapshot')
    parser.add_argument('--gpu',
                        '-g',
                        default=0,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='path/to/output',
                        help='Directory to output the result')
    parser.add_argument('--epoch',
                        '-e',
                        default=200,
                        type=int,
                        help='number of epochs to learn')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='learning minibatch size')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    model = net.NN(5, 50, 30, 2)
    if 0 <= args.gpu:
        cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # GPUを使うための処理
    model = L.Classifier(model)
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # モデルの読み込み npzはnumpy用
    if args.initmodel:
        chainer.serializers.load_npz(args.initmodel, model)

    workclass = [
        "Private", "Self-emp-not-inc", "Self-emp-inc", "Federal-gov",
        "Local-gov", "State-gov", "Without-pay", "Never-worked", "?"
    ]
    education = [
        "Bachelors", "Some-college", "11th", "HS-grad", "Prof-school",
        "Assoc-acdm", "Assoc-voc", "9th", "7th-8th", "12th", "Masters",
        "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool", "?"
    ]
    occupation = [
        "Tech-support", "Craft-repair", "Other-service", "Sales",
        "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
        "Machine-op-inspct", "Adm-clerical", "Farming-fishing",
        "Transport-moving", "Priv-house-serv", "Protective-serv",
        "Armed-Forces", "?"
    ]

    #地価ランキングのリスト化
    data = pd.read_table(
        "train.tsv",
        sep="\t",
        usecols=["age", "workclass", "education", "occupation", "sex", "Y"])
    data = data.replace({'Female': 0, 'Male': 1})
    for i, v in enumerate(workclass):
        data = data.replace(v, i)
    for i, v in enumerate(education):
        data = data.replace(v, i)
    for i, v in enumerate(occupation):
        data = data.replace(v, i)
    print(data)
    train = data.select_dtypes(include=int).values
    print(train)
    data = data.replace({'>50K': 0, '<=50K': 1})
    lab = data.iloc[:, 5]
    print(lab)
    lab = np.array(lab.astype('int32'))
    train = np.array(train.astype('float32'))

    dataset = list(zip(train, lab))

    train, test = train_test_split(dataset, test_size=0.2)

    #------------------イテレーターによるデータセットの設定-----------------------------------
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)
    #---------------------------------------------------------------

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport(log_name='my_log_data'))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/accuracy', 'validation/main/accuracy', 'main/loss',
            'validation/main/loss', 'elapsed_time'
        ]))
    trainer.extend(
        extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'],
                              'epoch',
                              file_name='loss.png'))
    # trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # トレーナーの実行
    trainer.run()

    serializers.save_npz("NN.npz", model)