def start(self): with tf.Session() as session: saver = tf.train.Saver() restore_from_last_checkpoint(session, saver) nn = net.NN(session) self.mcts_batch = tree.MCTS_Batch(nn) self.moves_num = 0 while True: gc.collect() self.moves_num += 1 # zero is thinking pi = self.mcts_batch.alpha([self.current_node], get_temperature(self.moves_num))[0] zero_move = pick_move_greedily(pi) self.make_move(zero_move) if self.current_node.is_terminal: break # human is thinking self.mcts_batch.alpha([self.current_node], get_temperature(self.moves_num))[0] human_move = self.get_human_input() self.make_move(human_move) if self.current_node.is_terminal: break # who is the winner print_winner(self.current_node)
def start(self): if self.data_len == 0: log("no data for training.") return with tf.Session() as session: saver = tf.train.Saver( max_to_keep=config.train_checkpoint_max_to_keep) self.restore(session, saver) nn = net.NN(session) log("training version:", self.version, "global step:", self.global_step, "session start.") with open(config.log_path + "loss_log.csv", "a+") as loss_log_file: for echo in range(self.echo_max): for batch_index in range(self.batch_num): self.global_step += 1 state_batch, pi_batch, z_batch = self.get_next_batch( batch_index, self.batch_size) p_loss, v_loss = nn.train(state_batch, pi_batch, z_batch) loss_log_file.write("{0},{1},{2}\n".format( self.global_step, p_loss, v_loss)) log("training echo:", echo, "global step:", self.global_step) saver.save(session, config.checkpoint_path + "v{0:03d}".format(self.version), global_step=self.global_step) self.clear() log("training session end.")
def start(self): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=config. self_play_woker_gpu_memory_fraction) with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options)) as session: saver = tf.train.Saver() self.restore(session, saver) nn = net.NN(session) mcts_batch = tree.MCTS_Batch(nn) while self.echo < self.echo_max: log("selfplay worker", self.worker_id, "version:", self.version, "echo:", self.echo, "session start.") self.play(mcts_batch) self.save() self.echo += 1 log("selfplay worker", self.worker_id, "session end.")
def play_game(player): moves_num = 0 mcts_batch = None current_node = tree.Node(tree.FakeNode(), 0, config.black, board.Board()) current_node.is_game_root = True current_node.is_search_root = True def make_move_with_gui(current_node, move): current_node = make_move(current_node, move) gui.print_node(current_node) return current_node with tf.Session() as session: saver = tf.train.Saver() restore_from_last_checkpoint(session, saver) nn = net.NN(session) mcts_batch = tree.MCTS_Batch(nn) moves_num = 0 while True: gc.collect() moves_num += 1 # zero is thinking pi = mcts_batch.alpha([current_node], get_temperature(moves_num))[0] zero_move = pick_move_greedily(pi) current_node = make_move_with_gui(current_node, zero_move) if current_node.is_terminal: break # player is thinking mcts_batch.alpha([current_node], get_temperature(moves_num))[0] player_move = player.make_move(current_node) print("player move: {}".format(player_move)) current_node = make_move_with_gui(current_node, player_move) if current_node.is_terminal: break # who is the winner print_winner(current_node)
def main(): parser = argparse.ArgumentParser(description='Chainer: NN') parser.add_argument('--initmodel', '-m', default='', help='Initialize the model from given file') parser.add_argument('--resume', '-r', default='', help='Resume the optimization from snapshot') parser.add_argument('--gpu', '-g', default=0, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='path/to/output', help='Directory to output the result') parser.add_argument('--epoch', '-e', default=200, type=int, help='number of epochs to learn') parser.add_argument('--batchsize', '-b', type=int, default=100, help='learning minibatch size') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') model = net.NN(5, 50, 30, 2) if 0 <= args.gpu: cuda.get_device_from_id(args.gpu).use() model.to_gpu() # GPUを使うための処理 model = L.Classifier(model) optimizer = chainer.optimizers.Adam() optimizer.setup(model) # モデルの読み込み npzはnumpy用 if args.initmodel: chainer.serializers.load_npz(args.initmodel, model) workclass = [ "Private", "Self-emp-not-inc", "Self-emp-inc", "Federal-gov", "Local-gov", "State-gov", "Without-pay", "Never-worked", "?" ] education = [ "Bachelors", "Some-college", "11th", "HS-grad", "Prof-school", "Assoc-acdm", "Assoc-voc", "9th", "7th-8th", "12th", "Masters", "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool", "?" ] occupation = [ "Tech-support", "Craft-repair", "Other-service", "Sales", "Exec-managerial", "Prof-specialty", "Handlers-cleaners", "Machine-op-inspct", "Adm-clerical", "Farming-fishing", "Transport-moving", "Priv-house-serv", "Protective-serv", "Armed-Forces", "?" ] #地価ランキングのリスト化 data = pd.read_table( "train.tsv", sep="\t", usecols=["age", "workclass", "education", "occupation", "sex", "Y"]) data = data.replace({'Female': 0, 'Male': 1}) for i, v in enumerate(workclass): data = data.replace(v, i) for i, v in enumerate(education): data = data.replace(v, i) for i, v in enumerate(occupation): data = data.replace(v, i) print(data) train = data.select_dtypes(include=int).values print(train) data = data.replace({'>50K': 0, '<=50K': 1}) lab = data.iloc[:, 5] print(lab) lab = np.array(lab.astype('int32')) train = np.array(train.astype('float32')) dataset = list(zip(train, lab)) train, test = train_test_split(dataset, test_size=0.2) #------------------イテレーターによるデータセットの設定----------------------------------- train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) #--------------------------------------------------------------- updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport(log_name='my_log_data')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/accuracy', 'validation/main/accuracy', 'main/loss', 'validation/main/loss', 'elapsed_time' ])) trainer.extend( extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='loss.png')) # trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # トレーナーの実行 trainer.run() serializers.save_npz("NN.npz", model)