def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks) train_ratio = cfg['dataset']['train_ratio'] num_train = int(num_chunks * train_ratio) shuffle_size = cfg['training']['shuffle_size'] ChunkParser.BATCH_SIZE = cfg['training']['batch_size'] root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) train_parser = ChunkParser(FileDataSrc(chunks[:num_train]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator(train_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() shuffle_size = int(shuffle_size * (1.0 - train_ratio)) test_parser = ChunkParser(FileDataSrc(chunks[num_train:]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator(test_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = get_checkpoint(root_dir) tfprocess.restore(cp) # Sweeps through all test chunks statistically num_evals = (num_chunks - num_train) * 10 // ChunkParser.BATCH_SIZE print("Using {} evaluation batches".format(num_evals)) for _ in range(cfg['training']['total_steps']): tfprocess.process(ChunkParser.BATCH_SIZE, num_evals) tfprocess.save_leelaz_weights(cmd.output) tfprocess.session.close() train_parser.shutdown() test_parser.shutdown()
def main(args): train_data_prefix = args.pop(0) chunks = get_chunks(train_data_prefix) print("Found {0} chunks".format(len(chunks))) if not chunks: return parser = ChunkParser(chunks) run_test(parser) #benchmark(parser) dataset = tf.data.Dataset.from_generator( parser.parse_chunk, output_types=(tf.string)) dataset = dataset.shuffle(65536) dataset = dataset.map(_parse_function) dataset = dataset.batch(BATCH_SIZE) dataset = dataset.prefetch(16) iterator = dataset.make_one_shot_iterator() next_batch = iterator.get_next() tfprocess = TFProcess(next_batch) if args: restore_file = args.pop(0) tfprocess.restore(restore_file) while True: tfprocess.process(BATCH_SIZE)
def main(): batch = [ tf.placeholder(tf.float32, [None, 120, 8 * 8]), tf.placeholder(tf.float32, [None, 1924]), tf.placeholder(tf.float32, [None, 1]), ] tfprocess = TFProcess(batch) tfprocess.save_leelaz_weights('weights.txt')
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) x = [ tf.placeholder(tf.float32, [None, 112, 8*8]), tf.placeholder(tf.float32, [None, 1858]), tf.placeholder(tf.float32, [None, 3]), tf.placeholder(tf.float32, [None, 3]), ] tfprocess = TFProcess(cfg) tfprocess.init_net(x) if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = tf.train.latest_checkpoint(root_dir) reader = tf.train.NewCheckpointReader(cp) saved_shapes = reader.get_variable_to_shape_map() new_names = sorted( [var.name.split(':')[0] for var in tf.global_variables() if var.name.split(':')[0] not in saved_shapes]) for saved_var_name in new_names: print("New name {} will use default value".format(saved_var_name)) var_names = sorted( [(var.name, var.name.split(':')[0]) for var in tf.global_variables() if var.name.split(':')[0] in saved_shapes]) restore_vars = [] restore_names = [] for var_name, saved_var_name in var_names: curr_var = tf.get_default_graph().get_tensor_by_name(var_name) var_shape = curr_var.get_shape().as_list() if var_shape == saved_shapes[saved_var_name]: restore_vars.append(curr_var) restore_names.append(saved_var_name) else: print("Dropping {} due to shape change".format(saved_var_name)) legacy_names = sorted( [name for name in saved_shapes.keys() if name not in restore_names]) for saved_var_name in legacy_names: print("Dropping {} as no longer used".format(saved_var_name)) opt_saver = tf.train.Saver(restore_vars) opt_saver.restore(tfprocess.session, cp) else: print("No checkpoint to upgrade!") exit(1) steps = tf.train.global_step(tfprocess.session, tfprocess.global_step) path = os.path.join(root_dir, cfg['name']) save_path = tfprocess.saver.save(tfprocess.session, path, global_step=steps) tfprocess.session.close()
def main(args): train_data_prefix = args.pop(0) chunks = get_chunks(train_data_prefix) print("Found {0} chunks".format(len(chunks))) if chunks: tfprocess = TFProcess() if args: restore_file = args.pop(0) tfprocess.restore(restore_file) do_train_loop(chunks, tfprocess)
def main(args): trainning = GameArchive('../../data/train1.data') train_parser = ChunkParser(trainning.games) dataset = tf.data.Dataset.from_generator(train_parser.parse_chunk, output_types=(tf.string)) dataset = dataset.shuffle(1 << 18) dataset = dataset.map(_parse_function) dataset = dataset.batch(BATCH_SIZE) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() testing = GameArchive('../../data/test1.data') test_parser = ChunkParser(testing.games) dataset = tf.data.Dataset.from_generator(test_parser.parse_chunk, output_types=(tf.string)) dataset = dataset.map(_parse_function) dataset = dataset.batch(BATCH_SIZE) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess() tfprocess.init(dataset, train_iterator, test_iterator) if args: restore_file = args.pop(0) tfprocess.restore(restore_file) while True: tfprocess.process(BATCH_SIZE)
def main(): if len(sys.argv) != 2: print("Usage: {} config.yaml".format(sys.argv[0])) return 1 cfg = yaml.safe_load(open(sys.argv[1], 'r').read()) print(yaml.dump(cfg, default_flow_style=False)) batch = [ tf.placeholder(tf.float32, [None, 120, 8 * 8]), tf.placeholder(tf.float32, [None, 1924]), tf.placeholder(tf.float32, [None, 1]), ] tfprocess = TFProcess(cfg, batch) tfprocess.save_leelaz_weights('weights.txt')
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks) train_ratio = cfg['dataset']['train_ratio'] num_train = int(num_chunks*train_ratio) shuffle_size = cfg['training']['shuffle_size'] ChunkParser.BATCH_SIZE = cfg['training']['batch_size'] root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) train_parser = ChunkParser(FileDataSrc(chunks[:num_train]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() shuffle_size = int(shuffle_size*(1.0-train_ratio)) test_parser = ChunkParser(FileDataSrc(chunks[num_train:]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = get_checkpoint(root_dir) tfprocess.restore(cp) # Sweeps through all test chunks statistically num_evals = (num_chunks-num_train)*10 // ChunkParser.BATCH_SIZE print("Using {} evaluation batches".format(num_evals)) for _ in range(cfg['training']['total_steps']): tfprocess.process(ChunkParser.BATCH_SIZE, num_evals) tfprocess.save_leelaz_weights(cmd.output) tfprocess.session.close() train_parser.shutdown() test_parser.shutdown()
def main(args): chunks = latest_chunks() parser, next_batch = chunks2batches(chunks) tfprocess = TFProcess(next_batch) if args: restore_file = args.pop(0) print("Restoring weights ....") tfprocess.restore(restore_file) print("Training starts ....") while True: change_data, run_val = tfprocess.process() if change_data: chunks = latest_chunks() parser.chunk_switch(chunks) if run_val: best_net = leela_conf.SAVE_DIR + "/best.txt" last_net = leela_conf.SAVE_DIR + "/latest.txt" cmd = leela_conf.VALIDATION_COMMAND % \ (last_net, best_net) print(cmd) subprocess.call(cmd.split(" ")) #, stdout=subprocess.PIPE) with open(leela_conf.VALIDATION_LOG, "r") as f: better = int(f.readlines()[-1].split("\t")[0]) if better: print("---------------- Better Network Found! --------------") copy2(last_net, best_net) else: print("------------- Checkout best net so far. -------------") tfprocess.replace_weights(get_weights(best_net))
def main(): if len(sys.argv) != 2: print("Usage: {} config.yaml".format(sys.argv[0])) return 1 cfg = yaml.safe_load(open(sys.argv[1], 'r').read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks) num_train = int(num_chunks*cfg['dataset']['train_ratio']) shuffle_size = cfg['training']['shuffle_size'] ChunkParser.BATCH_SIZE = cfg['training']['batch_size'] root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) #bench_parser = ChunkParser(FileDataSrc(chunks[:1000]), shuffle_size=1<<14, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) #benchmark(bench_parser) train_parser = ChunkParser(FileDataSrc(chunks[:num_train]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) #benchmark(train_parser) dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() test_parser = ChunkParser(FileDataSrc(chunks[num_train:]), batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = get_checkpoint(root_dir) tfprocess.restore(cp) # Sweeps through all test chunks statistically num_evals = int(round(((num_chunks-num_train) * (200 / SKIP)) / ChunkParser.BATCH_SIZE)) print("Using {} evaluation batches".format(num_evals)) # while True: for _ in range(cfg['training']['total_steps']): tfprocess.process(ChunkParser.BATCH_SIZE, num_evals)
def main(): parser = argparse.ArgumentParser( description='Train network from game data.') parser.add_argument("trainpref", help='Training file prefix', nargs='?', type=str) parser.add_argument("restorepref", help='Training snapshot prefix', nargs='?', type=str) parser.add_argument("--train", '-t', help="Training file prefix", type=str) parser.add_argument("--test", help="Test file prefix", type=str) parser.add_argument("--restore", type=str, help="Prefix of tensorflow snapshot to restore from") parser.add_argument("--logbase", default='leelalogs', type=str, help="Log file prefix (for tensorboard)") parser.add_argument("--sample", default=DOWN_SAMPLE, type=int, help="Rate of data down-sampling to use") args = parser.parse_args() train_data_prefix = args.train or args.trainpref restore_prefix = args.restore or args.restorepref training = get_chunks(train_data_prefix) if not args.test: # Generate test by taking 10% of the training chunks. random.shuffle(training) training, test = split_chunks(training, 0.1) else: test = get_chunks(args.test) if not training: print("No data to train on!") return print("Training with {0} chunks, validating on {1} chunks".format( len(training), len(test))) train_parser = ChunkParser(FileDataSrc(training), shuffle_size=1<<20, # 2.2GB of RAM. sample=args.sample, batch_size=RAM_BATCH_SIZE).parse() test_parser = ChunkParser(FileDataSrc(test), shuffle_size=1<<19, sample=args.sample, batch_size=RAM_BATCH_SIZE).parse() tfprocess = TFProcess() tfprocess.init(RAM_BATCH_SIZE, logbase=args.logbase, macrobatch=BATCH_SIZE // RAM_BATCH_SIZE) #benchmark1(tfprocess) if restore_prefix: tfprocess.restore(restore_prefix) tfprocess.process(train_parser, test_parser)
def main(args): train_data_prefix = args.pop(0) chunks = get_chunks(train_data_prefix) print("Found {0} chunks".format(len(chunks))) if not chunks: return # The following assumes positions from one game are not # spread through chunks. random.shuffle(chunks) training, test = split_chunks(chunks, 0.1) print("Training with {0} chunks, validating on {1} chunks".format( len(training), len(test))) train_parser = ChunkParser(FileDataSrc(training), shuffle_size=1 << 19, sample=DOWN_SAMPLE, batch_size=BATCH_SIZE) #benchmark(train_parser) dataset = tf.data.Dataset.from_generator(train_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(_parse_function) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() test_parser = ChunkParser(FileDataSrc(test), shuffle_size=1 << 19, sample=DOWN_SAMPLE, batch_size=BATCH_SIZE) dataset = tf.data.Dataset.from_generator(test_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(_parse_function) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess() tfprocess.init(dataset, train_iterator, test_iterator) #benchmark1(tfprocess) if args: restore_file = args.pop(0) tfprocess.restore(restore_file) while True: tfprocess.process(BATCH_SIZE)
def main(args): train_data_prefix = args.pop(0) chunks = get_chunks(train_data_prefix) print("Found {0} chunks".format(len(chunks))) if not chunks: return # The following assumes positions from one game are not # spread through chunks. random.shuffle(chunks) training, test = split_chunks(chunks, 0.1) print("Training with {0} chunks, validating on {1} chunks".format( len(training), len(test))) #run_test(parser) #benchmark(parser) train_parser = ChunkParser(training) dataset = tf.data.Dataset.from_generator(train_parser.parse_chunk, output_types=(tf.string)) dataset = dataset.shuffle(1 << 18) dataset = dataset.map(_parse_function) dataset = dataset.batch(BATCH_SIZE) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() test_parser = ChunkParser(test) dataset = tf.data.Dataset.from_generator(test_parser.parse_chunk, output_types=(tf.string)) dataset = dataset.map(_parse_function) dataset = dataset.batch(BATCH_SIZE) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess() tfprocess.init(dataset, train_iterator, test_iterator) if args: restore_file = args.pop(0) tfprocess.restore(restore_file) for _ in range(12001): tfprocess.process(BATCH_SIZE) for x in train_parser.mp_instances: x.terminate() x.join() os.killpg(0, signal.SIGTERM)
def main(args): train_data_prefix = args.pop(0) chunks = get_chunks(train_data_prefix) print("Found {0} chunks".format(len(chunks))) if not chunks: return parser = ChunkParser(chunks) dataset = tf.data.Dataset.from_generator(parser.parse_chunk, output_types=(tf.float32, tf.float32, tf.float32)) dataset = dataset.shuffle(65536) dataset = dataset.batch(BATCH_SIZE) dataset = dataset.prefetch(16) iterator = dataset.make_one_shot_iterator() next_batch = iterator.get_next() tfprocess = TFProcess(next_batch) if args: restore_file = args.pop(0) tfprocess.restore(restore_file) while True: tfprocess.process(BATCH_SIZE)
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) x = [ tf.placeholder(tf.float32, [None, 112, 8 * 8]), tf.placeholder(tf.float32, [None, 1858]), tf.placeholder(tf.float32, [None, 3]), tf.placeholder(tf.float32, [None, 3]), ] tfprocess = TFProcess(cfg) tfprocess.init_net(x) if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = tf.train.latest_checkpoint(root_dir) tfprocess.restore(cp) START_FROM = cmd.start update_global_step = tfprocess.global_step.assign(START_FROM) tfprocess.session.run(update_global_step) path = os.path.join(root_dir, cfg['name']) save_path = tfprocess.saver.save(tfprocess.session, path, global_step=START_FROM) tfprocess.session.close()
def main(): if len(sys.argv) != 2: print("Usage: {} config.yaml".format(sys.argv[0])) return 1 cfg = yaml.safe_load(open(sys.argv[1], 'r').read()) print(yaml.dump(cfg, default_flow_style=False)) batch_size = cfg['training']['batch_size'] filename = os.path.join(cfg['dataset']['path'], 'train.bin') train_next_batch, parser = dataset_iterator(filename, batch_size) print("Creating trainingset from {}".format(filename)) num_eval = parser.num_samples() // batch_size print("Train epoch in {} steps".format(num_eval)) filename = os.path.join(cfg['dataset']['path'], 'test.bin') test_next_batch, parser = dataset_iterator(filename, batch_size) print("Creating testset from {}".format(filename)) num_eval = parser.num_samples() // batch_size print("Test epoch in {} steps".format(num_eval)) tfprocess = TFProcess(cfg, train_next_batch, test_next_batch, num_eval) root_dir = os.path.join(cfg['training']['path'], cfg['name']) if os.path.exists(os.path.join(root_dir, 'checkpoint')): checkpoint = parse.get_checkpoint(root_dir) tfprocess.restore(checkpoint) if not os.path.exists(root_dir): os.makedirs(root_dir) print("Created output directory: {}".format(root_dir)) while True: tfprocess.process(batch_size)
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) tfprocess = TFProcess(cfg) tfprocess.init_net_v2() tfprocess.restore_v2() START_FROM = cmd.start tfprocess.global_step.assign(START_FROM) tfprocess.manager.save(checkpoint_number=START_FROM)
def main(): if len(sys.argv) != 2: print("Usage: {} config.yaml".format(sys.argv[0])) return 1 cfg = yaml.safe_load(open(sys.argv[1], 'r').read()) print(yaml.dump(cfg, default_flow_style=False)) chunks = get_chunks(cfg['dataset']['input']) print("Found {0} chunks".format(len(chunks))) if not chunks: return parser = ChunkParser(chunks) run_test(parser) #benchmark(parser) dataset = tf.data.Dataset.from_generator(parser.parse_chunk, output_types=(tf.string)) dataset = dataset.shuffle(65536) dataset = dataset.map(_parse_function) dataset = dataset.batch(cfg['training']['batch_size']) dataset = dataset.prefetch(16) iterator = dataset.make_one_shot_iterator() next_batch = iterator.get_next() tfprocess = TFProcess(cfg, next_batch) root_dir = os.path.join(cfg['training']['path'], cfg['name']) if os.path.exists(os.path.join(root_dir, 'checkpoint')): checkpoint = get_checkpoint(root_dir) tfprocess.restore(checkpoint) while True: tfprocess.process(cfg['training']['batch_size'])
with open(sys.argv[1], 'r') as f: weights = [] for e, line in enumerate(f): if e == 0: #Version print("Version", line.strip()) if line != '1\n': raise ValueError("Unknown version {}".format(line.strip())) else: weights.append(list(map(float, line.split(' ')))) if e == 2: channels = len(line.split(' ')) print("Channels", channels) blocks = e - (4 + 14) if blocks % 8 != 0: raise ValueError("Inconsistent number of weights in the file") blocks /= 8 print("Blocks", blocks) x = [ tf.placeholder(tf.float32, [None, 18, 19 * 19]), tf.placeholder(tf.float32, [None, 362]), tf.placeholder(tf.float32, [None, 1]) ] tfprocess = TFProcess(x) tfprocess.replace_weights(weights) path = os.path.join(os.getcwd(), "leelaz-model") save_path = tfprocess.saver.save(tfprocess.session, path, global_step=0)
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] train_ratio = cfg['dataset']['train_ratio'] num_train = int(num_chunks*train_ratio) num_test = num_chunks - num_train if 'input_test' in cfg['dataset']: train_chunks = get_latest_chunks(cfg['dataset']['input_train'], num_train) test_chunks = get_latest_chunks(cfg['dataset']['input_test'], num_test) else: chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks) train_chunks = chunks[:num_train] test_chunks = chunks[num_train:] shuffle_size = cfg['training']['shuffle_size'] total_batch_size = cfg['training']['batch_size'] batch_splits = cfg['training'].get('num_batch_splits', 1) if total_batch_size % batch_splits != 0: raise ValueError('num_batch_splits must divide batch_size evenly') split_batch_size = total_batch_size // batch_splits # Load data with split batch size, which will be combined to the total batch size in tfprocess. ChunkParser.BATCH_SIZE = split_batch_size root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) '''train_parser = ChunkParser(FileDataSrc(train_chunks), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(2) train_iterator = dataset.make_one_shot_iterator() shuffle_size = int(shuffle_size*(1.0-train_ratio)) test_parser = ChunkParser(FileDataSrc(test_chunks), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(2) test_iterator = dataset.make_one_shot_iterator()''' filenames = {'train': 'test_bytes', 'test': 'test_bytes'} def extract(example): features = { 'x': tf.FixedLenFeature((), tf.string), '_y': tf.FixedLenFeature((), tf.string), '_z': tf.FixedLenFeature((), tf.string) } parsed_example = tf.parse_single_example(example, features) x = tf.decode_raw(parsed_example['x'], tf.float32) _y = tf.decode_raw(parsed_example['_y'], tf.float32) _z = tf.decode_raw(parsed_example['_z'], tf.float32) x.set_shape([112 * 64]) _y.set_shape([1858]) _z.set_shape([1]) x = tf.reshape(x, [112, 64]) return x, _y, _z dataset = tf.data.TFRecordDataset(filenames=[filenames['train']], compression_type='GZIP') dataset = dataset.map(extract) dataset = dataset.batch(total_batch_size) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() dataset = tf.data.TFRecordDataset(filenames=[filenames['test']], compression_type='GZIP') dataset = dataset.map(extract) dataset = dataset.batch(total_batch_size) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = tf.train.latest_checkpoint(root_dir) # Sweeps through all test chunks statistically # Assumes average of 10 samples per test game. # For simplicity, testing can use the split batch size instead of total batch size. # This does not affect results, because test results are simple averages that are independent of batch size. num_evals = num_test*10 // ChunkParser.BATCH_SIZE print("Using {} evaluation batches".format(num_evals)) tfprocess.process_loop(total_batch_size, num_evals) tfprocess.session.close() train_parser.shutdown() test_parser.shutdown()
version, blocks, filters, weights = read_net(sys.argv[2]) if data_format == 'NHWC': planes = tf.placeholder(tf.float32, [None, BOARD_SIZE, BOARD_SIZE, FEATURES], name='x') probs = tf.placeholder(tf.float32, [None, BOARD_SIZE * BOARD_SIZE + 1]) winner = tf.placeholder(tf.float32, [None, 1]) else: planes = tf.placeholder(tf.float32, [None, FEATURES, BOARD_SIZE, BOARD_SIZE], name='x') probs = tf.placeholder(tf.float32, [None, BOARD_SIZE * BOARD_SIZE + 1]) winner = tf.placeholder(tf.float32, [None, 1]) tfprocess = TFProcess() tfprocess.TFCOREML = True tfprocess.DATA_FORMAT = data_format tfprocess.BOARD_SIZE = BOARD_SIZE tfprocess.INPUT_DIM = 2 tfprocess.FEATURES = FEATURES tfprocess.RESIDUAL_FILTERS = filters tfprocess.RESIDUAL_BLOCKS = blocks if BOARD_SIZE == 9: tfprocess.VALUE_FULLY_CONNECTED = 64 tfprocess.training = False # batch normalizationをコンバートするため tfprocess.init_net(planes, probs, winner) tfprocess.replace_weights(weights) tf.train.write_graph(tf.get_default_graph(), os.path.dirname(sys.argv[3]), os.path.basename(sys.argv[3]),
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] allow_less = cfg['dataset'].get('allow_less_chunks', False) train_ratio = cfg['dataset']['train_ratio'] num_train = int(num_chunks * train_ratio) num_test = num_chunks - num_train sort_type = cfg['dataset'].get('sort_type', 'mtime') if sort_type == 'mtime': sort_key_fn = os.path.getmtime elif sort_type == 'number': sort_key_fn = game_number_for_name elif sort_type == 'name': sort_key_fn = identity_function else: raise ValueError('Unknown dataset sort_type: {}'.format(sort_type)) if 'input_test' in cfg['dataset']: train_chunks = get_latest_chunks(cfg['dataset']['input_train'], num_train, allow_less, sort_key_fn) test_chunks = get_latest_chunks(cfg['dataset']['input_test'], num_test, allow_less, sort_key_fn) else: chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks, allow_less, sort_key_fn) if allow_less: num_train = int(len(chunks) * train_ratio) num_test = len(chunks) - num_train train_chunks = chunks[:num_train] test_chunks = chunks[num_train:] shuffle_size = cfg['training']['shuffle_size'] total_batch_size = cfg['training']['batch_size'] batch_splits = cfg['training'].get('num_batch_splits', 1) train_workers = cfg['dataset'].get('train_workers', None) test_workers = cfg['dataset'].get('test_workers', None) if total_batch_size % batch_splits != 0: raise ValueError('num_batch_splits must divide batch_size evenly') split_batch_size = total_batch_size // batch_splits diff_focus_min = cfg['training'].get('diff_focus_min', 1) diff_focus_slope = cfg['training'].get('diff_focus_slope', 0) diff_focus_q_weight = cfg['training'].get('diff_focus_q_weight', 6.0) diff_focus_pol_scale = cfg['training'].get('diff_focus_pol_scale', 3.5) root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) train_parser = ChunkParser(train_chunks, get_input_mode(cfg), shuffle_size=shuffle_size, sample=SKIP, batch_size=split_batch_size, diff_focus_min=diff_focus_min, diff_focus_slope=diff_focus_slope, diff_focus_q_weight=diff_focus_q_weight, diff_focus_pol_scale=diff_focus_pol_scale, workers=train_workers) test_shuffle_size = int(shuffle_size * (1.0 - train_ratio)) # no diff focus for test_parser test_parser = ChunkParser(test_chunks, get_input_mode(cfg), shuffle_size=test_shuffle_size, sample=SKIP, batch_size=split_batch_size, workers=test_workers) if 'input_validation' in cfg['dataset']: valid_chunks = get_all_chunks(cfg['dataset']['input_validation']) validation_parser = ChunkParser(valid_chunks, get_input_mode(cfg), sample=1, batch_size=split_batch_size, workers=0) import tensorflow as tf from chunkparsefunc import parse_function from tfprocess import TFProcess tfprocess = TFProcess(cfg) train_dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string, tf.string)) train_dataset = train_dataset.map(parse_function) test_dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string, tf.string)) test_dataset = test_dataset.map(parse_function) validation_dataset = None if 'input_validation' in cfg['dataset']: validation_dataset = tf.data.Dataset.from_generator( validation_parser.sequential, output_types=(tf.string, tf.string, tf.string, tf.string, tf.string)) validation_dataset = validation_dataset.map(parse_function) if tfprocess.strategy is None: #Mirrored strategy appends prefetch itself with a value depending on number of replicas train_dataset = train_dataset.prefetch(4) test_dataset = test_dataset.prefetch(4) if validation_dataset is not None: validation_dataset = validation_dataset.prefetch(4) else: options = tf.data.Options() options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF train_dataset = train_dataset.with_options(options) test_dataset = test_dataset.with_options(options) if validation_dataset is not None: validation_dataset = validation_dataset.with_options(options) tfprocess.init(train_dataset, test_dataset, validation_dataset) tfprocess.restore() # If number of test positions is not given # sweeps through all test chunks statistically # Assumes average of 10 samples per test game. # For simplicity, testing can use the split batch size instead of total batch size. # This does not affect results, because test results are simple averages that are independent of batch size. num_evals = cfg['training'].get('num_test_positions', len(test_chunks) * 10) num_evals = max(1, num_evals // split_batch_size) print("Using {} evaluation batches".format(num_evals)) tfprocess.total_batch_size = total_batch_size tfprocess.process_loop(total_batch_size, num_evals, batch_splits=batch_splits) if cmd.output is not None: if cfg['training'].get('swa_output', False): tfprocess.save_swa_weights(cmd.output) else: tfprocess.save_leelaz_weights(cmd.output) train_parser.shutdown() test_parser.shutdown()
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] train_ratio = cfg['dataset']['train_ratio'] num_train = int(num_chunks*train_ratio) num_test = num_chunks - num_train if 'input_test' in cfg['dataset']: train_chunks = get_latest_chunks(cfg['dataset']['input_train'], num_train) test_chunks = get_latest_chunks(cfg['dataset']['input_test'], num_test) else: chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks) train_chunks = chunks[:num_train] test_chunks = chunks[num_train:] shuffle_size = cfg['training']['shuffle_size'] total_batch_size = cfg['training']['batch_size'] batch_splits = cfg['training'].get('num_batch_splits', 1) if total_batch_size % batch_splits != 0: raise ValueError('num_batch_splits must divide batch_size evenly') split_batch_size = total_batch_size // batch_splits # Load data with split batch size, which will be combined to the total batch size in tfprocess. ChunkParser.BATCH_SIZE = split_batch_size root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) train_parser = ChunkParser(FileDataSrc(train_chunks), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() shuffle_size = int(shuffle_size*(1.0-train_ratio)) test_parser = ChunkParser(FileDataSrc(test_chunks), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = tf.train.latest_checkpoint(root_dir) tfprocess.restore(cp) # If number of test positions is not given # sweeps through all test chunks statistically # Assumes average of 10 samples per test game. # For simplicity, testing can use the split batch size instead of total batch size. # This does not affect results, because test results are simple averages that are independent of batch size. num_evals = cfg['training'].get('num_test_positions', num_test * 10) num_evals = max(1, num_evals // ChunkParser.BATCH_SIZE) print("Using {} evaluation batches".format(num_evals)) tfprocess.process_loop(total_batch_size, num_evals, batch_splits=batch_splits) if cmd.output is not None: tfprocess.save_leelaz_weights(cmd.output) tfprocess.session.close() train_parser.shutdown() test_parser.shutdown()
def main(): parser = argparse.ArgumentParser( description='Train network from game data.') parser.add_argument("blockspref", help="Number of blocks", nargs='?', type=int) parser.add_argument("filterspref", help="Number of filters", nargs='?', type=int) parser.add_argument("trainpref", help='Training file prefix', nargs='?', type=str) parser.add_argument("restorepref", help='Training snapshot prefix', nargs='?', type=str) parser.add_argument("--blocks", '-b', help="Number of blocks", type=int) parser.add_argument("--filters", '-f', help="Number of filters", type=int) parser.add_argument("--train", '-t', help="Training file prefix", type=str) parser.add_argument("--test", help="Test file prefix", type=str) parser.add_argument("--restore", type=str, help="Prefix of tensorflow snapshot to restore from") parser.add_argument("--logbase", default='leelalogs', type=str, help="Log file prefix (for tensorboard) (default: %(default)s)") parser.add_argument("--sample", default=DOWN_SAMPLE, type=int, help="Rate of data down-sampling to use (default: %(default)d)") parser.add_argument("--bufferbits", default=TRAIN_SHUFFLE_BITS, type=int, help="Train shuffle-buffer size in bits (default: %(default)d)") parser.add_argument("--rate", default=LEARN_RATE, type=float, help="Learning rate (default: %(default)f)") parser.add_argument("--steps", default=TRAINING_STEPS, type=int, help="Training step before writing a network (default: %(default)d)") parser.add_argument("--maxsteps", default=MAX_TRAINING_STEPS, type=int, help="Terminates after this many steps (default: %(default)d)") parser.add_argument("--maxkeep", default=MAX_SAVER_TO_KEEP, type=int, help="Keeps meta files for at most this many networks (default: %(default)d)") parser.add_argument("--policyloss", default=POLICY_LOSS_WT, type=float, help="Coefficient for policy term in loss function (default: %(default)f)") parser.add_argument("--mseloss", default=MSE_LOSS_WT, type=float, help="Coefficient for mse term in loss function (default: %(default)f)") parser.add_argument("--regloss", default=REG_LOSS_WT, type=float, help="Coefficient for regularizing term in loss function (default: %(default)f)") args = parser.parse_args() blocks = args.blocks or args.blockspref filters = args.filters or args.filterspref train_data_prefix = args.train or args.trainpref restore_prefix = args.restore or args.restorepref if not blocks or not filters: print("Must supply number of blocks and filters") return training = get_chunks(train_data_prefix) if not args.test: # Generate test by taking 10% of the training chunks. random.shuffle(training) training, test = split_chunks(training, 0.1) else: test = get_chunks(args.test) if not training: print("No data to train on!") return print("Training with {0} chunks, validating on {1} chunks".format( len(training), len(test))) train_parser = ChunkParser(FileDataSrc(training), shuffle_size=1<<args.bufferbits, # was 20 -- 2.2GB of RAM. sample=args.sample, batch_size=RAM_BATCH_SIZE).parse() test_parser = ChunkParser(FileDataSrc(test), shuffle_size=1<<(args.bufferbits-3), # was 19 sample=args.sample, batch_size=RAM_BATCH_SIZE).parse() tfprocess = TFProcess(blocks, filters, args.rate, args.steps, args.maxsteps, args.maxkeep, args.policyloss, args.mseloss, args.regloss) tfprocess.init(RAM_BATCH_SIZE, logbase=args.logbase, macrobatch=BATCH_SIZE // RAM_BATCH_SIZE) #benchmark1(tfprocess) if restore_prefix: tfprocess.restore(restore_prefix) tfprocess.process(train_parser, test_parser)
print("Version", line.strip()) if line != '1\n': raise ValueError("Unknown version {}".format(line.strip())) else: weights.append(list(map(float, line.split(' ')))) if e == 2: filters = len(line.split(' ')) print("Channels", filters) blocks = e - (4 + 14) if blocks % 8 != 0: raise ValueError("Inconsistent number of weights in the file") blocks //= 8 print("Blocks", blocks) cfg['model']['filters'] = filters cfg['model']['residual_blocks'] = blocks print(yaml.dump(cfg, default_flow_style=False)) x = [ tf.placeholder(tf.float32, [None, 120, 8 * 8]), tf.placeholder(tf.float32, [None, 1924]), tf.placeholder(tf.float32, [None, 1]) ] tfprocess = TFProcess(cfg) tfprocess.init_net(x) tfprocess.replace_weights(weights) path = os.path.join(os.getcwd(), cfg['name']) save_path = tfprocess.saver.save(tfprocess.session, path, global_step=0) print("Writted model to {}".format(path))
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] allow_less = cfg['dataset'].get('allow_less_chunks', False) train_ratio = cfg['dataset']['train_ratio'] experimental_parser = cfg['dataset'].get('experimental_v5_only_dataset', False) # num_train = int(num_chunks * train_ratio) # we just need to use one data loader, just put everything into train num_train = int(num_chunks) num_test = num_chunks - num_train sort_type = cfg['dataset'].get('sort_type', 'mtime') if sort_type == 'mtime': sort_key_fn = os.path.getmtime elif sort_type == 'number': sort_key_fn = game_number_for_name elif sort_type == 'name': sort_key_fn = identity_function else: raise ValueError('Unknown dataset sort_type: {}'.format(sort_type)) if 'input_test' in cfg['dataset']: train_chunks = get_latest_chunks(cfg['dataset']['input_train'], num_train, allow_less, sort_key_fn) test_chunks = get_latest_chunks(cfg['dataset']['input_test'], num_test, allow_less, sort_key_fn) else: chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks, allow_less, sort_key_fn) if allow_less: num_train = int(len(chunks) * train_ratio) num_test = len(chunks) - num_train train_chunks = chunks[:num_train] test_chunks = chunks[num_train:] # shuffle_size = cfg['training']['shuffle_size'] shuffle_size = 1 total_batch_size = cfg['training']['batch_size'] batch_splits = cfg['training'].get('num_batch_splits', 1) train_workers = cfg['dataset'].get('train_workers', None) test_workers = cfg['dataset'].get('test_workers', None) if total_batch_size % batch_splits != 0: raise ValueError('num_batch_splits must divide batch_size evenly') split_batch_size = total_batch_size // batch_splits # Load data with split batch size, which will be combined to the total batch size in tfprocess. ChunkParser.BATCH_SIZE = split_batch_size value_focus_min = cfg['training'].get('value_focus_min', 1) value_focus_slope = cfg['training'].get('value_focus_slope', 0) root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) tfprocess = TFProcess(cfg) experimental_reads = max(2, mp.cpu_count() - 2) // 2 extractor = select_extractor(tfprocess.INPUT_MODE) if experimental_parser and (value_focus_min != 1 or value_focus_slope != 0): raise ValueError( 'Experimental parser does not support non-default value \ focus parameters.') def read(x): return tf.data.FixedLengthRecordDataset( x, 8308, compression_type='GZIP', num_parallel_reads=experimental_reads) if experimental_parser: # train_dataset = tf.data.Dataset.from_tensor_slices(train_chunks).shuffle(len(train_chunks)).repeat().batch(256)\ train_dataset = tf.data.Dataset.from_tensor_slices(train_chunks).repeat().batch(256)\ .interleave(read, num_parallel_calls=1)\ .batch(SKIP_MULTIPLE*SKIP).map(semi_sample).unbatch()\ .batch(split_batch_size).map(extractor) # .shuffle(shuffle_size)\ # .batch(split_batch_size).map(extractor) else: train_parser = ChunkParser(train_chunks, tfprocess.INPUT_MODE, shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE, value_focus_min=value_focus_min, value_focus_slope=value_focus_slope, workers=train_workers) train_dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string, tf.string)) train_dataset = train_dataset.map(ChunkParser.parse_function) shuffle_size = int(shuffle_size * (1.0 - train_ratio)) if experimental_parser: # test_dataset = tf.data.Dataset.from_tensor_slices(test_chunks).shuffle(len(test_chunks)).repeat().batch(256)\ test_dataset = tf.data.Dataset.from_tensor_slices(test_chunks).repeat().batch(256)\ .interleave(read, num_parallel_calls=2)\ .batch(SKIP_MULTIPLE*SKIP).map(semi_sample).unbatch()\ .batch(split_batch_size).map(extractor) # .shuffle(shuffle_size)\ # .batch(split_batch_size).map(extractor) else: # no value focus for test_parser test_parser = ChunkParser(test_chunks, tfprocess.INPUT_MODE, shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE, workers=test_workers) test_dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string, tf.string)) test_dataset = test_dataset.map(ChunkParser.parse_function) validation_dataset = None if 'input_validation' in cfg['dataset']: valid_chunks = get_all_chunks(cfg['dataset']['input_validation']) validation_dataset = tf.data.FixedLengthRecordDataset(valid_chunks, 8308, compression_type='GZIP', num_parallel_reads=experimental_reads)\ .batch(split_batch_size, drop_remainder=True).map(extractor) if tfprocess.strategy is None: #Mirrored strategy appends prefetch itself with a value depending on number of replicas train_dataset = train_dataset.prefetch(4) test_dataset = test_dataset.prefetch(4) if validation_dataset is not None: validation_dataset = validation_dataset.prefetch(4) else: options = tf.data.Options() options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF train_dataset = train_dataset.with_options(options) test_dataset = test_dataset.with_options(options) if validation_dataset is not None: validation_dataset = validation_dataset.with_options(options) ########################## # Custom Additions # ########################## tfprocess.init_v2(train_dataset, test_dataset, validation_dataset) # load net from weights file given in yaml config tfprocess.replace_weights_v2(proto_filename=cmd.net, ignore_errors=False) tfprocess.model.summary() for layer_name, path in zip(cmd.layer, cmd.path): # sort data files train_chunks = sorted(train_chunks) # create predictor that gives access to specific intermediate layer layer = tfprocess.model.get_layer(layer_name) earlyPredictor = tf.keras.models.Model( tfprocess.model.inputs, [tfprocess.model.inputs, tfprocess.model.outputs, layer.output]) # create custom iterator which doesn't shuffle the data etc custom_parse_gen = train_parser.custom_parse(train_chunks) turn_counter = 0 custom_iter = iter(custom_parse_gen) # prepare dataframe df = pd.DataFrame() # iterate entire dataset generator / iterator for data in custom_iter: #i in range(30): # data = next(custom_iter) planes, probs, winner, best_q = train_parser.custom_get_batch(data) x = planes print('predicting...') _, _, layer_results = earlyPredictor.predict(x) # append to dataframe # df = df.append(pd.DataFrame(activation_31.reshape(-1,128*8*8))) shape_tuple = (-1, np.prod(layer.output_shape[1:])) df = df.append(pd.DataFrame(layer_results.reshape(shape_tuple))) turn_counter += len(x) df.info() df.to_csv(path) print('done') train_parser.shutdown() test_parser.shutdown()
weights = [] for e, line in enumerate(f): if e == 0: #Version print("Version", line.strip()) if line != '1\n': raise ValueError("Unknown version {}".format(line.strip())) else: weights.append(list(map(float, line.split(' ')))) if e == 2: channels = len(line.split(' ')) print("Channels", channels) blocks = e - (4 + 14) if blocks % 8 != 0: raise ValueError("Inconsistent number of weights in the file") blocks //= 8 print("Blocks", blocks) tfprocess = TFProcess() tfprocess.init(batch_size=1) if tfprocess.RESIDUAL_BLOCKS != blocks: raise ValueError("Number of blocks in tensorflow model doesn't match "\ "number of blocks in input network") if tfprocess.RESIDUAL_FILTERS != channels: raise ValueError("Number of filters in tensorflow model doesn't match "\ "number of filters in input network") tfprocess.replace_weights(weights) path = os.path.join(os.getcwd(), "leelaz-model") save_path = tfprocess.saver.save(tfprocess.session, path, global_step=0)
with open(sys.argv[2], 'r') as f: weights = [] for e, line in enumerate(f): if e == 0: #Version print("Version", line.strip()) if line != '1\n': raise ValueError("Unknown version {}".format(line.strip())) else: weights.append(list(map(float, line.split(' ')))) if e == 2: channels = len(line.split(' ')) print("Channels", channels) blocks = e - (4 + 14) if blocks % 8 != 0: raise ValueError("Inconsistent number of weights in the file") blocks //= 8 print("Blocks", blocks) tfprocess = TFProcess(cfg) tfprocess.init(1) if tfprocess.RESIDUAL_BLOCKS != blocks: raise ValueError("Number of blocks in tensorflow model doesn't match "\ "number of blocks in input network") if tfprocess.RESIDUAL_FILTERS != channels: raise ValueError("Number of filters in tensorflow model doesn't match "\ "number of filters in input network") tfprocess.replace_weights(weights) path = os.path.join(os.getcwd(), cfg['name']) save_path = tfprocess.saver.save(tfprocess.session, path, global_step=0)
from tfprocess import TFProcess from net import Net from chunkparser import ChunkParser import multiprocessing as mp cfg_path = "128x10-t60-2.yaml" net_path = "128x10-t60-2-5300.pb.gz" ignore_errors = False with open(cfg_path, 'r') as f: cfg = yaml.safe_load(f.read()) print(yaml.dump(cfg, default_flow_style=False)) # START_FROM = args.start tfp = TFProcess(cfg) tfp.init_net_v2() tfp.replace_weights_v2(net_path, ignore_errors) # tfp.global_step.assign(START_FROM) # root_dir = os.path.join(cfg['training']['path'], cfg['name']) # if not os.path.exists(root_dir): # os.makedirs(root_dir) # # tfp.manager.save(checkpoint_number=START_FROM) # print("Wrote model to {}".format(tfp.manager.latest_checkpoint)) cfg['dataset']['input_train'] = "tf/data/*/" cfg['dataset']['input_test'] = "tf/data/*/" import glob for d in glob.glob(cfg['dataset']['input_train']):
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks) train_ratio = cfg['dataset']['train_ratio'] num_train = int(num_chunks*train_ratio) shuffle_size = cfg['training']['shuffle_size'] ChunkParser.BATCH_SIZE = cfg['training']['batch_size'] root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) train_parser = ChunkParser(FileDataSrc(chunks[:num_train]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) train_iterator = dataset.make_one_shot_iterator() shuffle_size = int(shuffle_size*(1.0-train_ratio)) test_parser = ChunkParser(FileDataSrc(chunks[num_train:]), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) test_iterator = dataset.make_one_shot_iterator() tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = get_checkpoint(root_dir) tfprocess.restore(cp) # Sweeps through all test chunks statistically num_evals = (num_chunks-num_train)*10 // ChunkParser.BATCH_SIZE print("Using {} evaluation batches".format(num_evals)) for _ in range(cfg['training']['total_steps']): tfprocess.process(ChunkParser.BATCH_SIZE, num_evals) tfprocess.save_leelaz_weights('/tmp/weights.txt') with open('/tmp/weights.txt', 'rb') as f: m = hashlib.sha256() w = f.read() m.update(w) digest = m.hexdigest() filename = '/tmp/{}.gz'.format(digest) with gzip.open(filename, 'wb') as f: f.write(w) if cmd.upload: metadata = {'training_id':'1', 'layers':cfg['model']['residual_blocks'], 'filters':cfg['model']['filters']} print("\nUploading `{}'...".format(digest[:8]), end='') upload(cmd.upload, metadata, filename) print("[done]\n") else: print("\nStored `{}'\n".format(filename))
def main(): parser = argparse.ArgumentParser( description='Train network from game data.') parser.add_argument("blockspref", help="Number of blocks", nargs='?', type=int) parser.add_argument("filterspref", help="Number of filters", nargs='?', type=int) parser.add_argument("trainpref", help='Training file prefix', nargs='?', type=str) parser.add_argument("restorepref", help='Training snapshot prefix', nargs='?', type=str) parser.add_argument("--blocks", '-b', help="Number of blocks", type=int) parser.add_argument("--filters", '-f', help="Number of filters", type=int) parser.add_argument("--train", '-t', help="Training file prefix", type=str) parser.add_argument("--test", help="Test file prefix", type=str) parser.add_argument("--restore", type=str, help="Prefix of tensorflow snapshot to restore from") parser.add_argument( "--logbase", default='leelalogs', type=str, help="Log file prefix (for tensorboard) (default: %(default)s)") parser.add_argument( "--sample", default=DOWN_SAMPLE, type=int, help="Rate of data down-sampling to use (default: %(default)d)") args = parser.parse_args() blocks = args.blocks or args.blockspref filters = args.filters or args.filterspref train_data_prefix = args.train or args.trainpref restore_prefix = args.restore or args.restorepref if not blocks or not filters: print("Must supply number of blocks and filters") return training = get_chunks(train_data_prefix) if not args.test: # Generate test by taking 10% of the training chunks. random.shuffle(training) print("here1") training, test = split_chunks(training, 0.1) else: test = get_chunks(args.test) if not training: print("No data to train on!") return print("Training with {0} chunks, validating on {1} chunks".format( len(training), len(test))) train_parser = ChunkParser( FileDataSrc(training), shuffle_size=1 << 20, # 2.2GB of RAM. sample=args.sample, batch_size=RAM_BATCH_SIZE).parse() test_parser = ChunkParser(FileDataSrc(test), shuffle_size=1 << 19, sample=args.sample, batch_size=RAM_BATCH_SIZE).parse() tfprocess = TFProcess(blocks, filters) tfprocess.init(RAM_BATCH_SIZE, logbase=args.logbase, macrobatch=BATCH_SIZE // RAM_BATCH_SIZE) #benchmark1(tfprocess) if restore_prefix: tfprocess.restore(restore_prefix) tfprocess.process(train_parser, test_parser)
def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) print(yaml.dump(cfg, default_flow_style=False)) num_chunks = cfg['dataset']['num_chunks'] allow_less = cfg['dataset'].get('allow_less_chunks', False) train_ratio = cfg['dataset']['train_ratio'] experimental_parser = cfg['dataset'].get('experimental_v5_only_dataset', False) num_train = int(num_chunks * train_ratio) num_test = num_chunks - num_train if 'input_test' in cfg['dataset']: train_chunks = get_latest_chunks(cfg['dataset']['input_train'], num_train, allow_less) test_chunks = get_latest_chunks(cfg['dataset']['input_test'], num_test, allow_less) else: chunks = get_latest_chunks(cfg['dataset']['input'], num_chunks, allow_less) if allow_less: num_train = int(len(chunks) * train_ratio) num_test = len(chunks) - num_train train_chunks = chunks[:num_train] test_chunks = chunks[num_train:] shuffle_size = cfg['training']['shuffle_size'] total_batch_size = cfg['training']['batch_size'] batch_splits = cfg['training'].get('num_batch_splits', 1) train_workers = cfg['dataset'].get('train_workers', None) test_workers = cfg['dataset'].get('test_workers', None) if total_batch_size % batch_splits != 0: raise ValueError('num_batch_splits must divide batch_size evenly') split_batch_size = total_batch_size // batch_splits # Load data with split batch size, which will be combined to the total batch size in tfprocess. ChunkParser.BATCH_SIZE = split_batch_size root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) tfprocess = TFProcess(cfg) experimental_reads = max(2, mp.cpu_count() - 2) // 2 extractor = select_extractor(tfprocess.INPUT_MODE) def read(x): return tf.data.FixedLengthRecordDataset( x, 8308, compression_type='GZIP', num_parallel_reads=experimental_reads) if experimental_parser: train_dataset = tf.data.Dataset.from_tensor_slices(train_chunks).shuffle(len(train_chunks)).repeat().batch(256)\ .interleave(read, num_parallel_calls=2)\ .batch(SKIP_MULTIPLE*SKIP).map(semi_sample).unbatch()\ .shuffle(shuffle_size)\ .batch(split_batch_size).map(extractor).prefetch(4) else: train_parser = ChunkParser(train_chunks, tfprocess.INPUT_MODE, shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE, workers=train_workers) train_dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string, tf.string)) train_dataset = train_dataset.map(ChunkParser.parse_function) train_dataset = train_dataset.prefetch(4) shuffle_size = int(shuffle_size * (1.0 - train_ratio)) if experimental_parser: test_dataset = tf.data.Dataset.from_tensor_slices(test_chunks).shuffle(len(test_chunks)).repeat().batch(256)\ .interleave(read, num_parallel_calls=2)\ .batch(SKIP_MULTIPLE*SKIP).map(semi_sample).unbatch()\ .shuffle(shuffle_size)\ .batch(split_batch_size).map(extractor).prefetch(4) else: test_parser = ChunkParser(test_chunks, tfprocess.INPUT_MODE, shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE, workers=test_workers) test_dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string, tf.string)) test_dataset = test_dataset.map(ChunkParser.parse_function) test_dataset = test_dataset.prefetch(4) validation_dataset = None if 'input_validation' in cfg['dataset']: valid_chunks = get_all_chunks(cfg['dataset']['input_validation']) validation_dataset = tf.data.FixedLengthRecordDataset(valid_chunks, 8308, compression_type='GZIP', num_parallel_reads=experimental_reads)\ .batch(split_batch_size, drop_remainder=True).map(extractor).prefetch(4) tfprocess.init_v2(train_dataset, test_dataset, validation_dataset) tfprocess.restore_v2() # If number of test positions is not given # sweeps through all test chunks statistically # Assumes average of 10 samples per test game. # For simplicity, testing can use the split batch size instead of total batch size. # This does not affect results, because test results are simple averages that are independent of batch size. num_evals = cfg['training'].get('num_test_positions', len(test_chunks) * 10) num_evals = max(1, num_evals // ChunkParser.BATCH_SIZE) print("Using {} evaluation batches".format(num_evals)) tfprocess.process_loop_v2(total_batch_size, num_evals, batch_splits=batch_splits) if cmd.output is not None: if cfg['training'].get('swa_output', False): tfprocess.save_swa_weights_v2(cmd.output) else: tfprocess.save_leelaz_weights_v2(cmd.output) train_parser.shutdown() test_parser.shutdown()
#!/usr/bin/env python3 import os import sys from tfprocess import TFProcess with open(sys.argv[1], 'r') as f: weights = [] for e, line in enumerate(f): if e == 0: #Version print("Version", line.strip()) if line != '1\n': raise ValueError("Unknown version {}".format(line.strip())) else: weights.append(list(map(float, line.split(' ')))) if e == 2: channels = len(line.split(' ')) print("Channels", channels) blocks = e - (4 + 14) if blocks % 8 != 0: raise ValueError("Inconsistent number of weights in the file") blocks //= 8 print("Blocks", blocks) tfprocess = TFProcess(blocks, channels) tfprocess.init(batch_size=1, gpus_num=1) tfprocess.replace_weights(weights) path = os.path.join(os.getcwd(), "leelaz-model") save_path = tfprocess.saver.save(tfprocess.session, path, global_step=0)