Ejemplo n.º 1
0
    def test_3_alphago_value(self):

        print("TEST 3\n=====================================================")
        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            # Restrict TensorFlow to only use the first GPU
            try:
                tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
                tf.config.experimental.set_memory_growth(gpus[0], True)
                tf.config.set_soft_device_placement(True)
            except RuntimeError as e:
                print(e)

        rows, cols = 19, 19
        encoder = AlphaGoEncoder()
        input_shape = (encoder.num_planes, rows, cols)
        alphago_value_network = alphago_model(input_shape)

        alphago_value = ValueAgent(alphago_value_network, encoder)

        experience = load_experience(
            h5py.File('test_alphago_rl_experience.h5', 'r'))

        alphago_value.train(experience)

        with h5py.File('test_alphago_value.h5', 'w') as value_agent_out:
            alphago_value.serialize(value_agent_out)
Ejemplo n.º 2
0
    def test_1_supervised_learning(self):
        print("TEST 1\n=====================================================")

        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            # Restrict TensorFlow to only use the first GPU
            try:
                tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
                tf.config.experimental.set_memory_growth(gpus[0], True)
                tf.config.set_soft_device_placement(True)
            except RuntimeError as e:
                print(e)

        rows, cols = 19, 19
        encoder = AlphaGoEncoder()

        input_shape = (encoder.num_planes, rows, cols)
        alphago_sl_policy = alphago_model(input_shape, is_policy_net=True)

        alphago_sl_policy.compile('sgd',
                                  'categorical_crossentropy',
                                  metrics=['accuracy'])

        alphago_sl_agent = DeepLearningAgent(alphago_sl_policy, encoder)

        inputs = np.ones((10, ) + input_shape)
        outputs = alphago_sl_policy.predict(inputs)
        assert (outputs.shape == (10, 361))

        with h5py.File('test_alphago_sl_policy.h5', 'w') as sl_agent_out:
            alphago_sl_agent.serialize(sl_agent_out)
Ejemplo n.º 3
0
def main():
    # sl data
    encoder = AlphaGoEncoder()
    processor = GoDataProcessor(encoder=encoder.name())

    # Paraller Processor
    generator = processor.load_go_data('train', NUM_GAMES, use_generator=True)
    test_generator = processor.load_go_data('test',
                                            NUM_GAMES,
                                            use_generator=True)

    # Data Processor
    # todo: does not have use_generator capability
    # generator = processor.load_go_data('train', NUM_GAMES)
    # test_generator = processor.load_go_data('test', NUM_GAMES)

    # sl model
    input_shape = (encoder.num_planes, ROWS, COLS)
    alphago_sl_policy = alphago_model(input_shape=input_shape,
                                      is_policy_net=True)

    # read earlier trained bot
    bot_filepath = 'alphago/alpha_sl_policy_e13_1k.h5'
    alphago_sl_policy.load_weights(bot_filepath)

    alphago_sl_policy.compile(optimizer='sgd',
                              loss='categorical_crossentropy',
                              metrics=['accuracy'])

    # sl train
    epochs = 200
    batch_size = 128
    alphago_sl_policy.fit_generator(
        generator=generator.generate(batch_size, NUM_CLASSES),
        epochs=epochs,
        steps_per_epoch=generator.get_num_samples() / batch_size,
        validation_data=test_generator.generate(batch_size, NUM_CLASSES),
        validation_steps=test_generator.get_num_samples() / batch_size,
        callbacks=[ModelCheckpoint('alphago_sl_policy_load_train_{epoch}.h5')])
    alphago_sl_agent = DeepLearningAgent(alphago_sl_policy, encoder)

    # save model
    with h5py.File('alphago_sl_policy_load_train.h5', 'w') as sl_agent_out:
        alphago_sl_agent.serialize(sl_agent_out)

    # evaluate
    alphago_sl_policy.evaluate_generator(
        generator=test_generator.generate(batch_size, NUM_CLASSES),
        steps=test_generator.get_num_samples() / batch_size)
Ejemplo n.º 4
0
    def test_3_alphago_value(self):
        rows, cols = 19, 19
        encoder = AlphaGoEncoder()
        input_shape = (encoder.num_planes, rows, cols)
        alphago_value_network = alphago_model(input_shape)

        alphago_value = ValueAgent(alphago_value_network, encoder)

        experience = load_experience(
            h5py.File('test_alphago_rl_experience.h5', 'r'))

        alphago_value.train(experience)

        with h5py.File('test_alphago_value.h5', 'w') as value_agent_out:
            alphago_value.serialize(value_agent_out)
Ejemplo n.º 5
0
    def test_1_supervised_learning(self):
        rows, cols = 19, 19
        encoder = AlphaGoEncoder()

        input_shape = (encoder.num_planes, rows, cols)
        alphago_sl_policy = alphago_model(input_shape, is_policy_net=True)

        alphago_sl_policy.compile('sgd',
                                  'categorical_crossentropy',
                                  metrics=['accuracy'])

        alphago_sl_agent = DeepLearningAgent(alphago_sl_policy, encoder)

        inputs = np.ones((10, ) + input_shape)
        outputs = alphago_sl_policy.predict(inputs)
        assert (outputs.shape == (10, 361))

        with h5py.File('test_alphago_sl_policy.h5', 'w') as sl_agent_out:
            alphago_sl_agent.serialize(sl_agent_out)
Ejemplo n.º 6
0
# tag::init_value[]
from dlgo.networks.alphago import alphago_model
from dlgo.encoders.alphago import AlphaGoEncoder
from dlgo.rl import ValueAgent, load_experience
import h5py

rows, cols = 19, 19
encoder = AlphaGoEncoder()
input_shape = (encoder.num_planes, rows, cols)
alphago_value_network = alphago_model(input_shape)

alphago_value = ValueAgent(alphago_value_network, encoder)
# end::init_value[]

# tag::train_value[]
experience = load_experience(h5py.File('alphago_rl_experience.h5', 'r'))

alphago_value.train(experience)

with h5py.File('alphago_value.h5', 'w') as value_agent_out:
    alphago_value.serialize(value_agent_out)
# end::train_value[]
Ejemplo n.º 7
0
from dlgo.data.parallel_processor import GoDataProcessor
from dlgo.encoders.alphago import AlphaGoEncoder
from dlgo.agent.predict import DeepLearningAgent
from dlgo.networks.alphago import alphago_model

rows, cols = 19, 19
num_classes = rows * cols
num_games = 10000

encoder = AlphaGoEncoder()
processor = GoDataProcessor(encoder=encoder.name())
generator = processor.load_go_data('train', num_games, use_generator=True)
test_generator = processor.load_go_data('test', num_games, use_generator=True)

input_shape = (encoder.num_planes, rows, cols)
alphago_sl_policy = alphago_model(input_shape, is_policy_net=True)
alphago_sl_policy.compile('sgd',
                          'categorical_crossentropy',
                          metrics=['accuracy'])

epochs = 200
batch_size = 128
alphago_sl_policy = fit_generator(
    generator=generator.generate(batch_size, num_classes),
    epochs=epochs,
    steps_per_epoch=generator.get_num_samples() / batch_size,
    validation_data=test_generator.generate(batch_size, num_classes),
    validation_steps=test_generator.get_num_samples() / batch_size,
    callbacks=[ModelCheckpoint('alphago_sl_policy_{epoch}.h5')])

alphago_sl_agent = DeepLearningAgent(alphago_sl_policy, encoder)