예제 #1
0
def pretrain():
    # with open('pretrained/results.csv', 'w+') as results_file:
    #     results_file.write('mmr,train_size,test_size,roc_auc\n')

    for i in range(31):
        if i < 20:
            offset = 200
        elif i < 25:
            offset = 250
        else:
            offset = 300

        mmr_low = 2000 + 100 * i - offset
        mmr_base = 2000 + 100 * i
        mmr_high = 2000 + 100 * i + offset

        train_features, _ = read_dataset('706e_train_dataset.csv',
                                         low_mmr=mmr_low,
                                         high_mmr=mmr_high)
        test_features, _ = read_dataset('706e_test_dataset.csv',
                                        low_mmr=mmr_low,
                                        high_mmr=mmr_high)

        (train_size, test_size,
         roc_auc) = evaluate(train_features,
                             test_features,
                             cv=0,
                             save_model='pretrained/' + str(mmr_base) + '.pkl')

        with open('pretrained/results.csv', 'a+') as results_file:
            results_file.write('%s,%d,%d,%.3f\n' %
                               (str(mmr_base), train_size, test_size, roc_auc))
예제 #2
0
def training_example():
    dataset_train, _ = read_dataset('706e_train_dataset.csv', low_mmr=4500)
    dataset_test, _ = read_dataset('706e_test_dataset.csv', low_mmr=4500)

    # cv is the number of folds to be used when cross validating (default is 5)
    # save_model is the path where the model should be saved (default None)
    evaluate(dataset_train, dataset_test, cv=7, save_model='test.pkl')
예제 #3
0
def visualize_data_example():
    # in order to use plotly (for most of these examples), you need to create an account and
    # configure your credentials; the plots will be saved to your online account
    # see https://plot.ly/python/getting-started/

    # plot learning curve for a loaded dataset with either matplotlib or plotly
    # subsets represents the number of points where the accuracies are evaluated
    # cv represents the number of folds for each point of the evaluation
    features, _ = read_dataset('706e_train_dataset.csv', low_mmr=3000, high_mmr=3500)
    plot_learning_curve(features[0], features[1], subsets=20, cv=3, mmr=3250, tool='matplotlib')

    # the rest of the plots were implemented only for plotly because of their size

    # plot win rate statistics
    winrate_statistics(features, '3000 - 3500')

    # plot pick rate statistics
    pick_statistics(features, '3000 - 3500')

    # plot mmr distribution
    mmr_distribution('706e_train_dataset.csv')

    # plot synergies and counters for hero combinations
    # they are loaded from the pretrained folder
    plot_synergies()
    plot_counters()

    # plot hero map containing the heroes grouped by the similarity of their role
    # the heroes are clustered by roles: support, offlane, mid, carry
    plot_hero_map('706e_train_dataset.csv')
예제 #4
0
def load_dataset_example():
    # load dataset from csv using precomputed advantages from the entire train dataset
    dataset_simple, advantages = read_dataset('706e_train_dataset.csv', low_mmr=4500)
    logger.info("The features have shape: %s", dataset_simple[0].shape)
    logger.info("The labels have shape: %s", dataset_simple[1].shape)
    logger.info("Synergies for Anti-Mage (index 1): \n%s", advantages[0][0, :])
    logger.info("Counters for Monkey King (index 114): \n%s", advantages[1][113, :])

    # load dataset from csv and recompute advantages for this specific dataset
    dataset_advanced, advantages_computed = read_dataset('706e_train_dataset.csv',
                                                         low_mmr=2000,
                                                         high_mmr=2500,
                                                         advantages=True)
    logger.info("The features have shape: %s", dataset_advanced[0].shape)
    logger.info("The labels have shape: %s", dataset_advanced[1].shape)
    logger.info("Synergies for Anti-Mage (index 1): \n%s", advantages_computed[0][0, :])
    logger.info("Counters for Monkey King (index 114): \n%s", advantages_computed[1][113, :])
예제 #5
0
def train_model(config_path, data_path):

    cfg = ModelConfig(config_path)

    discriminator = Discriminator(cfg)
    generator = Generator(cfg)

    gen_optimizer = tf.keras.optimizers.Adam(
        learning_rate=cfg.generator_base_learning_rate,
        beta_1=cfg.generator_beta_1,
        beta_2=cfg.generator_beta_2,
        epsilon=1e-8)

    disc_optimizer = tf.keras.optimizers.Adam(
        learning_rate=cfg.discriminator_base_learning_rate,
        beta_1=cfg.discriminator_beta_1,
        beta_2=cfg.discriminator_beta_2,
        epsilon=1e-8)

    dataset = read_dataset(data_path, cfg)

    # Initialize checkpoint and checkpoint manager
    ckpt_models = tf.train.Checkpoint(generator=generator,
                                      discriminator=discriminator)

    ckpt_optimizers = tf.train.Checkpoint(gen_optimizer=gen_optimizer,
                                          disc_optimizer=disc_optimizer)

    ckpt_manager_models = tf.train.CheckpointManager(
        checkpoint=ckpt_models,
        directory=cfg.checkpoint_path + '/models/',
        max_to_keep=cfg.max_checkpoints_to_keep)

    ckpt_manager_optimizers = tf.train.CheckpointManager(
        checkpoint=ckpt_optimizers,
        directory=cfg.checkpoint_path + '/optimizers/',
        max_to_keep=cfg.max_checkpoints_to_keep)

    # Initialize log writer
    train_summary_writer = tf.summary.create_file_writer(cfg.log_dir)

    # Initialize metrics
    gen_loss = tf.keras.metrics.Metric
    start_time = time.time()
    num_images_before = 0
    num_minibatch = 0

    for example in dataset:

        disc_regularization = (num_minibatch % cfg.disc_reg_intervall == 0)
        gen_loss, disc_loss = train_step(
            real_images=example['data'],
            real_labels=example['label'],
            generator=generator,
            discriminator=discriminator,
            gen_optimizer=gen_optimizer,
            disc_optimizer=disc_optimizer,
            cfg=cfg,
            disc_regularization=disc_regularization)

        num_minibatch = gen_optimizer.iterations.numpy()
        num_images = num_minibatch * cfg.batch_size

        # Print Metrics
        if (num_images %
            (cfg.print_metrics_intervall_kimg * 1000)) < cfg.batch_size:
            images_per_second = (num_images - num_images_before) / (
                time.time() - start_time)
            print('minibatch {} images {} gen loss {:.4f} disc loss {:.4f}'
                  ' images per second {:.2f}'.format(num_minibatch, num_images,
                                                     gen_loss, disc_loss,
                                                     images_per_second))
            num_images_before = num_images
            start_time = time.time()

        # Save checkpoint
        if (num_images %
            (cfg.checkpoint_intervall_kimg * 1000)) < cfg.batch_size:
            save_checkpoint(ckpt_manager_models, num_images)
            save_checkpoint(ckpt_manager_optimizers, num_images)

        # Log metrics
        if (num_images %
            (cfg.log_metrics_intervall_kimg * 1000)) < cfg.batch_size:
            with train_summary_writer.as_default():
                tf.summary.scalar('gen_loss', gen_loss, step=num_images)
                tf.summary.scalar('disc_loss', disc_loss, step=num_images)

        if (num_images % (cfg.max_num_images_kimg * 1000)) < cfg.batch_size:
            # Save final state if not already done
            if not (num_images %
                    cfg.checkpoint_intervall_kimg) < cfg.batch_size:
                save_checkpoint(ckpt_manager_models, int(num_images / 1000))
                save_checkpoint(ckpt_manager_optimizers,
                                int(num_images / 1000))
            break