def pretrain(): # with open('pretrained/results.csv', 'w+') as results_file: # results_file.write('mmr,train_size,test_size,roc_auc\n') for i in range(31): if i < 20: offset = 200 elif i < 25: offset = 250 else: offset = 300 mmr_low = 2000 + 100 * i - offset mmr_base = 2000 + 100 * i mmr_high = 2000 + 100 * i + offset train_features, _ = read_dataset('706e_train_dataset.csv', low_mmr=mmr_low, high_mmr=mmr_high) test_features, _ = read_dataset('706e_test_dataset.csv', low_mmr=mmr_low, high_mmr=mmr_high) (train_size, test_size, roc_auc) = evaluate(train_features, test_features, cv=0, save_model='pretrained/' + str(mmr_base) + '.pkl') with open('pretrained/results.csv', 'a+') as results_file: results_file.write('%s,%d,%d,%.3f\n' % (str(mmr_base), train_size, test_size, roc_auc))
def training_example(): dataset_train, _ = read_dataset('706e_train_dataset.csv', low_mmr=4500) dataset_test, _ = read_dataset('706e_test_dataset.csv', low_mmr=4500) # cv is the number of folds to be used when cross validating (default is 5) # save_model is the path where the model should be saved (default None) evaluate(dataset_train, dataset_test, cv=7, save_model='test.pkl')
def visualize_data_example(): # in order to use plotly (for most of these examples), you need to create an account and # configure your credentials; the plots will be saved to your online account # see https://plot.ly/python/getting-started/ # plot learning curve for a loaded dataset with either matplotlib or plotly # subsets represents the number of points where the accuracies are evaluated # cv represents the number of folds for each point of the evaluation features, _ = read_dataset('706e_train_dataset.csv', low_mmr=3000, high_mmr=3500) plot_learning_curve(features[0], features[1], subsets=20, cv=3, mmr=3250, tool='matplotlib') # the rest of the plots were implemented only for plotly because of their size # plot win rate statistics winrate_statistics(features, '3000 - 3500') # plot pick rate statistics pick_statistics(features, '3000 - 3500') # plot mmr distribution mmr_distribution('706e_train_dataset.csv') # plot synergies and counters for hero combinations # they are loaded from the pretrained folder plot_synergies() plot_counters() # plot hero map containing the heroes grouped by the similarity of their role # the heroes are clustered by roles: support, offlane, mid, carry plot_hero_map('706e_train_dataset.csv')
def load_dataset_example(): # load dataset from csv using precomputed advantages from the entire train dataset dataset_simple, advantages = read_dataset('706e_train_dataset.csv', low_mmr=4500) logger.info("The features have shape: %s", dataset_simple[0].shape) logger.info("The labels have shape: %s", dataset_simple[1].shape) logger.info("Synergies for Anti-Mage (index 1): \n%s", advantages[0][0, :]) logger.info("Counters for Monkey King (index 114): \n%s", advantages[1][113, :]) # load dataset from csv and recompute advantages for this specific dataset dataset_advanced, advantages_computed = read_dataset('706e_train_dataset.csv', low_mmr=2000, high_mmr=2500, advantages=True) logger.info("The features have shape: %s", dataset_advanced[0].shape) logger.info("The labels have shape: %s", dataset_advanced[1].shape) logger.info("Synergies for Anti-Mage (index 1): \n%s", advantages_computed[0][0, :]) logger.info("Counters for Monkey King (index 114): \n%s", advantages_computed[1][113, :])
def train_model(config_path, data_path): cfg = ModelConfig(config_path) discriminator = Discriminator(cfg) generator = Generator(cfg) gen_optimizer = tf.keras.optimizers.Adam( learning_rate=cfg.generator_base_learning_rate, beta_1=cfg.generator_beta_1, beta_2=cfg.generator_beta_2, epsilon=1e-8) disc_optimizer = tf.keras.optimizers.Adam( learning_rate=cfg.discriminator_base_learning_rate, beta_1=cfg.discriminator_beta_1, beta_2=cfg.discriminator_beta_2, epsilon=1e-8) dataset = read_dataset(data_path, cfg) # Initialize checkpoint and checkpoint manager ckpt_models = tf.train.Checkpoint(generator=generator, discriminator=discriminator) ckpt_optimizers = tf.train.Checkpoint(gen_optimizer=gen_optimizer, disc_optimizer=disc_optimizer) ckpt_manager_models = tf.train.CheckpointManager( checkpoint=ckpt_models, directory=cfg.checkpoint_path + '/models/', max_to_keep=cfg.max_checkpoints_to_keep) ckpt_manager_optimizers = tf.train.CheckpointManager( checkpoint=ckpt_optimizers, directory=cfg.checkpoint_path + '/optimizers/', max_to_keep=cfg.max_checkpoints_to_keep) # Initialize log writer train_summary_writer = tf.summary.create_file_writer(cfg.log_dir) # Initialize metrics gen_loss = tf.keras.metrics.Metric start_time = time.time() num_images_before = 0 num_minibatch = 0 for example in dataset: disc_regularization = (num_minibatch % cfg.disc_reg_intervall == 0) gen_loss, disc_loss = train_step( real_images=example['data'], real_labels=example['label'], generator=generator, discriminator=discriminator, gen_optimizer=gen_optimizer, disc_optimizer=disc_optimizer, cfg=cfg, disc_regularization=disc_regularization) num_minibatch = gen_optimizer.iterations.numpy() num_images = num_minibatch * cfg.batch_size # Print Metrics if (num_images % (cfg.print_metrics_intervall_kimg * 1000)) < cfg.batch_size: images_per_second = (num_images - num_images_before) / ( time.time() - start_time) print('minibatch {} images {} gen loss {:.4f} disc loss {:.4f}' ' images per second {:.2f}'.format(num_minibatch, num_images, gen_loss, disc_loss, images_per_second)) num_images_before = num_images start_time = time.time() # Save checkpoint if (num_images % (cfg.checkpoint_intervall_kimg * 1000)) < cfg.batch_size: save_checkpoint(ckpt_manager_models, num_images) save_checkpoint(ckpt_manager_optimizers, num_images) # Log metrics if (num_images % (cfg.log_metrics_intervall_kimg * 1000)) < cfg.batch_size: with train_summary_writer.as_default(): tf.summary.scalar('gen_loss', gen_loss, step=num_images) tf.summary.scalar('disc_loss', disc_loss, step=num_images) if (num_images % (cfg.max_num_images_kimg * 1000)) < cfg.batch_size: # Save final state if not already done if not (num_images % cfg.checkpoint_intervall_kimg) < cfg.batch_size: save_checkpoint(ckpt_manager_models, int(num_images / 1000)) save_checkpoint(ckpt_manager_optimizers, int(num_images / 1000)) break