def one_run(env, n_turns, steepness, noise): env.max_turns = n_turns env.steepness = steepness env.noise_factor = noise trials = int(20 * 400 / n_turns) t = time.time() metrics_mcts_v3 = [] for i in range(trials): env.reset() m = Metric('step', 'score') root = Node(0, 10) mcts = Mcts(root) done = False while not done: action = mcts.decide() _, r, done, _ = env.step(action) mcts.register(r) for j, r in enumerate(root.results): m.add_record(j, r) metrics_mcts_v3.append(m) metrics_mcts_v3 = sum(metrics_mcts_v3) print('Time for MCTSv3:', time.time() - t) t = time.time() import random metrics_rnd = [] for i in range(trials): env.reset() m = Metric('step', 'score') rand_results = [] done = False while not done: action = random.random() * 10 _, r, done, _ = env.step(action) rand_results.append(r) for j, r in enumerate(rand_results): m.add_record(j, r) metrics_rnd.append(m) print('Time for RND:', time.time() - t) plot_group({ 'mcts_v3': metrics_mcts_v3, 'random': sum(metrics_rnd) }, 'temp', name=f'{n_turns}_st{steepness}_n{noise}')
def one_run(ga): ga.reset() m = Metric('steps', 'score') while not ga.done: r = ga.step() m.add_record(ga.ctr, r) return m
def one_try(env, agent_constructor, episodes): agent = agent_constructor(env) training_reward = Metric('episode', 'training reward') test_reward = Metric('episode', 'test reward') for episode in range(episodes): r_train = train_episode(agent, env) print(episode, f"Train: reward: {r_train:.2g}") training_reward.add_record(episode, r_train) if episode % 3 == 0: r_test = trial_episode(agent, env) print(episode, f"Test: reward: {r_test:.2f}") test_reward.add_record(episode, r_test) return training_reward, test_reward
def run_trials(): metrics_mcts = [] for i in range(trials): env.reset() m = Metric('step', 'score') root = Node(0, 10) mcts = Mcts(run_action, root) done = False while not done: done = mcts.step() for j, r in enumerate(root.results): m.add_record(j, r) metrics_mcts.append(m) print('Score by MCTS:', sum(root.results))
def train_model(epochs: int, tag: str, train_epoch_callable, model_callable): d = model_callable() m_batch_loss = Metric( name=f"batch_loss_{tag}", x_label="images trained", y_label="loss_batches" ) m_batch_acc = Metric( name=f"batch_acc_{tag}", x_label="images trained", y_label="acc_batches" ) m_train_loss = Metric( name=f"train_loss_{tag}", x_label="images trained", y_label="loss_training_set" ) m_test_loss = Metric( name=f"test_loss_{tag}", x_label="images trained", y_label="loss_test_set" ) m_train_acc = Metric( name=f"train_acc_{tag}", x_label="images trained", y_label="acc_training_set" ) m_test_acc = Metric( name=f"test_acc_{tag}", x_label="images trained", y_label="acc_test_set" ) print("Started training") for epoch in range(epochs): _globals.epoch = epoch print("Epoch", epoch) t = time.time() chronicles = train_epoch_callable(d) if chronicles: loss_chronicle, acc_chronicle = chronicles m_batch_loss.add_dict(loss_chronicle) m_batch_acc.add_dict(acc_chronicle) _globals.epoch += 1 loss, tr_acc = d.evaluate(train_images, train_labels, verbose=0) m_train_acc.add_record(calc_n_images(0), tr_acc) loss, tst_acc = d.evaluate(test_images, test_labels, verbose=0) m_test_acc.add_record(calc_n_images( 0), tst_acc) print(f"Epoch {epoch} done in {time.time() - t:.3f}") y_pred = tf.convert_to_tensor(d.predict(train_images)) losses = cross_entropy.call(train_labels, y_pred) plot_statistic( losses, f"losses_{epoch}", folder=os.path.join("plots", tag, "losses"), ) m_train_loss.add_record(calc_n_images( 0), np.mean(losses)) if tag == Tags.Prio: cifar_sampler.update_chances(losses) plot_statistic( cifar_sampler._chances, f"chances_{epoch}", folder=os.path.join("plots", tag, "chances"), ) test_pred = tf.convert_to_tensor(d.predict(test_images)) test_losses = cross_entropy.call(test_labels, test_pred) plot_statistic( test_losses, f"test_losses_{epoch}", folder=os.path.join("plots", tag, "test_losses"), ) m_test_loss.add_record(calc_n_images( 0), np.mean(test_losses)) print(np.mean(losses), np.mean(test_losses), tr_acc, tst_acc) print(f"Chances changed in {time.time() - t:.3f}") if tag == Tags.Prio: m_batch_loss.save() m_batch_acc.save() m_train_loss.save() m_test_loss.save() m_train_acc.save() m_test_acc.save()