Exemple #1
0
def one_run(env, n_turns, steepness, noise):

    env.max_turns = n_turns
    env.steepness = steepness
    env.noise_factor = noise

    trials = int(20 * 400 / n_turns)

    t = time.time()
    metrics_mcts_v3 = []
    for i in range(trials):
        env.reset()
        m = Metric('step', 'score')
        root = Node(0, 10)
        mcts = Mcts(root)

        done = False
        while not done:
            action = mcts.decide()
            _, r, done, _ = env.step(action)
            mcts.register(r)

        for j, r in enumerate(root.results):
            m.add_record(j, r)

        metrics_mcts_v3.append(m)

    metrics_mcts_v3 = sum(metrics_mcts_v3)
    print('Time for MCTSv3:', time.time() - t)



    t = time.time()
    import random
    metrics_rnd = []
    for i in range(trials):

        env.reset()
        m = Metric('step', 'score')
        rand_results = []
        done = False
        while not done:
            action = random.random() * 10
            _, r, done, _ = env.step(action)
            rand_results.append(r)

        for j, r in enumerate(rand_results):
            m.add_record(j, r)

        metrics_rnd.append(m)

    print('Time for RND:', time.time() - t)

    plot_group({
        'mcts_v3': metrics_mcts_v3,
        'random': sum(metrics_rnd)
    },
        'temp', name=f'{n_turns}_st{steepness}_n{noise}')
Exemple #2
0
    def one_run(ga):
        ga.reset()
        m = Metric('steps', 'score')

        while not ga.done:
            r = ga.step()
            m.add_record(ga.ctr, r)

        return m
Exemple #3
0
def one_try(env, agent_constructor, episodes):
    agent = agent_constructor(env)
    training_reward = Metric('episode', 'training reward')
    test_reward = Metric('episode', 'test reward')
    for episode in range(episodes):

        r_train = train_episode(agent, env)
        print(episode, f"Train: reward: {r_train:.2g}")

        training_reward.add_record(episode, r_train)
        if episode % 3 == 0:
            r_test = trial_episode(agent, env)
            print(episode, f"Test: reward: {r_test:.2f}")
            test_reward.add_record(episode, r_test)

    return training_reward, test_reward
Exemple #4
0
def run_trials():
    metrics_mcts = []

    for i in range(trials):
        env.reset()
        m = Metric('step', 'score')
        root = Node(0, 10)
        mcts = Mcts(run_action, root)

        done = False
        while not done:
            done = mcts.step()

        for j, r in enumerate(root.results):
            m.add_record(j, r)

        metrics_mcts.append(m)
        print('Score by MCTS:', sum(root.results))
Exemple #5
0
def train_model(epochs: int, tag: str, train_epoch_callable, model_callable):

    d = model_callable()

    m_batch_loss = Metric(
        name=f"batch_loss_{tag}", x_label="images trained", y_label="loss_batches"
    )
    m_batch_acc = Metric(
        name=f"batch_acc_{tag}", x_label="images trained", y_label="acc_batches"
    )
    m_train_loss = Metric(
        name=f"train_loss_{tag}", x_label="images trained", y_label="loss_training_set"
    )
    m_test_loss = Metric(
        name=f"test_loss_{tag}", x_label="images trained", y_label="loss_test_set"
    )
    m_train_acc = Metric(
        name=f"train_acc_{tag}", x_label="images trained", y_label="acc_training_set"
    )
    m_test_acc = Metric(
        name=f"test_acc_{tag}", x_label="images trained", y_label="acc_test_set"
    )

    print("Started training")
    for epoch in range(epochs):
        _globals.epoch = epoch
        print("Epoch", epoch)

        t = time.time()

        chronicles = train_epoch_callable(d)
        if chronicles:
            loss_chronicle, acc_chronicle = chronicles
            m_batch_loss.add_dict(loss_chronicle)
            m_batch_acc.add_dict(acc_chronicle)

        _globals.epoch += 1

        loss, tr_acc = d.evaluate(train_images, train_labels, verbose=0)
        m_train_acc.add_record(calc_n_images(0), tr_acc)

        loss, tst_acc = d.evaluate(test_images, test_labels, verbose=0)
        m_test_acc.add_record(calc_n_images( 0), tst_acc)

        print(f"Epoch {epoch} done in {time.time() - t:.3f}")

        y_pred = tf.convert_to_tensor(d.predict(train_images))
        losses = cross_entropy.call(train_labels, y_pred)
        plot_statistic(
            losses,
            f"losses_{epoch}",
            folder=os.path.join("plots", tag, "losses"),
        )
        m_train_loss.add_record(calc_n_images( 0), np.mean(losses))

        if tag == Tags.Prio:
            cifar_sampler.update_chances(losses)
            plot_statistic(
                cifar_sampler._chances,
                f"chances_{epoch}",
                folder=os.path.join("plots", tag, "chances"),
            )

        test_pred = tf.convert_to_tensor(d.predict(test_images))
        test_losses = cross_entropy.call(test_labels, test_pred)
        plot_statistic(
            test_losses,
            f"test_losses_{epoch}",
            folder=os.path.join("plots", tag, "test_losses"),
        )
        m_test_loss.add_record(calc_n_images( 0), np.mean(test_losses))

        print(np.mean(losses), np.mean(test_losses), tr_acc, tst_acc)
        print(f"Chances changed in {time.time() - t:.3f}")

    if tag == Tags.Prio:
        m_batch_loss.save()
        m_batch_acc.save()

    m_train_loss.save()
    m_test_loss.save()
    m_train_acc.save()
    m_test_acc.save()