Beispiel #1
0
def loop(n):
    logger_her.info("***************************")
    logger_her.info("**** Bit flipping game ****")
    logger_her.info("***************************")

    logger_her.info("Start main loop with size {}".format(n))
    logger_her.info("HER STATUS: {}".format(HER))

    actor = QModel(n, HER)
    critic = QModel(n, HER)

    if not TRAIN_FROM_SCRATCH:
        actor.load()
        critic.load()
    else:
        logger_her.info("Training QNetworks from scratch")

    re_buffer = Buffer(BUFFER_SIZE)

    for epoch in range(EPOCHS):
        logger_her.info("Start epoch {}".format(epoch + 1))

        for episode_idx in range(EPISODES):
            goal = State.sample_status(n)
            start = State.sample_status(n)

            # here we will going to store start and goal in a state object
            state = State(start, goal)

            _, episode = sample_episode(actor, state, epsilon_greedy=True)
            re_buffer.add(episode)

            if HER:
                new_experience = []
                for s, a, r, sn in episode:
                    for t in _sample(n, HER_NEW_GOALS):
                        _g = episode[t][-1].status
                        _sn = State(sn.status.copy(), _g.copy())

                        exp = (State(s.status.copy(), _g.copy()), a, 0 if _sn.is_final else -1, _sn)

                        new_experience.append(exp)

                re_buffer.add(new_experience)

        for training_step in range(TRAINING_STEPS):
            minibatch = re_buffer.sample(BATCH_SIZE)
            train(critic, actor, minibatch)

        if (epoch + 1) % UPDATE_ACTOR == 0:
            actor.update(critic)

            success_rate = evaluate_actor(actor)

            re_buffer.log_stats()

            if success_rate >= 1. - 1e-9:
                logger_her.info("Learned policy (QAction-Value) for {} bits in {} epochs".format(n, epoch + 1))
                break
Beispiel #2
0
def evaluate_actor(actor, episodes_count=TESTING_EPISODES, verbose=0, pause=0):
    success_counter = 0

    for episode_ev in range(episodes_count):
        start = State.sample_status(actor.n)
        goal = State.sample_status(actor.n)
        success, _ = sample_episode(actor, State(start, goal), epsilon_greedy=False, verbose=verbose)
        success_counter += int(success)

        if pause: input("Press <Enter> to continue...")

    logger_her.info("Success/Total {}/{}".format(success_counter, episodes_count))
    logger_her.info("Success rate: {}".format(success_counter / episodes_count))

    return success_counter / episodes_count